Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 22 Jul 2011 13:58:06 -0700
From:      Craig Rodrigues <rodrigc@crodrigues.org>
To:        Kirk McKusick <mckusick@freebsd.org>
Cc:        svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org
Subject:   Re: svn commit: r224061 - in head/sys/ufs: ffs ufs
Message-ID:  <CAG=rPVcCRBDvZtUy2OLSf2TRCvhh2MLc%2BehWYK9qL1dYUKkuGQ@mail.gmail.com>
In-Reply-To: <201107151620.p6FGKX98086893@svn.freebsd.org>
References:  <201107151620.p6FGKX98086893@svn.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, Jul 15, 2011 at 9:20 AM, Kirk McKusick <mckusick@freebsd.org> wrote:

> Author: mckusick
> Date: Fri Jul 15 16:20:33 2011
> New Revision: 224061
> URL: http://svn.freebsd.org/changeset/base/224061
>
> Log:
>  Add an FFS specific mount option to allow a filesystem checker
>  (typically fsck_ffs) to register that it wishes to use FFS specific
>  sysctl's to update the filesystem. This ensures that two checkers
>  cannot run on a given filesystem at the same time and that no other
>  process accidentally or maliciously uses the filesystem updating
>  sysctls inappropriately. This functionality is needed by the
>  journaling soft-updates recovery code.
>
> Modified:
>  head/sys/ufs/ffs/ffs_alloc.c
>  head/sys/ufs/ffs/ffs_inode.c
>  head/sys/ufs/ffs/ffs_vfsops.c
>  head/sys/ufs/ffs/fs.h
>  head/sys/ufs/ufs/ufsmount.h
>
> Modified: head/sys/ufs/ffs/ffs_alloc.c
>
> ==============================================================================
> --- head/sys/ufs/ffs/ffs_alloc.c        Fri Jul 15 15:56:23 2011
>  (r224060)
> +++ head/sys/ufs/ffs/ffs_alloc.c        Fri Jul 15 16:20:33 2011
>  (r224061)
> @@ -2381,6 +2381,18 @@ ffs_fserr(fs, inum, cp)
>  *     in the current directory is oldvalue then change it to newvalue.
>  * unlink(nameptr, oldvalue) - Verify that the inode number associated
>  *     with nameptr in the current directory is oldvalue then unlink it.
> + *
> + * The following functions may only be used on a quiescent filesystem
> + * by the soft updates journal. They are not safe to be run on an active
> + * filesystem.
> + *
> + * setinode(inode, dip) - the specified disk inode is replaced with the
> + *     contents pointed to by dip.
> + * setbufoutput(fd, flags) - output associated with the specified file
> + *     descriptor (which must reference the character device supporting
> + *     the filesystem) switches from using physio to running through the
> + *     buffer cache when flags is set to 1. The descriptor reverts to
> + *     physio for output when flags is set to zero.
>  */
>
>  static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
> @@ -2427,11 +2439,21 @@ static SYSCTL_NODE(_vfs_ffs, FFS_SET_DOT
>  static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink, CTLFLAG_WR,
>        sysctl_ffs_fsck, "Unlink a Duplicate Name");
>
> +static SYSCTL_NODE(_vfs_ffs, FFS_SET_INODE, setinode, CTLFLAG_WR,
> +       sysctl_ffs_fsck, "Update an On-Disk Inode");
> +
> +static SYSCTL_NODE(_vfs_ffs, FFS_SET_BUFOUTPUT, setbufoutput, CTLFLAG_WR,
> +       sysctl_ffs_fsck, "Set Buffered Writing for Descriptor");
> +
> +#define DEBUG 1
>  #ifdef DEBUG
> -static int fsckcmds = 0;
> +static int fsckcmds = 1;
>  SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
>  #endif /* DEBUG */
>
> +static int buffered_write(struct file *, struct uio *, struct ucred *,
> +       int, struct thread *);
> +
>  static int
>  sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>  {
> @@ -2445,8 +2467,10 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>        ufs2_daddr_t blkno;
>        long blkcnt, blksize;
>        struct filedesc *fdp;
> -       struct file *fp;
> +       struct file *fp, *vfp;
>        int vfslocked, filetype, error;
> +       static struct fileops *origops, bufferedops;
> +       static int outcnt = 0;
>
>        if (req->newlen > sizeof cmd)
>                return (EBADRPC);
> @@ -2454,7 +2478,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>                return (error);
>        if (cmd.version != FFS_CMD_VERSION)
>                return (ERPCMISMATCH);
> -       if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0)
> +       if ((error = getvnode(td->td_proc->p_fd, cmd.handle, &fp)) != 0)
>                return (error);
>        vp = fp->f_data;
>        if (vp->v_type != VREG && vp->v_type != VDIR) {
> @@ -2467,12 +2491,13 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>                fdrop(fp, td);
>                return (EINVAL);
>        }
> -       if (mp->mnt_flag & MNT_RDONLY) {
> +       ump = VFSTOUFS(mp);
> +       if ((mp->mnt_flag & MNT_RDONLY) &&
> +           ump->um_fsckpid != td->td_proc->p_pid) {
>                vn_finished_write(mp);
>                fdrop(fp, td);
>                return (EROFS);
>        }
> -       ump = VFSTOUFS(mp);
>        fs = ump->um_fs;
>        filetype = IFREG;
>
> @@ -2493,7 +2518,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>        case FFS_ADJ_REFCNT:
>  #ifdef DEBUG
>                if (fsckcmds) {
> -                       printf("%s: adjust inode %jd count by %jd\n",
> +                       printf("%s: adjust inode %jd link count by %jd\n",
>                            mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
>                            (intmax_t)cmd.size);
>                }
> @@ -2504,7 +2529,8 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>                ip->i_nlink += cmd.size;
>                DIP_SET(ip, i_nlink, ip->i_nlink);
>                ip->i_effnlink += cmd.size;
> -               ip->i_flag |= IN_CHANGE;
> +               ip->i_flag |= IN_CHANGE | IN_MODIFIED;
> +               error = ffs_update(vp, 1);
>                if (DOINGSOFTDEP(vp))
>                        softdep_change_linkcnt(ip);
>                vput(vp);
> @@ -2522,7 +2548,8 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>                        break;
>                ip = VTOI(vp);
>                DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size);
> -               ip->i_flag |= IN_CHANGE;
> +               ip->i_flag |= IN_CHANGE | IN_MODIFIED;
> +               error = ffs_update(vp, 1);
>                vput(vp);
>                break;
>
> @@ -2722,6 +2749,78 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>                    UIO_USERSPACE, (ino_t)cmd.size);
>                break;
>
> +       case FFS_SET_INODE:
> +               if (ump->um_fsckpid != td->td_proc->p_pid) {
> +                       error = EPERM;
> +                       break;
> +               }
> +#ifdef DEBUG
> +               if (fsckcmds && outcnt++ < 100) {
> +                       printf("%s: update inode %jd\n",
> +                           mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
> +               }
> +#endif /* DEBUG */
> +               if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE,
> &vp)))
> +                       break;
> +               vfslocked = VFS_LOCK_GIANT(vp->v_mount);
> +               AUDIT_ARG_VNODE1(vp);
> +               ip = VTOI(vp);
> +               if (ip->i_ump->um_fstype == UFS1)
> +                       error = copyin((void *)(intptr_t)cmd.size,
> ip->i_din1,
> +                           sizeof(struct ufs1_dinode));
> +               else
> +                       error = copyin((void *)(intptr_t)cmd.size,
> ip->i_din2,
> +                           sizeof(struct ufs2_dinode));
> +               if (error) {
> +                       vput(vp);
> +                       VFS_UNLOCK_GIANT(vfslocked);
> +                       break;
> +               }
> +               ip->i_flag |= IN_CHANGE | IN_MODIFIED;
> +               error = ffs_update(vp, 1);
> +               vput(vp);
> +               VFS_UNLOCK_GIANT(vfslocked);
> +               break;
> +
> +       case FFS_SET_BUFOUTPUT:
> +               if (ump->um_fsckpid != td->td_proc->p_pid) {
> +                       error = EPERM;
> +                       break;
> +               }
> +               if (VTOI(vp)->i_ump != ump) {
> +                       error = EINVAL;
> +                       break;
> +               }
> +#ifdef DEBUG
> +               if (fsckcmds) {
> +                       printf("%s: %s buffered output for descriptor
> %jd\n",
> +                           mp->mnt_stat.f_mntonname,
> +                           cmd.size == 1 ? "enable" : "disable",
> +                           (intmax_t)cmd.value);
> +               }
> +#endif /* DEBUG */
> +               if ((error = getvnode(td->td_proc->p_fd, cmd.value, &vfp))
> != 0)
> +                       break;
> +               if (vfp->f_vnode->v_type != VCHR) {
> +                       fdrop(vfp, td);
> +                       error = EINVAL;
> +                       break;
> +               }
> +               if (origops == NULL) {
> +                       origops = vfp->f_ops;
> +                       bcopy((void *)origops, (void *)&bufferedops,
> +                           sizeof(bufferedops));
> +                       bufferedops.fo_write = buffered_write;
> +               }
> +               if (cmd.size == 1)
> +                       atomic_store_rel_ptr((volatile uintptr_t
> *)&vfp->f_ops,
> +                           (uintptr_t)&bufferedops);
> +               else
> +                       atomic_store_rel_ptr((volatile uintptr_t
> *)&vfp->f_ops,
> +                           (uintptr_t)origops);
> +               fdrop(vfp, td);
> +               break;
> +
>        default:
>  #ifdef DEBUG
>                if (fsckcmds) {
> @@ -2737,3 +2836,73 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
>        vn_finished_write(mp);
>        return (error);
>  }
> +
> +/*
> + * Function to switch a descriptor to use the buffer cache to stage
> + * its I/O. This is needed so that writes to the filesystem device
> + * will give snapshots a chance to copy modified blocks for which it
> + * needs to retain copies.
> + */
> +static int
> +buffered_write(fp, uio, active_cred, flags, td)
> +       struct file *fp;
> +       struct uio *uio;
> +       struct ucred *active_cred;
> +       int flags;
> +       struct thread *td;
> +{
>


Please use ANSI style prototypes for new functions.



>
> Modified: head/sys/ufs/ffs/ffs_vfsops.c
>
> ==============================================================================
> --- head/sys/ufs/ffs/ffs_vfsops.c       Fri Jul 15 15:56:23 2011
>  (r224060)
> +++ head/sys/ufs/ffs/ffs_vfsops.c       Fri Jul 15 16:20:33 2011
>  (r224061)
> @@ -132,8 +132,8 @@ static struct buf_ops ffs_ops = {
>  */
>  static const char *ffs_opts[] = { "acls", "async", "noatime",
> "noclusterr",
>     "noclusterw", "noexec", "export", "force", "from", "groupquota",
> -    "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir",
> "nosymfollow",
> -    "sync", "union", "userquota", NULL };
> +    "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
> +    "nosymfollow", "sync", "union", "userquota", NULL };
>
>  static int
>  ffs_mount(struct mount *mp)
> @@ -142,6 +142,7 @@ ffs_mount(struct mount *mp)
>        struct thread *td;
>        struct ufsmount *ump = 0;
>        struct fs *fs;
> +       pid_t fsckpid = 0;
>        int error, flags;
>        u_int mntorflags;
>        accmode_t accmode;
> @@ -184,6 +185,29 @@ ffs_mount(struct mount *mp)
>                vfs_deleteopt(mp->mnt_opt, "snapshot");
>        }
>
> +       if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
> +           vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
> +               /*
> +                * Once we have set the restricted PID, do not
> +                * persist "fsckpid" in the options list.
> +                */
> +               vfs_deleteopt(mp->mnt_optnew, "fsckpid");
> +               vfs_deleteopt(mp->mnt_opt, "fsckpid");
> +               if (mp->mnt_flag & MNT_UPDATE) {
> +                       if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
> +                            vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) ==
> 0) {
> +                               printf("Checker enable: Must be
> read-only\n");
> +                               return (EINVAL);
> +                       }
> +               } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0)
> {
> +                       printf("Checker enable: Must be read-only\n");
> +                       return (EINVAL);
> +               }
>


If you have error messages in an nmount() call, please consider
using the vfs_mount_error() function which is in sys/kern/vfs_mount.c.
In src/sbin/mount/mount_fs.c, there is an example of how the "errmsg"
string can be passed into nmount() from userspace into the kernel in order
to
retrieve the value of this string.

-- 
Craig Rodrigues
rodrigc@crodrigues.org



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAG=rPVcCRBDvZtUy2OLSf2TRCvhh2MLc%2BehWYK9qL1dYUKkuGQ>