Date: Thu, 20 Nov 2014 22:19:14 -0500 (EST) From: Rick Macklem <rmacklem@uoguelph.ca> To: FreeBSD Filesystems <freebsd-fs@freebsd.org> Subject: RFC: patch to make d_fileno 64bits Message-ID: <539201047.4538834.1416539954794.JavaMail.root@uoguelph.ca> In-Reply-To: <683927697.4538805.1416539949195.JavaMail.root@uoguelph.ca>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
The attached patch covers the basics of a way to
convert the d_fileno field of "struct dirent" to
64bits. This patch is incomplete and won't even
build, but I thought I'd post it in case anyone
wanted to take a look and comment on the approach
it uses.
- renames the old/current one "struct dirent32"
- changes d_fileno to 64bits and adds a 64bit
d_off field for the offset of the underlying
file system
- defines a new VOP_READDIR() that will return
the new "struct dirent" that is used as the
default one for a new getdirentries(2).
- the old/current getdirentries(2) uses the old
VOP_READDIR32() by default.
For the case of a file system that supports both
the new and old VOP_READDIR(), they are used by
the corresponding new and old getdirentries(2)
syscalls.
For a file system that only supports one of
the VOP_READDIR()s, the "struct dirent32"
is copied to "struct dirent" (or vice versa).
At this point, all file systems would support
the old VOP_READDIR() and I think the new
VOP_READDIR() can easily be added for NFS,
ZFS. (OpenBSD already has UFS code for
essentially a new struct dirent and hopefully
that code could be ported easily, too.)
Anyhow, any comments on this approach? rick
[-- Attachment #2 --]
--- sys/dirent.h.sav 2014-10-23 18:12:59.000000000 -0400
+++ sys/dirent.h 2014-11-19 19:13:12.000000000 -0500
@@ -38,16 +38,31 @@
/*
* The dirent structure defines the format of directory entries returned by
- * the getdirentries(2) system call.
+ * the getdirentries(2) system call and dirent32 for the getdirentries32(2)
+ * system call.
*
- * A directory entry has a struct dirent at the front of it, containing its
+ * A directory entry has a struct dirent(32) at the front of it, containing its
* inode number, the length of the entry, and the length of the name
- * contained in the entry. These are followed by the name padded to a 4
+ * contained in the entry. These are followed by the name padded to a 8(4)
* byte boundary with null bytes. All names are guaranteed null terminated.
* The maximum length of a name in a directory is MAXNAMLEN.
*/
struct dirent {
+ __uint64_t d_off; /* dir offset for on-disk directory */
+ __uint64_t d_fileno; /* file number of entry */
+ __uint16_t d_reclen; /* length of this record */
+ __uint8_t d_type; /* file type, see below */
+ __uint8_t d_namlen; /* length of string in d_name */
+#if __BSD_VISIBLE
+#define MAXNAMLEN 255
+ char d_name[MAXNAMLEN + 1]; /* name must be no longer than this */
+#else
+ char d_name[255 + 1]; /* name must be no longer than this */
+#endif
+};
+
+struct dirent32 {
__uint32_t d_fileno; /* file number of entry */
__uint16_t d_reclen; /* length of this record */
__uint8_t d_type; /* file type, see below */
@@ -81,20 +96,26 @@ struct dirent {
#define DTTOIF(dirtype) ((dirtype) << 12)
/*
- * The _GENERIC_DIRSIZ macro gives the minimum record length which will hold
- * the directory entry. This returns the amount of space in struct direct
- * without the d_name field, plus enough space for the name with a terminating
- * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ * The _GENERIC_xxx macros gives the minimum record length which will
+ * hold the directory entry. They return the amount of space in struct
+ * dirent(32) without the d_name field, plus enough space for the name with a
+ * terminating null byte (dp->d_namlen+1), rounded up to a 8(4) byte boundary.
+ * The _GENERIC_DIRVAL() case takes the name length instead of dp as the
+ * argument.
*
* XXX although this macro is in the implementation namespace, it requires
* a manifest constant that is not.
*/
-#define _GENERIC_DIRSIZ(dp) \
- ((sizeof (struct dirent) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+#define _GENERIC_DIRVAL(namlen) \
+ ((sizeof(struct dirent) - (MAXNAMLEN + 1) + (namlen) + 1 + 7) & ~7)
+#define _GENERIC_DIRSIZ(dp) _GENERIC_DIRVAL((dp)->d_namlen)
+#define _GENERIC_DIRSIZ32(dp) \
+ ((sizeof (struct dirent32) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
#endif /* __BSD_VISIBLE */
#ifdef _KERNEL
#define GENERIC_DIRSIZ(dp) _GENERIC_DIRSIZ(dp)
+#define GENERIC_DIRSIZ32(dp) _GENERIC_DIRSIZ32(dp)
#endif
#endif /* !_SYS_DIRENT_H_ */
--- kern/vfs_syscalls.c.sav 2014-10-24 16:45:39.000000000 -0400
+++ kern/vfs_syscalls.c 2014-11-20 21:46:29.000000000 -0500
@@ -4006,10 +4006,11 @@ unionread:
#endif /* COMPAT_43 */
/*
- * Read a block of directory entries in a filesystem independent format.
+ * Read the old "struct dirent32" block of directory entries in a
+ * filesystem independent format.
*/
#ifndef _SYS_SYSPROTO_H_
-struct getdirentries_args {
+struct getdirentries32_args {
int fd;
char *buf;
u_int count;
@@ -4017,9 +4018,9 @@ struct getdirentries_args {
};
#endif
int
-sys_getdirentries(td, uap)
+sys_getdirentries32(td, uap)
struct thread *td;
- register struct getdirentries_args /* {
+ register struct getdirentries32_args /* {
int fd;
char *buf;
u_int count;
@@ -4029,7 +4030,7 @@ sys_getdirentries(td, uap)
long base;
int error;
- error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
+ error = kern_getdirentries32(td, uap->fd, uap->buf, uap->count, &base,
NULL, UIO_USERSPACE);
if (error != 0)
return (error);
@@ -4039,7 +4040,7 @@ sys_getdirentries(td, uap)
}
int
-kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
+kern_getdirentries32(struct thread *td, int fd, char *buf, u_int count,
long *basep, ssize_t *residp, enum uio_seg bufseg)
{
struct vnode *vp;
@@ -4048,8 +4049,9 @@ kern_getdirentries(struct thread *td, in
struct iovec aiov;
cap_rights_t rights;
long loff;
- int error, eofflag;
+ int copy_dir = 0, error, eofflag;
off_t foffset;
+ char *tbuf = NULL;
AUDIT_ARG_FD(fd);
if (count > IOSIZE_MAX)
@@ -4070,22 +4072,46 @@ unionread:
error = EINVAL;
goto fail;
}
- aiov.iov_base = buf;
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+trynew:
+ /*
+ * If this file system only returns the new struct dirent, allocate
+ * a kernel buffer to be read into, so it can be copied/converted.
+ */
+ if (copy_dir != 0 && bufseg == UIO_USERSPACE) {
+ if (tbuf == NULL)
+ tbuf = malloc(count, M_TEMP, M_WAITOK);
+ aiov.iov_base = tbuf;
+ } else
+ aiov.iov_base = buf;
aiov.iov_len = count;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_rw = UIO_READ;
- auio.uio_segflg = bufseg;
+ if (copy_dir != 0 && bufseg == UIO_USERSPACE)
+ auio.uio_segflg = UIO_SYSSPACE;
+ else
+ auio.uio_segflg = bufseg;
auio.uio_td = td;
- vn_lock(vp, LK_SHARED | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
loff = auio.uio_offset = foffset;
#ifdef MAC
error = mac_vnode_check_readdir(td->td_ucred, vp);
if (error == 0)
#endif
- error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
- NULL);
+ {
+ if (copy_dir == 0) {
+ error = VOP_READDIR32(vp, &auio, fp->f_cred, &eofflag,
+ NULL, NULL);
+ if (error == EOPNOTSUPP) {
+ copy_dir = 1;
+ error = 0;
+ goto trynew;
+ }
+ } else
+ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
+ NULL, NULL);
+ }
foffset = auio.uio_offset;
if (error != 0) {
VOP_UNLOCK(vp, 0);
@@ -4102,14 +4128,209 @@ unionread:
fp->f_data = vp;
foffset = 0;
vput(tvp);
+ copy_dir = 0;
goto unionread;
}
VOP_UNLOCK(vp, 0);
+ if (copy_dir != 0 && count - auio.uio_resid > 0) {
+ if (bufseg == UIO_USERSPACE) {
+ copy_dirent32(tbuf, count - auio.uio_resid);
+ error = copyout(tbuf, buf, count - auio.uio_resid);
+ if (error != 0)
+ goto fail;
+ } else
+ copy_dirent32(buf, count - auio.uio_resid);
+ }
*basep = loff;
if (residp != NULL)
*residp = auio.uio_resid;
td->td_retval[0] = count - auio.uio_resid;
fail:
+ if (tbuf != NULL)
+ free(tbuf, M_TEMP);
+ foffset_unlock(fp, foffset, 0);
+ fdrop(fp, td);
+ return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct getdents32_args {
+ int fd;
+ char *buf;
+ size_t count;
+};
+#endif
+int
+sys_getdents32(td, uap)
+ struct thread *td;
+ register struct getdents32_args /* {
+ int fd;
+ char *buf;
+ u_int count;
+ } */ *uap;
+{
+ struct getdirentries32_args ap;
+
+ ap.fd = uap->fd;
+ ap.buf = uap->buf;
+ ap.count = uap->count;
+ ap.basep = NULL;
+ return (sys_getdirentries32(td, &ap));
+}
+
+/*
+ * Read in the new "struct dirent" block of directory entries in a
+ * filesystem independent format.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct getdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ uint64_t *basep;
+};
+#endif
+int
+sys_getdirentries(td, uap)
+ struct thread *td;
+ register struct getdirentries_args /* {
+ int fd;
+ char *buf;
+ u_int count;
+ uint64_t *basep;
+ } */ *uap;
+{
+ uint64_t base;
+ int error;
+
+ error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
+ NULL, UIO_USERSPACE);
+ if (error != 0)
+ return (error);
+ if (uap->basep != NULL)
+ error = copyout(&base, uap->basep, sizeof(uint64_t));
+ return (error);
+}
+
+int
+kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
+ uint64_t *basep, ssize_t *residp, enum uio_seg bufseg)
+{
+ struct vnode *vp;
+ struct file *fp;
+ struct uio auio;
+ struct iovec aiov;
+ cap_rights_t rights;
+ uint64_t loff;
+ int copy_dir = 0, error, eofflag;
+ off_t foffset;
+ char *ibuf = NULL, *obuf = NULL;
+ u_int obuflen;
+
+ AUDIT_ARG_FD(fd);
+ if (count > IOSIZE_MAX)
+ return (EINVAL);
+ auio.uio_resid = count;
+ error = getvnode(td->td_proc->p_fd, fd,
+ cap_rights_init(&rights, CAP_READ), &fp);
+ if (error != 0)
+ return (error);
+ if ((fp->f_flag & FREAD) == 0) {
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ vp = fp->f_vnode;
+ foffset = foffset_lock(fp, 0);
+unionread:
+ if (vp->v_type != VDIR) {
+ error = EINVAL;
+ goto fail;
+ }
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+tryold:
+ /*
+ * If this file system only returns the old struct dirent, allocate
+ * kernel buffers to be read and copied/converted into.
+ */
+ if (copy_dir != 0) {
+ if (ibuf == NULL)
+ ibuf = malloc(count, M_TEMP, M_WAITOK);
+ if (obuf == NULL)
+ obuf = malloc(count, M_TEMP, M_WAITOK);
+ aiov.iov_base = ibuf;
+ } else
+ aiov.iov_base = buf;
+ aiov.iov_len = count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ if (copy_dir != 0)
+ auio.uio_segflg = UIO_SYSSPACE;
+ else
+ auio.uio_segflg = bufseg;
+ auio.uio_td = td;
+ AUDIT_ARG_VNODE1(vp);
+ loff = auio.uio_offset = foffset;
+#ifdef MAC
+ error = mac_vnode_check_readdir(td->td_ucred, vp);
+ if (error == 0)
+#endif
+ {
+ if (copy_dir == 0) {
+ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
+ NULL, NULL);
+ if (error == EOPNOTSUPP) {
+ copy_dir = 1;
+ error = 0;
+ goto tryold;
+ }
+ foffset = auio.uio_offset;
+ } else
+ error = VOP_READDIR32(vp, &auio, fp->f_cred, &eofflag,
+ NULL, NULL);
+ }
+ if (error != 0) {
+ VOP_UNLOCK(vp, 0);
+ goto fail;
+ }
+ if (count == auio.uio_resid &&
+ (vp->v_vflag & VV_ROOT) &&
+ (vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+
+ vp = vp->v_mount->mnt_vnodecovered;
+ VREF(vp);
+ fp->f_vnode = vp;
+ fp->f_data = vp;
+ foffset = 0;
+ vput(tvp);
+ copy_dir = 0;
+ goto unionread;
+ }
+ VOP_UNLOCK(vp, 0);
+ if (copy_dir != 0 && count - auio.uio_resid > 0) {
+ obuflen = copy_dirent(ibuf, count - auio.uio_resid, obuf, count,
+ &foffset);
+ if (bufseg == UIO_USERSPACE)
+ error = copyout(obuf, buf, obuflen);
+ else
+ bcopy(obuf, buf, obuflen);
+ if (error != 0)
+ goto fail;
+ if (residp != NULL)
+ *residp = count - obuflen;
+ td->td_retval[0] = obuflen;
+ } else {
+ if (residp != NULL)
+ *residp = auio.uio_resid;
+ td->td_retval[0] = count - auio.uio_resid;
+ }
+ *basep = loff;
+fail:
+ if (ibuf != NULL)
+ free(ibuf, M_TEMP);
+ if (obuf != NULL)
+ free(obuf, M_TEMP);
foffset_unlock(fp, foffset, 0);
fdrop(fp, td);
return (error);
@@ -4141,6 +4362,69 @@ sys_getdents(td, uap)
}
/*
+ * Copy the new struct ndirent to the old struct dirent format.
+ */
+static void
+copy_dirent32(char *buf, u_int len)
+{
+ struct dirent *dp;
+ struct dirent32 *dp32;
+
+ while (len > 0) {
+ dp = (struct dirent *)buf;
+ dp32 = (struct dirent32 *)buf;
+ dp32->d_fileno = dp->d_fileno;
+ dp32->d_reclen = dp->d_reclen;
+ dp32->d_type = dp->d_type;
+ dp32->d_namlen = dp->d_namlen;
+ bcopy(dp->d_name, dp32->d_name, dp32->d_namlen + 1);
+ buf += dp32->d_reclen;
+ len -= dp32->d_reclen;
+ }
+}
+
+/*
+ * Copy the old struct dirent32 to new struct dirent format.
+ */
+static u_int
+copy_dirent(char *ibuf, u_int ilen, char *obuf, u_int olen, off_t *offp)
+{
+ struct dirent *dp;
+ struct dirent32 *dp32;
+ u_int left, ocnt;
+
+ dp32 = (struct dirent32 *)ibuf;
+ ocnt = 0;
+ while (ilen > 0 && olen >= ocnt + _GENERIC_DIRVAL(dp32->d_namlen)) {
+ dp = (struct dirent *)obuf;
+ dp->d_off = *offp;
+ dp->d_fileno = dp32->d_fileno;
+ dp->d_type = dp32->d_type;
+ dp->d_namlen = dp32->d_namlen;
+ bcopy(dp32->d_name, dp->d_name, dp32->d_namlen + 1);
+ dp->d_reclen = _GENERIC_DIRSIZ(dp);
+ ibuf += dp32->d_reclen;
+ ilen -= dp32->d_reclen;
+ *offp += dp32->d_reclen;
+ obuf += dp->d_reclen;
+ ocnt += dp->d_reclen;
+ left = DEV_BSIZE - (ocnt & (DEV_BSIZE - 1));
+ dp32 = (struct dirent32 *)ibuf;
+ if (ilen > 0 && left < _GENERIC_DIRVAL(dp32->d_namlen)) {
+ dp->d_reclen += left;
+ obuf += left;
+ ocnt += left;
+ }
+ }
+ if (ocnt < olen) {
+ left = DEV_BSIZE - (ocnt & (DEV_BSIZE - 1));
+ dp->d_reclen += left;
+ ocnt += left;
+ }
+ return (ocnt);
+}
+
+/*
* Set the mode mask for creation of filesystem nodes.
*/
#ifndef _SYS_SYSPROTO_H_
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?539201047.4538834.1416539954794.JavaMail.root>
