Date: Thu, 30 Oct 2014 05:10:33 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r273842 - in head/sys: kern sys Message-ID: <201410300510.s9U5AXkX088397@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Thu Oct 30 05:10:33 2014 New Revision: 273842 URL: https://svnweb.freebsd.org/changeset/base/273842 Log: filedesc: get rid of atomic_load_acq_int from fget_unlocked A read barrier was necessary because fd table pointer and table size were updated separately, opening a window where fget_unlocked could read new size and old pointer. This patch puts both these fields into one dedicated structure, pointer to which is later atomically updated. As such, fget_unlocked only needs data a dependency barrier which is a noop on all supported architectures. Reviewed by: kib (previous version) MFC after: 2 weeks Modified: head/sys/kern/kern_descrip.c head/sys/sys/filedesc.h Modified: head/sys/kern/kern_descrip.c ============================================================================== --- head/sys/kern/kern_descrip.c Wed Oct 29 23:10:48 2014 (r273841) +++ head/sys/kern/kern_descrip.c Thu Oct 30 05:10:33 2014 (r273842) @@ -150,7 +150,7 @@ static int getmaxfd(struct proc *p); * the process exits. */ struct freetable { - struct filedescent *ft_table; + struct fdescenttbl *ft_table; SLIST_ENTRY(freetable) ft_next; }; @@ -158,10 +158,16 @@ struct freetable { * Initial allocation: a filedesc structure + the head of SLIST used to * keep track of old ofiles + enough space for NDFILE descriptors. */ + +struct fdescenttbl0 { + int fdt_nfiles; + struct filedescent fdt_ofiles[NDFILE]; +}; + struct filedesc0 { struct filedesc fd_fd; SLIST_HEAD(, freetable) fd_free; - struct filedescent fd_dfiles[NDFILE]; + struct fdescenttbl0 fd_dfiles; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; @@ -1516,8 +1522,8 @@ fdgrowtable(struct filedesc *fdp, int nf { struct filedesc0 *fdp0; struct freetable *ft; - struct filedescent *ntable; - struct filedescent *otable; + struct fdescenttbl *ntable; + struct fdescenttbl *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap, *omap; @@ -1527,7 +1533,7 @@ fdgrowtable(struct filedesc *fdp, int nf /* save old values */ onfiles = fdp->fd_nfiles; - otable = fdp->fd_ofiles; + otable = fdp->fd_files; omap = fdp->fd_map; /* compute the size of the new table */ @@ -1537,17 +1543,20 @@ fdgrowtable(struct filedesc *fdp, int nf return; /* - * Allocate a new table. We need enough space for the - * file entries themselves and the struct freetable we will use + * Allocate a new table. We need enough space for the number of + * entries, file entries themselves and the struct freetable we will use * when we decommission the table and place it on the freelist. * We place the struct freetable in the middle so we don't have * to worry about padding. */ - ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), + ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) + + nnfiles * sizeof(ntable->fdt_ofiles[0]) + + sizeof(struct freetable), M_FILEDESC, M_ZERO | M_WAITOK); - /* copy the old data over and point at the new tables */ - memcpy(ntable, otable, onfiles * sizeof(*otable)); - fdp->fd_ofiles = ntable; + /* copy the old data */ + ntable->fdt_nfiles = nnfiles; + memcpy(ntable->fdt_ofiles, otable->fdt_ofiles, + onfiles * sizeof(ntable->fdt_ofiles[0])); /* * Allocate a new map only if the old is not large enough. It will @@ -1563,13 +1572,11 @@ fdgrowtable(struct filedesc *fdp, int nf } /* - * In order to have a valid pattern for fget_unlocked() - * fdp->fd_nfiles must be the last member to be updated, otherwise - * fget_unlocked() consumers may reference a new, higher value for - * fdp->fd_nfiles before to access the fdp->fd_ofiles array, - * resulting in OOB accesses. + * Make sure that ntable is correctly initialized before we replace + * fd_files poiner. Otherwise fget_unlocked() may see inconsistent + * data. */ - atomic_store_rel_int(&fdp->fd_nfiles, nnfiles); + atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable); /* * Do not free the old file table, as some threads may still @@ -1581,7 +1588,7 @@ fdgrowtable(struct filedesc *fdp, int nf * which must not be freed. */ if (onfiles > NDFILE) { - ft = (struct freetable *)&otable[onfiles]; + ft = (struct freetable *)&otable->fdt_ofiles[onfiles]; fdp0 = (struct filedesc0 *)fdp; ft->ft_table = otable; SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); @@ -1813,8 +1820,8 @@ fdinit(struct filedesc *fdp) newfdp->fd_fd.fd_refcnt = 1; newfdp->fd_fd.fd_holdcnt = 1; newfdp->fd_fd.fd_cmask = CMASK; - newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; - newfdp->fd_fd.fd_nfiles = NDFILE; + newfdp->fd_dfiles.fdt_nfiles = NDFILE; + newfdp->fd_fd.fd_files = (struct fdescenttbl *)&newfdp->fd_dfiles; newfdp->fd_fd.fd_map = newfdp->fd_dmap; newfdp->fd_fd.fd_lastfile = -1; return (&newfdp->fd_fd); @@ -2054,10 +2061,10 @@ fdescfree(struct thread *td) } } - if (fdp->fd_nfiles > NDFILE) - free(fdp->fd_ofiles, M_FILEDESC); if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) free(fdp->fd_map, M_FILEDESC); + if (fdp->fd_nfiles > NDFILE) + free(fdp->fd_files, M_FILEDESC); if (cdir != NULL) vrele(cdir); @@ -2305,6 +2312,7 @@ fget_unlocked(struct filedesc *fdp, int #ifdef CAPABILITIES struct filedescent fde; #endif + struct fdescenttbl *fdt; struct file *fp; u_int count; #ifdef CAPABILITIES @@ -2313,11 +2321,8 @@ fget_unlocked(struct filedesc *fdp, int int error; #endif - /* - * Avoid reads reordering and then a first access to the - * fdp->fd_ofiles table which could result in OOB operation. - */ - if (fd < 0 || fd >= atomic_load_acq_int(&fdp->fd_nfiles)) + fdt = fdp->fd_files; + if (fd < 0 || fd >= fdt->fdt_nfiles) return (EBADF); /* * Fetch the descriptor locklessly. We avoid fdrop() races by @@ -2329,15 +2334,15 @@ fget_unlocked(struct filedesc *fdp, int */ for (;;) { #ifdef CAPABILITIES - seq = seq_read(fd_seq(fdp, fd)); - fde = fdp->fd_ofiles[fd]; - if (!seq_consistent(fd_seq(fdp, fd), seq)) { + seq = seq_read(fd_seq(fdt, fd)); + fde = fdt->fdt_ofiles[fd]; + if (!seq_consistent(fd_seq(fdt, fd), seq)) { cpu_spinwait(); continue; } fp = fde.fde_file; #else - fp = fdp->fd_ofiles[fd].fde_file; + fp = fdt->fdt_ofiles[fd].fde_file; #endif if (fp == NULL) return (EBADF); @@ -2355,18 +2360,23 @@ fget_unlocked(struct filedesc *fdp, int } #endif count = fp->f_count; - if (count == 0) + if (count == 0) { + fdt = fdp->fd_files; continue; + } /* - * Use an acquire barrier to prevent caching of fd_ofiles - * so it is refreshed for verification. + * Use an acquire barrier to force re-reading of fdt so it is + * refreshed for verification. */ - if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) + if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) == 0) { + fdt = fdp->fd_files; continue; + } + fdt = fdp->fd_files; #ifdef CAPABILITIES - if (seq_consistent_nomb(fd_seq(fdp, fd), seq)) + if (seq_consistent_nomb(fd_seq(fdt, fd), seq)) #else - if (fp == fdp->fd_ofiles[fd].fde_file) + if (fp == fdt->fdt_ofiles[fd].fde_file) #endif break; fdrop(fp, curthread); Modified: head/sys/sys/filedesc.h ============================================================================== --- head/sys/sys/filedesc.h Wed Oct 29 23:10:48 2014 (r273841) +++ head/sys/sys/filedesc.h Thu Oct 30 05:10:33 2014 (r273842) @@ -62,6 +62,12 @@ struct filedescent { #define fde_nioctls fde_caps.fc_nioctls #define fde_change_size (offsetof(struct filedescent, fde_seq)) +struct fdescenttbl { + int fdt_nfiles; /* number of open files allocated */ + struct filedescent fdt_ofiles[0]; /* open files */ +}; +#define fd_seq(fdt, fd) (&(fdt)->fdt_ofiles[(fd)].fde_seq) + /* * This structure is used for the management of descriptors. It may be * shared by multiple processes. @@ -69,11 +75,10 @@ struct filedescent { #define NDSLOTTYPE u_long struct filedesc { - struct filedescent *fd_ofiles; /* open files */ + struct fdescenttbl *fd_files; /* open files table */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ struct vnode *fd_jdir; /* jail root directory */ - int fd_nfiles; /* number of open files allocated */ NDSLOTTYPE *fd_map; /* bitmap of free fds */ int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_freefile; /* approx. next free file */ @@ -85,7 +90,6 @@ struct filedesc { int fd_holdleaderscount; /* block fdfree() for shared close() */ int fd_holdleaderswakeup; /* fdfree() needs wakeup */ }; -#define fd_seq(fdp, fd) (&(fdp)->fd_ofiles[(fd)].fde_seq) /* * Structure to keep track of (process leader, struct fildedesc) tuples. @@ -105,6 +109,8 @@ struct filedesc_to_leader { struct filedesc_to_leader *fdl_prev; struct filedesc_to_leader *fdl_next; }; +#define fd_nfiles fd_files->fdt_nfiles +#define fd_ofiles fd_files->fdt_ofiles /* * Per-process open flags.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201410300510.s9U5AXkX088397>