Date: Mon, 14 Jun 2010 19:25:54 GMT From: Gleb Kurtsou <gk@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 179615 for review Message-ID: <201006141925.o5EJPsDO066140@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@179615?ac=10 Change 179615 by gk@gk_h1 on 2010/06/14 19:25:33 Fix bugs, add missings bits to make cache operations for tmpfs. Affected files ... .. //depot/projects/soc2010/gk_namecache/sys/kern/subr_witness.c#2 edit .. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#2 edit .. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_subr.c#3 edit .. //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#2 edit Differences ... ==== //depot/projects/soc2010/gk_namecache/sys/kern/subr_witness.c#2 (text+ko) ==== @@ -614,6 +614,19 @@ { "vnode interlock", &lock_class_mtx_sleep }, { NULL, NULL }, /* + * dircache pool locks/vnode interlock + */ + { "dircache lock 0", &lock_class_mtx_sleep }, + { "dircache lock 1", &lock_class_mtx_sleep }, + { "dircache lock 2", &lock_class_mtx_sleep }, + { "dircache lock 3", &lock_class_mtx_sleep }, + { "dircache lock 4", &lock_class_mtx_sleep }, + { "dircache lock 5", &lock_class_mtx_sleep }, + { "dircache lock 6", &lock_class_mtx_sleep }, + { "dircache lock 7", &lock_class_mtx_sleep }, + { "vnode interlock", &lock_class_mtx_sleep }, + { NULL, NULL }, + /* * ZFS locking */ { "dn->dn_mtx", &lock_class_sx }, ==== //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#2 (text+ko) ==== @@ -49,13 +49,37 @@ #include <sys/dircache.h> -#define DCDEBUG(format, args...) printf(format ,## args) +#define DCDEBUG(format, args...) \ + do { \ + if (dircache_debug != 0) \ + printf(format ,## args); \ + } while (0) + +#define DIRCACHE_STAT(n, descr) \ + SYSCTL_PROC(_vfs_dircache, OID_AUTO, n, \ + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, \ + NULL, __CONCAT(dps_, n), dps_sysctlhandler, "LU", descr) + +#define DC_NAMEROUND 16 /* power of 2 */ + +enum { + dps_interlock_same, + dps_interlock_direct, + dps_interlock_reverse, + dps_interlock_reverse_fast, + dps_max +}; struct nspace; +struct dircache_poolstat { + u_long dps_stats[dps_max]; +}; + struct dircache_pool { struct mtx dp_mtx; u_long dp_gen; + struct dircache_poolstat dp_stat; }; struct dircache_root { @@ -63,30 +87,68 @@ struct dircache *dr_entry; }; +SYSCTL_NODE(_vfs, OID_AUTO, dircache, CTLFLAG_RW, 0, "Dircache"); static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers"); static uma_zone_t dircache_zone; static struct dircache_pool **pool; static size_t pool_size; static u_long pool_id; +static char **pool_mtxname; +static const int pool_mtxnamesz = 20; +static int dircache_debug = 1; +SYSCTL_UINT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0, + "Enable debug"); + +static int dps_sysctlhandler(SYSCTL_HANDLER_ARGS); + +DIRCACHE_STAT(interlock_same, + "Same lock hits in interlock"); +DIRCACHE_STAT(interlock_direct, + "Direct lock order hits in interlock"); +DIRCACHE_STAT(interlock_reverse, + "Reverse lock order hits in interlock"); +DIRCACHE_STAT(interlock_reverse_fast, + "Reverse lock order without sleeping hits in interlock"); + +static int +ptr_cmp(const void *a, const void *b) +{ + return (((uintptr_t)(*(void * const *)a)) - + ((uintptr_t)(*(void * const *)b))); +} + static void dircache_sysinit(void *arg __unused) { int i; - pool_size = 1; + pool_size = 4; dircache_zone = uma_zcreate("dircache", sizeof(struct dircache), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); pool = malloc(sizeof(void *) * pool_size, M_DIRCACHE, M_WAITOK); + pool_mtxname = malloc(sizeof(void *) * pool_size, + M_DIRCACHE, M_WAITOK); + /* + * Keep struct dircache_pool size minimal. (and align at cache + * pipeline?) + * Use pool address for lock ordering. + */ for (i = 0; i < pool_size; i++) { + pool_mtxname[i] = malloc(pool_mtxnamesz, + M_DIRCACHE, M_WAITOK | M_ZERO); pool[i] = malloc(sizeof(struct dircache_pool), M_DIRCACHE, M_WAITOK | M_ZERO); + } + qsort(pool, pool_size, sizeof(void *), ptr_cmp); + for (i = 0; i < pool_size; i++) { pool[i]->dp_gen = pool_id++; - mtx_init(&pool[i]->dp_mtx, "dircache lock", NULL, MTX_DEF); + snprintf(pool_mtxname[i], pool_mtxnamesz, "dircache lock %d", i); + mtx_init(&pool[i]->dp_mtx, pool_mtxname[i], NULL, MTX_DEF); } } SYSINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysinit, NULL); @@ -99,13 +161,62 @@ for (i = 0; i < pool_size; i++) { mtx_destroy(&pool[i]->dp_mtx); free(pool[i], M_DIRCACHE); + free(pool_mtxname[i], M_DIRCACHE); } free(pool, M_DIRCACHE); + free(pool_mtxname, M_DIRCACHE); pool = NULL; uma_zdestroy(dircache_zone); } SYSUNINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysuninit, NULL); +static void +pool_getstats(struct dircache_poolstat *ps) +{ + struct dircache_poolstat *stat; + int i, ind; + + for (i = 0; i < pool_size; i++) { + mtx_lock(&pool[i]->dp_mtx); + stat = &pool[i]->dp_stat; + for (ind = 0; ind < dps_max; ind++) + ps->dps_stats[ind] += stat->dps_stats[ind]; + mtx_unlock(&pool[i]->dp_mtx); + } +} + +static int +dps_sysctlhandler(SYSCTL_HANDLER_ARGS) +{ + struct dircache_poolstat st = {}; + u_long res; + int error; + + pool_getstats(&st); + res = st.dps_stats[arg2]; + error = SYSCTL_OUT(req, &res, sizeof(res)); + + return (error); +} + +static __inline void +dp_incstat(int ind, struct dircache_pool *dp, u_long val) +{ + dp->dp_stat.dps_stats[ind] += val; +} + +static __inline int +dc_cmpname(struct dircache *dc, char *name, size_t namelen) +{ + int r; + + r = dc->dc_namelen - namelen; + if (r != 0) + return (r); + r = bcmp(dc->dc_name, name, namelen); + return (r); +} + static __inline int dc_cmp(struct dircache *a, struct dircache *b) { @@ -123,57 +234,61 @@ RB_GENERATE_STATIC(dircache_tree, dircache, dc_listentry, dc_cmp); +#define DC_MTX(dc) (&(dc)->dc_pool->dp_mtx) + +#define dc_lock(dc) mtx_lock(DC_MTX(dc)) + +#define dc_trylock(dc) mtx_trylock(DC_MTX(dc)) + +#define dc_unlock(dc) mtx_unlock(DC_MTX(dc)) + +#define dc_assertlock(dc, w) mtx_assert(DC_MTX(dc), (w)) -static __inline void -dc_lock(struct dircache *dc) +static void +dc_relock(struct dircache *from, struct dircache *to) { - mtx_lock(&dc->dc_pool->dp_mtx); -} + dc_assertlock(from, MA_OWNED); -static __inline int -dc_trylock(struct dircache *dc) -{ - return (mtx_trylock(&dc->dc_pool->dp_mtx)); -} + if (from->dc_pool == to->dc_pool) + return; -static __inline void -dc_unlock(struct dircache *dc) -{ - mtx_unlock(&dc->dc_pool->dp_mtx); -} + dc_assertlock(to, MA_NOTOWNED); -static __inline void -dc_assertlock(struct dircache *dc, int what) -{ - mtx_assert(&dc->dc_pool->dp_mtx, what); + dc_unlock(from); + dc_lock(to); } static void -dc_interlock(struct dircache *a, struct dircache *b) +dc_interlock(struct dircache *from, struct dircache *to) { - dc_assertlock(a, MA_OWNED); + dc_assertlock(from, MA_OWNED); - if (a->dc_pool == b->dc_pool) + if (from->dc_pool == to->dc_pool) { + dp_incstat(dps_interlock_same, to->dc_pool, 1); return; + } - dc_assertlock(b, MA_NOTOWNED); - if ((uintptr_t)a->dc_pool < (uintptr_t)b->dc_pool) { - dc_lock(b); - dc_unlock(a); + dc_assertlock(to, MA_NOTOWNED); + if ((uintptr_t)from->dc_pool < (uintptr_t)to->dc_pool) { + dc_lock(to); + dc_unlock(from); + dp_incstat(dps_interlock_direct, to->dc_pool, 1); return; } critical_enter(); - if (dc_trylock(b) != 0) { - dc_unlock(a); + if (dc_trylock(to) != 0) { + dc_unlock(from); critical_exit(); + dp_incstat(dps_interlock_reverse_fast, to->dc_pool, 1); return; } critical_exit(); /* !!!! FIXME !!!! */ - dc_unlock(a); - dc_lock(b); + dc_unlock(from); + dc_lock(to); + dp_incstat(dps_interlock_reverse, to->dc_pool, 1); } static __inline void @@ -184,16 +299,41 @@ dc->dc_namehash = hash32_buf(name, namelen, HASHINIT * namelen); } +static __inline size_t +dc_namebuflen(size_t namelen) +{ + return (roundup2(namelen + 1, DC_NAMEROUND)); +} + +static __inline int +dc_namebuffits(struct dircache *dc, size_t namelen) +{ + return (dc_namebuflen(dc->dc_namelen) < namelen + 1); +} + +static __inline char * +dc_allocnamebuf(size_t namelen) +{ + char * buf; + + buf = malloc(dc_namebuflen(namelen), M_DIRCACHE, M_WAITOK); + return (buf); +} + static __inline void -dc_setname(struct dircache *dc, char *name, size_t namelen) +dc_setname(struct dircache *dc, char *name, size_t namelen, char *namebuf) { MPASS(name != dc->dc_name); - if (dc->dc_name == NULL || dc->dc_namelen < namelen) { + if (dc->dc_name == NULL || dc_namebuffits(dc, namelen) == 0) { if (dc->dc_name != NULL) free(dc->dc_name, M_DIRCACHE); - dc->dc_name = malloc(namelen + 1, M_DIRCACHE, M_WAITOK); - } + if (namebuf == NULL) + dc->dc_name = dc_allocnamebuf(namelen); + else + dc->dc_name = namebuf; + } else + MPASS(namebuf == NULL); memcpy(dc->dc_name, name, namelen); dc->dc_name[namelen] = '\0'; dc_initname(dc, dc->dc_name, namelen); @@ -210,7 +350,6 @@ dc->dc_gen = *genp; } - static struct dircache * dc_alloc(struct dircache *pdc, enum dircache_type type, char *name, size_t namelen) @@ -220,22 +359,23 @@ dc = uma_zalloc(dircache_zone, M_WAITOK | M_ZERO); DCDEBUG("alloc: %p %s\n", dc, name); + + dc->dc_type = type; + dc->dc_flags = DC_CH_PARTIAL; + dc->dc_parent = pdc; + refcount_init(&dc->dc_refcnt, 1); cv_init(&dc->dc_condvar, "dircache cv"); + if (name != NULL && namelen != 0) { - dc_setname(dc, name, namelen); - dc->dc_parent = pdc; + dc_setname(dc, name, namelen, NULL); /* cheaper way to get pseudo-random value */ poolind = dc->dc_namehash; } else { poolind = arc4random(); } - poolind %= pool_size; dc->dc_pool = pool[poolind]; - dc->dc_flags = DC_CH_PARTIAL; - refcount_init(&dc->dc_refcnt, 1); - return (dc); } @@ -290,18 +430,6 @@ } static void -dc_refvnode_locked(struct dircache *dc, struct vnode *vp) -{ - dc_ref(dc); - MPASS(dc->dc_vnode == NULL); - dc->dc_vnode = vp; - TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_vnodelist); - DCDEBUG("refvnode locked: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name, - vp, dc->dc_refcnt); - MPASS(TAILQ_FIRST(&vp->v_dircache) == dc); -} - -static void dc_refvnode(struct dircache *dc, struct vnode *vp) { if (dc->dc_type != DT_ROOT) @@ -309,57 +437,102 @@ DCDEBUG("refvnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name, vp, dc->dc_refcnt); + MPASS(vp->v_type != VNON && vp->v_type != VBAD); + MPASS(dc->dc_vnode == NULL); dc_ref(dc); - MPASS(dc->dc_vnode == NULL); dc->dc_vnode = vp; - dc_unlock(dc); VI_LOCK(vp); + if (vp->v_type == VDIR && !TAILQ_EMPTY(&vp->v_dircache)) + panic("dircache: multiple directory vnode references %p", vp); TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_vnodelist); VI_UNLOCK(vp); } static void -dc_relevnode(struct dircache *dc, struct vnode *vp) +dc_relevnode(struct dircache *dc) { MPASS(dc->dc_vnode != NULL); + dc_assertlock(dc, MA_OWNED); DCDEBUG("relevnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name, - vp, dc->dc_refcnt); + dc->dc_vnode, dc->dc_refcnt); - TAILQ_REMOVE(&vp->v_dircache, dc, dc_vnodelist); + VI_LOCK(dc->dc_vnode); + TAILQ_REMOVE(&dc->dc_vnode->v_dircache, dc, dc_vnodelist); + VI_UNLOCK(dc->dc_vnode); dc->dc_vnode = NULL; dc_unlock(dc); dc_rele(dc); } static struct dircache * -dc_getentry(struct vnode *vp, struct dircache *parent_hint, - struct nspace *nspace_hint, int flags) +dc_getentry(struct vnode *vp, struct componentname *cnp, struct vnode *dvp) { struct dircache *dc; +restart: VI_LOCK(vp); dc = TAILQ_FIRST(&vp->v_dircache); if (dc == NULL) { + VI_UNLOCK(vp); if ((vp->v_vflag & VV_ROOT) != 0) { dc = vp->v_mount->mnt_dircache; DCDEBUG("getentry: root %p vp=%p\n", dc, vp); MPASS(dc != NULL); dc_lock(dc); - dc_refvnode_locked(dc, vp); - goto out; + dc_refvnode(dc, vp); } else { - VI_UNLOCK(vp); +#if 0 DCDEBUG("getentry: not found vp=%p\n", vp); +#else + panic("dircache: entry not found for vnode %p\n", vp); +#endif return (NULL); } + } else { + if (TAILQ_NEXT(dc, dc_vnodelist) != NULL) { + MPASS(cnp != NULL && dvp != NULL); + MPASS(vp->v_type != VDIR); + MPASS(!(cnp->cn_nameptr[0] == '.' && + (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 && + cnp->cn_nameptr[1] == '.')))); + + for(; dc != NULL; dc = TAILQ_NEXT(dc, dc_vnodelist)) { + VI_UNLOCK(vp); + dc_lock(dc); + if (dc->dc_vnode != vp) { + dc_unlock(dc); + DCDEBUG("getenrty: restart; multiple entries; vp=%p\n", + vp); + goto restart; + } + /* FIXME: dc_parent locking */ + if (dc_cmpname(dc, cnp->cn_nameptr, + cnp->cn_namelen) == 0 && + dvp == dc->dc_parent->dc_vnode) + break; + dc_unlock(dc); + VI_LOCK(vp); + } + if (dc == NULL) { + VI_UNLOCK(vp); +#if 0 + return (NULL); +#else + panic("dircache: entry not found for vnode %p (multiple)\n", vp); +#endif + } + } else { + VI_UNLOCK(vp); + dc_lock(dc); + } } - MPASS(TAILQ_NEXT(dc, dc_vnodelist) == NULL); - dc_lock(dc); -out: - if ((flags & DC_OP_LOCKVP) == 0) - VI_UNLOCK(vp); - + dc_assertlock(dc, MA_OWNED); + if (dc->dc_vnode != vp) { + dc_unlock(dc); + DCDEBUG("getenrty: restart; vp=%p\n", vp); + goto restart; + } return (dc); } @@ -369,7 +542,7 @@ struct dircache key; struct dircache *pdc, *dc; - pdc = dc_getentry(dvp, NULL, NULL, 0); + pdc = dc_getentry(dvp, NULL, NULL); if (pdc == NULL) return (NULL); dc_assertlock(pdc, MA_OWNED); @@ -381,15 +554,107 @@ return (NULL); } + if ((flags & DC_OP_NOWAIT) == 0) + dc_wait(pdc); + dc_interlock(pdc, dc); dc_assertlock(dc, MA_OWNED); + MPASS(dc->dc_parent == pdc); + + return (dc); +} - if ((flags & DC_OP_NOWAIT) == 0) - dc_wait(dc); +static struct dircache * +dc_update(struct dircache_cursor *curs, struct vnode *vp, + enum dircache_type type, char *name, size_t namelen, + ino_t inode, off_t offset, void *fspriv) +{ + struct dircache *dc, *pdc, *col; + + pdc = curs->dcr_parent; + MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0); + + DCDEBUG("update: parent=%p name=%s\n", pdc, name); + + dc = dc_alloc(pdc, type, name, namelen); + if (type == DT_WEAK) + curs->dcr_nflags |= DC_CH_HASWEAK; + dc->dc_fspriv = fspriv; + col = RB_INSERT(dircache_tree, &pdc->dc_children, dc); + if (col != NULL) { + if (type == col->dc_type) { + DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name); + MPASS(col->dc_inode == inode && col->dc_offset == offset); + dc->dc_fspriv = NULL; + dc->dc_parent = NULL; + dc_rele(dc); + return (NULL); + } else if (col->dc_type == DT_NEGATIVE) { + DCDEBUG("update: replace negative entry: %p %s\n", dc, dc->dc_name); + dc_lock(col); + col->dc_type = type; + MPASS((col->dc_flags & DC_CH_COMPLETE) == 0); + col->dc_flags |= DC_CH_PARTIAL; + col->dc_inode = inode; + col->dc_offset = offset; + MPASS(col->dc_fspriv == NULL); + col->dc_fspriv = fspriv; + dc->dc_fspriv = NULL; + dc_unlock(col); + dc->dc_parent = NULL; + dc_rele(dc); + dc = col; + } else + panic("dircache: update: ivalid entry: %d %s\n", + dc->dc_type, dc->dc_name); + } else + dc_ref(pdc); + if (vp != NULL) { + dc_lock(dc); + dc_refvnode(dc, vp); + dc_unlock(dc); + } return (dc); } +static void +dc_removeentry(struct dircache *dc) +{ + struct dircache *parent; + MPASS(dc->dc_parent != NULL); + + dc->dc_fspriv = NULL; + dc->dc_type = DT_INVAL; + parent = dc->dc_parent; + dc->dc_parent = NULL; + RB_REMOVE(dircache_tree, &parent->dc_children, dc); + if (dc->dc_vnode != NULL) + dc_relevnode(dc); + else + dc_unlock(dc); + dc_rele(parent); + dc_rele(dc); +} + +static void +dc_marknegative(struct dircache *dc) +{ + DCDEBUG("mark negative: %p %s; vp=%p\n", dc, dc->dc_name, dc->dc_vnode); + dc->dc_inode = 0; + dc->dc_offset = 0; + dc->dc_fspriv = NULL; + dc->dc_type = DT_NEGATIVE; + dc->dc_flags &= ~DC_CH_COMPLETE; + dc->dc_flags |= DC_CH_PARTIAL; + dc_updategen(dc); + if (dc->dc_vnode != NULL) + dc_relevnode(dc); + else + dc_unlock(dc); + dc_assertlock(dc, MA_NOTOWNED); +} + void dircache_init(struct mount *mp, ino_t inode) { @@ -416,15 +681,22 @@ { struct dircache *dc, *ch, *tmp; +restart: VI_LOCK(vp); TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) { DCDEBUG("purge negative: %p %s; vp=%p\n", dc, dc->dc_name, vp); + VI_UNLOCK(vp); dc_lock(dc); + if (dc->dc_vnode != vp) { + dc_unlock(dc); + goto restart; + } RB_FOREACH_SAFE(ch, dircache_tree, &dc->dc_children, tmp) { - if (dc->dc_type == DT_NEGATIVE) + if (ch->dc_type == DT_NEGATIVE) RB_REMOVE(dircache_tree, &dc->dc_children, ch); } + VI_LOCK(vp); dc_unlock(dc); } VI_UNLOCK(vp); @@ -434,29 +706,7 @@ dircache_update(struct dircache_cursor *curs, enum dircache_type type, char *name, size_t namelen, ino_t inode, off_t offset) { - struct dircache *dc, *pdc, *col; - - pdc = curs->dcr_parent; - MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0); - - DCDEBUG("update: parent=%p name=%s\n", pdc, name); - - dc = dc_alloc(pdc, type, name, namelen); - - if (type == DT_WEAK) - curs->dcr_nflags |= DC_CH_HASWEAK; - col = RB_INSERT(dircache_tree, &pdc->dc_children, dc); - if (col == NULL) - dc_ref(pdc); - else { - if (type == col->dc_type) { - DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name); - dc_free(dc); - return; - } - panic("dircache: unexpected entry during update: %d %s\n", - dc->dc_type, dc->dc_name); - } + dc_update(curs, NULL, type, name, namelen, inode, offset, NULL); } int @@ -468,7 +718,7 @@ curs->dcr_parent = NULL; - dc = dc_getentry(dvp, NULL, NULL, 0); + dc = dc_getentry(dvp, NULL, NULL); if (dc == NULL) { DCDEBUG("beginupdate: not found dvp=%p; path=%s\n", dvp, cnp->cn_nameptr); @@ -542,32 +792,100 @@ dircache_endupdate(curs, DC_CH_PARTIAL); } +static int +dircache_lookupdot(struct vnode *dvp, struct vnode **vpp, + struct componentname *cnp) +{ + int ltype; + + MPASS(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.'); + + DCDEBUG("lookup dot: dvp=%p\n", dvp); + + *vpp = dvp; + vref(*vpp); + /* + * When we lookup "." we still can be asked to lock it + * differently. + */ + ltype = cnp->cn_lkflags & LK_TYPE_MASK; + if (ltype != VOP_ISLOCKED(*vpp)) { + if (ltype == LK_EXCLUSIVE) { + vn_lock(*vpp, LK_UPGRADE | LK_RETRY); + if ((*vpp)->v_iflag & VI_DOOMED) { + /* forced unmount */ + vrele(*vpp); + *vpp = NULL; + return (ENOENT); + } + } else + vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); + } + return (-1); +} + int dircache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { - struct dircache *dc; - int error; + struct dircache *pdc, *dc; + int error, ltype; - dc = dc_find(dvp, cnp, 0); + if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) + return (dircache_lookupdot(dvp, vpp, cnp)); + else if (cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.' && + cnp->cn_namelen == 2) { + MPASS((cnp->cn_flags & ISDOTDOT) != 0); + pdc = dc_getentry(dvp, NULL, NULL); + if (pdc != NULL) { + DCDEBUG("lookup dotdot: dvp=%p\n", dvp); + dc = pdc->dc_parent; + dc_interlock(pdc, dc); + } else + dc = NULL; + } else + dc = dc_find(dvp, cnp, 0); if (dc == NULL) { - DCDEBUG("lookup: not found: %s; dvp=%p\n", cnp->cn_nameptr, dvp); + DCDEBUG("lookup: not found: %s; dvp=%p; op=%ld\n", + cnp->cn_nameptr, dvp, cnp->cn_nameiop); return (0); } error = 0; if (dc->dc_type == DT_NEGATIVE) - error = ENOENT; + switch (cnp->cn_nameiop) { + case CREATE: + case RENAME: + error = 0; + break; + default: + error = ENOENT; + } else if (dc->dc_vnode != NULL) { *vpp = dc->dc_vnode; error = -1; } dc_unlock(dc); - DCDEBUG("lookup: error=%d: %p %s; dvp=%p\n", error, dc, dc->dc_name, dvp); + DCDEBUG("lookup: error=%d: %p %s; dvp=%p; op=%ld\n", error, dc, + dc->dc_name, dvp, cnp->cn_nameiop); if (error == -1) { + ltype = 0; + if ((cnp->cn_flags & ISDOTDOT) != 0) { + ltype = VOP_ISLOCKED(dvp); + VOP_UNLOCK(dvp, 0); + } if (vget(*vpp, cnp->cn_lkflags, cnp->cn_thread) != 0) { *vpp = NULL; error = 0; } + if (cnp->cn_flags & ISDOTDOT) { + vn_lock(dvp, ltype | LK_RETRY); + if (dvp->v_iflag & VI_DOOMED) { + if (error == 0) + vput(*vpp); + *vpp = NULL; + return (ENOENT); + } + } } return (error); } @@ -583,23 +901,12 @@ DCDEBUG("add: inode=%d %s; vp=%p\n", inode, cnp->cn_nameptr, vp); if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0) return (ENOENT); - dircache_update(&curs, type, cnp->cn_nameptr, cnp->cn_namelen, - inode, offset); + dc_update(&curs, vp, type, cnp->cn_nameptr, cnp->cn_namelen, + inode, offset, NULL); dircache_endupdate(&curs, 0); return (0); } -static void -dc_remove(struct dircache *dc, struct vnode *vp) -{ - dc->dc_inode = 0; - dc->dc_offset = 0; - dc->dc_fspriv = NULL; - dc->dc_type = DT_NEGATIVE; - dc_updategen(dc); - dc_relevnode(dc, vp); -} - int dircache_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { @@ -607,16 +914,18 @@ struct dircache *dc; DCDEBUG("remove: %s; vp=%p\n", cnp->cn_nameptr, vp); - if (dircache_beginupdate(&curs, dvp, cnp, 0) == 0) + if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0) return (ENOENT); - dc = dc_getentry(vp, curs.dcr_parent, NULL, DC_OP_LOCKVP); + dc = dc_getentry(vp, cnp, dvp); if (dc == NULL) { + DCDEBUG("remove: vp not found: %s vp=%p\n", cnp->cn_nameptr, + vp); MPASS(dc_find(dvp, cnp, DC_OP_NOWAIT) == 0); dircache_endupdate(&curs, 0); return (ENOENT); } - dc_remove(dc, vp); + dc_marknegative(dc); dircache_endupdate(&curs, 0); return (0); } @@ -628,6 +937,7 @@ { struct dircache_cursor fcr, tcr; struct dircache *fdc, *tdc, *col; + char *namebuf; int error; MPASS(fvp != tvp); @@ -638,48 +948,83 @@ if (fdvp == tdvp) { if (tcr.dcr_parent == NULL) return (ENOENT); + fcr.dcr_parent = tcr.dcr_parent; } else { dircache_beginupdate(&fcr, fdvp, fcnp, 0); +#if 0 if (tcr.dcr_parent == NULL && fcr.dcr_parent == NULL) { - MPASS(dc_getentry(fvp, NULL, NULL, 0) == NULL); + MPASS(dc_getentry(fvp, fcnp) == NULL); MPASS(tvp == NULL || - dc_getentry(tvp, NULL, NULL, 0) == NULL); + dc_getentry(tvp, NULL) == NULL); return (ENOENT); } else if (tcr.dcr_parent == NULL) { - error = dircache_remove(fdvp, fvp, fcnp); + fdc = dc_getentry(fvp, fcnp); + if (fdc != NULL) { + dc_marknegative(fdc, fvp); + } else { + MPASS(dc_find(fdvp, fcnp, DC_OP_NOWAIT) == 0); + error = ENOENT; + } dircache_endupdate(&fcr, 0); return (error); } else if (fcr.dcr_parent == NULL) { dircache_partialupdate(&tcr); return (0); } +#else + MPASS(tcr.dcr_parent != NULL && fcr.dcr_parent != NULL); +#endif } if (tvp != NULL) { - tdc = dc_getentry(tvp, tcr.dcr_parent, NULL, DC_OP_LOCKVP); + tdc = dc_getentry(tvp, tcnp, tdvp); if (tdc != NULL) { - MPASS(fcr.dcr_parent == tdc->dc_parent); - dc_remove(tdc, tvp); + MPASS(tcr.dcr_parent == tdc->dc_parent); + dc_removeentry(tdc); } else MPASS(dc_find(tdvp, tcnp, DC_OP_NOWAIT) == 0); } - fdc = dc_getentry(fvp, fcr.dcr_parent, NULL, DC_OP_LOCKVP); + fdc = dc_getentry(fvp, fcnp, tdvp); if (fdc == NULL) { MPASS(dc_find(fdvp, fcnp, DC_OP_NOWAIT) == 0); error = ENOENT; goto out; } + DCDEBUG("rename: remove from tree: %p %s; parent=%p\n", fdc, + fdc->dc_name, fcr.dcr_parent); + RB_REMOVE(dircache_tree, &fcr.dcr_parent->dc_children, fdc); + DCDEBUG("rename: rename: %p %s\n", fdc, fdc->dc_name); + namebuf = NULL; + if (dc_namebuffits(fdc, tcnp->cn_namelen) == 0) { + MPASS(fdc->dc_namelen > 0 && fdc->dc_name != NULL); + dc_unlock(fdc); + namebuf = dc_allocnamebuf(tcnp->cn_namelen); + dc_lock(fdc); + } + dc_setname(fdc, tcnp->cn_nameptr, tcnp->cn_namelen, namebuf); dc_updategen(fdc); - RB_REMOVE(dircache_tree, &fcr.dcr_parent->dc_children, fdc); - dc_setname(fdc, tcnp->cn_nameptr, tcnp->cn_namelen); + DCDEBUG("rename: insert to tree: %p %s; parent=%p\n", fdc, fdc->dc_name, + tcr.dcr_parent); +reinsert: col = RB_INSERT(dircache_tree, &tcr.dcr_parent->dc_children, fdc); - MPASS(col == NULL); - if (fcr.dcr_parent != tcr.dcr_parent) { + if (col != NULL) { + DCDEBUG("rename: insert collision: %p %s; type=%d\n", col, + col->dc_name, col->dc_type); + if (col->dc_type != DT_NEGATIVE) + panic("dircache: rename: invalid entry: %d %s\n", + col->dc_type, col->dc_name); + dc_relock(fdc, col); + dc_removeentry(col); + dc_lock(fdc); + goto reinsert; + } + if (fdvp != tdvp) { dc_ref(tcr.dcr_parent); + fdc->dc_parent = tcr.dcr_parent; + dc_unlock(fdc); dc_rele(fcr.dcr_parent); - fdc->dc_parent = tcr.dcr_parent; - } - dc_unlock(fdc); + } else + dc_unlock(fdc); out: dircache_endupdate(&tcr, 0); @@ -694,10 +1039,14 @@ { struct dircache_cursor curs; + if (cnp->cn_nameptr[0] == '.' && (cnp->cn_namelen == 1 || + (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.'))) + panic("dircache: set negative for '.' or '..'"); + if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0) return (ENOENT); - dircache_update(&curs, DT_NEGATIVE, cnp->cn_nameptr, cnp->cn_namelen, - 0, 0); + dc_update(&curs, NULL, DT_NEGATIVE, cnp->cn_nameptr, cnp->cn_namelen, + 0, 0, NULL); dircache_endupdate(&curs, 0); return (0); } @@ -708,19 +1057,29 @@ { struct dircache *dc; - dc = dc_getentry(vp, NULL, NULL, 0); + dc = dc_getentry(vp, cnp, dvp); if (dc != NULL) { - MPASS(dc->dc_namelen == cnp->cn_namelen && - bcmp(dc->dc_name, cnp->cn_nameptr, dc->dc_namelen) == 0); + DCDEBUG("setvnode found entry: %p %s; type=%d; vp=%p; cnp=%d %s\n", + dc, dc ? dc->dc_name : "??", dc->dc_type, vp, + (int)cnp->cn_namelen, cnp->cn_nameptr); + MPASS(dc->dc_type == DT_ROOT || + (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) || + ((dc->dc_namelen == cnp->cn_namelen) && >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201006141925.o5EJPsDO066140>
