Date: Tue, 12 Jul 2011 17:57:53 +0300 From: Kostik Belousov <kostikbel@gmail.com> To: Attilio Rao <attilio@freebsd.org> Cc: freebsd-current@freebsd.org, Kohji Okuno <okuno.kohji@jp.panasonic.com>, Ed Maste <emaste@sandvine.com> Subject: Re: Bug about devfs? Message-ID: <20110712145753.GI43872@deviant.kiev.zoral.com.ua> In-Reply-To: <CAJ-FndDz31pzH2dzrqdWWAFpMm%2BOOTFCj44MBpBoCS81-S4CVg@mail.gmail.com> References: <20110712.191028.650619413057975749.okuno.kohji@jp.panasonic.com> <20110712111925.GH43872@deviant.kiev.zoral.com.ua> <CAJ-FndDz31pzH2dzrqdWWAFpMm%2BOOTFCj44MBpBoCS81-S4CVg@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
--jAJnlX6Iz2QeVWJH Content-Type: text/plain; charset=koi8-r Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Tue, Jul 12, 2011 at 03:02:44PM +0200, Attilio Rao wrote: > 2011/7/12 Kostik Belousov <kostikbel@gmail.com>: > > On Tue, Jul 12, 2011 at 07:10:28PM +0900, Kohji Okuno wrote: > >> Hello, > >> > >> I think that devfs has a problem. > >> I encountered the problem that open("/dev/AAA") returned ENOENT. > >> Of course, /dev/AAA exists. > >> > >> ENOENT was created by the point(***) in devfs_allocv(). > >> I think that the race condition had occurred between process A and > >> vnlru kernel thread. > >> > >> Please check the following. > >> > >> If vnlru set VI_DOOMED to vp->v_iflag but vnlru didn't still execute > >> VOP_RECLAIM(), process A cat get valid vp from de->de_vnode. > >> But, vget() will return ENOENT, because vp->v_iflag has VI_DOOMED. > >> > >> When I set the break point to (***), I checked that de->de_vnode and > >> vp->v_data were NULL. > >> > >> > >> process A: =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9Avnlr= u: > >> > >> devfs_allocv() { > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A vgonel(vp) { > >> =9A =9A... =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A= =9A =9A =9A =9A =9A =9A ... > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A =9A vp->v_iflag |=3D VI_DOOMED; > >> =9A mtx_lock(&devfs_de_interlock); =9A =9A =9A =9A ... > >> =9A vp =3D de->de_vnode; > >> =9A if (vp !=3D NULL) { =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A VI_UNL= OCK(vp); > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A _____________/ ... > >> =9A VI_LOCK(vp); ____________/ =9A =9A =9A =9A =9A =9A =9Aif (VOP_RECL= AIM(vp, td)) > >> =9A mtx_unlock(&devfs_de_interlock); =9A =9A =9A ... > >> =9A =9A... =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A= =9A\ =9A =9A =9A =9A devfs_reclaim(ap) { > >> =9A error =3D vget(vp,...); =9A =9A =9A =9A =9A =9A =9A =9A\ > >> =9A =9A... =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A= =9A =9A\______ =9A mtx_lock(&devfs_de_interlock); > >> =9A if (devfs_allocv_drop_refs(...)) { =9A =9A =9A =9Ade =3D vp->v_dat= a; > >> =9A =9A ... =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A =9A =9A =9A =9A =9A if (de !=3D NULL) { > >> =9A } =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A =9A =9A de->de_vnode =3D NULL; > >> =9A else if (error) { =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = vp->v_data =3D NULL; > >> =9A =9A ... =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A =9A =9A =9A =9A =9A } > >> =9A =9A rturn (error); (***) =9A =9A =9A =9A =9A =9A =9A =9A =9Amtx_un= lock(&devfs_de_interlock); > >> =9A } =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A =9A =9A ... > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A =9A } > >> > >> > >> > >> I think that devfs_allocv() should be fixed as below. > >> How do you think? > >> > >> devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode *= *vpp) > >> { > >> =9A =9A =9A =9A int error; > >> =9A =9A =9A struct vnode *vp; > >> =9A =9A =9A struct cdev *dev; > >> =9A =9A =9A struct devfs_mount *dmp; > >> > >> =9A =9A =9A dmp =3D VFSTODEVFS(mp); > >> +#if 1 > >> + retry: > >> +#endif > >> =9A =9A =9A =9A if (de->de_flags & DE_DOOMED) { > >> > >> =9A =9A =9A =9A =9A =9A... > >> > >> =9A =9A =9A =9A mtx_lock(&devfs_de_interlock); > >> =9A =9A =9A =9A vp =3D de->de_vnode; > >> =9A =9A =9A =9A if (vp !=3D NULL) { > >> =9A =9A =9A =9A =9A =9A =9A =9A VI_LOCK(vp); > >> =9A =9A =9A =9A =9A =9A =9A =9A mtx_unlock(&devfs_de_interlock); > >> =9A =9A =9A =9A =9A =9A =9A =9A sx_xunlock(&dmp->dm_lock); > >> =9A =9A =9A =9A =9A =9A =9A =9A error =3D vget(vp, LK_EXCLUSIVE | LK_I= NTERLOCK, curthread); > >> =9A =9A =9A =9A =9A =9A =9A =9A sx_xlock(&dmp->dm_lock); > >> =9A =9A =9A =9A =9A =9A =9A =9A if (devfs_allocv_drop_refs(0, dmp, de)= ) { > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A if (error =3D=3D 0) > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A vput(v= p); > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A return (ENOENT); > >> =9A =9A =9A =9A =9A =9A =9A =9A } > >> =9A =9A =9A =9A =9A =9A =9A =9A else if (error) { > >> +#if 1 > >> + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A if (error =3D=3D ENOENT) > >> + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A goto retry; > >> +#endif > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A sx_xunlock(&dmp->dm_lock); > >> =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A return (error); > >> =9A =9A =9A =9A =9A =9A =9A } > >> > > Thank you for the report. > > > > The proposed change would revert r179247, which also caused some issues. > > Are you able to reproduce the problem ? > > > > Could you try the following patch ? I cannot reproduce your situation, > > so the patch is untested by me. > > > > diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c > > index bf6dab8..bbbfff4 100644 > > --- a/sys/fs/devfs/devfs_vnops.c > > +++ b/sys/fs/devfs/devfs_vnops.c > > @@ -397,6 +397,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount = *mp, int lockmode, > > =9A =9A =9A =9A =9A =9A =9A =9Asx_xunlock(&dmp->dm_lock); > > =9A =9A =9A =9A =9A =9A =9A =9Areturn (ENOENT); > > =9A =9A =9A =9A} > > +loop: > > =9A =9A =9A =9ADEVFS_DE_HOLD(de); > > =9A =9A =9A =9ADEVFS_DMP_HOLD(dmp); > > =9A =9A =9A =9Amtx_lock(&devfs_de_interlock); > > @@ -412,7 +413,16 @@ devfs_allocv(struct devfs_dirent *de, struct mount= *mp, int lockmode, > > =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9Avput(vp); > > =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9Areturn (ENOENT); > > =9A =9A =9A =9A =9A =9A =9A =9A} > > - =9A =9A =9A =9A =9A =9A =9A else if (error) { > > + =9A =9A =9A =9A =9A =9A =9A else if (error !=3D 0) { > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A if (error =3D=3D ENOENT) { > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A mtx_lock(= &devfs_de_interlock); > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A while (de= ->de_vnode !=3D NULL) { > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A msleep(&de->de_vnode, > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A = =9A =9A =9A =9A &devfs_de_interlock, 0, "dvall", 0); > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A } > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A mtx_unloc= k(&devfs_de_interlock); > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A goto loop; > > + =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A } > > =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9Asx_xunlock(&dmp->dm_lock= ); > > =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9A =9Areturn (error); > > =9A =9A =9A =9A =9A =9A =9A =9A} > > @@ -1259,6 +1269,7 @@ devfs_reclaim(struct vop_reclaim_args *ap) > > =9A =9A =9A =9Aif (de !=3D NULL) { > > =9A =9A =9A =9A =9A =9A =9A =9Ade->de_vnode =3D NULL; > > =9A =9A =9A =9A =9A =9A =9A =9Avp->v_data =3D NULL; > > + =9A =9A =9A =9A =9A =9A =9A wakeup(&de->de_vnode); > > =9A =9A =9A =9A} > > =9A =9A =9A =9Amtx_unlock(&devfs_de_interlock); >=20 > I think that this approach may starve the thread for a while. > As I told you privately I was able to see on field a livelock because > of this check. In my case, it was a thread running for 63seconds (at > least, at that point the watchdog was tripping over). Feasible explanation was not found at the time, AFAIR. I could believe that this is possible with r179247 and driver stuck in the close cdevsw method. More risky change would be to clear de_vnode early. All devfs code shall be already safe by checking for VI_DOOMED, and if VI_DOOMED, v_data may be NULL. Again, I am unable to test. diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index bf6dab8..955bd8b 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -397,6 +397,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp,= int lockmode, sx_xunlock(&dmp->dm_lock); return (ENOENT); } +loop: DEVFS_DE_HOLD(de); DEVFS_DMP_HOLD(dmp); mtx_lock(&devfs_de_interlock); @@ -405,16 +406,21 @@ devfs_allocv(struct devfs_dirent *de, struct mount *m= p, int lockmode, VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); sx_xunlock(&dmp->dm_lock); - error =3D vget(vp, lockmode | LK_INTERLOCK, curthread); + vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread); sx_xlock(&dmp->dm_lock); if (devfs_allocv_drop_refs(0, dmp, de)) { - if (error =3D=3D 0) - vput(vp); + vput(vp); return (ENOENT); } - else if (error) { - sx_xunlock(&dmp->dm_lock); - return (error); + else if ((vp->v_iflag & VI_DOOMED) !=3D 0) { + mtx_lock(&devfs_de_interlock); + if (de->de_vnode =3D=3D vp) { + de->de_vnode =3D NULL; + vp->v_data =3D NULL; + } + mtx_unlock(&devfs_de_interlock); + vput(vp); + goto loop; } sx_xunlock(&dmp->dm_lock); *vpp =3D vp; --jAJnlX6Iz2QeVWJH Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (FreeBSD) iEYEARECAAYFAk4cYPEACgkQC3+MBN1Mb4iVVACgmxg/VHxHjPnHEPLFw85wHfwh DFYAoKCfJRca+DqGmkn3/kR3w9mik8Cm =7FWP -----END PGP SIGNATURE----- --jAJnlX6Iz2QeVWJH--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20110712145753.GI43872>