Date: Tue, 10 Nov 2015 10:57:06 -0800 From: Adrian Chadd <adrian@freebsd.org> To: Eugene Grosbein <eugen@grosbein.net>, benno <benno@freebsd.org>, Jeff Roberson <jeff@freebsd.org>, Konstantin Belousov <kib@freebsd.org> Cc: "freebsd-mips@freebsd.org" <freebsd-mips@freebsd.org> Subject: Re: USB-related problem Message-ID: <CAJ-VmomOeSyQGxKPvaa9TiBENCcdvTJiTLvSi2FfbCH9c6CGAA@mail.gmail.com> In-Reply-To: <56423D82.5030203@grosbein.net> References: <56348063.3090508@grosbein.net> <563707A0.3040700@grosbein.net> <CANCZdfrB0hkjmTU-9NzimBv=X_h-=bF0D2azBxg=9B=kmitv7Q@mail.gmail.com> <56370E1D.3040801@grosbein.net> <CAJ-Vmo=0vOAq8db_GeLWmdXr7xJdzUh44ZZJrQ9vVdpvzT9hiQ@mail.gmail.com> <563F5630.2000407@grosbein.net> <563F6F6F.1010807@grosbein.net> <CAJ-Vmo=fPSi7yZO5Xjodg8HPtTLd44Y9Y_8qg4EgTGwEpHO10A@mail.gmail.com> <563F91A8.9080702@grosbein.net> <CAJ-VmomUvoUerMS20qQsQujcjULVA=_jaLp9Mh3fU1fEpdwzZA@mail.gmail.com> <CAJ-Vmo=eUUZ928KgQbyOi8EdDFSmxhjvDOyAMvfXsqwDbO96ng@mail.gmail.com> <5640C0FD.2040803@grosbein.net> <CAJ-Vmo=6mztfvvBd91LPO5H418K8vW=%2BLk=6V5Z_y5DHu7v7HA@mail.gmail.com> <5640F315.5020303@grosbein.net> <CAJ-VmokpWM=d%2BtEFv8a8eU91UimVZ9W8da2QkKCTDjd%2B2ZM_LQ@mail.gmail.com> <56410214.3070901@grosbein.net> <CAJ-Vmo=QUUkTfQ7pvr_V%2BCQ8zQWOoqp3H8hD9LUR8C5U-5N=Ag@mail.gmail.com> <CAJ-VmomA27NcNCjx00DrZzFOU9bGYucUhbPXodH2uvNd8eJ3wg@mail.gmail.com> <564231DE.7090308@grosbein.net> <56423D82.5030203@grosbein.net>
next in thread | previous in thread | raw e-mail | index | archive | help
+ benno, jeff, kib cool, so now we know where syncer is hanging. nice work! That syncer loop that calls it waiting for the list to be empty is also a bit suss; it looks like it could also get stuck in a loop and never yield. -a On 10 November 2015 at 10:54, Eugene Grosbein <eugen@grosbein.net> wrote: > On 11.11.2015 01:05, Eugene Grosbein wrote: >> On 10.11.2015 04:43, Adrian Chadd wrote: >>> Hi, >>> >>> How much C do you know? Are you able to add some debugging to >>> sched_sync() (which IIRC is the syncer thread) and work out where it's >>> getting stuck and spinning? >> >> I did. I've added new global kernel symbol, variable schedstat >> so I can check its value using ddb. And it's always equals to 5 >> when deadlock occurs, discovering the loop > > I've digged it a bit more. I tried to make double sure it loops > and added loop counter "schedloop" hoping to see some insane large value. > But it does not loop this way and schedloop is always 1. > > It just sits inside sync_vnode(). So I've added another variable "svnodestate" > to trace sync_vnode() and svnodestate is 9. > > That is, VOP_FSYNC(vp, MNT_LAZY, td) does not return: > > --- sys/kern/vfs_subr.c.orig 2015-10-08 23:33:53.935440000 +0700 > +++ /sys/kern/vfs_subr.c 2015-11-11 01:39:09.450390000 +0700 > @@ -1738,217 +1738,272 @@ sysctl_vfs_worklist_len(SYSCTL_HANDLER_A > return (error); > } > > SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, > sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); > > static struct proc *updateproc; > static void sched_sync(void); > static struct kproc_desc up_kp = { > "syncer", > sched_sync, > &updateproc > }; > SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); > > +volatile unsigned svnodestate = 0; > + > static int > sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) > { > struct vnode *vp; > struct mount *mp; > > + svnodestate = 0; > *bo = LIST_FIRST(slp); > if (*bo == NULL) > return (0); > + > + svnodestate = 1; > vp = (*bo)->__bo_vnode; /* XXX */ > if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) > return (1); > /* > * We use vhold in case the vnode does not > * successfully sync. vhold prevents the vnode from > * going away when we unlock the sync_mtx so that > * we can acquire the vnode interlock. > */ > + svnodestate = 2; > vholdl(vp); > + svnodestate = 3; > mtx_unlock(&sync_mtx); > VI_UNLOCK(vp); > + svnodestate = 4; > if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { > + svnodestate = 5; > vdrop(vp); > + svnodestate = 6; > mtx_lock(&sync_mtx); > + svnodestate = 7; > return (*bo == LIST_FIRST(slp)); > } > + svnodestate = 8; > vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); > + svnodestate = 9; > (void) VOP_FSYNC(vp, MNT_LAZY, td); > + svnodestate = 10; > VOP_UNLOCK(vp, 0); > + svnodestate = 11; > vn_finished_write(mp); > + svnodestate = 12; > BO_LOCK(*bo); > + svnodestate = 13; > if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { > /* > * Put us back on the worklist. The worklist > * routine will remove us from our current > * position and then add us back in at a later > * position. > */ > + svnodestate = 14; > vn_syncer_add_to_worklist(*bo, syncdelay); > } > + svnodestate = 15; > BO_UNLOCK(*bo); > + svnodestate = 16; > vdrop(vp); > + svnodestate = 17; > mtx_lock(&sync_mtx); > + svnodestate = 18; > return (0); > } > > static int first_printf = 1; > > +volatile unsigned schedstate = 0; > +volatile unsigned schedloop = 0; > + > /* > * System filesystem synchronizer daemon. > */ > static void > sched_sync(void) > { > struct synclist *next, *slp; > struct bufobj *bo; > long starttime; > struct thread *td = curthread; > int last_work_seen; > int net_worklist_len; > int syncer_final_iter; > int error; > > last_work_seen = 0; > syncer_final_iter = 0; > syncer_state = SYNCER_RUNNING; > starttime = time_uptime; > td->td_pflags |= TDP_NORUNNINGBUF; > > EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, > SHUTDOWN_PRI_LAST); > > + schedstate = 0; > mtx_lock(&sync_mtx); > for (;;) { > + schedstate = 1; > + > if (syncer_state == SYNCER_FINAL_DELAY && > syncer_final_iter == 0) { > mtx_unlock(&sync_mtx); > kproc_suspend_check(td->td_proc); > mtx_lock(&sync_mtx); > } > + schedstate = 2; > + > net_worklist_len = syncer_worklist_len - sync_vnode_count; > if (syncer_state != SYNCER_RUNNING && > starttime != time_uptime) { > if (first_printf) { > printf("\nSyncing disks, vnodes remaining..."); > first_printf = 0; > } > printf("%d ", net_worklist_len); > } > starttime = time_uptime; > > /* > * Push files whose dirty time has expired. Be careful > * of interrupt race on slp queue. > * > * Skip over empty worklist slots when shutting down. > */ > do { > + schedstate = 3; > slp = &syncer_workitem_pending[syncer_delayno]; > syncer_delayno += 1; > if (syncer_delayno == syncer_maxdelay) > syncer_delayno = 0; > next = &syncer_workitem_pending[syncer_delayno]; > /* > * If the worklist has wrapped since the > * it was emptied of all but syncer vnodes, > * switch to the FINAL_DELAY state and run > * for one more second. > */ > if (syncer_state == SYNCER_SHUTTING_DOWN && > net_worklist_len == 0 && > last_work_seen == syncer_delayno) { > syncer_state = SYNCER_FINAL_DELAY; > syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; > } > } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && > syncer_worklist_len > 0); > > + schedstate = 4; > + > /* > * Keep track of the last time there was anything > * on the worklist other than syncer vnodes. > * Return to the SHUTTING_DOWN state if any > * new work appears. > */ > if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) > last_work_seen = syncer_delayno; > if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) > syncer_state = SYNCER_SHUTTING_DOWN; > + schedloop=0; > while (!LIST_EMPTY(slp)) { > + schedstate = 5; > + schedloop++; > + > error = sync_vnode(slp, &bo, td); > if (error == 1) { > LIST_REMOVE(bo, bo_synclist); > LIST_INSERT_HEAD(next, bo, bo_synclist); > continue; > } > + schedstate = 6; > > if (first_printf == 0) { > + schedstate = 7; > /* > * Drop the sync mutex, because some watchdog > * drivers need to sleep while patting > */ > mtx_unlock(&sync_mtx); > wdog_kern_pat(WD_LASTVAL); > mtx_lock(&sync_mtx); > } > > + schedstate = 8; > } > + schedstate = 9; > + > if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) > syncer_final_iter--; > /* > * The variable rushjob allows the kernel to speed up the > * processing of the filesystem syncer process. A rushjob > * value of N tells the filesystem syncer to process the next > * N seconds worth of work on its queue ASAP. Currently rushjob > * is used by the soft update code to speed up the filesystem > * syncer process when the incore state is getting so far > * ahead of the disk that the kernel memory pool is being > * threatened with exhaustion. > */ > if (rushjob > 0) { > rushjob -= 1; > continue; > } > /* > * Just sleep for a short period of time between > * iterations when shutting down to allow some I/O > * to happen. > * > * If it has taken us less than a second to process the > * current work, then wait. Otherwise start right over > * again. We can still lose time if any single round > * takes more than two seconds, but it does not really > * matter as we are just trying to generally pace the > * filesystem activity. > */ > + > + schedstate = 10; > + > if (syncer_state != SYNCER_RUNNING || > time_uptime == starttime) { > + schedstate = 11; > thread_lock(td); > sched_prio(td, PPAUSE); > thread_unlock(td); > } > - if (syncer_state != SYNCER_RUNNING) > + schedstate = 12; > + > + if (syncer_state != SYNCER_RUNNING) { > + schedstate = 13; > + > cv_timedwait(&sync_wakeup, &sync_mtx, > hz / SYNCER_SHUTDOWN_SPEEDUP); > - else if (time_uptime == starttime) > + } > + else if (time_uptime == starttime) { > + schedstate = 14; > cv_timedwait(&sync_wakeup, &sync_mtx, hz); > + } > + schedstate = 15; > } > + schedstate = 16; > } > > /* > * Request the syncer daemon to speed up its work. > * We never push it to speed up more than half of its > * normal turn time, otherwise it could take over the cpu. > */ > int > speedup_syncer(void) > { > int ret = 0; > > mtx_lock(&sync_mtx); > if (rushjob < syncdelay / 2) { > rushjob += 1; >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-VmomOeSyQGxKPvaa9TiBENCcdvTJiTLvSi2FfbCH9c6CGAA>