Date: Wed, 11 Nov 2015 01:05:18 +0700 From: Eugene Grosbein <eugen@grosbein.net> To: Adrian Chadd <adrian@freebsd.org> Cc: "freebsd-mips@freebsd.org" <freebsd-mips@freebsd.org> Subject: Re: USB-related problem Message-ID: <564231DE.7090308@grosbein.net> In-Reply-To: <CAJ-VmomA27NcNCjx00DrZzFOU9bGYucUhbPXodH2uvNd8eJ3wg@mail.gmail.com> References: <56348063.3090508@grosbein.net> <CAJ-VmomtJ9eKfAJYfn4e2S1xWN8-YHo0M0KH-9V=VjNBf6vVVA@mail.gmail.com> <563707A0.3040700@grosbein.net> <CANCZdfrB0hkjmTU-9NzimBv=X_h-=bF0D2azBxg=9B=kmitv7Q@mail.gmail.com> <56370E1D.3040801@grosbein.net> <CAJ-Vmo=0vOAq8db_GeLWmdXr7xJdzUh44ZZJrQ9vVdpvzT9hiQ@mail.gmail.com> <563F5630.2000407@grosbein.net> <563F6F6F.1010807@grosbein.net> <CAJ-Vmo=fPSi7yZO5Xjodg8HPtTLd44Y9Y_8qg4EgTGwEpHO10A@mail.gmail.com> <563F91A8.9080702@grosbein.net> <CAJ-VmomUvoUerMS20qQsQujcjULVA=_jaLp9Mh3fU1fEpdwzZA@mail.gmail.com> <CAJ-Vmo=eUUZ928KgQbyOi8EdDFSmxhjvDOyAMvfXsqwDbO96ng@mail.gmail.com> <5640C0FD.2040803@grosbein.net> <CAJ-Vmo=6mztfvvBd91LPO5H418K8vW=%2BLk=6V5Z_y5DHu7v7HA@mail.gmail.com> <5640F315.5020303@grosbein.net> <CAJ-VmokpWM=d%2BtEFv8a8eU91UimVZ9W8da2QkKCTDjd%2B2ZM_LQ@mail.gmail.com> <56410214.3070901@grosbein.net> <CAJ-Vmo=QUUkTfQ7pvr_V%2BCQ8zQWOoqp3H8hD9LUR8C5U-5N=Ag@mail.gmail.com> <CAJ-VmomA27NcNCjx00DrZzFOU9bGYucUhbPXodH2uvNd8eJ3wg@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
On 10.11.2015 04:43, Adrian Chadd wrote: > Hi, > > How much C do you know? Are you able to add some debugging to > sched_sync() (which IIRC is the syncer thread) and work out where it's > getting stuck and spinning? I did. I've added new global kernel symbol, variable schedstat so I can check its value using ddb. And it's always equals to 5 when deadlock occurs, discovering the loop: --- sys/kern/vfs_subr.c.orig 2015-10-08 23:33:53.935440000 +0700 +++ sys/kern/vfs_subr.c 2015-11-11 01:01:06.248144000 +0700 @@ -1786,169 +1786,199 @@ sync_vnode(struct synclist *slp, struct * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(*bo, syncdelay); } BO_UNLOCK(*bo); vdrop(vp); mtx_lock(&sync_mtx); return (0); } static int first_printf = 1; +volatile unsigned schedstate = 0; + /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *next, *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; int last_work_seen; int net_worklist_len; int syncer_final_iter; int error; last_work_seen = 0; syncer_final_iter = 0; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); + schedstate = 0; mtx_lock(&sync_mtx); for (;;) { + schedstate = 1; + if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kproc_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } + schedstate = 2; + net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining..."); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { + schedstate = 3; slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && syncer_worklist_len > 0); + schedstate = 4; + /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while (!LIST_EMPTY(slp)) { + schedstate = 5; + error = sync_vnode(slp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } + schedstate = 6; if (first_printf == 0) { + schedstate = 7; /* * Drop the sync mutex, because some watchdog * drivers need to sleep while patting */ mtx_unlock(&sync_mtx); wdog_kern_pat(WD_LASTVAL); mtx_lock(&sync_mtx); } + schedstate = 8; } + schedstate = 9; + if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ + + schedstate = 10; + if (syncer_state != SYNCER_RUNNING || time_uptime == starttime) { + schedstate = 11; thread_lock(td); sched_prio(td, PPAUSE); thread_unlock(td); } - if (syncer_state != SYNCER_RUNNING) + schedstate = 12; + + if (syncer_state != SYNCER_RUNNING) { + schedstate = 13; + cv_timedwait(&sync_wakeup, &sync_mtx, hz / SYNCER_SHUTDOWN_SPEEDUP); - else if (time_uptime == starttime) + } + else if (time_uptime == starttime) { + schedstate = 14; cv_timedwait(&sync_wakeup, &sync_mtx, hz); + } + schedstate = 15; } + schedstate = 16; } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { int ret = 0; mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?564231DE.7090308>