FreeBSD Mail Archives

Date:      Wed, 11 Nov 2015 01:05:18 +0700
From:      Eugene Grosbein <eugen@grosbein.net>
To:        Adrian Chadd <adrian@freebsd.org>
Cc:        "freebsd-mips@freebsd.org" <freebsd-mips@freebsd.org>
Subject:   Re: USB-related problem
Message-ID:  <564231DE.7090308@grosbein.net>
In-Reply-To: <CAJ-VmomA27NcNCjx00DrZzFOU9bGYucUhbPXodH2uvNd8eJ3wg@mail.gmail.com>
References:  <56348063.3090508@grosbein.net> <CAJ-VmomtJ9eKfAJYfn4e2S1xWN8-YHo0M0KH-9V=VjNBf6vVVA@mail.gmail.com> <563707A0.3040700@grosbein.net> <CANCZdfrB0hkjmTU-9NzimBv=X_h-=bF0D2azBxg=9B=kmitv7Q@mail.gmail.com> <56370E1D.3040801@grosbein.net> <CAJ-Vmo=0vOAq8db_GeLWmdXr7xJdzUh44ZZJrQ9vVdpvzT9hiQ@mail.gmail.com> <563F5630.2000407@grosbein.net> <563F6F6F.1010807@grosbein.net> <CAJ-Vmo=fPSi7yZO5Xjodg8HPtTLd44Y9Y_8qg4EgTGwEpHO10A@mail.gmail.com> <563F91A8.9080702@grosbein.net> <CAJ-VmomUvoUerMS20qQsQujcjULVA=_jaLp9Mh3fU1fEpdwzZA@mail.gmail.com> <CAJ-Vmo=eUUZ928KgQbyOi8EdDFSmxhjvDOyAMvfXsqwDbO96ng@mail.gmail.com> <5640C0FD.2040803@grosbein.net> <CAJ-Vmo=6mztfvvBd91LPO5H418K8vW=%2BLk=6V5Z_y5DHu7v7HA@mail.gmail.com> <5640F315.5020303@grosbein.net> <CAJ-VmokpWM=d%2BtEFv8a8eU91UimVZ9W8da2QkKCTDjd%2B2ZM_LQ@mail.gmail.com> <56410214.3070901@grosbein.net> <CAJ-Vmo=QUUkTfQ7pvr_V%2BCQ8zQWOoqp3H8hD9LUR8C5U-5N=Ag@mail.gmail.com> <CAJ-VmomA27NcNCjx00DrZzFOU9bGYucUhbPXodH2uvNd8eJ3wg@mail.gmail.com>

index | next in thread | previous in thread | raw e-mail


On 10.11.2015 04:43, Adrian Chadd wrote:
> Hi,
> 
> How much C do you know? Are you able to add some debugging to
> sched_sync() (which IIRC is the syncer thread) and work out where it's
> getting stuck and spinning?

I did. I've added new global kernel symbol, variable schedstat
so I can check its value using ddb. And it's always equals to 5
when deadlock occurs, discovering the loop:

--- sys/kern/vfs_subr.c.orig	2015-10-08 23:33:53.935440000 +0700
+++ sys/kern/vfs_subr.c	2015-11-11 01:01:06.248144000 +0700
@@ -1786,169 +1786,199 @@ sync_vnode(struct synclist *slp, struct 
 		 * Put us back on the worklist.  The worklist
 		 * routine will remove us from our current
 		 * position and then add us back in at a later
 		 * position.
 		 */
 		vn_syncer_add_to_worklist(*bo, syncdelay);
 	}
 	BO_UNLOCK(*bo);
 	vdrop(vp);
 	mtx_lock(&sync_mtx);
 	return (0);
 }
 
 static int first_printf = 1;
 
+volatile unsigned schedstate = 0;
+
 /*
  * System filesystem synchronizer daemon.
  */
 static void
 sched_sync(void)
 {
 	struct synclist *next, *slp;
 	struct bufobj *bo;
 	long starttime;
 	struct thread *td = curthread;
 	int last_work_seen;
 	int net_worklist_len;
 	int syncer_final_iter;
 	int error;
 
 	last_work_seen = 0;
 	syncer_final_iter = 0;
 	syncer_state = SYNCER_RUNNING;
 	starttime = time_uptime;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
 	    SHUTDOWN_PRI_LAST);
 
+	schedstate = 0;
 	mtx_lock(&sync_mtx);
 	for (;;) {
+		schedstate = 1;
+
 		if (syncer_state == SYNCER_FINAL_DELAY &&
 		    syncer_final_iter == 0) {
 			mtx_unlock(&sync_mtx);
 			kproc_suspend_check(td->td_proc);
 			mtx_lock(&sync_mtx);
 		}
+		schedstate = 2;
+
 		net_worklist_len = syncer_worklist_len - sync_vnode_count;
 		if (syncer_state != SYNCER_RUNNING &&
 		    starttime != time_uptime) {
 			if (first_printf) {
 				printf("\nSyncing disks, vnodes remaining...");
 				first_printf = 0;
 			}
 			printf("%d ", net_worklist_len);
 		}
 		starttime = time_uptime;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 *
 		 * Skip over empty worklist slots when shutting down.
 		 */
 		do {
+			schedstate = 3;
 			slp = &syncer_workitem_pending[syncer_delayno];
 			syncer_delayno += 1;
 			if (syncer_delayno == syncer_maxdelay)
 				syncer_delayno = 0;
 			next = &syncer_workitem_pending[syncer_delayno];
 			/*
 			 * If the worklist has wrapped since the
 			 * it was emptied of all but syncer vnodes,
 			 * switch to the FINAL_DELAY state and run
 			 * for one more second.
 			 */
 			if (syncer_state == SYNCER_SHUTTING_DOWN &&
 			    net_worklist_len == 0 &&
 			    last_work_seen == syncer_delayno) {
 				syncer_state = SYNCER_FINAL_DELAY;
 				syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP;
 			}
 		} while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) &&
 		    syncer_worklist_len > 0);
 
+		schedstate = 4;
+
 		/*
 		 * Keep track of the last time there was anything
 		 * on the worklist other than syncer vnodes.
 		 * Return to the SHUTTING_DOWN state if any
 		 * new work appears.
 		 */
 		if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING)
 			last_work_seen = syncer_delayno;
 		if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY)
 			syncer_state = SYNCER_SHUTTING_DOWN;
 		while (!LIST_EMPTY(slp)) {
+	    		schedstate = 5;
+
 			error = sync_vnode(slp, &bo, td);
 			if (error == 1) {
 				LIST_REMOVE(bo, bo_synclist);
 				LIST_INSERT_HEAD(next, bo, bo_synclist);
 				continue;
 			}
+			schedstate = 6;
 
 			if (first_printf == 0) {
+				schedstate = 7;
 				/*
 				 * Drop the sync mutex, because some watchdog
 				 * drivers need to sleep while patting
 				 */
 				mtx_unlock(&sync_mtx);
 				wdog_kern_pat(WD_LASTVAL);
 				mtx_lock(&sync_mtx);
 			}
 
+			schedstate = 8;
 		}
+		schedstate = 9;
+
 		if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0)
 			syncer_final_iter--;
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * Just sleep for a short period of time between
 		 * iterations when shutting down to allow some I/O
 		 * to happen.
 		 *
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
+
+		schedstate = 10;
+
 		if (syncer_state != SYNCER_RUNNING ||
 		    time_uptime == starttime) {
+		    	schedstate = 11;
 			thread_lock(td);
 			sched_prio(td, PPAUSE);
 			thread_unlock(td);
 		}
-		if (syncer_state != SYNCER_RUNNING)
+		schedstate = 12;
+
+		if (syncer_state != SYNCER_RUNNING) {
+			schedstate = 13;
+
 			cv_timedwait(&sync_wakeup, &sync_mtx,
 			    hz / SYNCER_SHUTDOWN_SPEEDUP);
-		else if (time_uptime == starttime)
+		}
+		else if (time_uptime == starttime) {
+			schedstate = 14;
 			cv_timedwait(&sync_wakeup, &sync_mtx, hz);
+		}
+		schedstate = 15;
 	}
+	schedstate = 16;
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer(void)
 {
 	int ret = 0;
 
 	mtx_lock(&sync_mtx);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;

help

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?564231DE.7090308>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation