Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 23 Sep 2011 21:27:01 -0400 (EDT)
From:      Rick Macklem <rmacklem@uoguelph.ca>
To:        Clinton Adams <clinton.adams@gmail.com>
Cc:        freebsd-fs@freebsd.org
Subject:   Re: kernel panics with RPCSEC_GSS
Message-ID:  <1461855405.1940757.1316827621857.JavaMail.root@erie.cs.uoguelph.ca>
In-Reply-To: <CAEuopLYJ6fvZ_7hCHQdjKgdNC39kzK1DwyGib7XvvrqE5Pqozw@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
Clinton Adams wrote:
> Hi,
> 
> On 8.2-RELEASE-p2, kernel occassionaly panics during relatively high
> nfs usage (usually morning logins). Frequency of crashes have
> decreased as we have reduced the number of clients, about twice a week
> with 10 clients versus daily with 15.
> 
> Server is running nfsv4 with mit kerberos, clients are linux (ubuntu
> 10.04).
> 
> Backtraces from last 2 cores -
> 
> #1 0xffffffff805cbb5e in boot (howto=260)
> at /usr/src/sys/kern/kern_shutdown.c:419
> #2 0xffffffff805cbf91 in panic (fmt=Variable "fmt" is not available.
> ) at /usr/src/sys/kern/kern_shutdown.c:592
> #3 0xffffffff808d25c0 in trap_fatal (frame=0xc, eva=Variable "eva" is
> not available.
> )
> at /usr/src/sys/amd64/amd64/trap.c:783
> #4 0xffffffff808d299f in trap_pfault (frame=0xffffff8096bb7790,
> usermode=0)
> at /usr/src/sys/amd64/amd64/trap.c:699
> #5 0xffffffff808d2e7f in trap (frame=0xffffff8096bb7790)
> at /usr/src/sys/amd64/amd64/trap.c:449
> #6 0xffffffff808baf74 in calltrap ()
> at /usr/src/sys/amd64/amd64/exception.S:224
> #7 0xffffffff807db8d8 in svc_rpc_gss_forget_client (client=0x0)
> at /usr/src/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c:616

Oops, I realized that if multiple threads did the call at line#642
concurrently, it could try to remove it from the tailq twice.

Please try this attached patch instead of the one I posted a few
minutes ago (I think it avoids this race).

Thanks for reporting this and please let us know if this patch
helps, rick

> #8 0xffffffff807dc1c3 in svc_rpc_gss (rqst=0xffffff005708c000,
> msg=0xffffff8096bb7b20) at
> /usr/src/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c:642
> #9 0xffffffff807d49d3 in svc_run_internal (pool=0xffffff003d03d600,
> ismaster=0) at /usr/src/sys/rpc/svc.c:837
> #10 0xffffffff807d518b in svc_thread_start (arg=Variable "arg" is not
> available.
> )
> at /usr/src/sys/rpc/svc.c:1200
> #11 0xffffffff805a2798 in fork_exit (
> callout=0xffffffff807d5180 <svc_thread_start>, arg=0xffffff003d03d600,
> frame=0xffffff8096bb7c40) at /usr/src/sys/kern/kern_fork.c:845
> #12 0xffffffff808bb43e in fork_trampoline ()
> at /usr/src/sys/amd64/amd64/exception.S:565
> 
> 
> #1 0xffffffff805cbabe in boot (howto=260)
> at /usr/src/sys/kern/kern_shutdown.c:419
> #2 0xffffffff805cbed3 in panic (fmt=0x0)
> at /usr/src/sys/kern/kern_shutdown.c:592
> #3 0xffffffff808d239d in trap_fatal (frame=0xffffff0004c89460,
> eva=Variable "eva" is not available.
> )
> at /usr/src/sys/amd64/amd64/trap.c:783
> #4 0xffffffff808d275f in trap_pfault (frame=0xffffff8096c0d790,
> usermode=0)
> at /usr/src/sys/amd64/amd64/trap.c:699
> #5 0xffffffff808d2b5f in trap (frame=0xffffff8096c0d790)
> at /usr/src/sys/amd64/amd64/trap.c:449
> #6 0xffffffff808bada4 in calltrap ()
> at /usr/src/sys/amd64/amd64/exception.S:224
> #7 0xffffffff807db856 in svc_rpc_gss_forget_client
> (client=0xffffff001c015200)
> at atomic.h:158
> #8 0xffffffff807dc0e3 in svc_rpc_gss (rqst=0xffffff0004a24000,
> msg=0xffffff8096c0db20) at
> /usr/src/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c:642
> #9 0xffffffff807d48f3 in svc_run_internal (pool=0xffffff0004ca6200,
> ismaster=0) at /usr/src/sys/rpc/svc.c:837
> #10 0xffffffff807d50ab in svc_thread_start (arg=Variable "arg" is not
> available.
> )
> at /usr/src/sys/rpc/svc.c:1200
> #11 0xffffffff805a26f8 in fork_exit (
> callout=0xffffffff807d50a0 <svc_thread_start>, arg=0xffffff0004ca6200,
> frame=0xffffff8096c0dc40) at /usr/src/sys/kern/kern_fork.c:845
> #12 0xffffffff808bb26e in fork_trampoline ()
> at /usr/src/sys/amd64/amd64/exception.S:565
> 
> Kernel is generic except for
> device crypto
> options KGSSAPI.
> 
> Ash /etc/make.conf
> WITHOUT_X11=yes
> KRB5_HOME=/usr/local
> KRB5_IMPL=mit
> # added by use.perl 2011-09-02 11:38:57
> PERL_VERSION=5.10.1
> 
> I'm happy to provide any additional info.
> 
> Thanks for any help,
> Clinton
> _______________________________________________
> freebsd-fs@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-fs
> To unsubscribe, send any mail to "freebsd-fs-unsubscribe@freebsd.org"

[-- Attachment #2 --]
--- rpc/rpcsec_gss/svc_rpcsec_gss.c.sav	2011-09-23 20:14:07.000000000 -0400
+++ rpc/rpcsec_gss/svc_rpcsec_gss.c	2011-09-23 21:16:34.000000000 -0400
@@ -625,6 +625,23 @@ svc_rpc_gss_forget_client(struct svc_rpc
 	svc_rpc_gss_release_client(client);
 }
 
+/*
+ * Same as above, except that this one expects svc_rpc_gss_lock to
+ * be held when it is called. It releases this lock.
+ */
+static void
+svc_rpc_gss_forget_client_locked(struct svc_rpc_gss_client *client)
+{
+	struct svc_rpc_gss_client_list *list;
+
+	list = &svc_rpc_gss_client_hash[client->cl_id.ci_id % CLIENT_HASH_SIZE];
+	TAILQ_REMOVE(list, client, cl_link);
+	TAILQ_REMOVE(&svc_rpc_gss_clients, client, cl_alllink);
+	svc_rpc_gss_client_count--;
+	sx_xunlock(&svc_rpc_gss_lock);
+	svc_rpc_gss_release_client(client);
+}
+
 static void
 svc_rpc_gss_timeout_clients(void)
 {
@@ -638,16 +655,23 @@ svc_rpc_gss_timeout_clients(void)
 	 * First enforce the max client limit. We keep
 	 * svc_rpc_gss_clients in LRU order.
 	 */
-	while (svc_rpc_gss_client_count > CLIENT_MAX)
-		svc_rpc_gss_forget_client(TAILQ_LAST(&svc_rpc_gss_clients,
-			    svc_rpc_gss_client_list));
+	sx_xlock(&svc_rpc_gss_lock);
+	client = TAILQ_LAST(&svc_rpc_gss_clients, svc_rpc_gss_client_list);
+	while (svc_rpc_gss_client_count > CLIENT_MAX && client != NULL) {
+		svc_rpc_gss_forget_client_locked(client); /* releases lock */
+		sx_xlock(&svc_rpc_gss_lock);
+		client = TAILQ_LAST(&svc_rpc_gss_clients,
+		    svc_rpc_gss_client_list);
+	}
 	TAILQ_FOREACH_SAFE(client, &svc_rpc_gss_clients, cl_alllink, nclient) {
 		if (client->cl_state == CLIENT_STALE
 		    || now > client->cl_expiration) {
 			rpc_gss_log_debug("expiring client %p", client);
-			svc_rpc_gss_forget_client(client);
+			svc_rpc_gss_forget_client_locked(client);
+			sx_xlock(&svc_rpc_gss_lock);
 		}
 	}
+	sx_xunlock(&svc_rpc_gss_lock);
 }
 
 #ifdef DEBUG

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1461855405.1940757.1316827621857.JavaMail.root>