From owner-svn-src-head@FreeBSD.ORG Fri Oct 7 01:15:04 2011 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 76B62106564A; Fri, 7 Oct 2011 01:15:04 +0000 (UTC) (envelope-from rmacklem@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 577238FC0A; Fri, 7 Oct 2011 01:15:04 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p971F4en085536; Fri, 7 Oct 2011 01:15:04 GMT (envelope-from rmacklem@svn.freebsd.org) Received: (from rmacklem@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p971F4In085534; Fri, 7 Oct 2011 01:15:04 GMT (envelope-from rmacklem@svn.freebsd.org) Message-Id: <201110070115.p971F4In085534@svn.freebsd.org> From: Rick Macklem Date: Fri, 7 Oct 2011 01:15:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r226081 - head/sys/rpc/rpcsec_gss X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 07 Oct 2011 01:15:04 -0000 Author: rmacklem Date: Fri Oct 7 01:15:04 2011 New Revision: 226081 URL: http://svn.freebsd.org/changeset/base/226081 Log: A crash reported on freebsd-fs@ on Sep. 23, 2011 under the subject heading "kernel panics with RPCSEC_GSS" appears to be caused by a corrupted tailq list for the client structure. Looking at the code, calls to the function svc_rpc_gss_forget_client() were done in an SMP unsafe manner, with the svc_rpc_gss_lock only being acquired in the function and not before it. As such, when multiple threads called svc_rpc_gss_forget_client() concurrently, it could try and remove the same client structure from the tailq lists multiple times. The patch fixes this by moving the critical code into a separate function called svc_rpc_gss_forget_client_locked(), which must be called with the lock held. For the one case where the caller would have no interest in the lock, svc_rpc_gss_forget_client() was retained, but a loop was added to check that the client structure is still in the tailq lists before removing it, to make it safe for multiple concurrent calls. Tested by: clinton.adams at gmail.com (earlier version) Reviewed by: zkirsch MFC after: 3 days Modified: head/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c Modified: head/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c ============================================================================== --- head/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c Fri Oct 7 00:20:07 2011 (r226080) +++ head/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c Fri Oct 7 01:15:04 2011 (r226081) @@ -609,27 +609,52 @@ svc_rpc_gss_release_client(struct svc_rp } /* - * Remove a client from our global lists and free it if we can. + * Remove a client from our global lists. + * Must be called with svc_rpc_gss_lock held. */ static void -svc_rpc_gss_forget_client(struct svc_rpc_gss_client *client) +svc_rpc_gss_forget_client_locked(struct svc_rpc_gss_client *client) { struct svc_rpc_gss_client_list *list; + sx_assert(&svc_rpc_gss_lock, SX_XLOCKED); list = &svc_rpc_gss_client_hash[client->cl_id.ci_id % CLIENT_HASH_SIZE]; - sx_xlock(&svc_rpc_gss_lock); TAILQ_REMOVE(list, client, cl_link); TAILQ_REMOVE(&svc_rpc_gss_clients, client, cl_alllink); svc_rpc_gss_client_count--; +} + +/* + * Remove a client from our global lists and free it if we can. + */ +static void +svc_rpc_gss_forget_client(struct svc_rpc_gss_client *client) +{ + struct svc_rpc_gss_client_list *list; + struct svc_rpc_gss_client *tclient; + + list = &svc_rpc_gss_client_hash[client->cl_id.ci_id % CLIENT_HASH_SIZE]; + sx_xlock(&svc_rpc_gss_lock); + TAILQ_FOREACH(tclient, list, cl_link) { + /* + * Make sure this client has not already been removed + * from the lists by svc_rpc_gss_forget_client() or + * svc_rpc_gss_forget_client_locked() already. + */ + if (client == tclient) { + svc_rpc_gss_forget_client_locked(client); + sx_xunlock(&svc_rpc_gss_lock); + svc_rpc_gss_release_client(client); + return; + } + } sx_xunlock(&svc_rpc_gss_lock); - svc_rpc_gss_release_client(client); } static void svc_rpc_gss_timeout_clients(void) { struct svc_rpc_gss_client *client; - struct svc_rpc_gss_client *nclient; time_t now = time_uptime; rpc_gss_log_debug("in svc_rpc_gss_timeout_clients()"); @@ -638,16 +663,29 @@ svc_rpc_gss_timeout_clients(void) * First enforce the max client limit. We keep * svc_rpc_gss_clients in LRU order. */ - while (svc_rpc_gss_client_count > CLIENT_MAX) - svc_rpc_gss_forget_client(TAILQ_LAST(&svc_rpc_gss_clients, - svc_rpc_gss_client_list)); - TAILQ_FOREACH_SAFE(client, &svc_rpc_gss_clients, cl_alllink, nclient) { + sx_xlock(&svc_rpc_gss_lock); + client = TAILQ_LAST(&svc_rpc_gss_clients, svc_rpc_gss_client_list); + while (svc_rpc_gss_client_count > CLIENT_MAX && client != NULL) { + svc_rpc_gss_forget_client_locked(client); + sx_xunlock(&svc_rpc_gss_lock); + svc_rpc_gss_release_client(client); + sx_xlock(&svc_rpc_gss_lock); + client = TAILQ_LAST(&svc_rpc_gss_clients, + svc_rpc_gss_client_list); + } +again: + TAILQ_FOREACH(client, &svc_rpc_gss_clients, cl_alllink) { if (client->cl_state == CLIENT_STALE || now > client->cl_expiration) { + svc_rpc_gss_forget_client_locked(client); + sx_xunlock(&svc_rpc_gss_lock); rpc_gss_log_debug("expiring client %p", client); - svc_rpc_gss_forget_client(client); + svc_rpc_gss_release_client(client); + sx_xlock(&svc_rpc_gss_lock); + goto again; } } + sx_xunlock(&svc_rpc_gss_lock); } #ifdef DEBUG