Date: Sun, 19 Jul 2009 14:20:53 +0000 (UTC) From: Robert Watson <rwatson@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r195760 - in head/sys: kern net netgraph netinet netinet6 netipsec sys Message-ID: <200907191420.n6JEKrtL044792@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: rwatson Date: Sun Jul 19 14:20:53 2009 New Revision: 195760 URL: http://svn.freebsd.org/changeset/base/195760 Log: Reimplement and/or implement vnet list locking by replacing a mostly unused custom mutex/condvar-based sleep locks with two locks: an rwlock (for non-sleeping use) and sxlock (for sleeping use). Either acquired for read is sufficient to stabilize the vnet list, but both must be acquired for write to modify the list. Replace previous no-op read locking macros, used in various places in the stack, with actual locking to prevent race conditions. Callers must declare when they may perform unbounded sleeps or not when selecting how to lock. Refactor vnet sysinits so that the vnet list and locks are initialized before kernel modules are linked, as the kernel linker will use them for modules loaded by the boot loader. Update various consumers of these KPIs based on whether they may sleep or not. Reviewed by: bz Approved by: re (kib) Modified: head/sys/kern/kern_vimage.c head/sys/net/if.c head/sys/netgraph/ng_gif.c head/sys/netinet/igmp.c head/sys/netinet/in_pcb.c head/sys/netinet/in_rmx.c head/sys/netinet/ip_input.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_timer.c head/sys/netinet6/frag6.c head/sys/netinet6/mld6.c head/sys/netipsec/key.c head/sys/sys/kernel.h head/sys/sys/vimage.h Modified: head/sys/kern/kern_vimage.c ============================================================================== --- head/sys/kern/kern_vimage.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/kern/kern_vimage.c Sun Jul 19 14:20:53 2009 (r195760) @@ -58,23 +58,22 @@ static void vnet_mod_complete_registrati static int vnet_mod_constructor(struct vnet_modlink *); static int vnet_mod_destructor(struct vnet_modlink *); -#define VNET_LIST_WLOCK() \ - mtx_lock(&vnet_list_refc_mtx); \ - while (vnet_list_refc != 0) \ - cv_wait(&vnet_list_condvar, &vnet_list_refc_mtx); +struct rwlock vnet_rwlock; +struct sx vnet_sxlock; -#define VNET_LIST_WUNLOCK() \ - mtx_unlock(&vnet_list_refc_mtx); +#define VNET_LIST_WLOCK() do { \ + sx_xlock(&vnet_sxlock); \ + rw_wlock(&vnet_rwlock); \ +} while (0) + +#define VNET_LIST_WUNLOCK() do { \ + rw_wunlock(&vnet_rwlock); \ + sx_xunlock(&vnet_sxlock); \ +} while (0) struct vnet_list_head vnet_head; - -struct cv vnet_list_condvar; -struct mtx vnet_list_refc_mtx; -int vnet_list_refc = 0; - struct vnet *vnet0; - /* * Move an ifnet to or from another vnet, specified by the jail id. */ @@ -373,16 +372,22 @@ vnet_foreach(void (*vnet_foreach_fn)(str } static void -vi_init(void *unused) +vnet_init_prelink(void *arg) { - TAILQ_INIT(&vnet_modlink_head); - TAILQ_INIT(&vnet_modpending_head); - + rw_init(&vnet_rwlock, "vnet_rwlock"); + sx_init(&vnet_sxlock, "vnet_sxlock"); LIST_INIT(&vnet_head); +} +SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST, + vnet_init_prelink, NULL); + +static void +vnet0_init(void *arg) +{ - mtx_init(&vnet_list_refc_mtx, "vnet_list_refc_mtx", NULL, MTX_DEF); - cv_init(&vnet_list_condvar, "vnet_list_condvar"); + TAILQ_INIT(&vnet_modlink_head); + TAILQ_INIT(&vnet_modpending_head); /* * We MUST clear curvnet in vi_init_done() before going SMP, @@ -391,9 +396,10 @@ vi_init(void *unused) */ curvnet = prison0.pr_vnet = vnet0 = vnet_alloc(); } +SYSINIT(vnet0_init, SI_SUB_VNET, SI_ORDER_FIRST, vnet0_init, NULL); static void -vi_init_done(void *unused) +vnet_init_done(void *unused) { struct vnet_modlink *vml_iter; @@ -411,8 +417,8 @@ vi_init_done(void *unused) panic("going nowhere without my vnet modules!"); } -SYSINIT(vimage, SI_SUB_VIMAGE, SI_ORDER_FIRST, vi_init, NULL); -SYSINIT(vimage_done, SI_SUB_VIMAGE_DONE, SI_ORDER_FIRST, vi_init_done, NULL); +SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done, + NULL); #ifdef DDB DB_SHOW_COMMAND(vnets, db_show_vnets) Modified: head/sys/net/if.c ============================================================================== --- head/sys/net/if.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/net/if.c Sun Jul 19 14:20:53 2009 (r195760) @@ -1793,8 +1793,8 @@ if_slowtimo(void *arg) struct ifnet *ifp; int s = splimp(); + VNET_LIST_RLOCK_NOSLEEP(); IFNET_RLOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { @@ -1805,8 +1805,8 @@ if_slowtimo(void *arg) } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IFNET_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); splx(s); timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ); } Modified: head/sys/netgraph/ng_gif.c ============================================================================== --- head/sys/netgraph/ng_gif.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netgraph/ng_gif.c Sun Jul 19 14:20:53 2009 (r195760) @@ -561,8 +561,8 @@ ng_gif_mod_event(module_t mod, int event ng_gif_input_orphan_p = ng_gif_input_orphan; /* Create nodes for any already-existing gif interfaces */ + VNET_LIST_RLOCK_NOSLEEP(); IFNET_RLOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET_QUIET(vnet_iter); /* XXX revisit quiet */ TAILQ_FOREACH(ifp, &V_ifnet, if_link) { @@ -571,8 +571,8 @@ ng_gif_mod_event(module_t mod, int event } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IFNET_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); break; case MOD_UNLOAD: Modified: head/sys/netinet/igmp.c ============================================================================== --- head/sys/netinet/igmp.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet/igmp.c Sun Jul 19 14:20:53 2009 (r195760) @@ -1616,13 +1616,13 @@ igmp_fasttimo(void) { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_fasttimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -2159,13 +2159,13 @@ igmp_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); igmp_slowtimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* Modified: head/sys/netinet/in_pcb.c ============================================================================== --- head/sys/netinet/in_pcb.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet/in_pcb.c Sun Jul 19 14:20:53 2009 (r195760) @@ -1570,7 +1570,7 @@ ipport_tick(void *xtp) { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */ if (V_ipport_tcpallocs <= @@ -1582,7 +1582,7 @@ ipport_tick(void *xtp) V_ipport_tcplastcount = V_ipport_tcpallocs; CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL); } Modified: head/sys/netinet/in_rmx.c ============================================================================== --- head/sys/netinet/in_rmx.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet/in_rmx.c Sun Jul 19 14:20:53 2009 (r195760) @@ -319,7 +319,7 @@ in_rtqdrain(void) struct rtqk_arg arg; int fibnum; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); @@ -336,7 +336,7 @@ in_rtqdrain(void) } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } static int _in_rt_was_here; Modified: head/sys/netinet/ip_input.c ============================================================================== --- head/sys/netinet/ip_input.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet/ip_input.c Sun Jul 19 14:20:53 2009 (r195760) @@ -1193,8 +1193,8 @@ ip_slowtimo(void) struct ipq *fp; int i; + VNET_LIST_RLOCK_NOSLEEP(); IPQ_LOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); for (i = 0; i < IPREASS_NHASH; i++) { @@ -1228,8 +1228,8 @@ ip_slowtimo(void) } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IPQ_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -1241,8 +1241,8 @@ ip_drain(void) VNET_ITERATOR_DECL(vnet_iter); int i; + VNET_LIST_RLOCK_NOSLEEP(); IPQ_LOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); for (i = 0; i < IPREASS_NHASH; i++) { @@ -1254,8 +1254,8 @@ ip_drain(void) } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IPQ_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); in_rtqdrain(); } Modified: head/sys/netinet/tcp_subr.c ============================================================================== --- head/sys/netinet/tcp_subr.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet/tcp_subr.c Sun Jul 19 14:20:53 2009 (r195760) @@ -940,7 +940,7 @@ tcp_drain(void) if (!do_tcpdrain) return; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct inpcb *inpb; @@ -976,7 +976,7 @@ tcp_drain(void) INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -1576,7 +1576,7 @@ tcp_isn_tick(void *xtp) VNET_ITERATOR_DECL(vnet_iter); u_int32_t projected_offset; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); ISN_LOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS */ @@ -1590,7 +1590,7 @@ tcp_isn_tick(void *xtp) CURVNET_RESTORE(); } ISN_UNLOCK(); - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL); } Modified: head/sys/netinet/tcp_timer.c ============================================================================== --- head/sys/netinet/tcp_timer.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet/tcp_timer.c Sun Jul 19 14:20:53 2009 (r195760) @@ -127,7 +127,7 @@ tcp_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); tcp_maxidle = tcp_keepcnt * tcp_keepintvl; @@ -136,7 +136,7 @@ tcp_slowtimo(void) INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = Modified: head/sys/netinet6/frag6.c ============================================================================== --- head/sys/netinet6/frag6.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet6/frag6.c Sun Jul 19 14:20:53 2009 (r195760) @@ -720,8 +720,8 @@ frag6_slowtimo(void) VNET_ITERATOR_DECL(vnet_iter); struct ip6q *q6; + VNET_LIST_RLOCK_NOSLEEP(); IP6Q_LOCK(); - VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); q6 = V_ip6q.ip6q_next; @@ -748,8 +748,8 @@ frag6_slowtimo(void) } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IP6Q_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -760,9 +760,11 @@ frag6_drain(void) { VNET_ITERATOR_DECL(vnet_iter); - if (IP6Q_TRYLOCK() == 0) + VNET_LIST_RLOCK_NOSLEEP(); + if (IP6Q_TRYLOCK() == 0) { + VNET_LIST_RUNLOCK_NOSLEEP(); return; - VNET_LIST_RLOCK(); + } VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); while (V_ip6q.ip6q_next != &V_ip6q) { @@ -772,6 +774,6 @@ frag6_drain(void) } CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); IP6Q_UNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } Modified: head/sys/netinet6/mld6.c ============================================================================== --- head/sys/netinet6/mld6.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netinet6/mld6.c Sun Jul 19 14:20:53 2009 (r195760) @@ -1306,13 +1306,13 @@ mld_fasttimo(void) { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_fasttimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* @@ -1721,13 +1721,13 @@ mld_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_slowtimo_vnet(); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); } /* Modified: head/sys/netipsec/key.c ============================================================================== --- head/sys/netipsec/key.c Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/netipsec/key.c Sun Jul 19 14:20:53 2009 (r195760) @@ -4537,7 +4537,7 @@ key_timehandler(void) VNET_ITERATOR_DECL(vnet_iter); time_t now = time_second; - VNET_LIST_RLOCK(); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); key_flush_spd(now); @@ -4546,7 +4546,7 @@ key_timehandler(void) key_flush_spacq(now); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK(); + VNET_LIST_RUNLOCK_NOSLEEP(); #ifndef IPSEC_DEBUG2 /* do exchange to tick time !! */ Modified: head/sys/sys/kernel.h ============================================================================== --- head/sys/sys/kernel.h Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/sys/kernel.h Sun Jul 19 14:20:53 2009 (r195760) @@ -106,13 +106,14 @@ enum sysinit_sub_id { SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */ + SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */ SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */ - SI_SUB_VIMAGE = 0x21E0000, /* vimage infrastructure */ + SI_SUB_VNET = 0x21E0000, /* vnet 0 */ SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/ SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/ SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */ @@ -158,7 +159,7 @@ enum sysinit_sub_id { SI_SUB_SWAP = 0xc000000, /* swap */ SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/ SI_SUB_SYSCALLS = 0xd800000, /* register system calls */ - SI_SUB_VIMAGE_DONE = 0xdc00000, /* vnet registration complete */ + SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */ SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/ SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/ SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/ Modified: head/sys/sys/vimage.h ============================================================================== --- head/sys/sys/vimage.h Sun Jul 19 12:57:10 2009 (r195759) +++ head/sys/sys/vimage.h Sun Jul 19 14:20:53 2009 (r195760) @@ -38,6 +38,9 @@ #ifdef _KERNEL +#include <sys/lock.h> +#include <sys/sx.h> + #ifdef INVARIANTS #define VNET_DEBUG #endif @@ -176,17 +179,42 @@ struct vnet { #endif /* !VIMAGE */ #ifdef VIMAGE +/* + * Global linked list of all virtual network stacks, along with read locks to + * access it. If a caller may sleep while accessing the list, it must use + * the sleepable lock macros. + */ LIST_HEAD(vnet_list_head, vnet); extern struct vnet_list_head vnet_head; -extern struct vnet *vnet0; -#define VNET_ITERATOR_DECL(arg) struct vnet *arg; -#define VNET_FOREACH(arg) LIST_FOREACH(arg, &vnet_head, vnet_le) -#else +extern struct rwlock vnet_rwlock; +extern struct sx vnet_sxlock; + +#define VNET_LIST_RLOCK() sx_slock(&vnet_sxlock) +#define VNET_LIST_RLOCK_NOSLEEP() rw_rlock(&vnet_rwlock) +#define VNET_LIST_RUNLOCK() sx_sunlock(&vnet_sxlock) +#define VNET_LIST_RUNLOCK_NOSLEEP() rw_runlock(&vnet_rwlock) + +/* + * Iteration macros to walk the global list of virtual network stacks. + */ +#define VNET_ITERATOR_DECL(arg) struct vnet *arg +#define VNET_FOREACH(arg) LIST_FOREACH((arg), &vnet_head, vnet_le) + +#else /* !VIMAGE */ +/* + * No-op macros for the !VIMAGE case. + */ +#define VNET_LIST_RLOCK() +#define VNET_LIST_RLOCK_NOSLEEP() +#define VNET_LIST_RUNLOCK() +#define VNET_LIST_RUNLOCK_NOSLEEP() #define VNET_ITERATOR_DECL(arg) #define VNET_FOREACH(arg) -#endif + +#endif /* VIMAGE */ #ifdef VIMAGE +extern struct vnet *vnet0; #define IS_DEFAULT_VNET(arg) ((arg) == vnet0) #else #define IS_DEFAULT_VNET(arg) 1 @@ -202,10 +230,6 @@ extern struct vnet *vnet0; #define P_TO_VNET(p) NULL #endif /* VIMAGE */ -/* Non-VIMAGE null-macros */ -#define VNET_LIST_RLOCK() -#define VNET_LIST_RUNLOCK() - #endif /* _KERNEL */ #endif /* !_SYS_VIMAGE_H_ */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200907191420.n6JEKrtL044792>