From owner-p4-projects@FreeBSD.ORG Wed Aug 12 21:58:48 2009 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 5674F1065676; Wed, 12 Aug 2009 21:58:48 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 162CE1065673 for ; Wed, 12 Aug 2009 21:58:48 +0000 (UTC) (envelope-from julian@elischer.org) Received: from outC.internet-mail-service.net (outc.internet-mail-service.net [216.240.47.226]) by mx1.freebsd.org (Postfix) with ESMTP id F0D1A8FC4A for ; Wed, 12 Aug 2009 21:58:47 +0000 (UTC) Received: from idiom.com (mx0.idiom.com [216.240.32.160]) by out.internet-mail-service.net (Postfix) with ESMTP id C2999B2E72; Wed, 12 Aug 2009 14:58:47 -0700 (PDT) X-Client-Authorized: MaGic Cook1e X-Client-Authorized: MaGic Cook1e Received: from julian-mac.elischer.org (home.elischer.org [216.240.48.38]) by idiom.com (Postfix) with ESMTP id 2A6092D601B; Wed, 12 Aug 2009 14:58:47 -0700 (PDT) Message-ID: <4A833B16.2040301@elischer.org> Date: Wed, 12 Aug 2009 14:58:46 -0700 From: Julian Elischer User-Agent: Thunderbird 2.0.0.22 (Macintosh/20090605) MIME-Version: 1.0 To: Marko Zec References: <200908122108.n7CL8uhJ058398@repoman.freebsd.org> In-Reply-To: <200908122108.n7CL8uhJ058398@repoman.freebsd.org> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: Perforce Change Reviews Subject: Re: PERFORCE change 167260 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 12 Aug 2009 21:58:49 -0000 Marko Zec wrote: > http://perforce.freebsd.org/chv.cgi?CH=167260 > > Change 167260 by zec@zec_tpx32 on 2009/08/12 21:08:10 > > Significanlty simplify / reduce previous patch aimed at > making ipdivert work with VIMAGE. The restriction is that > with current patch it is not permitted to kldunload -f > ipdivert if built with options VIMAGE enabled. The patch > is verified to work with natd running in a non-default vimage. > > Diff size against head/sys/netinet/ip_divert.c: > > tpx32% wc before.diff > 287 1073 7891 before.diff > tpx32% wc after.diff > 90 261 2370 after.diff > > Affected files ... > > .. //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#38 edit > > Differences ... > > ==== //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#38 (text+ko) ==== > > @@ -125,11 +125,13 @@ > static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ > static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ > > +static eventhandler_tag ip_divert_event_tag; > + > /* > * Initialize divert connection block queue. > */ > static void > -div_zone_change(struct vnet *vnet) > +div_zone_change(void *tag) > { > VNET_ITERATOR_DECL(vnet_iter); > > @@ -139,7 +141,7 @@ > uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); > CURVNET_RESTORE(); > } > - VNET_LIST_RUNLOCK_NOSLEEP(); > + VNET_LIST_RUNLOCK_NOSLEEP(); > } > > static int > @@ -159,6 +161,30 @@ > INP_LOCK_DESTROY(inp); > } > > +static void > +div_init(void) > +{ > + > + INP_INFO_LOCK_INIT(&V_divcbinfo, "div"); > + LIST_INIT(&V_divcb); > + V_divcbinfo.ipi_listhead = &V_divcb; > +#ifdef VIMAGE > + V_divcbinfo.ipi_vnet = curvnet; > +#endif > + /* > + * XXX We don't use the hash list for divert IP, but it's easier > + * to allocate a one entry hash list than it is to check all > + * over the place for hashbase == NULL. > + */ > + V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask); > + V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, > + &V_divcbinfo.ipi_porthashmask); > + V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), > + NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR, > + UMA_ZONE_NOFREE); > + uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); > +} > + > /* > - } > + error = in_pcbbind(inp, nam, td->td_ucred); > INP_WUNLOCK(inp); > INP_INFO_WUNLOCK(&V_divcbinfo); > return error; > @@ -710,22 +715,36 @@ > .pr_input = div_input, > .pr_ctlinput = div_ctlinput, > .pr_ctloutput = ip_ctloutput, > - .pr_init = NULL, > + .pr_init = div_init, > .pr_usrreqs = &div_usrreqs If you are going to make pr_init() called for every vnet then pr_destroy should be as well. But in fact that is not really safe. (either of them) The trouble is that we can not guarantee that other protocols can handle being called multiple times in their init and destroy methods. Especially 3rd party protocols. We need to ensure only protocols that have been converted to run with multiple vnets are ever called with multiple vnets. for this reason the only safe way to do this is via the VNET_SYSINIT and VNET_SYSUNINIT calls. > }; > > -static int div_loaded = 0; > -static eventhandler_tag div_evh_tag; > static int > div_modevent(module_t mod, int type, void *unused) > { > int err = 0; > +#ifndef VIMAGE > + int n; > +#endif > > switch (type) { > case MOD_LOAD: > - > + /* > + * Protocol will be initialized by pf_proto_register(). > + * We don't have to register ip_protox because we are not > + * a true IP protocol that goes over the wire. > + */ > + err = pf_proto_register(PF_INET, &div_protosw); > + if (err != 0) > + return (err); > + ip_divert_ptr = divert_packet; > + ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change, > + div_zone_change, NULL, EVENTHANDLER_PRI_ANY); > break; > case MOD_QUIESCE: > +#ifdef VIMAGE > + case MOD_UNLOAD: > +#endif > /* > * IPDIVERT may normally not be unloaded because of the > * potential race conditions. Tell kldunload we can't be > @@ -733,8 +752,34 @@ > */ > err = EPERM; > break; > +#ifndef VIMAGE > case MOD_UNLOAD: > + /* > + * Forced unload. > + * > + * Module ipdivert can only be unloaded if no sockets are > + * connected. Maybe this can be changed later to forcefully > + * disconnect any open sockets. > + * > + * XXXRW: Note that there is a slight race here, as a new > + * socket open request could be spinning on the lock and then > + * we destroy the lock. > + */ > + INP_INFO_WLOCK(&V_divcbinfo); > + n = V_divcbinfo.ipi_count; > + if (n != 0) { > + err = EBUSY; > + INP_INFO_WUNLOCK(&V_divcbinfo); > + break; > + } > + ip_divert_ptr = NULL; > + err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW); > + INP_INFO_WUNLOCK(&V_divcbinfo); > + INP_INFO_LOCK_DESTROY(&V_divcbinfo); > + uma_zdestroy(V_divcbinfo.ipi_zone); > + EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag); > break; > +#endif /* !VIMAGE */ > default: > err = EOPNOTSUPP; > break; > @@ -748,124 +793,6 @@ > 0 > }; > > -/* init on boot or module load */ > -static void > -div_init(void) > -{ > - int err; > - > - /* > - * Protocol will be initialized by pf_proto_register(). > - * We don't have to register ip_protox because we are not > - * a true IP protocol that goes over the wire. > - */ > - err = pf_proto_register(PF_INET, &div_protosw); > - if (err == 0) { > - ip_divert_ptr = divert_packet; > - div_evh_tag = > - EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change, > - NULL, EVENTHANDLER_PRI_ANY); > - div_loaded = 1; > - } > - return; > -} > - > -/**************** > - * Stuff that must be initialized for every instance > - * (including the first of course). > - */ > -static int > -div_vnet_init(const void *unused) > -{ > - if (div_loaded == 0) > - return (0); > - INP_INFO_LOCK_INIT(&V_divcbinfo, "div"); > - LIST_INIT(&V_divcb); > - V_divcbinfo.ipi_listhead = &V_divcb; > -#ifdef VIMAGE > - V_divcbinfo.ipi_vnet = curvnet; > -#endif > - /* > - * XXX We don't use the hash list for divert IP, but it's easier > - * to allocate a one entry hash list than it is to check all > - * over the place for hashbase == NULL. > - */ > - V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask); > - V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, > - &V_divcbinfo.ipi_porthashmask); > - V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), > - NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR, > - UMA_ZONE_NOFREE); > - uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); > - return (0); > -} > - > -/********************** > - * Called for the removal of the last instance only on module unload. > - */ > -static void > -div_uninit(void) > -{ > - int err; > - > - if (div_loaded == 0) > - return; > - div_loaded = 0; > - ip_divert_ptr = NULL; > - EVENTHANDLER_DEREGISTER(maxsockets_change, div_evh_tag); > - err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW); > -} > - > -/*********************** > - * Called for the removal of each instance. > - */ > -static int > -div_vnet_uninit(const void *unused) > -{ > - int err = 0; > - int n; > - > - if (div_loaded == 0) > - return (0); > - /* > - * Forced unload. > - * > - * Module ipdivert can only be unloaded if no sockets are > - * connected. Maybe this can be changed later to forcefully > - * disconnect any open sockets. > - * > - * XXXRW: Note that there is a slight race here, as a new > - * socket open request could be spinning on the lock and then > - * we destroy the lock. > - */ > - INP_INFO_WLOCK(&V_divcbinfo); > - n = V_divcbinfo.ipi_count; > - INP_INFO_WUNLOCK(&V_divcbinfo); > - if (n != 0) { > - err = EBUSY; > - } else { > - INP_INFO_LOCK_DESTROY(&V_divcbinfo); > - uma_zdestroy(V_divcbinfo.ipi_zone); > - } > - return (err); > -} > - > - > -#define DIV_MAJOR_ORDER SI_SUB_PROTO_IFATTACHDOMAIN > -#define DIV_MODULE_ORDER (SI_ORDER_ANY + 64) > -#define DIV_SYSINIT_ORDER (DIV_MODULE_ORDER + 1) > -#define DIV_VNET_ORDER (DIV_SYSINIT_ORDER + 1 ) > - > -DECLARE_MODULE(ipdivert, ipdivertmod, DIV_MAJOR_ORDER, DIV_MODULE_ORDER); > +DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); > MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); > MODULE_VERSION(ipdivert, 1); > - > -SYSINIT(div_init, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER, div_init, NULL); > -SYSUNINIT(div_uninit, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER, > - div_uninit, NULL); > - > -VNET_SYSINIT(div_vnet_init, DIV_MAJOR_ORDER, DIV_VNET_ORDER, > - div_vnet_init, NULL); > -VNET_SYSUNINIT(div_vnet_uninit, DIV_MAJOR_ORDER, DIV_VNET_ORDER, > - div_vnet_uninit, NULL); > -