From owner-p4-projects@FreeBSD.ORG Wed Aug 12 21:08:57 2009 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id ED774106566C; Wed, 12 Aug 2009 21:08:56 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id ACCEC106564A for ; Wed, 12 Aug 2009 21:08:56 +0000 (UTC) (envelope-from zec@fer.hr) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 9C1DE8FC41 for ; Wed, 12 Aug 2009 21:08:56 +0000 (UTC) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id n7CL8uBr058400 for ; Wed, 12 Aug 2009 21:08:56 GMT (envelope-from zec@fer.hr) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id n7CL8uhJ058398 for perforce@freebsd.org; Wed, 12 Aug 2009 21:08:56 GMT (envelope-from zec@fer.hr) Date: Wed, 12 Aug 2009 21:08:56 GMT Message-Id: <200908122108.n7CL8uhJ058398@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to zec@fer.hr using -f From: Marko Zec To: Perforce Change Reviews Cc: Subject: PERFORCE change 167260 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 12 Aug 2009 21:08:57 -0000 http://perforce.freebsd.org/chv.cgi?CH=167260 Change 167260 by zec@zec_tpx32 on 2009/08/12 21:08:10 Significanlty simplify / reduce previous patch aimed at making ipdivert work with VIMAGE. The restriction is that with current patch it is not permitted to kldunload -f ipdivert if built with options VIMAGE enabled. The patch is verified to work with natd running in a non-default vimage. Diff size against head/sys/netinet/ip_divert.c: tpx32% wc before.diff 287 1073 7891 before.diff tpx32% wc after.diff 90 261 2370 after.diff Affected files ... .. //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#38 edit Differences ... ==== //depot/projects/vimage-commit2/src/sys/netinet/ip_divert.c#38 (text+ko) ==== @@ -125,11 +125,13 @@ static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ +static eventhandler_tag ip_divert_event_tag; + /* * Initialize divert connection block queue. */ static void -div_zone_change(struct vnet *vnet) +div_zone_change(void *tag) { VNET_ITERATOR_DECL(vnet_iter); @@ -139,7 +141,7 @@ uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); CURVNET_RESTORE(); } - VNET_LIST_RUNLOCK_NOSLEEP(); + VNET_LIST_RUNLOCK_NOSLEEP(); } static int @@ -159,6 +161,30 @@ INP_LOCK_DESTROY(inp); } +static void +div_init(void) +{ + + INP_INFO_LOCK_INIT(&V_divcbinfo, "div"); + LIST_INIT(&V_divcb); + V_divcbinfo.ipi_listhead = &V_divcb; +#ifdef VIMAGE + V_divcbinfo.ipi_vnet = curvnet; +#endif + /* + * XXX We don't use the hash list for divert IP, but it's easier + * to allocate a one entry hash list than it is to check all + * over the place for hashbase == NULL. + */ + V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask); + V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, + &V_divcbinfo.ipi_porthashmask); + V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), + NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR, + UMA_ZONE_NOFREE); + uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); +} + /* * IPPROTO_DIVERT is not in the real IP protocol number space; this * function should never be called. Just in case, drop any packets. @@ -505,8 +531,7 @@ div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; - int error = 0; - struct in_addr laddr; + int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("div_bind: inp == NULL")); @@ -517,32 +542,12 @@ * XXX -- divert should not be abusing in_pcbind * and should probably have its own family. */ - if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) - return (EINVAL); - /* possibly not needed.. but play safe */ - inp->inp_fport = 0; - inp->inp_faddr.s_addr = INADDR_ANY; - if (nam->sa_family != AF_INET) return EAFNOSUPPORT; - - laddr.s_addr = INADDR_ANY; - /* just to be sure, since the man page says it is ignored. */ ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; - INP_INFO_WLOCK(&V_divcbinfo); INP_WLOCK(inp); - if (in_pcblookup_local(&V_divcbinfo, laddr, - ((struct sockaddr_in *)nam)->sin_port, 0/*not wild ok */, - td->td_ucred)) { - error = EADDRNOTAVAIL; - } else { - inp->inp_lport = ((struct sockaddr_in *)nam)->sin_port; - if (in_pcbinshash(inp) != 0) { - inp->inp_lport = 0; - error = EAGAIN; - } - } + error = in_pcbbind(inp, nam, td->td_ucred); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_divcbinfo); return error; @@ -710,22 +715,36 @@ .pr_input = div_input, .pr_ctlinput = div_ctlinput, .pr_ctloutput = ip_ctloutput, - .pr_init = NULL, + .pr_init = div_init, .pr_usrreqs = &div_usrreqs }; -static int div_loaded = 0; -static eventhandler_tag div_evh_tag; static int div_modevent(module_t mod, int type, void *unused) { int err = 0; +#ifndef VIMAGE + int n; +#endif switch (type) { case MOD_LOAD: - + /* + * Protocol will be initialized by pf_proto_register(). + * We don't have to register ip_protox because we are not + * a true IP protocol that goes over the wire. + */ + err = pf_proto_register(PF_INET, &div_protosw); + if (err != 0) + return (err); + ip_divert_ptr = divert_packet; + ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change, + div_zone_change, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_QUIESCE: +#ifdef VIMAGE + case MOD_UNLOAD: +#endif /* * IPDIVERT may normally not be unloaded because of the * potential race conditions. Tell kldunload we can't be @@ -733,8 +752,34 @@ */ err = EPERM; break; +#ifndef VIMAGE case MOD_UNLOAD: + /* + * Forced unload. + * + * Module ipdivert can only be unloaded if no sockets are + * connected. Maybe this can be changed later to forcefully + * disconnect any open sockets. + * + * XXXRW: Note that there is a slight race here, as a new + * socket open request could be spinning on the lock and then + * we destroy the lock. + */ + INP_INFO_WLOCK(&V_divcbinfo); + n = V_divcbinfo.ipi_count; + if (n != 0) { + err = EBUSY; + INP_INFO_WUNLOCK(&V_divcbinfo); + break; + } + ip_divert_ptr = NULL; + err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW); + INP_INFO_WUNLOCK(&V_divcbinfo); + INP_INFO_LOCK_DESTROY(&V_divcbinfo); + uma_zdestroy(V_divcbinfo.ipi_zone); + EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag); break; +#endif /* !VIMAGE */ default: err = EOPNOTSUPP; break; @@ -748,124 +793,6 @@ 0 }; -/* init on boot or module load */ -static void -div_init(void) -{ - int err; - - /* - * Protocol will be initialized by pf_proto_register(). - * We don't have to register ip_protox because we are not - * a true IP protocol that goes over the wire. - */ - err = pf_proto_register(PF_INET, &div_protosw); - if (err == 0) { - ip_divert_ptr = divert_packet; - div_evh_tag = - EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change, - NULL, EVENTHANDLER_PRI_ANY); - div_loaded = 1; - } - return; -} - -/**************** - * Stuff that must be initialized for every instance - * (including the first of course). - */ -static int -div_vnet_init(const void *unused) -{ - if (div_loaded == 0) - return (0); - INP_INFO_LOCK_INIT(&V_divcbinfo, "div"); - LIST_INIT(&V_divcb); - V_divcbinfo.ipi_listhead = &V_divcb; -#ifdef VIMAGE - V_divcbinfo.ipi_vnet = curvnet; -#endif - /* - * XXX We don't use the hash list for divert IP, but it's easier - * to allocate a one entry hash list than it is to check all - * over the place for hashbase == NULL. - */ - V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask); - V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, - &V_divcbinfo.ipi_porthashmask); - V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), - NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR, - UMA_ZONE_NOFREE); - uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); - return (0); -} - -/********************** - * Called for the removal of the last instance only on module unload. - */ -static void -div_uninit(void) -{ - int err; - - if (div_loaded == 0) - return; - div_loaded = 0; - ip_divert_ptr = NULL; - EVENTHANDLER_DEREGISTER(maxsockets_change, div_evh_tag); - err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW); -} - -/*********************** - * Called for the removal of each instance. - */ -static int -div_vnet_uninit(const void *unused) -{ - int err = 0; - int n; - - if (div_loaded == 0) - return (0); - /* - * Forced unload. - * - * Module ipdivert can only be unloaded if no sockets are - * connected. Maybe this can be changed later to forcefully - * disconnect any open sockets. - * - * XXXRW: Note that there is a slight race here, as a new - * socket open request could be spinning on the lock and then - * we destroy the lock. - */ - INP_INFO_WLOCK(&V_divcbinfo); - n = V_divcbinfo.ipi_count; - INP_INFO_WUNLOCK(&V_divcbinfo); - if (n != 0) { - err = EBUSY; - } else { - INP_INFO_LOCK_DESTROY(&V_divcbinfo); - uma_zdestroy(V_divcbinfo.ipi_zone); - } - return (err); -} - - -#define DIV_MAJOR_ORDER SI_SUB_PROTO_IFATTACHDOMAIN -#define DIV_MODULE_ORDER (SI_ORDER_ANY + 64) -#define DIV_SYSINIT_ORDER (DIV_MODULE_ORDER + 1) -#define DIV_VNET_ORDER (DIV_SYSINIT_ORDER + 1 ) - -DECLARE_MODULE(ipdivert, ipdivertmod, DIV_MAJOR_ORDER, DIV_MODULE_ORDER); +DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); MODULE_VERSION(ipdivert, 1); - -SYSINIT(div_init, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER, div_init, NULL); -SYSUNINIT(div_uninit, DIV_MAJOR_ORDER, DIV_SYSINIT_ORDER, - div_uninit, NULL); - -VNET_SYSINIT(div_vnet_init, DIV_MAJOR_ORDER, DIV_VNET_ORDER, - div_vnet_init, NULL); -VNET_SYSUNINIT(div_vnet_uninit, DIV_MAJOR_ORDER, DIV_VNET_ORDER, - div_vnet_uninit, NULL); -