From owner-freebsd-net@FreeBSD.ORG Thu Jun 4 20:47:22 2009 Return-Path: Delivered-To: freebsd-net@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id C8FB8106564A for ; Thu, 4 Jun 2009 20:47:22 +0000 (UTC) (envelope-from oleg@lath.rinet.ru) Received: from lath.rinet.ru (lath.rinet.ru [195.54.192.90]) by mx1.freebsd.org (Postfix) with ESMTP id EBC028FC0A for ; Thu, 4 Jun 2009 20:47:21 +0000 (UTC) (envelope-from oleg@lath.rinet.ru) Received: by lath.rinet.ru (Postfix, from userid 222) id 74E7A700B; Fri, 5 Jun 2009 00:47:20 +0400 (MSD) Date: Fri, 5 Jun 2009 00:47:20 +0400 From: Oleg Bulyzhin To: Mikolaj Golub Message-ID: <20090604204720.GA49677@lath.rinet.ru> References: <864ov9htgq.fsf@kopusha.onet> <81bpp8l6de.fsf@zhuzha.ua1> <20090603170311.GA18104@lath.rinet.ru> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="wRRV7LY7NUeQGEoC" Content-Disposition: inline In-Reply-To: <20090603170311.GA18104@lath.rinet.ru> User-Agent: Mutt/1.5.18 (2008-05-17) Cc: freebsd-net@FreeBSD.org Subject: Re: panic with ng_ipfw+ng_car and net.inet.ip.fw.one_pass=0 X-BeenThere: freebsd-net@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Networking and TCP/IP with FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 04 Jun 2009 20:47:23 -0000 --wRRV7LY7NUeQGEoC Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Wed, Jun 03, 2009 at 09:03:11PM +0400, Oleg Bulyzhin wrote: > On Mon, Jun 01, 2009 at 11:12:45AM +0300, Mikolaj Golub wrote: > > > It looks the problem has not drawn much attention :-). > > I was on vacation so did not reply in time. > Dummynet like solution is not enough, dummynet is affected by this problem > too. > I'll send patch to you for testing tomorrow. Please test attached patch and let me know results. Patch made for -current and it changes ABI, so rebuilding ipfw with new headers required. -- Oleg. ================================================================ === Oleg Bulyzhin -- OBUL-RIPN -- OBUL-RIPE -- oleg@rinet.ru === ================================================================ --wRRV7LY7NUeQGEoC Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="one_pass.diff" Index: sys/netinet/ip_dummynet.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.c,v retrieving revision 1.120 diff -u -r1.120 ip_dummynet.c --- sys/netinet/ip_dummynet.c 9 Apr 2009 12:46:00 -0000 1.120 +++ sys/netinet/ip_dummynet.c 4 Jun 2009 20:38:12 -0000 @@ -1399,6 +1399,8 @@ * Build and enqueue packet + parameters. */ pkt->rule = fwa->rule; + pkt->rule_id = fwa->rule_id; + pkt->chain_id = fwa->chain_id; pkt->dn_dir = dir; pkt->ifp = fwa->oif; Index: sys/netinet/ip_dummynet.h =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.h,v retrieving revision 1.44 diff -u -r1.44 ip_dummynet.h --- sys/netinet/ip_dummynet.h 4 Jun 2009 12:27:57 -0000 1.44 +++ sys/netinet/ip_dummynet.h 4 Jun 2009 20:38:12 -0000 @@ -113,6 +113,8 @@ */ struct dn_pkt_tag { struct ip_fw *rule; /* matching rule */ + uint32_t rule_id; /* matching rule id */ + uint32_t chain_id; /* ruleset id */ int dn_dir; /* action when packet comes out. */ #define DN_TO_IP_OUT 1 #define DN_TO_IP_IN 2 @@ -382,16 +384,16 @@ #define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) /* - * Return the IPFW rule associated with the dummynet tag; if any. + * Return the dummynet tag; if any. * Make sure that the dummynet tag is not reused by lower layers. */ -static __inline struct ip_fw * -ip_dn_claim_rule(struct mbuf *m) +static __inline struct dn_pkt_tag * +ip_dn_claim_tag(struct mbuf *m) { struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); if (mtag != NULL) { mtag->m_tag_id = PACKET_TAG_NONE; - return (((struct dn_pkt_tag *)(mtag+1))->rule); + return ((struct dn_pkt_tag *)(mtag + 1)); } else return (NULL); } Index: sys/netinet/ip_fw.h =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_fw.h,v retrieving revision 1.127 diff -u -r1.127 ip_fw.h --- sys/netinet/ip_fw.h 2 May 2009 08:16:26 -0000 1.127 +++ sys/netinet/ip_fw.h 4 Jun 2009 20:38:12 -0000 @@ -453,17 +453,18 @@ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ - u_int16_t act_ofs; /* offset of action in 32-bit units */ - u_int16_t cmd_len; /* # of 32-bit words in cmd */ - u_int16_t rulenum; /* rule number */ - u_int8_t set; /* rule set (0..31) */ + uint16_t act_ofs; /* offset of action in 32-bit units */ + uint16_t cmd_len; /* # of 32-bit words in cmd */ + uint16_t rulenum; /* rule number */ + uint8_t set; /* rule set (0..31) */ #define RESVD_SET 31 /* set for default and persistent rules */ - u_int8_t _pad; /* padding */ + uint8_t _pad; /* padding */ + uint32_t id; /* rule id */ /* These fields are present in all rules. */ - u_int64_t pcnt; /* Packet counter */ - u_int64_t bcnt; /* Byte counter */ - u_int32_t timestamp; /* tv_sec of last match */ + uint64_t pcnt; /* Packet counter */ + uint64_t bcnt; /* Byte counter */ + uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; @@ -607,10 +608,12 @@ struct ifnet *oif; /* output interface */ struct sockaddr_in *next_hop; /* forward address */ struct ip_fw *rule; /* matching rule */ + uint32_t rule_id; /* matching rule id */ + uint32_t chain_id; /* ruleset id */ struct ether_header *eh; /* for bridged packets */ struct ipfw_flow_id f_id; /* grabbed from IP header */ - u_int32_t cookie; /* a cookie depending on rule action */ + uint32_t cookie; /* a cookie depending on rule action */ struct inpcb *inp; struct _ip6dn_args dummypar; /* dummynet->ip6_output */ @@ -658,6 +661,7 @@ LIST_HEAD(, cfg_nat) nat; /* list of nat entries */ struct radix_node_head *tables[IPFW_TABLES_MAX]; struct rwlock rwmtx; + uint32_t id; /* ruleset id */ }; #ifdef IPFW_INTERNAL Index: sys/netinet/ip_fw2.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_fw2.c,v retrieving revision 1.223 diff -u -r1.223 ip_fw2.c --- sys/netinet/ip_fw2.c 18 May 2009 22:34:44 -0000 1.223 +++ sys/netinet/ip_fw2.c 4 Jun 2009 20:38:13 -0000 @@ -134,6 +134,16 @@ #endif static uma_zone_t ipfw_dyn_rule_zone; +/** + * dummynet needs a reference to the default rule, because rules can be + * deleted while packets hold a reference to them. When this happens, + * dummynet changes the reference to the default rule (it could well be a + * NULL pointer, but this way we do not need to check for the special + * case, plus here he have info on the default behaviour). + */ +struct ip_fw *ip_fw_default_rule; + + /* * Data structure to cache our ucred related * information. This structure only gets used if @@ -2522,9 +2532,22 @@ if (args->rule) { /* * Packet has already been tagged. Look for the next rule - * to restart processing. + * to restart processing. Make sure that args->rule still + * exists and not changed. */ - f = args->rule->next_rule; + if (chain->id != args->chain_id) { + for (f = chain->rules; f != NULL; f = f->next) + if (f == args->rule && f->id == args->rule_id) + break; + + if (f != NULL) + f = f->next_rule; + else + f = ip_fw_default_rule; + + } else + f = args->rule->next_rule; + if (f == NULL) f = lookup_next_rule(args->rule, 0); } else { @@ -3236,6 +3259,8 @@ case O_PIPE: case O_QUEUE: args->rule = f; /* report matching rule */ + args->rule_id = f->id; + args->chain_id = chain->id; if (cmd->arg1 == IP_FW_TABLEARG) args->cookie = tablearg; else @@ -3344,6 +3369,8 @@ case O_NETGRAPH: case O_NGTEE: args->rule = f; /* report matching rule */ + args->rule_id = f->id; + args->chain_id = chain->id; if (cmd->arg1 == IP_FW_TABLEARG) args->cookie = tablearg; else @@ -3366,6 +3393,8 @@ if (IPFW_NAT_LOADED) { args->rule = f; /* Report matching rule. */ + args->rule_id = f->id; + args->chain_id = chain->id; t = ((ipfw_insn_nat *)cmd)->nat; if (t == NULL) { nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? @@ -3424,6 +3453,8 @@ ip->ip_sum = in_cksum(m, hlen); retval = IP_FW_REASS; args->rule = f; + args->rule_id = f->id; + args->chain_id = chain->id; goto done; } else { retval = IP_FW_DENY; @@ -3482,6 +3513,8 @@ IPFW_WLOCK_ASSERT(chain); + chain->id++; + for (rule = chain->rules; rule; rule = rule->next) rule->next_rule = NULL; } @@ -3518,6 +3551,7 @@ if (chain->rules == NULL) { /* default rule */ chain->rules = rule; + rule->id = ++chain->id; goto done; } @@ -3559,6 +3593,8 @@ } } flush_rule_ptrs(chain); + /* chain->id incremented inside flush_rule_ptrs() */ + rule->id = chain->id; done: V_static_count++; V_static_len += l; @@ -3614,8 +3650,6 @@ while ((rule = head) != NULL) { head = head->next; - if (DUMMYNET_LOADED) - ip_dn_ruledel_ptr(rule); free(rule, M_IPFW); } } @@ -3632,7 +3666,7 @@ IPFW_WLOCK_ASSERT(chain); - flush_rule_ptrs(chain); /* more efficient to do outside the loop */ + flush_rule_ptrs(chain); /* more efficient to do outside the loop */ for (prev = NULL, rule = chain->rules; rule ; ) if (kill_default || rule->set != RESVD_SET) rule = remove_rule(chain, rule, prev); @@ -4517,15 +4551,6 @@ #undef RULE_MAXSIZE } -/** - * dummynet needs a reference to the default rule, because rules can be - * deleted while packets hold a reference to them. When this happens, - * dummynet changes the reference to the default rule (it could well be a - * NULL pointer, but this way we do not need to check for the special - * case, plus here he have info on the default behaviour). - */ -struct ip_fw *ip_fw_default_rule; - /* * This procedure is only used to handle keepalives. It is invoked * every dyn_keepalive_period Index: sys/netinet/ip_fw_pfil.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_fw_pfil.c,v retrieving revision 1.36 diff -u -r1.36 ip_fw_pfil.c --- sys/netinet/ip_fw_pfil.c 30 Apr 2009 13:36:26 -0000 1.36 +++ sys/netinet/ip_fw_pfil.c 4 Jun 2009 20:38:13 -0000 @@ -116,6 +116,8 @@ KASSERT(ng_tag->dir == NG_IPFW_IN, ("ng_ipfw tag with wrong direction")); args.rule = ng_tag->rule; + args.rule_id = ng_tag->rule_id; + args.chain_id = ng_tag->chain_id; m_tag_delete(*m0, (struct m_tag *)ng_tag); } @@ -126,6 +128,8 @@ dt = (struct dn_pkt_tag *)(dn_tag+1); args.rule = dt->rule; + args.rule_id = dt->rule_id; + args.chain_id = dt->chain_id; m_tag_delete(*m0, dn_tag); } @@ -246,6 +250,8 @@ KASSERT(ng_tag->dir == NG_IPFW_OUT, ("ng_ipfw tag with wrong direction")); args.rule = ng_tag->rule; + args.rule_id = ng_tag->rule_id; + args.chain_id = ng_tag->chain_id; m_tag_delete(*m0, (struct m_tag *)ng_tag); } @@ -256,6 +262,8 @@ dt = (struct dn_pkt_tag *)(dn_tag+1); args.rule = dt->rule; + args.rule_id = dt->rule_id; + args.chain_id = dt->chain_id; m_tag_delete(*m0, dn_tag); } Index: sys/net/if_bridge.c =================================================================== RCS file: /home/ncvs/src/sys/net/if_bridge.c,v retrieving revision 1.125 diff -u -r1.125 if_bridge.c --- sys/net/if_bridge.c 1 May 2009 19:46:42 -0000 1.125 +++ sys/net/if_bridge.c 4 Jun 2009 20:38:13 -0000 @@ -3041,11 +3041,19 @@ if (IPFW_LOADED && pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) { INIT_VNET_INET(curvnet); + struct dn_pkt_tag *dn_tag; error = -1; - args.rule = ip_dn_claim_rule(*mp); - if (args.rule != NULL && V_fw_one_pass) - goto ipfwpass; /* packet already partially processed */ + dn_tag = ip_dn_claim_tag(*mp); + if (dn_tag != NULL) { + if (dn_tag->rule != NULL && V_fw_one_pass) + /* packet already partially processed */ + goto ipfwpass; + args.rule = dn_tag->rule; /* matching rule to restart */ + args.rule_id = dn_tag->rule_id; + args.chain_id = dn_tag->chain_id; + } else + args.rule = NULL; args.m = *mp; args.oif = ifp; Index: sys/net/if_ethersubr.c =================================================================== RCS file: /home/ncvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.260 diff -u -r1.260 if_ethersubr.c --- sys/net/if_ethersubr.c 5 May 2009 10:56:12 -0000 1.260 +++ sys/net/if_ethersubr.c 4 Jun 2009 20:38:13 -0000 @@ -147,8 +147,7 @@ #if defined(INET) || defined(INET6) int -ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, - struct ip_fw **rule, int shared); +ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared); #ifdef VIMAGE_GLOBALS static int ether_ipfw; #endif @@ -430,10 +429,9 @@ { #if defined(INET) || defined(INET6) INIT_VNET_NET(ifp->if_vnet); - struct ip_fw *rule = ip_dn_claim_rule(m); if (IPFW_LOADED && V_ether_ipfw != 0) { - if (ether_ipfw_chk(&m, ifp, &rule, 0) == 0) { + if (ether_ipfw_chk(&m, ifp, 0) == 0) { if (m) { m_freem(m); return EACCES; /* pkt dropped */ @@ -457,8 +455,7 @@ * ether_output_frame. */ int -ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, - struct ip_fw **rule, int shared) +ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared) { INIT_VNET_INET(dst->if_vnet); struct ether_header *eh; @@ -466,9 +463,19 @@ struct mbuf *m; int i; struct ip_fw_args args; + struct dn_pkt_tag *dn_tag; - if (*rule != NULL && V_fw_one_pass) - return 1; /* dummynet packet, already partially processed */ + dn_tag = ip_dn_claim_tag(*m0); + + if (dn_tag != NULL) { + if (dn_tag->rule != NULL && V_fw_one_pass) + /* dummynet packet, already partially processed */ + return (1); + args.rule = dn_tag->rule; /* matching rule to restart */ + args.rule_id = dn_tag->rule_id; + args.chain_id = dn_tag->chain_id; + } else + args.rule = NULL; /* * I need some amt of data to be contiguous, and in case others need @@ -489,7 +496,6 @@ args.m = m; /* the packet we are looking at */ args.oif = dst; /* destination, if any */ - args.rule = *rule; /* matching rule to restart */ args.next_hop = NULL; /* we do not support forward yet */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ args.inp = NULL; /* used by ipfw uid/gid/jail rules */ @@ -510,7 +516,6 @@ ETHER_HDR_LEN); } *m0 = m; - *rule = args.rule; if (i == IP_FW_DENY) /* drop */ return 0; @@ -767,9 +772,7 @@ * Do not do this for PROMISC frames in case we are re-entered. */ if (IPFW_LOADED && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) { - struct ip_fw *rule = ip_dn_claim_rule(m); - - if (ether_ipfw_chk(&m, NULL, &rule, 0) == 0) { + if (ether_ipfw_chk(&m, NULL, 0) == 0) { if (m) m_freem(m); /* dropped; free mbuf chain */ return; /* consumed */ Index: sys/netgraph/ng_ipfw.c =================================================================== RCS file: /home/ncvs/src/sys/netgraph/ng_ipfw.c,v retrieving revision 1.11 diff -u -r1.11 ng_ipfw.c --- sys/netgraph/ng_ipfw.c 10 Dec 2008 23:12:39 -0000 1.11 +++ sys/netgraph/ng_ipfw.c 4 Jun 2009 20:38:13 -0000 @@ -293,6 +293,8 @@ return (ENOMEM); } ngit->rule = fwa->rule; + ngit->rule_id = fwa->rule_id; + ngit->chain_id = fwa->chain_id; ngit->dir = dir; ngit->ifp = fwa->oif; m_tag_prepend(m, &ngit->mt); Index: sys/netgraph/ng_ipfw.h =================================================================== RCS file: /home/ncvs/src/sys/netgraph/ng_ipfw.h,v retrieving revision 1.2 diff -u -r1.2 ng_ipfw.h --- sys/netgraph/ng_ipfw.h 17 Feb 2006 09:42:49 -0000 1.2 +++ sys/netgraph/ng_ipfw.h 4 Jun 2009 20:38:14 -0000 @@ -38,6 +38,8 @@ struct ng_ipfw_tag { struct m_tag mt; /* tag header */ struct ip_fw *rule; /* matching rule */ + uint32_t rule_id; /* matching rule id */ + uint32_t chain_id; /* ruleset id */ struct ifnet *ifp; /* interface, for ip_output */ int dir; #define NG_IPFW_OUT 0 --wRRV7LY7NUeQGEoC--