Date: Fri, 8 Mar 2013 19:37:25 GMT From: Kajetan Staszkiewicz <vegeta@tuxpowered.net> To: freebsd-gnats-submit@FreeBSD.org Subject: kern/176763: Removing pf Source entries locks kernel. Message-ID: <201303081937.r28JbPd4085970@red.freebsd.org> Resent-Message-ID: <201303081940.r28Je25r034114@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 176763 >Category: kern >Synopsis: Removing pf Source entries locks kernel. >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Fri Mar 08 19:40:01 UTC 2013 >Closed-Date: >Last-Modified: >Originator: Kajetan Staszkiewicz >Release: 9.1-RELEASE >Organization: InnoGames GmbH >Environment: FreeBSD xxxxxxx 9.1-RELEASE FreeBSD 9.1-RELEASE #10 r247265M: Mon Feb 25 14:58:39 CET 2013 root@xxxxxxx:/usr/obj/usr/src/sys/IGLB3 amd64 >Description: The case will happen on a FreeBSD router with large amount of pf State and Source entries. Use pfctl to remove some Sources. For each matching source whole State table is searched element by element. With hundreds of matching Sources (and hundreds of thousands of States to search through) this can freeze the kernel for a few seconds. Under some conditions (e.g. a DDoS attack hitting the IP address for which the Source entries will be removed), kernel will freeze permanently. >How-To-Repeat: `pfctl -K` >Fix: Create a list of State entries and attach it to Source. This way only this short list must be searched when Source is removed. List is maintained during State creation and removal. Patch attached with submission follows: --- sys/contrib/pf/net/pf.c.orig 2013-03-05 11:27:01.000000000 +0100 +++ sys/contrib/pf/net/pf.c 2013-03-08 14:14:39.000000000 +0100 @@ -1517,6 +1517,19 @@ u_int32_t timeout; if (s->src_node != NULL) { + + /* Remove this pf_state from the list of states linked to pf_src_node */ + if (s->prev_state) /* not the first pf_state in list */ + s->prev_state->next_state = s->next_state; + else /* the fist pf_state in the list, modify list head in pf_src_node */ + s->src_node->linked_states = s->next_state; + + if (s->next_state) /* not the last pf_state in list */ + s->next_state->prev_state = s->prev_state; + + s->prev_state = NULL; + s->next_state = NULL; + if (s->src.tcp_est) --s->src_node->conn; if (--s->src_node->states <= 0) { @@ -1532,6 +1545,19 @@ } } if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { + + /* Remove this pf_state from the list of states linked to pf_src_node */ + if (s->prev_state) /* not the first pf_state in list */ + s->prev_state->next_state = s->next_state; + else /* the fist pf_state in the list, modify list head in pf_src_node */ + s->nat_src_node->linked_states = s->next_state; + + if (s->next_state) /* not the last pf_state in list */ + s->next_state->prev_state = s->prev_state; + + s->prev_state = NULL; + s->next_state = NULL; + if (--s->nat_src_node->states <= 0) { timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) @@ -3895,12 +3921,24 @@ if (sn != NULL) { s->src_node = sn; s->src_node->states++; + + /* attach this state to head of list */ + s->next_state = sn->linked_states; + if (s->next_state) + s->next_state->prev_state = s; + sn->linked_states = s; } if (nsn != NULL) { /* XXX We only modify one side for now. */ PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); s->nat_src_node = nsn; s->nat_src_node->states++; + + /* attach this state to head of list */ + s->next_state = nsn->linked_states; + if (s->next_state) + s->next_state->prev_state = s; + nsn->linked_states = s; } if (pd->proto == IPPROTO_TCP) { if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, --- sys/contrib/pf/net/pf_ioctl.c.orig 2013-03-05 11:26:44.000000000 +0100 +++ sys/contrib/pf/net/pf_ioctl.c 2013-03-08 14:10:08.000000000 +0100 @@ -3790,6 +3790,7 @@ case DIOCKILLSRCNODES: { struct pf_src_node *sn; struct pf_state *s; + struct pf_state **pns; /* pointer to next_state of previous state */ struct pfioc_src_node_kill *psnk = (struct pfioc_src_node_kill *)addr; u_int killed = 0; @@ -3808,20 +3809,17 @@ &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af)) { /* Handle state to src_node linkage */ - if (sn->states != 0) { - RB_FOREACH(s, pf_state_tree_id, -#ifdef __FreeBSD__ - &V_tree_id) { -#else - &tree_id) { -#endif - if (s->src_node == sn) - s->src_node = NULL; - if (s->nat_src_node == sn) - s->nat_src_node = NULL; - } - sn->states = 0; + s = NULL; /* make gcc happy */ + pns = &sn->linked_states; + for (s = sn->linked_states; s != NULL; s = s->next_state) { + s->src_node = NULL; + s->nat_src_node = NULL; + *pns = NULL; + s->prev_state = NULL; + pns = &s->next_state; } + *pns = NULL; + sn->states = 0; sn->expire = 1; killed++; } --- sys/contrib/pf/net/pfvar.h.orig 2013-03-05 11:27:14.000000000 +0100 +++ sys/contrib/pf/net/pfvar.h 2013-03-08 11:02:29.000000000 +0100 @@ -748,6 +748,7 @@ u_int32_t expire; sa_family_t af; u_int8_t ruletype; + struct pf_state *linked_states; }; #define PFSNODE_HIWAT 10000 /* default source node table size */ @@ -852,6 +853,8 @@ struct pfi_kif *rt_kif; struct pf_src_node *src_node; struct pf_src_node *nat_src_node; + struct pf_state *prev_state; + struct pf_state *next_state; u_int64_t packets[2]; u_int64_t bytes[2]; u_int32_t creation; >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201303081937.r28JbPd4085970>