Date: Thu, 13 Jun 2013 06:07:19 +0000 (UTC) From: Gleb Smirnoff <glebius@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r251681 - head/sys/netpfil/pf Message-ID: <201306130607.r5D67J1G021873@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: glebius Date: Thu Jun 13 06:07:19 2013 New Revision: 251681 URL: http://svnweb.freebsd.org/changeset/base/251681 Log: Improve locking strategy between keys hash and ID hash. Before this change state creating sequence was: 1) lock wire key hash 2) link state's wire key 3) unlock wire key hash 4) lock stack key hash 5) link state's stack key 6) unlock stack key hash 7) lock ID hash 8) link into ID hash 9) unlock ID hash What could happen here is that other thread finds the state via key hash lookup after 6), locks ID hash and does some processing of the state. When the thread creating state unblocks, it finds the state it was inserting already non-virgin. Now we perform proper interlocking between key hash locks and ID hash lock: 1) lock wire & stack hashes 2) link state's keys 3) lock ID hash 4) unlock wire & stack hashes 5) link into ID hash 6) unlock ID hash To achieve that, the following hacking was performed in pf_state_key_attach(): - Key hash mutex is marked with MTX_DUPOK. - To avoid deadlock on 2 key hash mutexes, we lock them in order determined by their address value. - pf_state_key_attach() had a magic to reuse a > FIN_WAIT_2 state. It unlinked the conflicting state synchronously. In theory this could require locking a third key hash, which we can't do now. Now we do not remove the state immediately, instead we leave this task to the purge thread. To avoid conflicts in a short period before state is purged, we push to the very end of the TAILQ. - On success, before dropping key hash locks, pf_state_key_attach() locks ID hash and returns. Tested by: Ian FREISLICH <ianf clue.co.za> Modified: head/sys/netpfil/pf/pf.c Modified: head/sys/netpfil/pf/pf.c ============================================================================== --- head/sys/netpfil/pf/pf.c Thu Jun 13 05:51:59 2013 (r251680) +++ head/sys/netpfil/pf/pf.c Thu Jun 13 06:07:19 2013 (r251681) @@ -724,7 +724,7 @@ pf_initialize() V_pf_hashmask = V_pf_hashsize - 1; for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask; i++, kh++, ih++) { - mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF); + mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF); } @@ -851,7 +851,7 @@ static int pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks, struct pf_state *s) { - struct pf_keyhash *kh; + struct pf_keyhash *khs, *khw, *kh; struct pf_state_key *sk, *cur; struct pf_state *si, *olds = NULL; int idx; @@ -861,15 +861,47 @@ pf_state_key_attach(struct pf_state_key KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__)); /* + * We need to lock hash slots of both keys. To avoid deadlock + * we always lock the slot with lower address first. Unlock order + * isn't important. + * + * We also need to lock ID hash slot before dropping key + * locks. On success we return with ID hash slot locked. + */ + + if (skw == sks) { + khs = khw = &V_pf_keyhash[pf_hashkey(skw)]; + PF_HASHROW_LOCK(khs); + } else { + khs = &V_pf_keyhash[pf_hashkey(sks)]; + khw = &V_pf_keyhash[pf_hashkey(skw)]; + if (khs == khw) { + PF_HASHROW_LOCK(khs); + } else if (khs < khw) { + PF_HASHROW_LOCK(khs); + PF_HASHROW_LOCK(khw); + } else { + PF_HASHROW_LOCK(khw); + PF_HASHROW_LOCK(khs); + } + } + +#define KEYS_UNLOCK() do { \ + if (khs != khw) { \ + PF_HASHROW_UNLOCK(khs); \ + PF_HASHROW_UNLOCK(khw); \ + } else \ + PF_HASHROW_UNLOCK(khs); \ +} while (0) + + /* * First run: start with wire key. */ sk = skw; + kh = khw; idx = PF_SK_WIRE; keyattach: - kh = &V_pf_keyhash[pf_hashkey(sk)]; - - PF_HASHROW_LOCK(kh); LIST_FOREACH(cur, &kh->keys, entry) if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0) break; @@ -885,10 +917,20 @@ keyattach: if (sk->proto == IPPROTO_TCP && si->src.state >= TCPS_FIN_WAIT_2 && si->dst.state >= TCPS_FIN_WAIT_2) { + /* + * New state matches an old >FIN_WAIT_2 + * state. We can't drop key hash locks, + * thus we can't unlink it properly. + * + * As a workaround we drop it into + * TCPS_CLOSED state, schedule purge + * ASAP and push it into the very end + * of the slot TAILQ, so that it won't + * conflict with our new state. + */ si->src.state = si->dst.state = TCPS_CLOSED; - /* Unlink later or cur can go away. */ - pf_ref_state(si); + si->timeout = PFTM_PURGE; olds = si; } else { if (V_pf_status.debug >= PF_DEBUG_MISC) { @@ -911,7 +953,7 @@ keyattach: printf("\n"); } PF_HASHROW_UNLOCK(ih); - PF_HASHROW_UNLOCK(kh); + KEYS_UNLOCK(); uma_zfree(V_pf_state_key_z, sk); if (idx == PF_SK_STACK) pf_detach_state(s); @@ -934,6 +976,13 @@ stateattach: else TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]); + if (olds) { + TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]); + TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds, + key_list[idx]); + olds = NULL; + } + /* * Attach done. See how should we (or should not?) * attach a second key. @@ -944,31 +993,24 @@ stateattach: sks = NULL; goto stateattach; } else if (sks != NULL) { - PF_HASHROW_UNLOCK(kh); - if (olds) { - pf_unlink_state(olds, 0); - pf_release_state(olds); - olds = NULL; - } /* * Continue attaching with stack key. */ sk = sks; + kh = khs; idx = PF_SK_STACK; sks = NULL; goto keyattach; - } else - PF_HASHROW_UNLOCK(kh); - - if (olds) { - pf_unlink_state(olds, 0); - pf_release_state(olds); } + PF_STATE_LOCK(s); + KEYS_UNLOCK(); + KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL, ("%s failure", __func__)); return (0); +#undef KEYS_UNLOCK } static void @@ -1091,11 +1133,12 @@ pf_state_insert(struct pfi_kif *kif, str s->creatorid = V_pf_status.hostid; } + /* Returns with ID locked on success. */ if ((error = pf_state_key_attach(skw, sks, s)) != 0) return (error); ih = &V_pf_idhash[PF_IDHASH(s)]; - PF_HASHROW_LOCK(ih); + PF_HASHROW_ASSERT(ih); LIST_FOREACH(cur, &ih->states, entry) if (cur->id == s->id && cur->creatorid == s->creatorid) break;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201306130607.r5D67J1G021873>