Date: Sun, 21 Sep 2014 18:15:10 +0000 (UTC) From: "Alexander V. Chernikov" <melifaro@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r271932 - in projects/ipfw/sys: modules/ipfw netpfil/ipfw Message-ID: <201409211815.s8LIFAqJ026505@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: melifaro Date: Sun Sep 21 18:15:09 2014 New Revision: 271932 URL: http://svnweb.freebsd.org/changeset/base/271932 Log: Add pre-alfa version of DXR lookup module. It does build but (currently) does not work. This change is not intended to be merged along with other ipfw changes. Added: projects/ipfw/sys/netpfil/ipfw/dxr_algo.c projects/ipfw/sys/netpfil/ipfw/dxr_fwd.c projects/ipfw/sys/netpfil/ipfw/dxr_fwd.h Modified: projects/ipfw/sys/modules/ipfw/Makefile projects/ipfw/sys/netpfil/ipfw/ip_fw_table.h projects/ipfw/sys/netpfil/ipfw/ip_fw_table_algo.c Modified: projects/ipfw/sys/modules/ipfw/Makefile ============================================================================== --- projects/ipfw/sys/modules/ipfw/Makefile Sun Sep 21 15:37:39 2014 (r271931) +++ projects/ipfw/sys/modules/ipfw/Makefile Sun Sep 21 18:15:09 2014 (r271932) @@ -9,6 +9,7 @@ SRCS= ip_fw2.c ip_fw_pfil.c SRCS+= ip_fw_dynamic.c ip_fw_log.c SRCS+= ip_fw_sockopt.c ip_fw_table.c ip_fw_table_algo.c ip_fw_iface.c SRCS+= ip_fw_table_value.c +SRCS+= dxr_fwd.c dxr_algo.c SRCS+= opt_inet.h opt_inet6.h opt_ipdivert.h opt_ipfw.h opt_ipsec.h CFLAGS+= -DIPFIREWALL Added: projects/ipfw/sys/netpfil/ipfw/dxr_algo.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ projects/ipfw/sys/netpfil/ipfw/dxr_algo.c Sun Sep 21 18:15:09 2014 (r271932) @@ -0,0 +1,847 @@ +/*- + * Copyright (c) 2014 Yandex LLC + * Copyright (c) 2014 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c 267384 2014-06-12 09:59:11Z melifaro $"); + +/* + * DXR algorithm bindings. + * + */ + +#include "opt_ipfw.h" +#include "opt_inet.h" +#ifndef INET +#error IPFIREWALL requires INET. +#endif /* INET */ +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/socket.h> +#include <sys/queue.h> +#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ +#include <net/radix.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ +#include <netinet/ip_fw.h> + +#include <vm/uma.h> + +#include <netpfil/ipfw/ip_fw_private.h> +#include <netpfil/ipfw/ip_fw_table.h> +#include <netpfil/ipfw/dxr_fwd.h> + +#define DXR_BUILD_DEBUG + +static uma_zone_t chunk_zone; + +/* + * ADDR implementation using dxr + * + */ + +/* + * The radix code expects addr and mask to be array of bytes, + * with the first byte being the length of the array. rn_inithead + * is called with the offset in bits of the lookup key within the + * array. If we use a sockaddr_in as the underlying type, + * sin_len is conveniently located at offset 0, sin_addr is at + * offset 4 and normally aligned. + * But for portability, let's avoid assumption and make the code explicit + */ +#define KEY_LEN(v) *((uint8_t *)&(v)) +/* + * Do not require radix to compare more than actual IPv4/IPv6 address + */ +#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) +#define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr)) + +#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) +#define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr)) + +struct radix_addr_entry { + struct radix_node rn[2]; + struct sockaddr_in addr; + uint32_t value; + uint8_t masklen; +}; + +struct sa_in6 { + uint8_t sin6_len; + uint8_t sin6_family; + uint8_t pad[2]; + struct in6_addr sin6_addr; +}; + +struct radix_addr_xentry { + struct radix_node rn[2]; + struct sa_in6 addr6; + uint32_t value; + uint8_t masklen; +}; + +struct radix_cfg { + struct radix_node_head *head4; + struct radix_node_head *head6; + size_t count4; + size_t count6; + struct dxr_instance *di; +}; + +struct ta_buf_radix +{ + void *ent_ptr; + struct sockaddr *addr_ptr; + struct sockaddr *mask_ptr; + union { + struct { + struct sockaddr_in sa; + struct sockaddr_in ma; + } a4; + struct { + struct sa_in6 sa; + struct sa_in6 ma; + } a6; + } addr; +}; + +static int +radix_lookup(void *tree_ptr, in_addr_t *pdst, in_addr_t *pmask, int *pnh) +{ + struct radix_node_head *rnh; + struct radix_addr_entry *ent; + struct sockaddr_in sin, *s_dst; + struct sockaddr *psa; + in_addr_t dst, mask; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_addr.s_addr = htonl(*pdst); + psa = (struct sockaddr *)&sin; + + //TREE_LOCK_ASSERT(di); + rnh = (struct radix_node_head *)tree_ptr; + ent = (struct radix_addr_entry *)rnh->rnh_matchaddr(psa, rnh); + if (ent == NULL) + return (ENOENT); + + s_dst = (struct sockaddr_in *)&ent->addr; + + dst = s_dst->sin_addr.s_addr; + mask = htonl(ent->masklen ? ~((1 << (32 - ent->masklen)) - 1) : 0); + +#ifdef DXR_BUILD_DEBUG + char kbuf[16], kbuf2[16]; + inet_ntop(AF_INET, pdst, kbuf, sizeof(kbuf)); + inet_ntop(AF_INET, &dst, kbuf2, sizeof(kbuf2)); + printf("RLookup for %s returned %s/%d value %d\n", kbuf, kbuf2, + ent->masklen, ent->value); +#endif + + *pnh = ent->value; + *pdst = dst; + *pmask = mask; + + return (0); +} + +struct radix_wa { + tree_walkf_cb_t *f; + void *arg; + struct dxr_instance *di; +}; + +static int +radix_walkf_f(struct radix_node *rn, void *arg) +{ + struct radix_wa *wa; + struct radix_addr_entry *ent; + struct sockaddr_in *s_dst; + in_addr_t dst, mask; + int nh; + + wa = (struct radix_wa *)arg; + ent = (struct radix_addr_entry *)rn; + + s_dst = (struct sockaddr_in *)&ent->addr; + + nh = ent->value; + dst = s_dst->sin_addr.s_addr; + mask = htonl(ent->masklen ? ~((1 << (32 - ent->masklen)) - 1) : 0); + +#ifdef DXR_BUILD_DEBUG + char kbuf[16]; + inet_ntop(AF_INET, &dst, kbuf, sizeof(kbuf)); + printf(" WALK returned %s/%d value %d\n", kbuf, + ent->masklen, ent->value); +#endif + + return (wa->f(wa->di, dst, mask, nh, wa->arg)); +} + + +static int +radix_walkf(void *tree_ptr, struct dxr_instance *di, in_addr_t dst, + in_addr_t mask, tree_walkf_cb_t *f, void *arg) +{ + struct radix_node_head *rnh; + struct sockaddr_in s_dst, s_mask; + struct radix_wa wa; + int error; + + rnh = (struct radix_node_head *)tree_ptr; + + memset(&s_dst, 0, sizeof(s_dst)); + memset(&s_mask, 0, sizeof(s_mask)); + s_dst.sin_family = AF_INET; + s_dst.sin_len = sizeof(s_dst); + s_dst.sin_addr.s_addr = dst; + s_mask.sin_family = AF_INET; + s_mask.sin_len = sizeof(s_mask); + s_mask.sin_addr.s_addr = mask; + + memset(&wa, 0, sizeof(wa)); + wa.f = f; + wa.arg = arg; + wa.di = di; + +#ifdef DXR_BUILD_DEBUG + char kbuf[16], kbuf2[16]; + inet_ntop(AF_INET, &dst, kbuf, sizeof(kbuf)); + inet_ntop(AF_INET, &mask, kbuf2, sizeof(kbuf2)); + printf("START walk for %s/%s\n", kbuf, kbuf2); +#endif + + error = rnh->rnh_walktree_from(rnh, &s_dst, &s_mask, radix_walkf_f, &wa); +#ifdef DXR_BUILD_DEBUG + printf("END walk\n"); +#endif + + return (error); +} + + +static void *slab_alloc(void *slab_ptr) +{ + uma_zone_t zone; + + zone = (uma_zone_t)slab_ptr; + + return (uma_zalloc(zone, M_NOWAIT)); +} + +static void slab_free(void *slab_ptr, void *obj_ptr) +{ + uma_zone_t zone; + + zone = (uma_zone_t)slab_ptr; + + uma_zfree(zone, obj_ptr); +} + +static int +ta_lookup_dxr(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct radix_node_head *rnh; + struct dxr_instance *di; + + if (keylen == sizeof(in_addr_t)) { + di = (struct dxr_instance *)ti->state; + int idx = dxr_lookup(di, *((uint32_t *)key)); +#ifdef DXR_BUILD_DEBUG + char kbuf[16]; + inet_ntop(AF_INET, key, kbuf, sizeof(kbuf)); + printf("Lookup for %s returned %d\n", kbuf, idx); +#endif + if (idx == 0) { + /* No match, check for default route idx */ + if ((idx = ti->data & 0xFFFF) == 0) + return (0); + } + + *val = idx; + return (1); + } else { + struct radix_addr_xentry *xent; + struct sa_in6 sa6; + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr)); + rnh = (struct radix_node_head *)ti->xstate; + xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + if (xent != NULL) { + *val = xent->value; + return (1); + } + } + + return (0); +} + +/* + * New table + */ +static int +ta_init_dxr(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + struct radix_cfg *cfg; + struct dxr_funcs f; + + cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO); + + if (!rn_inithead((void **)&cfg->head4, OFF_LEN_INET)) + return (ENOMEM); + if (!rn_inithead((void **)&cfg->head6, OFF_LEN_INET6)) { + rn_detachhead((void **)&cfg->head4); + return (ENOMEM); + } + + ti->xstate = cfg->head6; + *ta_state = cfg; + ti->lookup = ta_lookup_dxr; + + /* XXX: do this from per-algo hook */ + if (chunk_zone == NULL) { + /* Allocate the zone for chunk descriptors (XXX - get size) */ + chunk_zone = uma_zcreate("dxr_chunk", sizeof(struct chunk_desc), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); +#if 0 + /* Create updater thread */ + if (kproc_kthread_add(dxr_updater, NULL, &p, &td, RFHIGHPID, + 0, "dxr_update", "dxr_update")) + panic("Can't create the DXR updater thread"); +#endif + } + + memset(&f, 0, sizeof(f)); + f.slab_alloc = slab_alloc; + f.slab_free = slab_free; + f.slab_ptr = chunk_zone; + f.tree_walk = radix_walkf; + f.tree_lookup = radix_lookup; + f.tree_ptr = cfg->head4; + + + cfg->di = dxr_init(M_IPFW, M_WAITOK); + if (cfg == NULL) + return (ENOMEM); + + dxr_setfuncs(cfg->di, &f); + + ti->state = cfg->di; + + return (0); +} + +static int +flush_radix_entry(struct radix_node *rn, void *arg) +{ + struct radix_node_head * const rnh = arg; + struct radix_addr_entry *ent; + + ent = (struct radix_addr_entry *) + rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); + if (ent != NULL) + free(ent, M_IPFW_TBL); + return (0); +} + +static void +ta_destroy_dxr(void *ta_state, struct table_info *ti) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + + cfg = (struct radix_cfg *)ta_state; + + dxr_destroy(cfg->di, M_IPFW); + + rnh = cfg->head4; + rnh->rnh_walktree(rnh, flush_radix_entry, rnh); + rn_detachhead((void **)&cfg->head4); + + rnh = cfg->head6; + rnh->rnh_walktree(rnh, flush_radix_entry, rnh); + rn_detachhead((void **)&cfg->head6); + + free(cfg, M_IPFW); +} + +/* + * Provide algo-specific table info + */ +static void +ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) +{ + struct radix_cfg *cfg; + + cfg = (struct radix_cfg *)ta_state; + + tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; + tinfo->taclass4 = IPFW_TACLASS_RADIX; + tinfo->count4 = cfg->count4; + tinfo->itemsize4 = sizeof(struct radix_addr_entry); + tinfo->taclass6 = IPFW_TACLASS_RADIX; + tinfo->count6 = cfg->count6; + tinfo->itemsize6 = sizeof(struct radix_addr_xentry); +} + +static int +ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct radix_addr_entry *n; + struct radix_addr_xentry *xn; + + n = (struct radix_addr_entry *)e; + + /* Guess IPv4/IPv6 radix by sockaddr family */ + if (n->addr.sin_family == AF_INET) { + tent->k.addr.s_addr = n->addr.sin_addr.s_addr; + tent->masklen = n->masklen; + tent->subtype = AF_INET; + tent->v.kidx = n->value; +#ifdef INET6 + } else { + xn = (struct radix_addr_xentry *)e; + memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr)); + tent->masklen = xn->masklen; + tent->subtype = AF_INET6; + tent->v.kidx = xn->value; +#endif + } + + return (0); +} + +static int +ta_find_radix_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + void *e; + + cfg = (struct radix_cfg *)ta_state; + + e = NULL; + if (tent->subtype == AF_INET) { + struct sockaddr_in sa; + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = tent->k.addr.s_addr; + rnh = cfg->head4; + e = rnh->rnh_matchaddr(&sa, rnh); + } else { + struct sa_in6 sa6; + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); + rnh = cfg->head6; + e = rnh->rnh_matchaddr(&sa6, rnh); + } + + if (e != NULL) { + ta_dump_radix_tentry(ta_state, ti, e, tent); + return (0); + } + + return (ENOENT); +} + +static void +ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + + cfg = (struct radix_cfg *)ta_state; + + rnh = cfg->head4; + rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); + + rnh = cfg->head6; + rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); +} + + +#ifdef INET6 +static inline void +ipv6_writemask(struct in6_addr *addr6, uint8_t mask) +{ + uint32_t *cp; + + for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) + *cp++ = 0xFFFFFFFF; + *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); +} +#endif + +static void +tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, + struct sockaddr *ma, int *set_mask) +{ + int mlen; + struct sockaddr_in *addr, *mask; + struct sa_in6 *addr6, *mask6; + in_addr_t a4; + + mlen = tei->masklen; + + if (tei->subtype == AF_INET) { +#ifdef INET + addr = (struct sockaddr_in *)sa; + mask = (struct sockaddr_in *)ma; + /* Set 'total' structure length */ + KEY_LEN(*addr) = KEY_LEN_INET; + KEY_LEN(*mask) = KEY_LEN_INET; + addr->sin_family = AF_INET; + mask->sin_addr.s_addr = + htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); + a4 = *((in_addr_t *)tei->paddr); + addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr; + if (mlen != 32) + *set_mask = 1; + else + *set_mask = 0; +#endif +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + /* IPv6 case */ + addr6 = (struct sa_in6 *)sa; + mask6 = (struct sa_in6 *)ma; + /* Set 'total' structure length */ + KEY_LEN(*addr6) = KEY_LEN_INET6; + KEY_LEN(*mask6) = KEY_LEN_INET6; + addr6->sin6_family = AF_INET6; + ipv6_writemask(&mask6->sin6_addr, mlen); + memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr)); + APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr); + if (mlen != 128) + *set_mask = 1; + else + *set_mask = 0; + } +#endif +} + +static int +ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_radix *tb; + struct radix_addr_entry *ent; + struct radix_addr_xentry *xent; + struct sockaddr *addr, *mask; + int mlen, set_mask; + + tb = (struct ta_buf_radix *)ta_buf; + + mlen = tei->masklen; + set_mask = 0; + + if (tei->subtype == AF_INET) { +#ifdef INET + if (mlen > 32) + return (EINVAL); + ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); + ent->masklen = mlen; + + addr = (struct sockaddr *)&ent->addr; + mask = (struct sockaddr *)&tb->addr.a4.ma; + tb->ent_ptr = ent; +#endif +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + /* IPv6 case */ + if (mlen > 128) + return (EINVAL); + xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); + xent->masklen = mlen; + + addr = (struct sockaddr *)&xent->addr6; + mask = (struct sockaddr *)&tb->addr.a6.ma; + tb->ent_ptr = xent; +#endif + } else { + /* Unknown CIDR type */ + return (EINVAL); + } + + tei_to_sockaddr_ent(tei, addr, mask, &set_mask); + /* Set pointers */ + tb->addr_ptr = addr; + if (set_mask != 0) + tb->mask_ptr = mask; + + return (0); +} + +static int +dxr_req(struct table_info *ti, int req, struct tentry_info *tei) +{ + struct dxr_instance *di; + struct in_addr *a; + int error; + + if (tei->masklen == 0) { + + /* + * Handle 'default route' case - store + * value index in lowe 2 bits of ti->data + */ + ti->data &= ~((u_long)0xFFFF); + if (req != 0) + ti->data |= tei->value & 0xFFFF; + return (0); + } + + di = (struct dxr_instance *)ti->state; + a = (struct in_addr *)tei->paddr; + error = 0; + +#ifdef DXR_BUILD_DEBUG + char kbuf[16]; + inet_ntop(AF_INET, tei->paddr, kbuf, sizeof(kbuf)); + printf("%s for %s/%d value [%d]\n", (req == 0) ? "DEL":"ADD", kbuf, + tei->masklen, tei->value); +#endif + + /* Delete old record */ + if (req == 0 || (tei->flags & TEI_FLAGS_UPDATED) != 0) { + error = dxr_request(di, RTM_DELETE, *a, tei->masklen, 1); + if (error != 0) + printf("error doing del dxr_req\n"); + } + if (req != 0) { + error = dxr_request(di, RTM_ADD, *a, tei->masklen, 1); + if (error != 0) + printf("error doing del dxr_req\n"); + } + + return (error); +} + +static int +ta_add_dxr(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + struct radix_node *rn; + struct ta_buf_radix *tb; + uint32_t *old_value, value; + + cfg = (struct radix_cfg *)ta_state; + tb = (struct ta_buf_radix *)ta_buf; + + /* Save current entry value from @tei */ + if (tei->subtype == AF_INET) { + rnh = cfg->head4; + ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value; + } else { + rnh = ti->xstate; + ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value; + } + + /* Search for an entry first */ + rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, rnh); + if (rn != NULL) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + /* Record already exists. Update value if we're asked to */ + if (tei->subtype == AF_INET) + old_value = &((struct radix_addr_entry *)rn)->value; + else + old_value = &((struct radix_addr_xentry *)rn)->value; + + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + + /* Update DXR data */ + if (tei->subtype == AF_INET) + dxr_req(ti, 1, tei); + + value = *old_value; + *old_value = tei->value; + tei->value = value; + + *pnum = 0; + + return (0); + } + + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + + rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, rnh, tb->ent_ptr); + if (rn == NULL) { + /* Unknown error */ + return (EINVAL); + } + + if (tei->subtype == AF_INET) { + dxr_req(ti, 1, tei); + cfg->count4++; + } else + cfg->count6++; + tb->ent_ptr = NULL; + *pnum = 1; + + return (0); +} + +static int +ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_radix *tb; + struct sockaddr *addr, *mask; + int mlen, set_mask; + + tb = (struct ta_buf_radix *)ta_buf; + + mlen = tei->masklen; + set_mask = 0; + + if (tei->subtype == AF_INET) { + if (mlen > 32) + return (EINVAL); + + addr = (struct sockaddr *)&tb->addr.a4.sa; + mask = (struct sockaddr *)&tb->addr.a4.ma; +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + if (mlen > 128) + return (EINVAL); + + addr = (struct sockaddr *)&tb->addr.a6.sa; + mask = (struct sockaddr *)&tb->addr.a6.ma; +#endif + } else + return (EINVAL); + + tei_to_sockaddr_ent(tei, addr, mask, &set_mask); + tb->addr_ptr = addr; + if (set_mask != 0) + tb->mask_ptr = mask; + + return (0); +} + +static int +ta_del_dxr(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + struct radix_node *rn; + struct ta_buf_radix *tb; + + cfg = (struct radix_cfg *)ta_state; + tb = (struct ta_buf_radix *)ta_buf; + + if (tei->subtype == AF_INET) + rnh = cfg->head4; + else + rnh = cfg->head6; + + rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, rnh); + + if (rn == NULL) + return (ENOENT); + + /* Save entry value to @tei */ + if (tei->subtype == AF_INET) + tei->value = ((struct radix_addr_entry *)rn)->value; + else + tei->value = ((struct radix_addr_xentry *)rn)->value; + + tb->ent_ptr = rn; + + if (tei->subtype == AF_INET) { + dxr_req(ti, 0, tei); + cfg->count4--; + } else + cfg->count6--; + *pnum = 1; + + return (0); +} + +static void +ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_radix *tb; + + tb = (struct ta_buf_radix *)ta_buf; + + if (tb->ent_ptr != NULL) + free(tb->ent_ptr, M_IPFW_TBL); +} + +static int +ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + + /* + * radix does not require additional memory allocations + * other than nodes itself. Adding new masks to the tree do + * but we don't have any API to call (and we don't known which + * sizes do we need). + */ + return (0); +} + +struct table_algo addr_dxr = { + .name = "addr:dxr", + .type = IPFW_TABLE_ADDR, + .flags = TA_FLAG_DEFAULT, + .ta_buf_size = sizeof(struct ta_buf_radix), + .init = ta_init_dxr, + .destroy = ta_destroy_dxr, + .prepare_add = ta_prepare_add_radix, + .prepare_del = ta_prepare_del_radix, + .add = ta_add_dxr, + .del = ta_del_dxr, + .flush_entry = ta_flush_radix_entry, + .foreach = ta_foreach_radix, + .dump_tentry = ta_dump_radix_tentry, + .find_tentry = ta_find_radix_tentry, + .dump_tinfo = ta_dump_radix_tinfo, + .need_modify = ta_need_modify_radix, +}; + Added: projects/ipfw/sys/netpfil/ipfw/dxr_fwd.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ projects/ipfw/sys/netpfil/ipfw/dxr_fwd.c Sun Sep 21 18:15:09 2014 (r271932) @@ -0,0 +1,2424 @@ +#define DXR_DIRECT_BITS 18 +#define ALLOW_OOO_EXEC +#define DXR_LOOKUP_TIMING +//#define DIR_24_8 +//#define RADIX_TIMING +//#define DXR_ITER_TIMING +//#define REPEAT_SAME_KEY +#define DXR_LOOKUP_CONSISTENCY_CHECK + +/* + * Copyright (c) 2005-2012 University of Zagreb + * Copyright (c) 2005 International Computer Science Institute + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* Compile-time tunables, overriding defaults from ip_fib.h */ +#define DXR_VPORTS_MAX 1024 + +/* Debugging options */ +#define DXR_BUILD_TIMING +#define DXR_BUILD_PARANOIC +//#define DXR_BUILD_DEBUG + +#if defined(DXR_ITER_TIMING) && defined(DXR_LOOKUP_TIMING) +#error DXR_ITER_TIMING and DXR_LOOKUP_TIMING are mutualy exclusive +#endif + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/proc.h> +#include <sys/protosw.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/unistd.h> + +#include <net/vnet.h> +#include <net/if.h> +#include <net/netisr.h> +#include <net/if_dl.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> + +#include <machine/clock.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> + +#include "dxr_fwd.h" + +#if 0 +static uint16_t nexthop_ref(struct in_addr, struct ifnet *); +static int nexthop_unref(uint16_t); +#endif +static void schedule_update(struct dxr_instance *di, struct in_addr dst, + int mlen); +static void update_chunk(struct dxr_instance *, int); +static void update_chunk_long(struct dxr_instance *, int); +static int dxr_walk(struct dxr_instance *di, in_addr_t dst, in_addr_t mask, + int nh, void *arg); +static int dxr_walk_long(struct dxr_instance *di, in_addr_t dst, in_addr_t mask, + int nh, void *arg); +static void dxr_initheap(struct dxr_instance *, uint32_t, uint32_t); +static void dxr_heap_inject(struct dxr_instance*, uint32_t, uint32_t, int, int); +static int dxr_parse(struct dxr_instance *, int, uint32_t, uint32_t, int, int); +static int dxr_parse_long(struct dxr_instance *, int, uint32_t, uint32_t, + int, int); +static void prune_empty_chunks(struct dxr_instance *); +static void chunk_ref(struct dxr_instance *, int); +static void chunk_unref(struct dxr_instance *, int); +static void apply_pending(struct dxr_instance *); +static void dxr_check_tables(struct dxr_instance *di); + +static int radix_lookup(struct dxr_instance *di, uint32_t dst); + +#ifdef DIR_24_8 +#if (DXR_DIRECT_BITS != 24) +#error DXR_DIRECT_BITS must be set to 24 when DIR_24_8 is configured +#endif +static void dir_24_8_rebuild(void); +static int dir_24_8_lookup(uint32_t); +#endif + +#if defined(DXR_LOOKUP_TIMING) || defined(DXR_ITER_TIMING) || defined(RADIX_TIMING) +static void dxr_lookup_exercise(void *arg); +#endif + +#ifdef DXR_BUILD_DEBUG +static void dxr_heap_dump(void); +static void dxr_chunk_dump(int); +static void print_in_route(struct rtentry *, const char *); +#endif + +#if defined(DXR_LOOKUP_TIMING) || defined(DXR_ITER_TIMING) || defined(RADIX_TIMING) +static DPCPU_DEFINE(int, valid_timing); +static int ex_preload; +static int ex_threads; +static int ex_iters = 100000; + +struct iter_stat { + uint64_t cnt; + uint64_t cycles; +} static iter_stats[MAXCPU][32]; + +static int reduce; +static int rdtsc_latency; + *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201409211815.s8LIFAqJ026505>