Date: Sun, 23 Nov 2008 00:12:29 GMT From: Gleb Kurtsou <gk@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 153374 for review Message-ID: <200811230012.mAN0CTOx016515@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=153374 Change 153374 by gk@gk_h1 on 2008/11/23 00:11:54 Major improvement to ipfw tables. Extend tables to containg layer2 addresses only (= any ip address) and table lookup during layer2 filtering. Sponsored by: Korcett Holdings, Inc (www.korcett.com) Affected files ... .. //depot/projects/soc2008/gk_l2filter/sbin-ipfw/ipfw2.c#13 edit .. //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw.h#13 edit .. //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw2.c#19 edit Differences ... ==== //depot/projects/soc2008/gk_l2filter/sbin-ipfw/ipfw2.c#13 (text+ko) ==== @@ -6022,7 +6022,7 @@ do_add = **av == 'a'; ac--; av++; if (!ac) - errx(EX_USAGE, "IP address required"); + errx(EX_USAGE, "Address required"); p = strchr(*av, '/'); if (p) { *p++ = '\0'; @@ -6031,11 +6031,19 @@ errx(EX_DATAERR, "bad width ``%s''", p); } else ent.masklen = 32; - if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0) - errx(EX_NOHOST, "hostname ``%s'' unknown", *av); - ac--; av++; + if (strcmp(*av, "ether") == 0 || strcmp(*av, "any") == 0) { + ent.addr = INADDR_ANY; + ent.masklen = 0; + if ((*av)[0] == 'a') { /* any */ + ac--; av++; + } + } else { + if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0) + errx(EX_NOHOST, "hostname ``%s'' unknown", *av); + ac--; av++; + } bzero(&ent.ether_addr, sizeof(ent.ether_addr)); - if (do_add && ac >= 2 && strcmp(*av, "ether") == 0) { + if (ac >= 2 && strcmp(*av, "ether") == 0) { get_ether_addr(av[1], &ent.ether_addr); ac-=2; av+=2; } @@ -6136,9 +6144,13 @@ tether_buf[0] = 0; } - printf("%s/%u %s%s\n", - inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr), - tbl->ent[a].masklen, tether_buf, tval_buf); + if (tbl->ent[a].addr == INADDR_ANY && tbl->ent[a].masklen == 0) + printf("any"); + else + printf("%s/%u", + inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr), + tbl->ent[a].masklen); + printf(" %s%s\n", tether_buf, tval_buf); } free(tbl); } ==== //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw.h#13 (text+ko) ==== @@ -682,11 +682,15 @@ #ifdef IPFW_INTERNAL +struct ipfw_table_head { + struct radix_node_head *in_rnh, *ether_rnh; +}; + struct ip_fw_chain { struct ip_fw *rules; /* list of rules */ struct ip_fw *reap; /* list of rules to reap */ LIST_HEAD(, cfg_nat) nat; /* list of nat entries */ - struct radix_node_head *tables[IPFW_TABLES_MAX]; + struct ipfw_table_head tables[IPFW_TABLES_MAX]; struct rwlock rwmtx; }; #define IPFW_LOCK_INIT(_chain) \ ==== //depot/projects/soc2008/gk_l2filter/sys-netinet/ip_fw2.c#19 (text+ko) ==== @@ -58,6 +58,7 @@ #include <sys/module.h> #include <sys/priv.h> #include <sys/proc.h> +#include <sys/refcount.h> #include <sys/rwlock.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -181,10 +182,17 @@ return (ether_addr_allow(want, a)); } +struct table_entry_addr { + u_char len; + u_char __reserved; + struct ether_addr ether_addr; + in_addr_t in_addr; +}; + struct table_entry { - struct radix_node rn[2]; - struct sockaddr_in addr, mask; - ipfw_ether_addr ether_addr; + struct radix_node in_rn[2], ether_rn[2]; + struct table_entry_addr addr, mask; + int refcnt; u_int32_t value; }; @@ -1802,86 +1810,149 @@ return rule; } +static void +init_table_entry_addr(struct table_entry_addr *addr, struct table_entry_addr *mask, + in_addr_t in_addr, uint8_t mlen, ipfw_ether_addr *ether_addr) +{ + addr->len = mask->len = sizeof(struct table_entry_addr); + mask->in_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); + addr->in_addr = in_addr & mask->in_addr; + if (ether_addr && (ether_addr->flags & IPFW_EA_CHECK)) { + if (ether_addr->flags & IPFW_EA_MULTICAST) { + bzero(addr->ether_addr.octet, ETHER_ADDR_LEN); + addr->ether_addr.octet[0] = 0x01; + bzero(mask->ether_addr.octet, ETHER_ADDR_LEN); + mask->ether_addr.octet[0] = 0x01; + } else { + memcpy(addr->ether_addr.octet, ether_addr->octet, ETHER_ADDR_LEN); + memset(mask->ether_addr.octet, 0xff, ETHER_ADDR_LEN); + } + } else { + /* set any ether addr */ + bzero(addr->ether_addr.octet, ETHER_ADDR_LEN); + memset(mask->ether_addr.octet, 0xff, ETHER_ADDR_LEN); + } +} + +static __inline struct table_entry * +__rn_to_table_entry(struct radix_node *_rn, int off) +{ + char *rn = (char*) _rn; + + if (rn == NULL) + return NULL; + return (struct table_entry*)(rn - off); + +} + +#define RN_TO_ENT(e, r) (__rn_to_table_entry(e, __offsetof(struct table_entry, r))) + +static __inline void +release_table_entry(struct ipfw_table_head *th, struct table_entry *ent) +{ + IPFW_WLOCK_ASSERT(&V_layer3_chain); /* FIXME */ + + if (refcount_release(&ent->refcnt)) { + if (ent->in_rn[0].rn_flags) + th->in_rnh->rnh_deladdr(&ent->addr, &ent->mask, th->in_rnh); + free(ent, M_IPFW_TBL); + } +} + static int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen, ipfw_ether_addr *ether_addr, uint32_t value) { INIT_VNET_IPFW(curvnet); - struct radix_node_head *rnh; - struct table_entry *ent; + struct ipfw_table_head *th; + struct table_entry *ent, *in_ent; if (tbl >= IPFW_TABLES_MAX) return (EINVAL); - rnh = ch->tables[tbl]; + th = &ch->tables[tbl]; ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO); if (ent == NULL) return (ENOMEM); + refcount_init(&ent->refcnt, 1); ent->value = value; - ent->addr.sin_len = ent->mask.sin_len = 8; - ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; - ent->ether_addr = *ether_addr; + init_table_entry_addr(&ent->addr, &ent->mask, addr, mlen, ether_addr); IPFW_WLOCK(&V_layer3_chain); - if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) == - NULL) { + if (th->ether_rnh->rnh_addaddr(&ent->addr, &ent->mask, th->ether_rnh, + ent->ether_rn) == NULL) { IPFW_WUNLOCK(&V_layer3_chain); free(ent, M_IPFW_TBL); return (EEXIST); } + in_ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&ent->addr, &ent->mask, th->in_rnh), + in_rn); + if (in_ent == NULL) { + in_ent = RN_TO_ENT(th->in_rnh->rnh_addaddr(&ent->addr, &ent->mask, + th->in_rnh, ent->in_rn), in_rn); + if (in_ent == NULL) { + th->ether_rnh->rnh_deladdr(&ent->addr, &ent->mask, th->ether_rnh); + IPFW_WUNLOCK(&V_layer3_chain); + free(ent, M_IPFW_TBL); + return (EEXIST); + } + } + refcount_acquire(&in_ent->refcnt); IPFW_WUNLOCK(&V_layer3_chain); return (0); } +static __inline int +delete_table_entry_rn(struct ipfw_table_head *th, void *addr, void *mask) +{ + struct table_entry *ent, *in_ent; + + ent = RN_TO_ENT(th->ether_rnh->rnh_deladdr(addr, mask, th->ether_rnh), + ether_rn); + if (ent == NULL) + return (ESRCH); + in_ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&ent->addr, &ent->mask, th->in_rnh), + in_rn); + release_table_entry(th, in_ent); + release_table_entry(th, ent); + return (0); +} + static int del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen) + uint8_t mlen, ipfw_ether_addr *ether_addr) { - struct radix_node_head *rnh; - struct table_entry *ent; - struct sockaddr_in sa, mask; + struct ipfw_table_head *th; + struct table_entry_addr sa, mask; + int err; if (tbl >= IPFW_TABLES_MAX) return (EINVAL); - rnh = ch->tables[tbl]; - sa.sin_len = mask.sin_len = 8; - mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; + th = &ch->tables[tbl]; + init_table_entry_addr(&sa, &mask, addr, mlen, ether_addr); IPFW_WLOCK(ch); - ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh); - if (ent == NULL) { - IPFW_WUNLOCK(ch); - return (ESRCH); - } + err = delete_table_entry_rn(th, &sa, &mask); IPFW_WUNLOCK(ch); - free(ent, M_IPFW_TBL); - return (0); + return (err); } static int flush_table_entry(struct radix_node *rn, void *arg) { - struct radix_node_head * const rnh = arg; - struct table_entry *ent; - - ent = (struct table_entry *) - rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); - if (ent != NULL) - free(ent, M_IPFW_TBL); + delete_table_entry_rn((struct ipfw_table_head *)arg, rn->rn_key, rn->rn_mask); return (0); } static int flush_table(struct ip_fw_chain *ch, uint16_t tbl) { - struct radix_node_head *rnh; + struct ipfw_table_head *th; IPFW_WLOCK_ASSERT(ch); if (tbl >= IPFW_TABLES_MAX) return (EINVAL); - rnh = ch->tables[tbl]; - KASSERT(rnh != NULL, ("NULL IPFW table")); - rnh->rnh_walktree(rnh, flush_table_entry, rnh); + th = &ch->tables[tbl]; + KASSERT(th->ether_rnh != NULL, ("NULL IPFW table")); + th->ether_rnh->rnh_walktree(th->ether_rnh, flush_table_entry, th); return (0); } @@ -1903,7 +1974,12 @@ uint16_t j; for (i = 0; i < IPFW_TABLES_MAX; i++) { - if (!rn_inithead((void **)&ch->tables[i], 32)) { + struct ipfw_table_head *th = &ch->tables[i]; + + if (!rn_inithead((void**)&(th->in_rnh), + __offsetof(struct table_entry_addr, in_addr) * 8) || + !rn_inithead((void**)&(th->ether_rnh), + __offsetof(struct table_entry_addr, ether_addr) * 8)) { for (j = 0; j < i; j++) { (void) flush_table(ch, j); } @@ -1915,22 +1991,35 @@ static int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - ipfw_ether_addr *ea, uint32_t *val) + ipfw_ether_addr *ether_addr, uint32_t *val) { - struct radix_node_head *rnh; - struct table_entry *ent; - struct sockaddr_in sa; + struct ipfw_table_head *th; + struct table_entry_addr sa, mask; + struct table_entry *ent = NULL; + const int has_ether_addr = (ether_addr && (ether_addr->flags & IPFW_EA_CHECK)); + const int has_in_addr = (addr != INADDR_ANY); if (tbl >= IPFW_TABLES_MAX) return (0); - rnh = ch->tables[tbl]; - sa.sin_len = 8; - sa.sin_addr.s_addr = addr; - ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); + th = &ch->tables[tbl]; + init_table_entry_addr(&sa, &mask, addr, (addr == INADDR_ANY ? 0 : 32), ether_addr); + if (has_ether_addr) { + ent = RN_TO_ENT(th->ether_rnh->rnh_lookup(&sa, NULL, th->ether_rnh), + ether_rn); + if (ent == NULL && has_in_addr) { + /* + * Try to lookup entry with any (zero) ether_addr. It's + * handled this way not to deal with non-continuous + * masks in radix trees. + */ + bzero(sa.ether_addr.octet, ETHER_ADDR_LEN); + ent = RN_TO_ENT(th->ether_rnh->rnh_lookup(&sa, NULL, th->ether_rnh), + ether_rn); + } + } else if (has_in_addr) { + ent = RN_TO_ENT(th->in_rnh->rnh_lookup(&sa, NULL, th->in_rnh), in_rn); + } if (ent != NULL) { - if (ea && !ether_addr_allow(&ent->ether_addr, ea)) - return (0); - /* use address to create dynamic rule */ *val = ent->value; return (1); } @@ -1949,20 +2038,20 @@ static int count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) { - struct radix_node_head *rnh; + struct ipfw_table_head *th; if (tbl >= IPFW_TABLES_MAX) return (EINVAL); - rnh = ch->tables[tbl]; + th = &ch->tables[tbl]; *cnt = 0; - rnh->rnh_walktree(rnh, count_table_entry, cnt); + th->ether_rnh->rnh_walktree(th->ether_rnh, count_table_entry, cnt); return (0); } static int dump_table_entry(struct radix_node *rn, void *arg) { - struct table_entry * const n = (struct table_entry *)rn; + struct table_entry * const n = RN_TO_ENT(rn, ether_rn); ipfw_table * const tbl = arg; ipfw_table_entry *ent; @@ -1970,12 +2059,23 @@ return (1); ent = &tbl->ent[tbl->cnt]; ent->tbl = tbl->tbl; - if (in_nullhost(n->mask.sin_addr)) + if (n->mask.in_addr == INADDR_ANY) ent->masklen = 0; else - ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); - ent->addr = n->addr.sin_addr.s_addr; - ent->ether_addr = n->ether_addr; + ent->masklen = 33 - ffs(ntohl(n->mask.in_addr)); + ent->addr = n->addr.in_addr; + memcpy(ent->ether_addr.octet, n->addr.ether_addr.octet, ETHER_ADDR_LEN); + ent->ether_addr.flags = 0; + +#define __ETHER_IS_ZERO(a) (((a)[0] | (a)[1] | (a)[2] | (a)[3] | (a)[4] | (a)[5]) == 0) + if (!__ETHER_IS_ZERO(n->mask.ether_addr.octet) && + !__ETHER_IS_ZERO(n->addr.ether_addr.octet)) { + ent->ether_addr.flags = IPFW_EA_CHECK; + /* Should be fixed after adding new flags */ + if (n->mask.ether_addr.octet[0] == 0x01) + ent->ether_addr.flags |= IPFW_EA_MULTICAST; + } +#undef __ETHER_IS_ZERO ent->value = n->value; tbl->cnt++; return (0); @@ -1984,13 +2084,13 @@ static int dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) { - struct radix_node_head *rnh; + struct ipfw_table_head *th; if (tbl->tbl >= IPFW_TABLES_MAX) return (EINVAL); - rnh = ch->tables[tbl->tbl]; + th = &ch->tables[tbl->tbl]; tbl->cnt = 0; - rnh->rnh_walktree(rnh, dump_table_entry, tbl); + th->ether_rnh->rnh_walktree(th->ether_rnh, dump_table_entry, tbl); return (0); } @@ -2707,16 +2807,21 @@ case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: - if (is_ipv4) { - ipfw_ether_addr *ea = - (cmd->opcode == O_IP_DST_LOOKUP ? - &args->f_id.dst_ether : - &args->f_id.src_ether); - uint32_t a = - (cmd->opcode == O_IP_DST_LOOKUP) ? - dst_ip.s_addr : src_ip.s_addr; + if (is_ipv4 || (args->flags & IP_FW_ARGS_LAYER2)) { + ipfw_ether_addr *ea; + uint32_t a; uint32_t v; + if (cmd->opcode == O_IP_DST_LOOKUP) { + a = dst_ip.s_addr; + ea = &args->f_id.dst_ether; + } else { + a = src_ip.s_addr; + ea = &args->f_id.src_ether; + } + if (args->flags & IP_FW_ARGS_LAYER2) + a = INADDR_ANY; + match = lookup_table(chain, cmd->arg1, a, ea, &v); if (!match) @@ -4441,7 +4546,7 @@ if (error) break; error = del_table_entry(&V_layer3_chain, ent.tbl, - ent.addr, ent.masklen); + ent.addr, ent.masklen, &ent.ether_addr); } break;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200811230012.mAN0CTOx016515>