Date: Sat, 9 Jan 2021 12:51:37 GMT From: "Alexander V. Chernikov" <melifaro@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 537d13437314 - main - Bring DPDK route lookups to FreeBSD. Message-ID: <202101091251.109CpbLZ082315@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by melifaro: URL: https://cgit.FreeBSD.org/src/commit/?id=537d134373141c2d25bfb24af6d661d0e6102927 commit 537d134373141c2d25bfb24af6d661d0e6102927 Author: Alexander V. Chernikov <melifaro@FreeBSD.org> AuthorDate: 2021-01-09 12:08:00 +0000 Commit: Alexander V. Chernikov <melifaro@FreeBSD.org> CommitDate: 2021-01-09 12:41:04 +0000 Bring DPDK route lookups to FreeBSD. This change introduces loadable fib lookup modules based on DPDK rte_lpm lib targeted for high-speed lookups in large-scale tables. It is based on the lookup framework described in D27401. IPv4 module is called dpdk_lpm4. It wraps around rte_lpm [1] library. This library implements variation of DIR24-8 [2] lookup algorithm. Module provide lockless route lookups and in-place incremental updates, allowing for good RIB performance. IPv6 module is called dpdk_lpm6. It wraps around rte_lpm6 [3] library. Implementation can be seen as multi-bit trie where the stride or number of bits inspected on each level varies from level to level. It can vary from 1 to 14 memory accesses, with 5 being the average value for the lengths that are most commonly used in IPv6. Module provide lockless route lookups for global unicast addresses and in-place incremental updates, allowing for good RIB performance. Implementation details: * wrapper code lives in `sys/contrib/dpdk_rte_lpm/dpdk_lpm[6].c`. * rte_lpm[6] implementation contains both RIB and FIB code. . RIB ("rule_") code, backed by array of hash tables part has been commented out, as base radix already provides all the necessary primitives. * link-local lookups are currently implemented as base radix lookup. This part should be converted to something like read-only radix trie. Usage detail: Compile kernel with option FIB_ALGO and load dpdk_lpm4/dpdk_lpm6 module at any time. They will be picked up automatically when amount of routes raises to several thousand. [1]: https://doc.dpdk.org/guides/prog_guide/lpm_lib.html [2]: http://yuba.stanford.edu/~nickm/papers/Infocom98_lookup.pdf [3]: https://doc.dpdk.org/guides/prog_guide/lpm6_lib.html Differential Revision: https://reviews.freebsd.org/D27412 --- sys/contrib/dpdk_rte_lpm/dpdk_lpm.c | 423 +++++++ sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c | 487 ++++++++ sys/contrib/dpdk_rte_lpm/dpdk_lpm6.h | 57 + sys/contrib/dpdk_rte_lpm/rte_branch_prediction.h | 41 + sys/contrib/dpdk_rte_lpm/rte_common.h | 838 +++++++++++++ sys/contrib/dpdk_rte_lpm/rte_debug.h | 83 ++ sys/contrib/dpdk_rte_lpm/rte_jhash.h | 379 ++++++ sys/contrib/dpdk_rte_lpm/rte_log.h | 383 ++++++ sys/contrib/dpdk_rte_lpm/rte_lpm.c | 1107 +++++++++++++++++ sys/contrib/dpdk_rte_lpm/rte_lpm.h | 403 ++++++ sys/contrib/dpdk_rte_lpm/rte_lpm6.c | 1415 ++++++++++++++++++++++ sys/contrib/dpdk_rte_lpm/rte_lpm6.h | 209 ++++ sys/contrib/dpdk_rte_lpm/rte_shim.h | 31 + sys/contrib/dpdk_rte_lpm/rte_tailq.h | 140 +++ sys/modules/Makefile | 10 + sys/modules/dpdk_lpm4/Makefile | 12 + sys/modules/dpdk_lpm6/Makefile | 12 + 17 files changed, 6030 insertions(+) diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c new file mode 100644 index 000000000000..af145997c4d6 --- /dev/null +++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c @@ -0,0 +1,423 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include "opt_inet.h" + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/rmlock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/kernel.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <net/vnet.h> + +#include <net/if.h> +#include <net/if_var.h> + +#include <netinet/in.h> +#include <netinet/in_fib.h> +#include <netinet/ip.h> + +#include <net/route.h> +#include <net/route/nhop.h> +#include <net/route/route_ctl.h> +#include <net/route/fib_algo.h> + +#include "rte_shim.h" +#include "rte_lpm.h" + +#define LPM_MIN_TBL8 8 /* 2 pages of memory */ +#define LPM_MAX_TBL8 65536 * 16 /* 256M */ + +MALLOC_DECLARE(M_RTABLE); + +struct dpdk_lpm_data { + struct rte_lpm *lpm; + uint64_t routes_added; + uint64_t routes_failed; + uint32_t number_tbl8s; + uint32_t fibnum; + uint8_t hit_tables; + uint8_t hit_records; + struct fib_data *fd; +}; + +/* + * Main datapath routing + */ +static struct nhop_object * +lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) +{ + struct rte_lpm *lpm; + const struct rte_lpm_external *rte_ext; + uint32_t nhidx = 0; + int ret; + + lpm = (struct rte_lpm *)algo_data; + rte_ext = (const struct rte_lpm_external *)lpm; + + ret = rte_lpm_lookup(lpm, ntohl(key.addr4.s_addr), &nhidx); + if (ret == 0) { + /* Success! */ + return (rte_ext->nh_idx[nhidx]); + } else { + /* Not found. Check default route */ + return (rte_ext->nh_idx[rte_ext->default_idx]); + } + + return (NULL); +} + +static uint8_t +rte_get_pref(const struct rib_rtable_info *rinfo) +{ + + if (rinfo->num_prefixes < 10) + return (1); + else if (rinfo->num_prefixes < 1000) + return (rinfo->num_prefixes / 10); + else if (rinfo->num_prefixes < 500000) + return (100 + rinfo->num_prefixes / 3334); + else + return (250); +} + +static enum flm_op_result +handle_default_change(struct dpdk_lpm_data *dd, struct rib_cmd_info *rc) +{ + struct rte_lpm_external *rte_ext; + rte_ext = (struct rte_lpm_external *)dd->lpm; + + if (rc->rc_cmd != RTM_DELETE) { + /* Reference new */ + uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); + + if (nhidx == 0) + return (FLM_REBUILD); + rte_ext->default_idx = nhidx; + } else { + /* No default route */ + rte_ext->default_idx = 0; + } + + return (FLM_SUCCESS); +} + +static void +get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, uint8_t *plen, uint32_t *nhop_idx) +{ + struct route_nhop_data rnd; + struct rtentry *rt; + + rt = fib4_lookup_rt(dd->fibnum, addr, 0, NHR_UNLOCKED, &rnd); + if (rt != NULL) { + struct in_addr addr4; + uint32_t scopeid; + int inet_plen; + rt_get_inet_prefix_plen(rt, &addr4, &inet_plen, &scopeid); + if (inet_plen > 0) { + *plen = inet_plen; + *nhop_idx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop); + return; + } + } + + *nhop_idx = 0; + *plen = 0; +} + +static enum flm_op_result +handle_gu_change(struct dpdk_lpm_data *dd, const struct rib_cmd_info *rc, + const struct in_addr addr, int plen) +{ + uint32_t nhidx = 0; + int ret; + char abuf[INET_ADDRSTRLEN]; + uint32_t ip; + + ip = ntohl(addr.s_addr); + inet_ntop(AF_INET, &addr, abuf, sizeof(abuf)); + + /* So we get sin, plen and nhidx */ + if (rc->rc_cmd != RTM_DELETE) { + /* + * Addition or change. Save nhop in the internal table + * and get index. + */ + nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); + if (nhidx == 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild"); + return (FLM_REBUILD); + } + + ret = rte_lpm_add(dd->lpm, ip, plen, nhidx); + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d", + (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", + abuf, plen, nhidx, ret); + } else { + /* + * Need to lookup parent. Assume deletion happened already + */ + uint8_t parent_plen; + uint32_t parent_nhop_idx; + get_parent_rule(dd, addr, &parent_plen, &parent_nhop_idx); + + ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx); + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d nhop %u = %d", + "DEL", abuf, plen, nhidx, ret); + } + + if (ret != 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret); + if (ret == -ENOSPC) + return (FLM_REBUILD); + return (FLM_ERROR); + } + return (FLM_SUCCESS); +} + +static enum flm_op_result +handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, + void *_data) +{ + struct dpdk_lpm_data *dd; + enum flm_op_result ret; + struct in_addr addr4; + uint32_t scopeid; + int plen; + + dd = (struct dpdk_lpm_data *)_data; + rt_get_inet_prefix_plen(rc->rc_rt, &addr4, &plen, &scopeid); + + if (plen != 0) + ret = handle_gu_change(dd, rc, addr4, plen); + else + ret = handle_default_change(dd, rc); + + if (ret != 0) + FIB_PRINTF(LOG_INFO, dd->fd, "error handling route"); + return (ret); +} + +static void +destroy_table(void *_data) +{ + struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data; + + if (dd->lpm != NULL) + rte_lpm_free(dd->lpm); + free(dd, M_RTABLE); +} + +static enum flm_op_result +add_route_cb(struct rtentry *rt, void *_data) +{ + struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data; + struct nhop_object *nh; + int plen, ret; + struct in_addr addr4; + uint32_t scopeid; + + nh = rt_get_raw_nhop(rt); + rt_get_inet_prefix_plen(rt, &addr4, &plen, &scopeid); + + char abuf[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, &addr4, abuf, sizeof(abuf)); + + FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen); + + if (plen == 0) { + struct rib_cmd_info rc = { + .rc_cmd = RTM_ADD, + .rc_nh_new = nh, + }; + + FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route"); + return (handle_default_change(dd, &rc)); + } + + uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh); + if (nhidx == 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index"); + return (FLM_REBUILD); + } + ret = rte_lpm_add(dd->lpm, ntohl(addr4.s_addr), plen, nhidx); + FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d", + dd->lpm, abuf, plen, nhidx, ret); + + if (ret != 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm_add() returned %d", ret); + if (ret == -ENOSPC) { + dd->hit_tables = 1; + return (FLM_REBUILD); + } + dd->routes_failed++; + return (FLM_ERROR); + } else + dd->routes_added++; + + return (FLM_SUCCESS); +} + +static enum flm_op_result +check_dump_success(void *_data, struct fib_dp *dp) +{ + struct dpdk_lpm_data *dd; + + dd = (struct dpdk_lpm_data *)_data; + + FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu", + dd->routes_added, dd->routes_failed); + if (dd->hit_tables || dd->routes_failed > 0) + return (FLM_REBUILD); + + FIB_PRINTF(LOG_INFO, dd->fd, + "DPDK lookup engine synced with IPv4 RIB id %u, %zu routes", + dd->fibnum, dd->routes_added); + + dp->f = lookup_ptr; + dp->arg = dd->lpm; + + return (FLM_SUCCESS); +} + +static void +estimate_scale(const struct dpdk_lpm_data *dd_src, struct dpdk_lpm_data *dd) +{ + + /* XXX: update at 75% capacity */ + if (dd_src->hit_tables) + dd->number_tbl8s = dd_src->number_tbl8s * 2; + else + dd->number_tbl8s = dd_src->number_tbl8s; + + /* TODO: look into the appropriate RIB to adjust */ +} + +static struct dpdk_lpm_data * +build_table(struct dpdk_lpm_data *dd_prev, struct fib_data *fd) +{ + struct dpdk_lpm_data *dd; + struct rte_lpm *lpm; + + dd = malloc(sizeof(struct dpdk_lpm_data), M_RTABLE, M_NOWAIT | M_ZERO); + if (dd == NULL) { + FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure"); + return (NULL); + } + dd->fibnum = dd_prev->fibnum; + dd->fd = fd; + + estimate_scale(dd_prev, dd); + + struct rte_lpm_config cfg = {.number_tbl8s = dd->number_tbl8s}; + lpm = rte_lpm_create("test", 0, &cfg); + if (lpm == NULL) { + FIB_PRINTF(LOG_INFO, fd, "unable to create lpm"); + free(dd, M_RTABLE); + return (NULL); + } + dd->lpm = lpm; + struct rte_lpm_external *ext = (struct rte_lpm_external *)lpm; + ext->nh_idx = fib_get_nhop_array(dd->fd); + + FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s); + + return (dd); +} + +static enum flm_op_result +init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data) +{ + struct dpdk_lpm_data *dd, dd_base; + + if (_old_data == NULL) { + bzero(&dd_base, sizeof(struct dpdk_lpm_data)); + dd_base.fibnum = fibnum; + /* TODO: get rib statistics */ + dd_base.number_tbl8s = LPM_MIN_TBL8; + dd = &dd_base; + } else { + FIB_PRINTF(LOG_DEBUG, fd, "Starting with old data"); + dd = (struct dpdk_lpm_data *)_old_data; + } + + /* Guaranteed to be in epoch */ + dd = build_table(dd, fd); + if (dd == NULL) { + FIB_PRINTF(LOG_NOTICE, fd, "table creation failed"); + return (FLM_REBUILD); + } + + *data = dd; + return (FLM_SUCCESS); +} + +static struct fib_lookup_module dpdk_lpm4 = { + .flm_name = "dpdk_lpm4", + .flm_family = AF_INET, + .flm_init_cb = init_table, + .flm_destroy_cb = destroy_table, + .flm_dump_rib_item_cb = add_route_cb, + .flm_dump_end_cb = check_dump_success, + .flm_change_rib_item_cb = handle_rtable_change_cb, + .flm_get_pref = rte_get_pref, +}; + +static int +lpm4_modevent(module_t mod, int type, void *unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + fib_module_register(&dpdk_lpm4); + break; + case MOD_UNLOAD: + error = fib_module_unregister(&dpdk_lpm4); + break; + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +static moduledata_t lpm4mod = { + "dpdk_lpm4", + lpm4_modevent, + 0 +}; + +DECLARE_MODULE(lpm4mod, lpm4mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(lpm4mod, 1); diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c new file mode 100644 index 000000000000..250e3e1bde4a --- /dev/null +++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c @@ -0,0 +1,487 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/rmlock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/kernel.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <net/vnet.h> + +#include <net/if.h> +#include <net/if_var.h> + +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet6/in6_fib.h> + +#include <net/route.h> +#include <net/route/nhop.h> +#include <net/route/route_ctl.h> +#include <net/route/fib_algo.h> +#define RTDEBUG + +#include "rte_lpm6.h" + +#define LPM6_MIN_TBL8 8 /* 2 pages of memory */ +#define LPM6_MAX_TBL8 65536 * 16 /* 256M */ + +struct fib_algo_calldata { + void *lookup; + void *arg; +}; + +struct dpdk_lpm6_data { + struct rte_lpm6 *lpm6; + uint64_t routes_added; + uint64_t routes_failed; + uint32_t number_tbl8s; + uint32_t fibnum; + uint8_t hit_tables; + struct fib_data *fd; +}; + +static struct nhop_object * +lookup_ptr_ll(const struct rte_lpm6 *lpm6, const struct in6_addr *dst6, + uint32_t scopeid) +{ + const struct rte_lpm6_external *rte_ext; + + rte_ext = (const struct rte_lpm6_external *)lpm6; + + return (fib6_radix_lookup_nh(rte_ext->fibnum, dst6, scopeid)); +} + +/* + * Main datapath routing + */ +static struct nhop_object * +lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) +{ + const struct rte_lpm6 *lpm6; + const struct rte_lpm6_external *rte_ext; + const struct in6_addr *addr6; + uint32_t nhidx = 0; + int ret; + + lpm6 = (const struct rte_lpm6 *)algo_data; + addr6 = key.addr6; + rte_ext = (const struct rte_lpm6_external *)lpm6; + + if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) { + ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx); + if (ret == 0) { + /* Success! */ + return (rte_ext->nh_idx[nhidx]); + } else { + /* Not found. Check default route */ + if (rte_ext->default_idx > 0) + return (rte_ext->nh_idx[rte_ext->default_idx]); + else + return (NULL); + } + } else { + /* LL */ + return (lookup_ptr_ll(lpm6, addr6, scopeid)); + } +} + +static uint8_t +rte6_get_pref(const struct rib_rtable_info *rinfo) +{ + + if (rinfo->num_prefixes < 10) + return (1); + else if (rinfo->num_prefixes < 1000) + return (rinfo->num_prefixes / 10); + else if (rinfo->num_prefixes < 500000) + return (100 + rinfo->num_prefixes / 3334); + else + return (250); +} + +static enum flm_op_result +handle_default_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) +{ + struct rte_lpm6_external *rte_ext; + rte_ext = (struct rte_lpm6_external *)dd->lpm6; + + if (rc->rc_cmd != RTM_DELETE) { + /* Reference new */ + uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); + + if (nhidx == 0) + return (FLM_REBUILD); + rte_ext->default_idx = nhidx; + } else { + /* No default route */ + rte_ext->default_idx = 0; + } + + return (FLM_SUCCESS); +} + +static enum flm_op_result +handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc, + const struct in6_addr addr6, int plen, uint32_t scopeid) +{ + + return (FLM_SUCCESS); +} + +static struct rte_lpm6_rule * +pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, + char *buffer) +{ + struct rte_lpm6_rule *lsp_rule = NULL; + struct route_nhop_data rnd; + struct rtentry *rt; + int plen; + + rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd); + /* plen = 0 means default route and it's out of scope */ + if (rt != NULL) { + uint32_t scopeid; + struct in6_addr new_addr6; + rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid); + if (plen > 0) { + uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop); + if (nhidx == 0) { + /* + * shouldn't happen as we already have parent route. + * It will trigger rebuild automatically. + */ + return (NULL); + } + lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhidx); + } + } + + return (lsp_rule); +} + +static enum flm_op_result +handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc, + const struct in6_addr *addr6, int plen) +{ + int ret; + char abuf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf)); + + /* So we get sin6, plen and nhidx */ + if (rc->rc_cmd != RTM_DELETE) { + /* + * Addition or change. Save nhop in the internal table + * and get index. + */ + uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); + if (nhidx == 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild"); + return (FLM_REBUILD); + } + + ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6, + plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0); + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d", + (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", + abuf, plen, nhidx, ret); + } else { + /* + * Need to lookup parent. Assume deletion happened already + */ + char buffer[RTE_LPM6_RULE_SIZE]; + struct rte_lpm6_rule *lsp_rule = NULL; + lsp_rule = pack_parent_rule(dd, addr6, buffer); + + ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule); + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop ? = %d", + "DEL", abuf, plen, ret); + } + + if (ret != 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret); + if (ret == -ENOSPC) + return (FLM_REBUILD); + return (FLM_ERROR); + } + return (FLM_SUCCESS); +} + +static enum flm_op_result +handle_any_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) +{ + enum flm_op_result ret; + struct in6_addr addr6; + uint32_t scopeid; + int plen; + + rt_get_inet6_prefix_plen(rc->rc_rt, &addr6, &plen, &scopeid); + + if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) + ret = handle_ll_change(dd, rc, addr6, plen, scopeid); + else if (plen == 0) + ret = handle_default_change(dd, rc); + else + ret = handle_gu_change(dd, rc, &addr6, plen); + + if (ret != 0) + FIB_PRINTF(LOG_INFO, dd->fd, "error handling route"); + return (ret); +} + +static enum flm_op_result +handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, + void *_data) +{ + struct dpdk_lpm6_data *dd; + + dd = (struct dpdk_lpm6_data *)_data; + + return (handle_any_change(dd, rc)); +} + +static void +destroy_dd(struct dpdk_lpm6_data *dd) +{ + + FIB_PRINTF(LOG_INFO, dd->fd, "destroy dd %p", dd); + if (dd->lpm6 != NULL) + rte_lpm6_free(dd->lpm6); + free(dd, M_TEMP); +} + +static void +destroy_table(void *_data) +{ + + destroy_dd((struct dpdk_lpm6_data *)_data); +} + +static enum flm_op_result +add_route_cb(struct rtentry *rt, void *_data) +{ + struct dpdk_lpm6_data *dd = (struct dpdk_lpm6_data *)_data; + struct in6_addr addr6; + struct nhop_object *nh; + uint32_t scopeid; + int plen; + int ret; + + rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid); + nh = rt_get_raw_nhop(rt); + + if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) { + + /* + * We don't operate on LL directly, however + * reference them to maintain guarantee on + * ability to refcount nhops in epoch. + */ + fib_get_nhop_idx(dd->fd, nh); + return (FLM_SUCCESS); + } + + char abuf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf)); + FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen); + + if (plen == 0) { + struct rib_cmd_info rc = { + .rc_cmd = RTM_ADD, + .rc_nh_new = nh, + }; + + FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route"); + return (handle_default_change(dd, &rc)); + } + + uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh); + if (nhidx == 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index"); + return (FLM_REBUILD); + } + ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&addr6, plen, nhidx, 1); + FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d", + dd->lpm6, abuf, plen, nhidx, ret); + + if (ret != 0) { + FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm6_add() returned %d", ret); + if (ret == -ENOSPC) { + dd->hit_tables = 1; + return (FLM_REBUILD); + } + dd->routes_failed++; + return (FLM_ERROR); + } else + dd->routes_added++; + + return (FLM_SUCCESS); +} + +static enum flm_op_result +check_dump_success(void *_data, struct fib_dp *dp) +{ + struct dpdk_lpm6_data *dd; + + dd = (struct dpdk_lpm6_data *)_data; + + FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu", + dd->routes_added, dd->routes_failed); + if (dd->hit_tables || dd->routes_failed > 0) + return (FLM_REBUILD); + + FIB_PRINTF(LOG_INFO, dd->fd, + "DPDK lookup engine synced with IPv6 RIB id %u, %zu routes", + dd->fibnum, dd->routes_added); + + dp->f = lookup_ptr; + dp->arg = dd->lpm6; + + return (FLM_SUCCESS); +} + +static void +estimate_scale(const struct dpdk_lpm6_data *dd_src, struct dpdk_lpm6_data *dd) +{ + + /* XXX: update at 75% capacity */ + if (dd_src->hit_tables) + dd->number_tbl8s = dd_src->number_tbl8s * 2; + else + dd->number_tbl8s = dd_src->number_tbl8s; + + /* TODO: look into the appropriate RIB to adjust */ +} + +static struct dpdk_lpm6_data * +build_table(struct dpdk_lpm6_data *dd_prev, struct fib_data *fd) +{ + struct dpdk_lpm6_data *dd; + struct rte_lpm6 *lpm6; + + dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT | M_ZERO); + if (dd == NULL) { + FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure"); + return (NULL); + } + dd->fibnum = dd_prev->fibnum; + dd->fd = fd; + + estimate_scale(dd_prev, dd); + + struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s}; + lpm6 = rte_lpm6_create("test", 0, &cfg); + if (lpm6 == NULL) { + FIB_PRINTF(LOG_INFO, fd, "unable to create lpm6"); + free(dd, M_TEMP); + return (NULL); + } + dd->lpm6 = lpm6; + struct rte_lpm6_external *ext = (struct rte_lpm6_external *)lpm6; + ext->nh_idx = fib_get_nhop_array(dd->fd); + + FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s); + + return (dd); +} + +static enum flm_op_result +init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data) +{ + struct dpdk_lpm6_data *dd, dd_base; + + if (_old_data == NULL) { + bzero(&dd_base, sizeof(struct dpdk_lpm6_data)); + dd_base.fibnum = fibnum; + /* TODO: get rib statistics */ + dd_base.number_tbl8s = LPM6_MIN_TBL8; + dd = &dd_base; + } else { + FIB_PRINTF(LOG_INFO, fd, "Starting with old data"); + dd = (struct dpdk_lpm6_data *)_old_data; + } + + /* Guaranteed to be in epoch */ + dd = build_table(dd, fd); + if (dd == NULL) { + FIB_PRINTF(LOG_INFO, fd, "table creation failed"); + return (FLM_REBUILD); + } + + *data = dd; + return (FLM_SUCCESS); +} + +static struct fib_lookup_module dpdk_lpm6 = { + .flm_name = "dpdk_lpm6", + .flm_family = AF_INET6, + .flm_init_cb = init_table, + .flm_destroy_cb = destroy_table, + .flm_dump_rib_item_cb = add_route_cb, + .flm_dump_end_cb = check_dump_success, + .flm_change_rib_item_cb = handle_rtable_change_cb, + .flm_get_pref = rte6_get_pref, +}; + +static int +lpm6_modevent(module_t mod, int type, void *unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + fib_module_register(&dpdk_lpm6); + break; + case MOD_UNLOAD: + error = fib_module_unregister(&dpdk_lpm6); + break; + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +static moduledata_t lpm6mod = { + "dpdk_lpm6", + lpm6_modevent, + 0 +}; + *** 5224 LINES SKIPPED ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202101091251.109CpbLZ082315>