Date: Mon, 13 Apr 2026 14:16:56 +0000 From: Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: e44d2e941e8e - main - if_geneve: Add Support for Geneve (RFC8926) Message-ID: <69dcfad8.47191.15534965@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by pouria: URL: https://cgit.FreeBSD.org/src/commit/?id=e44d2e941e8ebd74e6a1b1fdbed83fe86671cbc6 commit e44d2e941e8ebd74e6a1b1fdbed83fe86671cbc6 Author: Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org> AuthorDate: 2026-04-11 14:12:01 +0000 Commit: Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org> CommitDate: 2026-04-13 14:14:58 +0000 if_geneve: Add Support for Geneve (RFC8926) geneve creates a generic network virtualization tunnel interface for Tentant Systems over an L3 (IP/UDP) underlay network that provides a Layer 2 (ethernet) or Layer 3 service using the geneve protocol. This implementation is based on RFC8926. Reviewed by: glebius, adrian Discussed with: zlei, kp Relnotes: yes Differential Revision: https://reviews.freebsd.org/D54172 --- sys/conf/NOTES | 4 + sys/conf/files | 1 + sys/kern/kern_jail.c | 1 + sys/modules/Makefile | 1 + sys/modules/if_geneve/Makefile | 7 + sys/net/if.c | 2 + sys/net/if.h | 6 +- sys/net/if_geneve.c | 3967 ++++++++++++++++++++++++++++++++++++++++ sys/net/if_geneve.h | 70 + sys/net/if_strings.h | 12 +- sys/netlink/route/interface.h | 44 + sys/sys/mbuf.h | 6 +- sys/sys/priv.h | 1 + 13 files changed, 4115 insertions(+), 7 deletions(-) diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 4dda93e2ee70..4279fae4c547 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -880,6 +880,10 @@ device vlan # frames in UDP packets according to RFC7348. device vxlan +# The `geneve' device implements the GENEVE encapsulation of virtual +# overlays according to RFC8926. +device geneve + # The `wlan' device provides generic code to support 802.11 # drivers, including host AP mode; it is MANDATORY for the wi, # and ath drivers and will eventually be required by all 802.11 drivers. diff --git a/sys/conf/files b/sys/conf/files index b44fb46ef764..99ba7cdaba33 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4238,6 +4238,7 @@ net/if_stf.c optional stf inet inet6 net/if_tuntap.c optional tuntap net/if_vlan.c optional vlan net/if_vxlan.c optional vxlan inet | vxlan inet6 +net/if_geneve.c optional geneve inet | geneve inet6 net/ifdi_if.m optional ether pci iflib net/iflib.c optional ether pci iflib net/mp_ring.c optional ether iflib diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 384825b7f8ac..bc80adb91cd6 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -4385,6 +4385,7 @@ prison_priv_check(struct ucred *cred, int priv) case PRIV_NET_SETIFVNET: case PRIV_NET_SETIFFIB: case PRIV_NET_OVPN: + case PRIV_NET_GENEVE: case PRIV_NET_ME: case PRIV_NET_WG: diff --git a/sys/modules/Makefile b/sys/modules/Makefile index a4100c31ef26..faedb856977c 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -169,6 +169,7 @@ SUBDIR= \ if_tuntap \ if_vlan \ if_vxlan \ + if_geneve \ ${_if_wg} \ iflib \ ${_igc} \ diff --git a/sys/modules/if_geneve/Makefile b/sys/modules/if_geneve/Makefile new file mode 100644 index 000000000000..1e65d4dbb168 --- /dev/null +++ b/sys/modules/if_geneve/Makefile @@ -0,0 +1,7 @@ +.PATH: ${SRCTOP}/sys/net + +KMOD= if_geneve +SRCS= if_geneve.c +SRCS+= opt_inet.h opt_inet6.h + +.include <bsd.kmod.mk> diff --git a/sys/net/if.c b/sys/net/if.c index 760ae94e842b..8a148ba0fd06 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -2273,6 +2273,8 @@ const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = { CAP2NV(RXTLS4), CAP2NV(RXTLS6), CAP2NV(IPSEC_OFFLOAD), + CAP2NV(GENEVE_HWCSUM), + CAP2NV(GENEVE_HWTSO), {0, NULL} }; #undef CAPNV diff --git a/sys/net/if.h b/sys/net/if.h index 1b47237e46bb..4bb6a2659ce7 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -255,7 +255,9 @@ struct if_data { #define IFCAP_B_RXTLS4 32 /* can do TLS receive for TCP */ #define IFCAP_B_RXTLS6 33 /* can do TLS receive for TCP6 */ #define IFCAP_B_IPSEC_OFFLOAD 34 /* inline IPSEC offload */ -#define __IFCAP_B_SIZE 35 +#define IFCAP_B_GENEVE_HWCSUM 35 /* can do IFCAN_HWCSUM on GENEVE */ +#define IFCAP_B_GENEVE_HWTSO 36 /* can do IFCAP_TSO on GENEVE */ +#define __IFCAP_B_SIZE 37 #define IFCAP_B_MAX (__IFCAP_B_MAX - 1) #define IFCAP_B_SIZE (__IFCAP_B_SIZE) @@ -299,6 +301,8 @@ struct if_data { #define IFCAP2_RXTLS4 (IFCAP_B_RXTLS4 - 32) #define IFCAP2_RXTLS6 (IFCAP_B_RXTLS6 - 32) #define IFCAP2_IPSEC_OFFLOAD (IFCAP_B_IPSEC_OFFLOAD - 32) +#define IFCAP2_GENEVE_HWCSUM (IFCAP_B_GENEVE_HWCSUM - 32) +#define IFCAP2_GENEVE_HWTSO (IFCAP_B_GENEVE_HWTSO - 32) #define IFCAP2_BIT(x) (1UL << (x)) diff --git a/sys/net/if_geneve.c b/sys/net/if_geneve.c new file mode 100644 index 000000000000..9562a3476099 --- /dev/null +++ b/sys/net/if_geneve.c @@ -0,0 +1,3967 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025-2026 Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/hash.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/refcount.h> +#include <sys/rmlock.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/sdt.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sockio.h> +#include <sys/sx.h> +#include <sys/systm.h> +#include <sys/counter.h> +#include <sys/jail.h> + +#include <net/bpf.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <net/if_var.h> +#include <net/if_private.h> +#include <net/if_arp.h> +#include <net/if_clone.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/netisr.h> +#include <net/route.h> +#include <net/route/nhop.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/in_pcb.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet6/in6_var.h> +#include <netinet6/scope6_var.h> +#include <netinet/udp.h> +#include <netinet/udp_var.h> +#include <netinet/in_fib.h> +#include <netinet6/in6_fib.h> +#include <netinet/ip_ecn.h> +#include <net/if_geneve.h> + +#include <netlink/netlink.h> +#include <netlink/netlink_ctl.h> +#include <netlink/netlink_var.h> +#include <netlink/netlink_route.h> +#include <netlink/route/route_var.h> + +#include <security/mac/mac_framework.h> + +SDT_PROVIDER_DEFINE(if_geneve); + +struct geneve_softc; +LIST_HEAD(geneve_softc_head, geneve_softc); + +static struct sx geneve_sx; +SX_SYSINIT(geneve, &geneve_sx, "GENEVE global start/stop lock"); + +static unsigned geneve_osd_jail_slot; + +union sockaddr_union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + +struct geneve_socket_mc_info { + union sockaddr_union gnvsomc_saddr; + union sockaddr_union gnvsomc_gaddr; + int gnvsomc_ifidx; + int gnvsomc_users; +}; + +/* The maximum MTU of encapsulated geneve packet. */ +#define GENEVE_MAX_L3MTU (IP_MAXPACKET - \ + 60 /* Maximum IPv4 header len */ - \ + sizeof(struct udphdr) - \ + sizeof(struct genevehdr)) +#define GENEVE_MAX_MTU (GENEVE_MAX_L3MTU - \ + ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN) + +#define GENEVE_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU | IFCAP_NV) + +#define GENEVE_VERSION 0 +#define GENEVE_VNI_MASK (GENEVE_VNI_MAX - 1) + +#define GENEVE_HDR_VNI_SHIFT 8 + +#define GENEVE_SO_MC_MAX_GROUPS 32 + +#define GENEVE_SO_VNI_HASH_SHIFT 6 +#define GENEVE_SO_VNI_HASH_SIZE (1 << GENEVE_SO_VNI_HASH_SHIFT) +#define GENEVE_SO_VNI_HASH(_vni) ((_vni) % GENEVE_SO_VNI_HASH_SIZE) + +struct geneve_socket { + struct socket *gnvso_sock; + struct rmlock gnvso_lock; + u_int gnvso_refcnt; + union sockaddr_union gnvso_laddr; + LIST_ENTRY(geneve_socket) gnvso_entry; + struct geneve_softc_head gnvso_vni_hash[GENEVE_SO_VNI_HASH_SIZE]; + struct geneve_socket_mc_info gnvso_mc[GENEVE_SO_MC_MAX_GROUPS]; +}; + +#define GENEVE_SO_RLOCK(_gnvso, _p) rm_rlock(&(_gnvso)->gnvso_lock, (_p)) +#define GENEVE_SO_RUNLOCK(_gnvso, _p) rm_runlock(&(_gnvso)->gnvso_lock, (_p)) +#define GENEVE_SO_WLOCK(_gnvso) rm_wlock(&(_gnvso)->gnvso_lock) +#define GENEVE_SO_WUNLOCK(_gnvso) rm_wunlock(&(_gnvso)->gnvso_lock) +#define GENEVE_SO_LOCK_ASSERT(_gnvso) \ + rm_assert(&(_gnvso)->gnvso_lock, RA_LOCKED) +#define GENEVE_SO_LOCK_WASSERT(_gnvso) \ + rm_assert(&(_gnvso)->gnvso_lock, RA_WLOCKED) + +#define GENEVE_SO_ACQUIRE(_gnvso) refcount_acquire(&(_gnvso)->gnvso_refcnt) +#define GENEVE_SO_RELEASE(_gnvso) refcount_release(&(_gnvso)->gnvso_refcnt) + +struct gnv_ftable_entry { + LIST_ENTRY(gnv_ftable_entry) gnvfe_hash; + uint16_t gnvfe_flags; + uint8_t gnvfe_mac[ETHER_ADDR_LEN]; + union sockaddr_union gnvfe_raddr; + time_t gnvfe_expire; +}; + +#define GENEVE_FE_FLAG_DYNAMIC 0x01 +#define GENEVE_FE_FLAG_STATIC 0x02 + +#define GENEVE_FE_IS_DYNAMIC(_fe) \ + ((_fe)->gnvfe_flags & GENEVE_FE_FLAG_DYNAMIC) + +#define GENEVE_SC_FTABLE_SHIFT 9 +#define GENEVE_SC_FTABLE_SIZE (1 << GENEVE_SC_FTABLE_SHIFT) +#define GENEVE_SC_FTABLE_MASK (GENEVE_SC_FTABLE_SIZE - 1) +#define GENEVE_SC_FTABLE_HASH(_sc, _mac) \ + (geneve_mac_hash(_sc, _mac) % GENEVE_SC_FTABLE_SIZE) + +LIST_HEAD(geneve_ftable_head, gnv_ftable_entry); + +struct geneve_statistics { + uint32_t ftable_nospace; + uint32_t ftable_lock_upgrade_failed; + counter_u64_t txcsum; + counter_u64_t tso; + counter_u64_t rxcsum; +}; + +struct geneve_softc { + LIST_ENTRY(geneve_softc) gnv_entry; + + struct ifnet *gnv_ifp; + uint32_t gnv_flags; +#define GENEVE_FLAG_INIT 0x0001 +#define GENEVE_FLAG_RUNNING 0x0002 +#define GENEVE_FLAG_TEARDOWN 0x0004 +#define GENEVE_FLAG_LEARN 0x0008 +#define GENEVE_FLAG_USER_MTU 0x0010 +#define GENEVE_FLAG_TTL_INHERIT 0x0020 +#define GENEVE_FLAG_DSCP_INHERIT 0x0040 +#define GENEVE_FLAG_COLLECT_METADATA 0x0080 + + int gnv_reqcap; + int gnv_reqcap2; + struct geneve_socket *gnv_sock; + union sockaddr_union gnv_src_addr; + union sockaddr_union gnv_dst_addr; + uint32_t gnv_fibnum; + uint32_t gnv_vni; + uint32_t gnv_port_hash_key; + uint16_t gnv_proto; + uint16_t gnv_min_port; + uint16_t gnv_max_port; + uint8_t gnv_ttl; + enum ifla_geneve_df gnv_df; + + /* Lookup table from MAC address to forwarding entry. */ + uint32_t gnv_ftable_cnt; + uint32_t gnv_ftable_max; + uint32_t gnv_ftable_timeout; + uint32_t gnv_ftable_hash_key; + struct geneve_ftable_head *gnv_ftable; + + /* Derived from gnv_dst_addr. */ + struct gnv_ftable_entry gnv_default_fe; + + struct ip_moptions *gnv_im4o; + struct ip6_moptions *gnv_im6o; + + struct rmlock gnv_lock; + volatile u_int gnv_refcnt; + + int gnv_so_mc_index; + struct geneve_statistics gnv_stats; + struct callout gnv_callout; + struct ether_addr gnv_hwaddr; + int gnv_mc_ifindex; + struct ifnet *gnv_mc_ifp; + struct ifmedia gnv_media; + char gnv_mc_ifname[IFNAMSIZ]; + + /* For rate limiting errors on the tx fast path. */ + struct timeval err_time; + int err_pps; +}; + +#define GENEVE_RLOCK(_sc, _p) rm_rlock(&(_sc)->gnv_lock, (_p)) +#define GENEVE_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->gnv_lock, (_p)) +#define GENEVE_WLOCK(_sc) rm_wlock(&(_sc)->gnv_lock) +#define GENEVE_WUNLOCK(_sc) rm_wunlock(&(_sc)->gnv_lock) +#define GENEVE_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->gnv_lock) +#define GENEVE_LOCK_ASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_LOCKED) +#define GENEVE_LOCK_WASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_WLOCKED) +#define GENEVE_UNLOCK(_sc, _p) do { \ + if (GENEVE_LOCK_WOWNED(_sc)) \ + GENEVE_WUNLOCK(_sc); \ + else \ + GENEVE_RUNLOCK(_sc, _p); \ +} while (0) + +#define GENEVE_ACQUIRE(_sc) refcount_acquire(&(_sc)->gnv_refcnt) +#define GENEVE_RELEASE(_sc) refcount_release(&(_sc)->gnv_refcnt) + +#define SATOCONSTSIN(sa) ((const struct sockaddr_in *)(sa)) +#define SATOCONSTSIN6(sa) ((const struct sockaddr_in6 *)(sa)) + +struct geneve_pkt_info { + u_int isr; + uint16_t ethertype; + uint8_t ecn; + uint8_t ttl; +}; + +struct nl_parsed_geneve { + /* essential */ + uint32_t ifla_vni; + uint16_t ifla_proto; + struct sockaddr *ifla_local; + struct sockaddr *ifla_remote; + uint16_t ifla_local_port; + uint16_t ifla_remote_port; + + /* optional */ + struct ifla_geneve_port_range ifla_port_range; + enum ifla_geneve_df ifla_df; + uint8_t ifla_ttl; + bool ifla_ttl_inherit; + bool ifla_dscp_inherit; + bool ifla_external; + + /* l2 specific */ + bool ifla_ftable_learn; + bool ifla_ftable_flush; + uint32_t ifla_ftable_max; + uint32_t ifla_ftable_timeout; + uint32_t ifla_ftable_count; /* read-only */ + + /* multicast specific */ + char *ifla_mc_ifname; + uint32_t ifla_mc_ifindex; /* read-only */ +}; + +/* The multicast-based learning parts of the code are taken from if_vxlan */ +static int geneve_ftable_addr_cmp(const uint8_t *, const uint8_t *); +static void geneve_ftable_init(struct geneve_softc *); +static void geneve_ftable_fini(struct geneve_softc *); +static void geneve_ftable_flush(struct geneve_softc *, int); +static void geneve_ftable_expire(struct geneve_softc *); +static int geneve_ftable_update_locked(struct geneve_softc *, + const union sockaddr_union *, const uint8_t *, + struct rm_priotracker *); +static int geneve_ftable_learn(struct geneve_softc *, + const struct sockaddr *, const uint8_t *); + +static struct gnv_ftable_entry * + geneve_ftable_entry_alloc(void); +static void geneve_ftable_entry_free(struct gnv_ftable_entry *); +static void geneve_ftable_entry_init(struct geneve_softc *, + struct gnv_ftable_entry *, const uint8_t *, + const struct sockaddr *, uint32_t); +static void geneve_ftable_entry_destroy(struct geneve_softc *, + struct gnv_ftable_entry *); +static int geneve_ftable_entry_insert(struct geneve_softc *, + struct gnv_ftable_entry *); +static struct gnv_ftable_entry * + geneve_ftable_entry_lookup(struct geneve_softc *, + const uint8_t *); + +static struct geneve_socket * + geneve_socket_alloc(union sockaddr_union *laddr); +static void geneve_socket_destroy(struct geneve_socket *); +static void geneve_socket_release(struct geneve_socket *); +static struct geneve_socket * + geneve_socket_lookup(union sockaddr_union *); +static void geneve_socket_insert(struct geneve_socket *); +static int geneve_socket_init(struct geneve_socket *, struct ifnet *); +static int geneve_socket_bind(struct geneve_socket *, struct ifnet *); +static int geneve_socket_create(struct ifnet *, int, + const union sockaddr_union *, struct geneve_socket **); +static int geneve_socket_set_df(struct geneve_socket *, bool); + +static struct geneve_socket * + geneve_socket_mc_lookup(const union sockaddr_union *); +static int geneve_sockaddr_mc_info_match( + const struct geneve_socket_mc_info *, + const union sockaddr_union *, + const union sockaddr_union *, int); +static int geneve_socket_mc_join_group(struct geneve_socket *, + const union sockaddr_union *, const union sockaddr_union *, + int *, union sockaddr_union *); +static int geneve_socket_mc_leave_group(struct geneve_socket *, + const union sockaddr_union *, + const union sockaddr_union *, int); +static int geneve_socket_mc_add_group(struct geneve_socket *, + const union sockaddr_union *, + const union sockaddr_union *, int, int *); +static void geneve_socket_mc_release_group(struct geneve_socket *, int); + +static struct geneve_softc * + geneve_socket_lookup_softc_locked(struct geneve_socket *, + uint32_t); +static struct geneve_softc * + geneve_socket_lookup_softc(struct geneve_socket *, uint32_t); +static int geneve_socket_insert_softc(struct geneve_socket *, + struct geneve_softc *); +static void geneve_socket_remove_softc(struct geneve_socket *, + struct geneve_softc *); + +static struct ifnet * + geneve_multicast_if_ref(struct geneve_softc *, uint32_t); +static void geneve_free_multicast(struct geneve_softc *); +static int geneve_setup_multicast_interface(struct geneve_softc *); + +static int geneve_setup_multicast(struct geneve_softc *); +static int geneve_setup_socket(struct geneve_softc *); +static void geneve_setup_interface_hdrlen(struct geneve_softc *); +static int geneve_valid_init_config(struct geneve_softc *); +static void geneve_init_complete(struct geneve_softc *); +static void geneve_init(void *); +static void geneve_release(struct geneve_softc *); +static void geneve_teardown_wait(struct geneve_softc *); +static void geneve_teardown_locked(struct geneve_softc *); +static void geneve_teardown(struct geneve_softc *); +static void geneve_timer(void *); + +static int geneve_flush_ftable(struct geneve_softc *, bool); +static uint16_t geneve_get_local_port(struct geneve_softc *); +static uint16_t geneve_get_remote_port(struct geneve_softc *); + +static int geneve_set_vni_nl(struct geneve_softc *, struct nl_pstate *, + uint32_t); +static int geneve_set_local_addr_nl(struct geneve_softc *, struct nl_pstate *, + struct sockaddr *); +static int geneve_set_remote_addr_nl(struct geneve_softc *, struct nl_pstate *, + struct sockaddr *); +static int geneve_set_local_port_nl(struct geneve_softc *, struct nl_pstate *, + uint16_t); +static int geneve_set_remote_port_nl(struct geneve_softc *, struct nl_pstate *, + uint16_t); +static int geneve_set_port_range_nl(struct geneve_softc *, struct nl_pstate *, + struct ifla_geneve_port_range); +static int geneve_set_df_nl(struct geneve_softc *, struct nl_pstate *, + enum ifla_geneve_df); +static int geneve_set_ttl_nl(struct geneve_softc *, struct nl_pstate *, + uint8_t); +static int geneve_set_ttl_inherit_nl(struct geneve_softc *, struct nl_pstate *, + bool); +static int geneve_set_dscp_inherit_nl(struct geneve_softc *, struct nl_pstate *, + bool); +static int geneve_set_collect_metadata_nl(struct geneve_softc *, + struct nl_pstate *, bool); +static int geneve_set_learn_nl(struct geneve_softc *, struct nl_pstate *, + bool); +static int geneve_set_ftable_max_nl(struct geneve_softc *, struct nl_pstate *, + uint32_t); +static int geneve_set_ftable_timeout_nl(struct geneve_softc *, + struct nl_pstate *, uint32_t); +static int geneve_set_mc_if_nl(struct geneve_softc *, struct nl_pstate *, + char *); +static int geneve_flush_ftable_nl(struct geneve_softc *, struct nl_pstate *, + bool); +static void geneve_get_local_addr_nl(struct geneve_softc *, struct nl_writer *); +static void geneve_get_remote_addr_nl(struct geneve_softc *, struct nl_writer *); + +static int geneve_ioctl_ifflags(struct geneve_softc *); +static int geneve_ioctl(struct ifnet *, u_long, caddr_t); + +static uint16_t geneve_pick_source_port(struct geneve_softc *, struct mbuf *); +static void geneve_encap_header(struct geneve_softc *, struct mbuf *, + int, uint16_t, uint16_t, uint16_t); +static uint16_t geneve_get_ethertype(struct mbuf *); +static int geneve_inherit_l3_hdr(struct mbuf *, struct geneve_softc *, + uint16_t, uint8_t *, uint8_t *, u_short *); +static int geneve_encap4(struct geneve_softc *, + const union sockaddr_union *, struct mbuf *); +static int geneve_encap6(struct geneve_softc *, + const union sockaddr_union *, struct mbuf *); +static int geneve_transmit(struct ifnet *, struct mbuf *); +static void geneve_qflush(struct ifnet *); +static int geneve_output(struct ifnet *, struct mbuf *, + const struct sockaddr *, struct route *); +static uint32_t geneve_map_etype_to_af(uint32_t); +static bool geneve_udp_input(struct mbuf *, int, struct inpcb *, + const struct sockaddr *, void *); +static int geneve_input_ether(struct geneve_softc *, struct mbuf **, + const struct sockaddr *, struct geneve_pkt_info *); +static int geneve_input_inherit(struct geneve_softc *, + struct mbuf **, int, struct geneve_pkt_info *); +static int geneve_next_option(struct geneve_socket *, struct genevehdr *, + struct mbuf **); +static void geneve_input_csum(struct mbuf *m, struct ifnet *ifp, + counter_u64_t rxcsum); + +static void geneve_stats_alloc(struct geneve_softc *); +static void geneve_stats_free(struct geneve_softc *); +static void geneve_set_default_config(struct geneve_softc *); +static int geneve_set_reqcap(struct geneve_softc *, struct ifnet *, int, + int); +static void geneve_set_hwcaps(struct geneve_softc *); +static int geneve_clone_create(struct if_clone *, char *, size_t, + struct ifc_data *, struct ifnet **); +static int geneve_clone_destroy(struct if_clone *, struct ifnet *, + uint32_t); +static int geneve_clone_create_nl(struct if_clone *, char *, size_t, + struct ifc_data_nl *); +static int geneve_clone_modify_nl(struct ifnet *, struct ifc_data_nl *); +static void geneve_clone_dump_nl(struct ifnet *, struct nl_writer *); + +static uint32_t geneve_mac_hash(struct geneve_softc *, const uint8_t *); +static int geneve_media_change(struct ifnet *); +static void geneve_media_status(struct ifnet *, struct ifmediareq *); + +static int geneve_sockaddr_cmp(const union sockaddr_union *, + const struct sockaddr *); +static void geneve_sockaddr_copy(union sockaddr_union *, + const struct sockaddr *); +static int geneve_sockaddr_in_equal(const union sockaddr_union *, + const struct sockaddr *); +static void geneve_sockaddr_in_copy(union sockaddr_union *, + const struct sockaddr *); +static int geneve_sockaddr_supported(const union sockaddr_union *, int); +static int geneve_sockaddr_in_any(const union sockaddr_union *); + +static int geneve_can_change_config(struct geneve_softc *); +static int geneve_check_proto(uint16_t); +static int geneve_check_multicast_addr(const union sockaddr_union *); +static int geneve_check_sockaddr(const union sockaddr_union *, const int); + +static int geneve_prison_remove(void *, void *); +static void vnet_geneve_load(void); +static void vnet_geneve_unload(void); +static void geneve_module_init(void); +static void geneve_module_deinit(void); +static int geneve_modevent(module_t, int, void *); + + +static const char geneve_name[] = "geneve"; +static MALLOC_DEFINE(M_GENEVE, geneve_name, + "Generic Network Virtualization Encapsulation Interface"); +#define MTAG_GENEVE_LOOP 0x93d66dc0 /* geneve mtag */ + +VNET_DEFINE_STATIC(struct if_clone *, geneve_cloner); +#define V_geneve_cloner VNET(geneve_cloner) + +static struct mtx geneve_list_mtx; +#define GENEVE_LIST_LOCK() mtx_lock(&geneve_list_mtx) +#define GENEVE_LIST_UNLOCK() mtx_unlock(&geneve_list_mtx) + +static LIST_HEAD(, geneve_socket) geneve_socket_list = LIST_HEAD_INITIALIZER(geneve_socket_list); + +/* Default maximum number of addresses in the forwarding table. */ +#define GENEVE_FTABLE_MAX 2000 + +/* Timeout (in seconds) of addresses learned in the forwarding table. */ +#define GENEVE_FTABLE_TIMEOUT (20 * 60) + +/* Maximum timeout (in seconds) of addresses learned in the forwarding table. */ +#define GENEVE_FTABLE_MAX_TIMEOUT (60 * 60 * 24) + +/* Number of seconds between pruning attempts of the forwarding table. */ +#define GENEVE_FTABLE_PRUNE (5 * 60) + +static int geneve_ftable_prune_period = GENEVE_FTABLE_PRUNE; + +#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field) +static const struct nlattr_parser nla_p_geneve_create[] = { + { .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 }, +}; +#undef _OUT +NL_DECLARE_ATTR_PARSER(geneve_create_parser, nla_p_geneve_create); + +#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field) +static const struct nlattr_parser nla_p_geneve[] = { + { .type = IFLA_GENEVE_ID, .off = _OUT(ifla_vni), .cb = nlattr_get_uint32 }, + { .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 }, + { .type = IFLA_GENEVE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip }, + { .type = IFLA_GENEVE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip }, + { .type = IFLA_GENEVE_LOCAL_PORT, .off = _OUT(ifla_local_port), .cb = nlattr_get_uint16 }, + { .type = IFLA_GENEVE_PORT, .off = _OUT(ifla_remote_port), .cb = nlattr_get_uint16 }, + { .type = IFLA_GENEVE_PORT_RANGE, .off = _OUT(ifla_port_range), + .arg = (void *)sizeof(struct ifla_geneve_port_range), .cb = nlattr_get_bytes }, + { .type = IFLA_GENEVE_DF, .off = _OUT(ifla_df), .cb = nlattr_get_uint8 }, + { .type = IFLA_GENEVE_TTL, .off = _OUT(ifla_ttl), .cb = nlattr_get_uint8 }, + { .type = IFLA_GENEVE_TTL_INHERIT, .off = _OUT(ifla_ttl_inherit), .cb = nlattr_get_bool }, + { .type = IFLA_GENEVE_DSCP_INHERIT, .off = _OUT(ifla_dscp_inherit), .cb = nlattr_get_bool }, + { .type = IFLA_GENEVE_COLLECT_METADATA, .off = _OUT(ifla_external), .cb = nlattr_get_bool }, + { .type = IFLA_GENEVE_FTABLE_LEARN, .off = _OUT(ifla_ftable_learn), .cb = nlattr_get_bool }, + { .type = IFLA_GENEVE_FTABLE_FLUSH, .off = _OUT(ifla_ftable_flush), .cb = nlattr_get_bool }, + { .type = IFLA_GENEVE_FTABLE_MAX, .off = _OUT(ifla_ftable_max), .cb = nlattr_get_uint32 }, + { .type = IFLA_GENEVE_FTABLE_TIMEOUT, .off = _OUT(ifla_ftable_timeout), .cb = nlattr_get_uint32 }, + { .type = IFLA_GENEVE_MC_IFNAME, .off = _OUT(ifla_mc_ifname), .cb = nlattr_get_string }, +}; +#undef _OUT +NL_DECLARE_ATTR_PARSER(geneve_modify_parser, nla_p_geneve); + +static const struct nlhdr_parser *all_parsers[] = { + &geneve_create_parser, &geneve_modify_parser, +}; + +static int +geneve_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) +{ + int i, d; + + for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) + d = (int)a[i] - (int)b[i]; + + return (d); +} + +static void +geneve_ftable_init(struct geneve_softc *sc) +{ + int i; + + sc->gnv_ftable = malloc(sizeof(struct geneve_ftable_head) * + GENEVE_SC_FTABLE_SIZE, M_GENEVE, M_ZERO | M_WAITOK); + + for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) + LIST_INIT(&sc->gnv_ftable[i]); + sc->gnv_ftable_hash_key = arc4random(); +} + +static void +geneve_ftable_fini(struct geneve_softc *sc) +{ + int i; + + for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) { + KASSERT(LIST_EMPTY(&sc->gnv_ftable[i]), + ("%s: geneve %p ftable[%d] not empty", __func__, sc, i)); + } + MPASS(sc->gnv_ftable_cnt == 0); + + free(sc->gnv_ftable, M_GENEVE); + sc->gnv_ftable = NULL; +} + +static void +geneve_ftable_flush(struct geneve_softc *sc, int all) +{ + struct gnv_ftable_entry *fe, *tfe; + + for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) { + LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) { + if (all || GENEVE_FE_IS_DYNAMIC(fe)) + geneve_ftable_entry_destroy(sc, fe); + } + } +} + +static void +geneve_ftable_expire(struct geneve_softc *sc) +{ + struct gnv_ftable_entry *fe, *tfe; + + GENEVE_LOCK_WASSERT(sc); + + for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) { + LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) { + if (GENEVE_FE_IS_DYNAMIC(fe) && + time_uptime >= fe->gnvfe_expire) + geneve_ftable_entry_destroy(sc, fe); + } + } +} + +static int +geneve_ftable_update_locked(struct geneve_softc *sc, + const union sockaddr_union *unsa, const uint8_t *mac, + struct rm_priotracker *tracker) +{ + struct gnv_ftable_entry *fe; + int error; + + GENEVE_LOCK_ASSERT(sc); + +again: + /* + * A forwarding entry for this MAC address might already exist. If + * so, update it, otherwise create a new one. We may have to upgrade + * the lock if we have to change or create an entry. + */ + fe = geneve_ftable_entry_lookup(sc, mac); + if (fe != NULL) { + fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout; + + if (!GENEVE_FE_IS_DYNAMIC(fe) || + geneve_sockaddr_in_equal(&fe->gnvfe_raddr, &unsa->sa)) + return (0); + if (!GENEVE_LOCK_WOWNED(sc)) { + GENEVE_RUNLOCK(sc, tracker); + GENEVE_WLOCK(sc); + sc->gnv_stats.ftable_lock_upgrade_failed++; + goto again; + } + geneve_sockaddr_in_copy(&fe->gnvfe_raddr, &unsa->sa); + return (0); + } + + if (!GENEVE_LOCK_WOWNED(sc)) { + GENEVE_RUNLOCK(sc, tracker); + GENEVE_WLOCK(sc); + sc->gnv_stats.ftable_lock_upgrade_failed++; + goto again; + } + + if (sc->gnv_ftable_cnt >= sc->gnv_ftable_max) { + sc->gnv_stats.ftable_nospace++; + return (ENOSPC); + } + + fe = geneve_ftable_entry_alloc(); + if (fe == NULL) + return (ENOMEM); + + geneve_ftable_entry_init(sc, fe, mac, &unsa->sa, GENEVE_FE_FLAG_DYNAMIC); + + /* The prior lookup failed, so the insert should not. */ + error = geneve_ftable_entry_insert(sc, fe); + MPASS(error == 0); + + return (error); +} + +static int +geneve_ftable_learn(struct geneve_softc *sc, const struct sockaddr *sa, + const uint8_t *mac) +{ + struct rm_priotracker tracker; + union sockaddr_union unsa; + int error; + + /* + * The source port may be randomly selected by the remote host, so + * use the port of the default destination address. + */ + geneve_sockaddr_copy(&unsa, sa); + unsa.sin.sin_port = sc->gnv_dst_addr.sin.sin_port; + + if (unsa.sa.sa_family == AF_INET6) { + error = sa6_embedscope(&unsa.sin6, V_ip6_use_defzone); + if (error) + return (error); + } + + GENEVE_RLOCK(sc, &tracker); + error = geneve_ftable_update_locked(sc, &unsa, mac, &tracker); + GENEVE_UNLOCK(sc, &tracker); + + return (error); +} + +static struct gnv_ftable_entry * +geneve_ftable_entry_alloc(void) +{ + struct gnv_ftable_entry *fe; + + fe = malloc(sizeof(*fe), M_GENEVE, M_ZERO | M_NOWAIT); + + return (fe); +} + +static void +geneve_ftable_entry_free(struct gnv_ftable_entry *fe) +{ + + free(fe, M_GENEVE); +} + +static void +geneve_ftable_entry_init(struct geneve_softc *sc, struct gnv_ftable_entry *fe, + const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) +{ + + fe->gnvfe_flags = flags; + fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout; + memcpy(fe->gnvfe_mac, mac, ETHER_ADDR_LEN); + geneve_sockaddr_copy(&fe->gnvfe_raddr, sa); +} + +static void +geneve_ftable_entry_destroy(struct geneve_softc *sc, + struct gnv_ftable_entry *fe) +{ + + sc->gnv_ftable_cnt--; + LIST_REMOVE(fe, gnvfe_hash); + geneve_ftable_entry_free(fe); +} + +static int +geneve_ftable_entry_insert(struct geneve_softc *sc, + struct gnv_ftable_entry *fe) +{ + struct gnv_ftable_entry *lfe; + uint32_t hash; + int dir; + + GENEVE_LOCK_WASSERT(sc); + hash = GENEVE_SC_FTABLE_HASH(sc, fe->gnvfe_mac); + + lfe = LIST_FIRST(&sc->gnv_ftable[hash]); + if (lfe == NULL) { + LIST_INSERT_HEAD(&sc->gnv_ftable[hash], fe, gnvfe_hash); + goto out; + } + + do { + dir = geneve_ftable_addr_cmp(fe->gnvfe_mac, lfe->gnvfe_mac); + if (dir == 0) + return (EEXIST); + if (dir > 0) { + LIST_INSERT_BEFORE(lfe, fe, gnvfe_hash); + goto out; + } else if (LIST_NEXT(lfe, gnvfe_hash) == NULL) { + LIST_INSERT_AFTER(lfe, fe, gnvfe_hash); + goto out; + } else + lfe = LIST_NEXT(lfe, gnvfe_hash); + } while (lfe != NULL); + +out: + sc->gnv_ftable_cnt++; + + return (0); +} + +static struct gnv_ftable_entry * +geneve_ftable_entry_lookup(struct geneve_softc *sc, const uint8_t *mac) +{ + struct gnv_ftable_entry *fe; + uint32_t hash; + int dir; + + GENEVE_LOCK_ASSERT(sc); + + hash = GENEVE_SC_FTABLE_HASH(sc, mac); + LIST_FOREACH(fe, &sc->gnv_ftable[hash], gnvfe_hash) { + dir = geneve_ftable_addr_cmp(mac, fe->gnvfe_mac); + if (dir == 0) + return (fe); + if (dir > 0) + break; + } + + return (NULL); +} + +static struct geneve_socket * +geneve_socket_alloc(union sockaddr_union *laddr) +{ + struct geneve_socket *gnvso; + + gnvso = malloc(sizeof(*gnvso), M_GENEVE, M_WAITOK | M_ZERO); + rm_init(&gnvso->gnvso_lock, "genevesorm"); + refcount_init(&gnvso->gnvso_refcnt, 0); + for (int i = 0; i < GENEVE_SO_VNI_HASH_SIZE; i++) + LIST_INIT(&gnvso->gnvso_vni_hash[i]); + gnvso->gnvso_laddr = *laddr; + + return (gnvso); +} + +static void +geneve_socket_destroy(struct geneve_socket *gnvso) +{ + struct socket *so; + + so = gnvso->gnvso_sock; + if (so != NULL) { + gnvso->gnvso_sock = NULL; + soclose(so); + } + + rm_destroy(&gnvso->gnvso_lock); + free(gnvso, M_GENEVE); +} + +static void +geneve_socket_release(struct geneve_socket *gnvso) +{ + int destroy; + + GENEVE_LIST_LOCK(); *** 3327 LINES SKIPPED ***home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69dcfad8.47191.15534965>
