Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 27 Mar 2023 13:55:52 GMT
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 19e43c163c64 - main - netlink: add netlink KPI to the kernel by default
Message-ID:  <202303271355.32RDtqJ9006858@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=19e43c163c64636d2590dca006e22f18d22f48b2

commit 19e43c163c64636d2590dca006e22f18d22f48b2
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2023-03-27 11:59:30 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2023-03-27 13:55:44 +0000

    netlink: add netlink KPI to the kernel by default
    
    This change does the following:
    
    Base Netlink KPIs (ability to register the family, parse and/or
     write a Netlink message) are always present in the kernel. Specifically,
    * Implementation of genetlink family/group registration/removal,
      some base accessors (netlink_generic_kpi.c, 260 LoC) are compiled in
      unconditionally.
    * Basic TLV parser functions (netlink_message_parser.c, 507 LoC) are
      compiled in unconditionally.
    * Glue functions (netlink<>rtsock), malloc/core sysctl definitions
     (netlink_glue.c, 259 LoC) are compiled in unconditionally.
    * The rest of the KPI _functions_ are defined in the netlink_glue.c,
     but their implementation calls a pointer to either the stub function
     or the actual function, depending on whether the module is loaded or not.
    
    This approach allows to have only 1k LoC out of ~3.7k LoC (current
     sys/netlink implementation) in the kernel, which will not grow further.
    It also allows for the generic netlink kernel customers to load
     successfully without requiring Netlink module and operate correctly
     once Netlink module is loaded.
    
    Reviewed by:    imp
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D39269
---
 sys/conf/files                       |   4 +-
 sys/conf/options                     |   2 +-
 sys/modules/carp/Makefile            |   2 +-
 sys/modules/netlink/Makefile         |   6 +-
 sys/net/route.c                      |  19 ---
 sys/netinet/ip_carp.c                |   2 +
 sys/netlink/netlink_ctl.h            |  10 +-
 sys/netlink/netlink_domain.c         |  24 ---
 sys/netlink/netlink_generic.c        | 282 +++--------------------------------
 sys/netlink/netlink_generic_kpi.c    | 279 ++++++++++++++++++++++++++++++++++
 sys/netlink/netlink_io.c             |   2 +
 sys/netlink/netlink_message_writer.c |  22 +--
 sys/netlink/netlink_message_writer.h |  86 +++++++++++
 sys/netlink/netlink_module.c         |  26 +++-
 sys/netlink/netlink_var.h            |  44 ++++++
 sys/netlink/route/iface.c            |   2 +
 sys/netlink/route/neigh.c            |   2 +
 sys/netlink/route/nexthop.c          |   2 +
 sys/netlink/route/rt.c               |   2 +
 19 files changed, 497 insertions(+), 321 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index 629283f7c071..94cd7135b277 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4424,10 +4424,12 @@ netipsec/xform_ipcomp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_tcp.c		optional ipsec inet tcp_signature | \
 	 ipsec inet6 tcp_signature | ipsec_support inet tcp_signature | \
 	 ipsec_support inet6 tcp_signature
+netlink/netlink_generic_kpi.c	standard
+netlink/netlink_glue.c		standard
+netlink/netlink_message_parser.c	standard
 netlink/netlink_domain.c	optional netlink
 netlink/netlink_generic.c	optional netlink
 netlink/netlink_io.c		optional netlink
-netlink/netlink_message_parser.c	optional netlink
 netlink/netlink_message_writer.c	optional netlink
 netlink/netlink_module.c	optional netlink
 netlink/netlink_route.c		optional netlink
diff --git a/sys/conf/options b/sys/conf/options
index 6dd19582f346..173c56229084 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -463,7 +463,7 @@ MBUF_PROFILING
 MBUF_STRESS_TEST
 MROUTING		opt_mrouting.h
 NFSLOCKD
-NETLINK
+NETLINK			opt_netlink.h
 PF_DEFAULT_TO_DROP	opt_pf.h
 ROUTE_MPATH		opt_route.h
 ROUTETABLES		opt_route.h
diff --git a/sys/modules/carp/Makefile b/sys/modules/carp/Makefile
index 052687381ba6..faf3af66ece1 100644
--- a/sys/modules/carp/Makefile
+++ b/sys/modules/carp/Makefile
@@ -6,6 +6,6 @@
 KMOD=	carp
 SRCS=	ip_carp.c sha1.c
 SRCS+=	device_if.h bus_if.h vnode_if.h
-SRCS+=	opt_carp.h opt_bpf.h opt_inet.h opt_inet6.h opt_ofed.h
+SRCS+=	opt_carp.h opt_bpf.h opt_inet.h opt_inet6.h opt_ofed.h opt_netlink.h
 
 .include <bsd.kmod.mk>
diff --git a/sys/modules/netlink/Makefile b/sys/modules/netlink/Makefile
index 791b953e5b84..667c0b2475fe 100644
--- a/sys/modules/netlink/Makefile
+++ b/sys/modules/netlink/Makefile
@@ -2,10 +2,12 @@
 KMOD=	netlink
 
 SRCS =	netlink_module.c netlink_domain.c netlink_io.c \
-	netlink_message_parser.c netlink_message_writer.c  netlink_generic.c \
+	netlink_message_writer.c  netlink_generic.c \
 	netlink_route.c route/iface.c route/iface_drivers.c route/neigh.c \
 	route/nexthop.c route/rt.c
-SRCS+=	opt_inet.h opt_inet6.h opt_route.h
+SRCS+=	opt_inet.h opt_inet6.h opt_route.h opt_netlink.h
+
+CFLAGS+=	-DNETLINK_MODULE
 
 EXPORT_SYMS=
 EXPORT_SYMS+=	nlmsg_get_chain_writer
diff --git a/sys/net/route.c b/sys/net/route.c
index 1373b0986876..e8b2ca60da18 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -703,22 +703,3 @@ rt_ifmsg(struct ifnet *ifp, int if_flags_mask)
 	netlink_callback_p->ifmsg_f(ifp, if_flags_mask);
 }
 
-/* Netlink-related callbacks needed to glue rtsock, netlink and linuxolator */
-static void
-ignore_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
-{
-}
-
-static void
-ignore_ifmsg_event(struct ifnet *ifp, int if_flags_mask)
-{
-}
-
-static struct rtbridge ignore_cb = {
-	.route_f = ignore_route_event,
-	.ifmsg_f = ignore_ifmsg_event,
-};
-
-void *linux_netlink_p = NULL; /* Callback pointer for Linux translator functions */
-struct rtbridge *rtsock_callback_p = &ignore_cb;
-struct rtbridge *netlink_callback_p = &ignore_cb;
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index a4ed121aeeb2..deb2f3f347d9 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -28,6 +28,8 @@
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "opt_netlink.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
diff --git a/sys/netlink/netlink_ctl.h b/sys/netlink/netlink_ctl.h
index 9369194151af..6c195c0217a9 100644
--- a/sys/netlink/netlink_ctl.h
+++ b/sys/netlink/netlink_ctl.h
@@ -33,6 +33,7 @@
  * This file provides headers for the public KPI of the netlink
  * subsystem
  */
+#include <sys/_eventhandler.h>
 
 MALLOC_DECLARE(M_NETLINK);
 
@@ -81,6 +82,7 @@ bool netlink_unregister_proto(int proto);
 bool nl_has_listeners(int netlink_family, uint32_t groups_mask);
 bool nlp_has_priv(struct nlpcb *nlp, int priv);
 struct ucred *nlp_get_cred(struct nlpcb *nlp);
+uint32_t nlp_get_pid(const struct nlpcb *nlp);
 bool nlp_unconstrained_vnet(const struct nlpcb *nlp);
 
 /* netlink_generic.c */
@@ -99,8 +101,12 @@ bool genl_register_cmds(const char *family_name, const struct genl_cmd *cmds,
     int count);
 uint32_t genl_register_group(const char *family_name, const char *group_name);
 
-/* Debug */
-uint32_t nlp_get_pid(const struct nlpcb *nlp);
+struct genl_family;
+const char *genl_get_family_name(const struct genl_family *gf);
+uint32_t genl_get_family_id(const struct genl_family *gf);
+
+typedef void (*genl_family_event_handler_t)(void *arg, const struct genl_family *gf, int action);
+EVENTHANDLER_DECLARE(genl_family_event, genl_family_event_handler_t);
 
 #endif
 #endif
diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
index 24ca9de877f0..8b0d09ac0b66 100644
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -84,12 +84,6 @@ SYSCTL_OID(_net_netlink, OID_AUTO, nl_maxsockbuf,
     sysctl_handle_nl_maxsockbuf, "LU",
     "Maximum Netlink socket buffer size");
 
-uint32_t
-nlp_get_pid(const struct nlpcb *nlp)
-{
-	return (nlp->nl_process_id);
-}
-
 /*
  * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx.
  * Returns nlpcb pointer if present else NULL
@@ -211,24 +205,6 @@ nl_has_listeners(int netlink_family, uint32_t groups_mask)
 	return (V_nl_ctl != NULL);
 }
 
-bool
-nlp_has_priv(struct nlpcb *nlp, int priv)
-{
-	return (priv_check_cred(nlp->nl_cred, priv) == 0);
-}
-
-bool
-nlp_unconstrained_vnet(const struct nlpcb *nlp)
-{
-	return (nlp->nl_unconstrained_vnet);
-}
-
-struct ucred *
-nlp_get_cred(struct nlpcb *nlp)
-{
-	return (nlp->nl_cred);
-}
-
 static uint32_t
 nl_find_port(void)
 {
diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c
index 16b49d5aa9ce..4595b3074d50 100644
--- a/sys/netlink/netlink_generic.c
+++ b/sys/netlink/netlink_generic.c
@@ -25,11 +25,14 @@
  * SUCH DAMAGE.
  */
 
+#include "opt_netlink.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/types.h>
 #include <sys/ck.h>
 #include <sys/epoch.h>
+#include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/jail.h>
 #include <sys/lock.h>
@@ -41,242 +44,15 @@ __FBSDID("$FreeBSD$");
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_generic.h>
+#include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_generic
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_DEBUG);
 
-#define	CTRL_FAMILY_NAME	"nlctrl"
-
-#define	MAX_FAMILIES	20
-#define	MAX_GROUPS	64
-
-#define	MIN_GROUP_NUM	48
-
-static struct sx sx_lock;
-
-#define	GENL_LOCK_INIT()	sx_init(&sx_lock, "genetlink lock")
-#define	GENL_LOCK_DESTROY()	sx_destroy(&sx_lock)
-#define	GENL_LOCK()		sx_xlock(&sx_lock)
-#define	GENL_UNLOCK()		sx_xunlock(&sx_lock)
-
-struct genl_family {
-	const char	*family_name;
-	uint16_t	family_hdrsize;
-	uint16_t	family_id;
-	uint16_t	family_version;
-	uint16_t	family_attr_max;
-	uint16_t	family_cmd_size;
-	uint16_t	family_num_groups;
-	struct genl_cmd	*family_cmds;
-};
-
-static struct genl_family	families[MAX_FAMILIES];
-
-
-struct genl_group {
-	struct genl_family	*group_family;
-	const char		*group_name;
-};
-static struct genl_group	groups[MAX_GROUPS];
-
-
 static int dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr,
     const struct genl_family *gf, struct nl_writer *nw);
-static void nlctrl_notify(const struct genl_family *gf, int action);
-
-static struct genl_family *
-find_family(const char *family_name)
-{
-	for (int i = 0; i < MAX_FAMILIES; i++) {
-		struct genl_family *gf = &families[i];
-		if (gf->family_name != NULL && !strcmp(gf->family_name, family_name))
-			return (gf);
-	}
-
-	return (NULL);
-}
-
-static struct genl_family *
-find_empty_family_id(const char *family_name)
-{
-	struct genl_family *gf = NULL;
-
-	if (!strcmp(family_name, CTRL_FAMILY_NAME)) {
-		gf = &families[0];
-		gf->family_id = GENL_MIN_ID;
-	} else {
-		/* Index 0 is reserved for the control family */
-		for (int i = 1; i < MAX_FAMILIES; i++) {
-			struct genl_family *gf = &families[i];
-			if (gf->family_name == NULL) {
-				gf->family_id = GENL_MIN_ID + i;
-				break;
-			}
-		}
-	}
-
-	return (gf);
-}
-
-uint32_t
-genl_register_family(const char *family_name, size_t hdrsize, int family_version,
-    int max_attr_idx)
-{
-	uint32_t family_id = 0;
-
-	MPASS(family_name != NULL);
-	if (find_family(family_name) != NULL)
-		return (0);
-
-	GENL_LOCK();
-
-	struct genl_family *gf = find_empty_family_id(family_name);
-	MPASS(gf != NULL);
-
-	gf->family_name = family_name;
-	gf->family_version = family_version;
-	gf->family_hdrsize = hdrsize;
-	gf->family_attr_max = max_attr_idx;
-	NL_LOG(LOG_DEBUG2, "Registered family %s id %d", gf->family_name, gf->family_id);
-	family_id = gf->family_id;
-	nlctrl_notify(gf, CTRL_CMD_NEWFAMILY);
-
-	GENL_UNLOCK();
-
-	return (family_id);
-}
-
-static void
-free_family(struct genl_family *gf)
-{
-	if (gf->family_cmds != NULL)
-		free(gf->family_cmds, M_NETLINK);
-}
-
-/*
- * unregister groups of a given family
- */
-static void
-unregister_groups(const struct genl_family *gf)
-{
-
-	for (int i = 0; i < MAX_GROUPS; i++) {
-		struct genl_group *gg = &groups[i];
-		if (gg->group_family == gf && gg->group_name != NULL) {
-			gg->group_family = NULL;
-			gg->group_name = NULL;
-		}
-	}
-}
-
-/*
- * Can sleep, I guess
- */
-bool
-genl_unregister_family(const char *family_name)
-{
-	bool found = false;
-
-	GENL_LOCK();
-	struct genl_family *gf = find_family(family_name);
-
-	if (gf != NULL) {
-		nlctrl_notify(gf, CTRL_CMD_DELFAMILY);
-		found = true;
-		unregister_groups(gf);
-		/* TODO: zero pointer first */
-		free_family(gf);
-		bzero(gf, sizeof(*gf));
-	}
-	GENL_UNLOCK();
-
-	return (found);
-}
-
-bool
-genl_register_cmds(const char *family_name, const struct genl_cmd *cmds, int count)
-{
-	GENL_LOCK();
-	struct genl_family *gf = find_family(family_name);
-	if (gf == NULL) {
-		GENL_UNLOCK();
-		return (false);
-	}
-
-	int cmd_size = gf->family_cmd_size;
-
-	for (int i = 0; i < count; i++) {
-		MPASS(cmds[i].cmd_cb != NULL);
-		if (cmds[i].cmd_num >= cmd_size)
-			cmd_size = cmds[i].cmd_num + 1;
-	}
-
-	if (cmd_size > gf->family_cmd_size) {
-		/* need to realloc */
-		size_t sz = cmd_size * sizeof(struct genl_cmd);
-		void *data = malloc(sz, M_NETLINK, M_WAITOK | M_ZERO);
-
-		memcpy(data, gf->family_cmds, gf->family_cmd_size * sizeof(struct genl_cmd));
-		void *old_data = gf->family_cmds;
-		gf->family_cmds = data;
-		gf->family_cmd_size = cmd_size;
-		free(old_data, M_NETLINK);
-	}
-
-	for (int i = 0; i < count; i++) {
-		const struct genl_cmd *cmd = &cmds[i];
-		MPASS(gf->family_cmds[cmd->cmd_num].cmd_cb == NULL);
-		gf->family_cmds[cmd->cmd_num] = cmds[i];
-		NL_LOG(LOG_DEBUG2, "Adding cmd %s(%d) to family %s",
-		    cmd->cmd_name, cmd->cmd_num, gf->family_name);
-	}
-	GENL_UNLOCK();
-	return (true);
-}
-
-static struct genl_group *
-find_group(const struct genl_family *gf, const char *group_name)
-{
-	for (int i = 0; i < MAX_GROUPS; i++) {
-		struct genl_group *gg = &groups[i];
-		if (gg->group_family == gf && !strcmp(gg->group_name, group_name))
-			return (gg);
-	}
-	return (NULL);
-}
-
-uint32_t
-genl_register_group(const char *family_name, const char *group_name)
-{
-	uint32_t group_id = 0;
-
-	MPASS(family_name != NULL);
-	MPASS(group_name != NULL);
-
-	GENL_LOCK();
-	struct genl_family *gf = find_family(family_name);
-
-	if (gf == NULL || find_group(gf, group_name) != NULL) {
-		GENL_UNLOCK();
-		return (0);
-	}
-
-	for (int i = 0; i < MAX_GROUPS; i++) {
-		struct genl_group *gg = &groups[i];
-		if (gg->group_family == NULL) {
-			gf->family_num_groups++;
-			gg->group_family = gf;
-			gg->group_name = group_name;
-			group_id = i + MIN_GROUP_NUM;
-			break;
-		}
-	}
-	GENL_UNLOCK();
-
-	return (group_id);
-}
 
 /*
  * Handler called by netlink subsystem when matching netlink message is received
@@ -285,11 +61,12 @@ static int
 genl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt)
 {
 	struct nlpcb *nlp = npt->nlp;
+	struct genl_family *gf = NULL;
 	int error = 0;
 
 	int family_id = (int)hdr->nlmsg_type - GENL_MIN_ID;
 
-	if (__predict_false(family_id < 0 || family_id >= MAX_FAMILIES)) {
+	if (__predict_false(family_id < 0 || (gf = genl_get_family(family_id)) == NULL)) {
 		NLP_LOG(LOG_DEBUG, nlp, "invalid message type: %d", hdr->nlmsg_type);
 		return (ENOTSUP);
 	}
@@ -299,8 +76,6 @@ genl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt)
 		return (EINVAL);
 	}
 
-	struct genl_family *gf = &families[family_id];
-
 	struct genlmsghdr *ghdr = (struct genlmsghdr *)(hdr + 1);
 
 	if (ghdr->cmd >= gf->family_cmd_size || gf->family_cmds[ghdr->cmd].cmd_cb == NULL) {
@@ -375,8 +150,8 @@ dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr,
 		if (off == 0)
 			goto enomem;
 		for (int i = 0, cnt = 0; i < MAX_GROUPS; i++) {
-			struct genl_group *gg = &groups[i];
-			if (gg->group_family != gf)
+			struct genl_group *gg = genl_get_group(i);
+			if (gg == NULL || gg->group_family != gf)
 				continue;
 
 			int cmd_off = nlattr_add_nested(nw, ++cnt);
@@ -398,6 +173,8 @@ enomem:
 
 
 /* Declare ourself as a user */
+static void nlctrl_notify(void *arg, const struct genl_family *gf, int action);
+static eventhandler_tag family_event_tag;
 
 static uint32_t ctrl_family_id;
 static uint32_t ctrl_group_id;
@@ -451,8 +228,8 @@ nlctrl_handle_getfamily(struct nlmsghdr *hdr, struct nl_pstate *npt)
 	if (attrs.family_id != 0 || attrs.family_name != NULL) {
 		/* Resolve request */
 		for (int i = 0; i < MAX_FAMILIES; i++) {
-			struct genl_family *gf = &families[i];
-			if (match_family(gf, &attrs)) {
+			struct genl_family *gf = genl_get_family(i);
+			if (gf != NULL && match_family(gf, &attrs)) {
 				error = dump_family(hdr, &ghdr, gf, npt->nw);
 				return (error);
 			}
@@ -462,8 +239,8 @@ nlctrl_handle_getfamily(struct nlmsghdr *hdr, struct nl_pstate *npt)
 
 	hdr->nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI;
 	for (int i = 0; i < MAX_FAMILIES; i++) {
-		struct genl_family *gf = &families[i];
-		if (match_family(gf, &attrs)) {
+		struct genl_family *gf = genl_get_family(i);
+		if (gf != NULL && match_family(gf, &attrs)) {
 			error = dump_family(hdr, &ghdr, gf, npt->nw);
 			if (error != 0)
 				break;
@@ -479,7 +256,7 @@ nlctrl_handle_getfamily(struct nlmsghdr *hdr, struct nl_pstate *npt)
 }
 
 static void
-nlctrl_notify(const struct genl_family *gf, int cmd)
+nlctrl_notify(void *arg __unused, const struct genl_family *gf, int cmd)
 {
 	struct nlmsghdr hdr = {.nlmsg_type = NETLINK_GENERIC };
 	struct genlmsghdr ghdr = { .cmd = cmd };
@@ -502,37 +279,26 @@ static const struct genl_cmd nlctrl_cmds[] = {
 	},
 };
 
-static void
-genl_nlctrl_init(void)
-{
-	ctrl_family_id = genl_register_family(CTRL_FAMILY_NAME, 0, 2, CTRL_ATTR_MAX);
-	genl_register_cmds(CTRL_FAMILY_NAME, nlctrl_cmds, NL_ARRAY_LEN(nlctrl_cmds));
-	ctrl_group_id = genl_register_group(CTRL_FAMILY_NAME, "notify");
-}
-
-static void
-genl_nlctrl_destroy(void)
-{
-	genl_unregister_family(CTRL_FAMILY_NAME);
-}
-
 static const struct nlhdr_parser *all_parsers[] = { &genl_parser };
 
 static void
-genl_load(void *u __unused)
+genl_load_all(void *u __unused)
 {
-	GENL_LOCK_INIT();
 	NL_VERIFY_PARSERS(all_parsers);
+	ctrl_family_id = genl_register_family(CTRL_FAMILY_NAME, 0, 2, CTRL_ATTR_MAX);
+	genl_register_cmds(CTRL_FAMILY_NAME, nlctrl_cmds, NL_ARRAY_LEN(nlctrl_cmds));
+	ctrl_group_id = genl_register_group(CTRL_FAMILY_NAME, "notify");
+	family_event_tag = EVENTHANDLER_REGISTER(genl_family_event, nlctrl_notify, NULL,
+	    EVENTHANDLER_PRI_ANY);
 	netlink_register_proto(NETLINK_GENERIC, "NETLINK_GENERIC", genl_handle_message);
-	genl_nlctrl_init();
 }
-SYSINIT(genl_load, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_load, NULL);
+SYSINIT(genl_load_all, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_load_all, NULL);
 
 static void
 genl_unload(void *u __unused)
 {
-	genl_nlctrl_destroy();
-	GENL_LOCK_DESTROY();
+	EVENTHANDLER_DEREGISTER(genl_family_event, family_event_tag);
+	genl_unregister_family(CTRL_FAMILY_NAME);
 	NET_EPOCH_WAIT();
 }
 SYSUNINIT(genl_unload, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_unload, NULL);
diff --git a/sys/netlink/netlink_generic_kpi.c b/sys/netlink/netlink_generic_kpi.c
new file mode 100644
index 000000000000..b64f6bd3f1b6
--- /dev/null
+++ b/sys/netlink/netlink_generic_kpi.c
@@ -0,0 +1,279 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/types.h>
+#include <sys/ck.h>
+#include <sys/epoch.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/sx.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_ctl.h>
+#include <netlink/netlink_generic.h>
+#include <netlink/netlink_var.h>
+
+#define	DEBUG_MOD_NAME	nl_generic_kpi
+#define	DEBUG_MAX_LEVEL	LOG_DEBUG3
+#include <netlink/netlink_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+
+/*
+ * NETLINK_GENERIC families/groups registration logic
+ */
+
+#define	GENL_LOCK()		sx_xlock(&sx_lock)
+#define	GENL_UNLOCK()		sx_xunlock(&sx_lock)
+static struct sx sx_lock;
+SX_SYSINIT(genl_lock, &sx_lock, "genetlink lock");
+
+static struct genl_family	families[MAX_FAMILIES];
+static struct genl_group	groups[MAX_GROUPS];
+
+static struct genl_family *
+find_family(const char *family_name)
+{
+	for (int i = 0; i < MAX_FAMILIES; i++) {
+		struct genl_family *gf = &families[i];
+		if (gf->family_name != NULL && !strcmp(gf->family_name, family_name))
+			return (gf);
+	}
+
+	return (NULL);
+}
+
+static struct genl_family *
+find_empty_family_id(const char *family_name)
+{
+	struct genl_family *gf = NULL;
+
+	if (!strcmp(family_name, CTRL_FAMILY_NAME)) {
+		gf = &families[0];
+		gf->family_id = GENL_MIN_ID;
+	} else {
+		/* Index 0 is reserved for the control family */
+		for (int i = 1; i < MAX_FAMILIES; i++) {
+			gf = &families[i];
+			if (gf->family_name == NULL) {
+				gf->family_id = GENL_MIN_ID + i;
+				break;
+			}
+		}
+	}
+
+	return (gf);
+}
+
+uint32_t
+genl_register_family(const char *family_name, size_t hdrsize, int family_version,
+    int max_attr_idx)
+{
+	uint32_t family_id = 0;
+
+	MPASS(family_name != NULL);
+	if (find_family(family_name) != NULL)
+		return (0);
+
+	GENL_LOCK();
+
+	struct genl_family *gf = find_empty_family_id(family_name);
+	MPASS(gf != NULL);
+
+	gf->family_name = family_name;
+	gf->family_version = family_version;
+	gf->family_hdrsize = hdrsize;
+	gf->family_attr_max = max_attr_idx;
+	NL_LOG(LOG_DEBUG2, "Registered family %s id %d", gf->family_name, gf->family_id);
+	family_id = gf->family_id;
+	EVENTHANDLER_INVOKE(genl_family_event, gf, CTRL_CMD_NEWFAMILY);
+
+	GENL_UNLOCK();
+
+	return (family_id);
+}
+
+static void
+free_family(struct genl_family *gf)
+{
+	if (gf->family_cmds != NULL)
+		free(gf->family_cmds, M_NETLINK);
+}
+
+/*
+ * unregister groups of a given family
+ */
+static void
+unregister_groups(const struct genl_family *gf)
+{
+
+	for (int i = 0; i < MAX_GROUPS; i++) {
+		struct genl_group *gg = &groups[i];
+		if (gg->group_family == gf && gg->group_name != NULL) {
+			gg->group_family = NULL;
+			gg->group_name = NULL;
+		}
+	}
+}
+
+/*
+ * Can sleep, I guess
+ */
+bool
+genl_unregister_family(const char *family_name)
+{
+	bool found = false;
+
+	GENL_LOCK();
+	struct genl_family *gf = find_family(family_name);
+
+	if (gf != NULL) {
+		EVENTHANDLER_INVOKE(genl_family_event, gf, CTRL_CMD_DELFAMILY);
+		found = true;
+		unregister_groups(gf);
+		/* TODO: zero pointer first */
+		free_family(gf);
+		bzero(gf, sizeof(*gf));
+	}
+	GENL_UNLOCK();
+
+	return (found);
+}
+
+bool
+genl_register_cmds(const char *family_name, const struct genl_cmd *cmds, int count)
+{
+	GENL_LOCK();
+	struct genl_family *gf = find_family(family_name);
+	if (gf == NULL) {
+		GENL_UNLOCK();
+		return (false);
+	}
+
+	int cmd_size = gf->family_cmd_size;
+
+	for (int i = 0; i < count; i++) {
+		MPASS(cmds[i].cmd_cb != NULL);
+		if (cmds[i].cmd_num >= cmd_size)
+			cmd_size = cmds[i].cmd_num + 1;
+	}
+
+	if (cmd_size > gf->family_cmd_size) {
+		/* need to realloc */
+		size_t sz = cmd_size * sizeof(struct genl_cmd);
+		void *data = malloc(sz, M_NETLINK, M_WAITOK | M_ZERO);
+
+		memcpy(data, gf->family_cmds, gf->family_cmd_size * sizeof(struct genl_cmd));
+		void *old_data = gf->family_cmds;
+		gf->family_cmds = data;
+		gf->family_cmd_size = cmd_size;
+		free(old_data, M_NETLINK);
+	}
+
+	for (int i = 0; i < count; i++) {
+		const struct genl_cmd *cmd = &cmds[i];
+		MPASS(gf->family_cmds[cmd->cmd_num].cmd_cb == NULL);
+		gf->family_cmds[cmd->cmd_num] = cmds[i];
+		NL_LOG(LOG_DEBUG2, "Adding cmd %s(%d) to family %s",
+		    cmd->cmd_name, cmd->cmd_num, gf->family_name);
+	}
+	GENL_UNLOCK();
+	return (true);
+}
+
+static struct genl_group *
+find_group(const struct genl_family *gf, const char *group_name)
+{
+	for (int i = 0; i < MAX_GROUPS; i++) {
+		struct genl_group *gg = &groups[i];
+		if (gg->group_family == gf && !strcmp(gg->group_name, group_name))
+			return (gg);
+	}
+	return (NULL);
+}
+
+uint32_t
+genl_register_group(const char *family_name, const char *group_name)
+{
+	uint32_t group_id = 0;
+
+	MPASS(family_name != NULL);
+	MPASS(group_name != NULL);
+
+	GENL_LOCK();
+	struct genl_family *gf = find_family(family_name);
+
+	if (gf == NULL || find_group(gf, group_name) != NULL) {
+		GENL_UNLOCK();
+		return (0);
+	}
+
+	for (int i = 0; i < MAX_GROUPS; i++) {
+		struct genl_group *gg = &groups[i];
+		if (gg->group_family == NULL) {
+			gf->family_num_groups++;
+			gg->group_family = gf;
+			gg->group_name = group_name;
+			group_id = i + MIN_GROUP_NUM;
+			break;
+		}
+	}
+	GENL_UNLOCK();
+
+	return (group_id);
+}
+
+/* accessors */
+struct genl_family *
+genl_get_family(uint32_t family_id)
+{
+	return ((family_id < MAX_FAMILIES) ? &families[family_id] : NULL);
+}
+
+const char *
+genl_get_family_name(const struct genl_family *gf)
+{
+	return (gf->family_name);
+}
+
+uint32_t
+genl_get_family_id(const struct genl_family *gf)
+{
+	return (gf->family_id);
+}
+
+struct genl_group *
+genl_get_group(uint32_t group_id)
+{
+	return ((group_id < MAX_GROUPS) ? &groups[group_id] : NULL);
+}
+
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index 509065d04818..b40ffaab7dd9 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -26,6 +26,8 @@
  * SUCH DAMAGE.
  */
 
+#include "opt_netlink.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
index 8a9315eedd1b..884295939ce5 100644
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -25,6 +25,8 @@
  * SUCH DAMAGE.
  */
 
+#include "opt_netlink.h"
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
@@ -434,7 +436,7 @@ nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
 }
 
 bool
-nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
+_nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
 {
 	if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
 		return (false);
@@ -445,7 +447,7 @@ nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
 }
 
 bool
-nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
+_nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
 {
 	if (!nlmsg_get_buf(nw, size, false, false))
 		return (false);
@@ -456,7 +458,7 @@ nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_i
 }
 
 bool
-nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
+_nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
 {
 	if (!nlmsg_get_buf(nw, size, false, false))
 		return (false);
@@ -469,13 +471,13 @@ nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
 }
 
 void
-nlmsg_ignore_limit(struct nl_writer *nw)
+_nlmsg_ignore_limit(struct nl_writer *nw)
 {
 	nw->ignore_limit = true;
 }
 
 bool
-nlmsg_flush(struct nl_writer *nw)
+_nlmsg_flush(struct nl_writer *nw)
 {
 
 	if (__predict_false(nw->hdr != NULL)) {
@@ -503,7 +505,7 @@ nlmsg_flush(struct nl_writer *nw)
  * Return true on success.
  */
 bool
-nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
+_nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
 {
 	struct nl_writer ns_new = {};
 	int completed_len, new_len;
@@ -561,7 +563,7 @@ nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
 }
 
*** 329 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202303271355.32RDtqJ9006858>