Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 5 Feb 2025 00:54:37 GMT
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 841dcdcd3f0a - main - netlink: initialize VNET context with VNET_SYSINIT()
Message-ID:  <202502050054.5150sbG9050744@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=841dcdcd3f0a1dda65c080cec8377e3dc5b1aae7

commit 841dcdcd3f0a1dda65c080cec8377e3dc5b1aae7
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2025-02-05 00:36:14 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2025-02-05 00:54:21 +0000

    netlink: initialize VNET context with VNET_SYSINIT()
    
    With the initial check-in netlink(4) was very conservative with regards to
    using memory and intrusiveness to the kernel and network stack. In
    particular it would initialize the VNET context only on the first actuall
    call to socket(PF_NETLINK), saving on allocation of a struct nl_control of
    size 224 bytes.
    
    Now it is clear that netlink(4) is primary citizen of FreeBSD, with a set
    of system tools using it.  So resort to normal VNET_SYSINIT() and with
    that shave a lot of complexity, since after the change V_nl_ctl is
    immutable.
---
 sys/netlink/netlink_ctl.h    |  1 -
 sys/netlink/netlink_domain.c | 89 +++++++++++++++-----------------------------
 sys/netlink/netlink_module.c | 61 ++++++++----------------------
 sys/netlink/netlink_var.h    |  6 +--
 sys/netlink/route/iface.c    |  6 ---
 5 files changed, 47 insertions(+), 116 deletions(-)

diff --git a/sys/netlink/netlink_ctl.h b/sys/netlink/netlink_ctl.h
index 58f7f6c4abe3..895f70322a29 100644
--- a/sys/netlink/netlink_ctl.h
+++ b/sys/netlink/netlink_ctl.h
@@ -77,7 +77,6 @@ bool netlink_register_proto(int proto, const char *proto_name, nl_handler_f hand
 bool netlink_unregister_proto(int proto);
 
 /* Common helpers */
-bool nl_has_listeners(uint16_t netlink_family, uint32_t groups_mask);
 bool nlp_has_priv(struct nlpcb *nlp, int priv);
 struct ucred *nlp_get_cred(struct nlpcb *nlp);
 uint32_t nlp_get_pid(const struct nlpcb *nlp);
diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
index 94f880cbfb61..0567537d1d1d 100644
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -65,11 +65,13 @@ _Static_assert(NLP_MAX_GROUPS >= 64,
     "NLP_MAX_GROUPS has to be at least 64");
 
 #define	NLCTL_TRACKER		struct rm_priotracker nl_tracker
-#define	NLCTL_RLOCK(_ctl)	rm_rlock(&((_ctl)->ctl_lock), &nl_tracker)
-#define	NLCTL_RUNLOCK(_ctl)	rm_runlock(&((_ctl)->ctl_lock), &nl_tracker)
+#define	NLCTL_RLOCK()		rm_rlock(&V_nl_ctl.ctl_lock, &nl_tracker)
+#define	NLCTL_RUNLOCK()		rm_runlock(&V_nl_ctl.ctl_lock, &nl_tracker)
+#define	NLCTL_LOCK_ASSERT()	rm_assert(&V_nl_ctl.ctl_lock, RA_LOCKED)
 
-#define	NLCTL_WLOCK(_ctl)	rm_wlock(&((_ctl)->ctl_lock))
-#define	NLCTL_WUNLOCK(_ctl)	rm_wunlock(&((_ctl)->ctl_lock))
+#define	NLCTL_WLOCK()		rm_wlock(&V_nl_ctl.ctl_lock)
+#define	NLCTL_WUNLOCK()		rm_wunlock(&V_nl_ctl.ctl_lock)
+#define	NLCTL_WLOCK_ASSERT()	rm_assert(&V_nl_ctl.ctl_lock, RA_WLOCKED)
 
 static u_long nl_sendspace = NLSNDQ;
 SYSCTL_ULONG(_net_netlink, OID_AUTO, sendspace, CTLFLAG_RW, &nl_sendspace, 0,
@@ -128,7 +130,7 @@ nl_port_lookup(uint32_t port_id)
 {
 	struct nlpcb *nlp;
 
-	CK_LIST_FOREACH(nlp, &V_nl_ctl->ctl_port_head, nl_port_next) {
+	CK_LIST_FOREACH(nlp, &V_nl_ctl.ctl_port_head, nl_port_next) {
 		if (nlp->nl_port == port_id)
 			return (nlp);
 	}
@@ -209,19 +211,8 @@ nl_send_group(struct nl_writer *nw)
 
 	nw->buf = NULL;
 
-	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
-	if (__predict_false(ctl == NULL)) {
-		/*
-		 * Can be the case when notification is sent within VNET
-		 * which doesn't have any netlink sockets.
-		 */
-		nl_buf_free(nb);
-		return (false);
-	}
-
-	NLCTL_RLOCK(ctl);
-
-	CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) {
+	NLCTL_RLOCK();
+	CK_LIST_FOREACH(nlp, &V_nl_ctl.ctl_pcb_head, nl_next) {
 		if ((nw->group.priv == 0 || priv_check_cred(
 		    nlp->nl_socket->so_cred, nw->group.priv) == 0) &&
 		    nlp->nl_proto == nw->group.proto &&
@@ -249,17 +240,11 @@ nl_send_group(struct nl_writer *nw)
 	} else
 		nl_buf_free(nb);
 
-	NLCTL_RUNLOCK(ctl);
+	NLCTL_RUNLOCK();
 
 	return (true);
 }
 
-bool
-nl_has_listeners(uint16_t netlink_family, uint32_t groups_mask)
-{
-	return (V_nl_ctl != NULL);
-}
-
 static uint32_t
 nl_find_port(void)
 {
@@ -298,7 +283,7 @@ nl_bind_locked(struct nlpcb *nlp, struct sockaddr_nl *snl)
 			return (EADDRINUSE);
 		nlp->nl_port = snl->nl_pid;
 		nlp->nl_bound = true;
-		CK_LIST_INSERT_HEAD(&V_nl_ctl->ctl_port_head, nlp, nl_port_next);
+		CK_LIST_INSERT_HEAD(&V_nl_ctl.ctl_port_head, nlp, nl_port_next);
 	}
 	for (int i = 0; i < 32; i++) {
 		if (snl->nl_groups & ((uint32_t)1 << i))
@@ -328,14 +313,6 @@ nl_attach(struct socket *so, int proto, struct thread *td)
 	    so, is_linux ? "(linux) " : "", curproc->p_pid,
 	    nl_get_proto_name(proto));
 
-	/* Create per-VNET state on first socket init */
-	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
-	if (ctl == NULL)
-		ctl = vnet_nl_ctl_init();
-	KASSERT(V_nl_ctl != NULL, ("nl_attach: vnet_sock_init() failed"));
-
-	MPASS(sotonlpcb(so) == NULL);
-
 	nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO);
 	error = soreserve(so, nl_sendspace, nl_recvspace);
 	if (error != 0) {
@@ -362,10 +339,9 @@ nl_attach(struct socket *so, int proto, struct thread *td)
 	taskqueue_start_threads(&nlp->nl_taskqueue, 1, PWAIT,
 	    "netlink_socket (PID %u)", nlp->nl_process_id);
 
-	NLCTL_WLOCK(ctl);
-	/* XXX: check ctl is still alive */
-	CK_LIST_INSERT_HEAD(&ctl->ctl_pcb_head, nlp, nl_next);
-	NLCTL_WUNLOCK(ctl);
+	NLCTL_WLOCK();
+	CK_LIST_INSERT_HEAD(&V_nl_ctl.ctl_pcb_head, nlp, nl_next);
+	NLCTL_WUNLOCK();
 
 	soisconnected(so);
 
@@ -375,7 +351,6 @@ nl_attach(struct socket *so, int proto, struct thread *td)
 static int
 nl_bind(struct socket *so, struct sockaddr *sa, struct thread *td)
 {
-	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct nlpcb *nlp = sotonlpcb(so);
 	struct sockaddr_nl *snl = (struct sockaddr_nl *)sa;
 	int error;
@@ -387,11 +362,11 @@ nl_bind(struct socket *so, struct sockaddr *sa, struct thread *td)
 	}
 
 
-	NLCTL_WLOCK(ctl);
+	NLCTL_WLOCK();
 	NLP_LOCK(nlp);
 	error = nl_bind_locked(nlp, snl);
 	NLP_UNLOCK(nlp);
-	NLCTL_WUNLOCK(ctl);
+	NLCTL_WUNLOCK();
 	NL_LOG(LOG_DEBUG2, "socket %p, bind() to %u, groups %u, error %d", so,
 	    snl->nl_pid, snl->nl_groups, error);
 
@@ -402,18 +377,17 @@ nl_bind(struct socket *so, struct sockaddr *sa, struct thread *td)
 static int
 nl_assign_port(struct nlpcb *nlp, uint32_t port_id)
 {
-	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct sockaddr_nl snl = {
 		.nl_pid = port_id,
 	};
 	int error;
 
-	NLCTL_WLOCK(ctl);
+	NLCTL_WLOCK();
 	NLP_LOCK(nlp);
 	snl.nl_groups = nl_get_groups_compat(nlp);
 	error = nl_bind_locked(nlp, &snl);
 	NLP_UNLOCK(nlp);
-	NLCTL_WUNLOCK(ctl);
+	NLCTL_WUNLOCK();
 
 	NL_LOG(LOG_DEBUG3, "socket %p, port assign: %d, error: %d", nlp->nl_socket, port_id, error);
 	return (error);
@@ -427,7 +401,6 @@ nl_assign_port(struct nlpcb *nlp, uint32_t port_id)
 static int
 nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id)
 {
-	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	uint32_t port_id = candidate_id;
 	NLCTL_TRACKER;
 	bool exist;
@@ -435,9 +408,9 @@ nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id)
 
 	for (int i = 0; i < 10; i++) {
 		NL_LOG(LOG_DEBUG3, "socket %p, trying to assign port %d", nlp->nl_socket, port_id);
-		NLCTL_RLOCK(ctl);
+		NLCTL_RLOCK();
 		exist = nl_port_lookup(port_id) != 0;
-		NLCTL_RUNLOCK(ctl);
+		NLCTL_RUNLOCK();
 		if (!exist) {
 			error = nl_assign_port(nlp, port_id);
 			if (error != EADDRINUSE)
@@ -491,7 +464,6 @@ destroy_nlpcb_epoch(epoch_context_t ctx)
 static void
 nl_close(struct socket *so)
 {
-	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	MPASS(sotonlpcb(so) != NULL);
 	struct nlpcb *nlp;
 	struct nl_buf *nb;
@@ -508,7 +480,7 @@ nl_close(struct socket *so)
 	taskqueue_drain_all(nlp->nl_taskqueue);
 	taskqueue_free(nlp->nl_taskqueue);
 
-	NLCTL_WLOCK(ctl);
+	NLCTL_WLOCK();
 	NLP_LOCK(nlp);
 	if (was_bound) {
 		CK_LIST_REMOVE(nlp, nl_port_next);
@@ -517,7 +489,7 @@ nl_close(struct socket *so)
 	CK_LIST_REMOVE(nlp, nl_next);
 	nlp->nl_socket = NULL;
 	NLP_UNLOCK(nlp);
-	NLCTL_WUNLOCK(ctl);
+	NLCTL_WUNLOCK();
 
 	so->so_pcb = NULL;
 
@@ -852,7 +824,6 @@ nl_getoptflag(int sopt_name)
 static int
 nl_ctloutput(struct socket *so, struct sockopt *sopt)
 {
-	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct nlpcb *nlp = sotonlpcb(so);
 	uint32_t flag;
 	int optval, error = 0;
@@ -875,12 +846,12 @@ nl_ctloutput(struct socket *so, struct sockopt *sopt)
 			}
 			NL_LOG(LOG_DEBUG2, "ADD/DEL group %d", (uint32_t)optval);
 
-			NLCTL_WLOCK(ctl);
+			NLCTL_WLOCK();
 			if (sopt->sopt_name == NETLINK_ADD_MEMBERSHIP)
 				nl_add_group_locked(nlp, optval);
 			else
 				nl_del_group_locked(nlp, optval);
-			NLCTL_WUNLOCK(ctl);
+			NLCTL_WUNLOCK();
 			break;
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
@@ -897,12 +868,12 @@ nl_ctloutput(struct socket *so, struct sockopt *sopt)
 				break;
 			}
 
-			NLCTL_WLOCK(ctl);
+			NLCTL_WLOCK();
 			if (optval != 0)
 				nlp->nl_flags |= flag;
 			else
 				nlp->nl_flags &= ~flag;
-			NLCTL_WUNLOCK(ctl);
+			NLCTL_WUNLOCK();
 			break;
 		default:
 			error = ENOPROTOOPT;
@@ -911,18 +882,18 @@ nl_ctloutput(struct socket *so, struct sockopt *sopt)
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case NETLINK_LIST_MEMBERSHIPS:
-			NLCTL_RLOCK(ctl);
+			NLCTL_RLOCK();
 			optval = nl_get_groups_compat(nlp);
-			NLCTL_RUNLOCK(ctl);
+			NLCTL_RUNLOCK();
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
 		case NETLINK_GET_STRICT_CHK:
 		case NETLINK_MSG_INFO:
-			NLCTL_RLOCK(ctl);
+			NLCTL_RLOCK();
 			optval = (nlp->nl_flags & nl_getoptflag(sopt->sopt_name)) != 0;
-			NLCTL_RUNLOCK(ctl);
+			NLCTL_RUNLOCK();
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
diff --git a/sys/netlink/netlink_module.c b/sys/netlink/netlink_module.c
index cf119a74cfb7..6c3cd90e61ab 100644
--- a/sys/netlink/netlink_module.c
+++ b/sys/netlink/netlink_module.c
@@ -58,7 +58,10 @@ struct nl_proto_handler *nl_handlers = _nl_handlers;
 CK_LIST_HEAD(nl_control_head, nl_control);
 static struct nl_control_head vnets_head = CK_LIST_HEAD_INITIALIZER();
 
-VNET_DEFINE(struct nl_control *, nl_ctl) = NULL;
+VNET_DEFINE(struct nl_control, nl_ctl) = {
+	.ctl_port_head = CK_LIST_HEAD_INITIALIZER(),
+	.ctl_pcb_head = CK_LIST_HEAD_INITIALIZER(),
+};
 
 struct mtx nl_global_mtx;
 MTX_SYSINIT(nl_global_mtx, &nl_global_mtx, "global netlink lock", MTX_DEF);
@@ -69,63 +72,31 @@ MTX_SYSINIT(nl_global_mtx, &nl_global_mtx, "global netlink lock", MTX_DEF);
 int netlink_unloading = 0;
 
 static void
-free_nl_ctl(struct nl_control *ctl)
+vnet_nl_init(const void *unused __unused)
 {
-	rm_destroy(&ctl->ctl_lock);
-	free(ctl, M_NETLINK);
-}
-
-struct nl_control *
-vnet_nl_ctl_init(void)
-{
-	struct nl_control *ctl;
-
-	ctl = malloc(sizeof(struct nl_control), M_NETLINK, M_WAITOK | M_ZERO);
-	rm_init(&ctl->ctl_lock, "netlink lock");
-	CK_LIST_INIT(&ctl->ctl_port_head);
-	CK_LIST_INIT(&ctl->ctl_pcb_head);
+	rm_init(&V_nl_ctl.ctl_lock, "netlink lock");
 
 	NL_GLOBAL_LOCK();
-
-	struct nl_control *tmp = atomic_load_ptr(&V_nl_ctl);
-
-	if (tmp == NULL) {
-		atomic_store_ptr(&V_nl_ctl, ctl);
-		CK_LIST_INSERT_HEAD(&vnets_head, ctl, ctl_next);
-		NL_LOG(LOG_DEBUG2, "VNET %p init done, inserted %p into global list",
-		    curvnet, ctl);
-	} else {
-		NL_LOG(LOG_DEBUG, "per-VNET init clash, dropping this instance");
-		free_nl_ctl(ctl);
-		ctl = tmp;
-	}
-
+	CK_LIST_INSERT_HEAD(&vnets_head, &V_nl_ctl, ctl_next);
+	NL_LOG(LOG_DEBUG2, "VNET %p init done, inserted %p into global list",
+	    curvnet, &V_nl_ctl);
 	NL_GLOBAL_UNLOCK();
-
-	return (ctl);
 }
+VNET_SYSINIT(vnet_nl_init, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_nl_init, NULL);
 
 static void
-vnet_nl_ctl_destroy(const void *unused __unused)
+vnet_nl_uninit(const void *unused __unused)
 {
-	struct nl_control *ctl;
-
 	/* Assume at the time all of the processes / sockets are dead */
-
 	NL_GLOBAL_LOCK();
-	ctl = atomic_load_ptr(&V_nl_ctl);
-	atomic_store_ptr(&V_nl_ctl, NULL);
-	if (ctl != NULL) {
-		NL_LOG(LOG_DEBUG2, "Removing %p from global list", ctl);
-		CK_LIST_REMOVE(ctl, ctl_next);
-	}
+	NL_LOG(LOG_DEBUG2, "Removing %p from global list", &V_nl_ctl);
+	CK_LIST_REMOVE(&V_nl_ctl, ctl_next);
 	NL_GLOBAL_UNLOCK();
 
-	if (ctl != NULL)
-		free_nl_ctl(ctl);
+	rm_destroy(&V_nl_ctl.ctl_lock);
 }
-VNET_SYSUNINIT(vnet_nl_ctl_destroy, SI_SUB_PROTO_IF, SI_ORDER_ANY,
-    vnet_nl_ctl_destroy, NULL);
+VNET_SYSUNINIT(vnet_nl_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_nl_uninit,
+    NULL);
 
 int
 nl_verify_proto(int proto)
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
index c341abf7ca55..11b69eb604fe 100644
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -102,17 +102,13 @@ struct nl_control {
 	CK_LIST_ENTRY(nl_control)		ctl_next;
 	struct rmlock				ctl_lock;
 };
-VNET_DECLARE(struct nl_control *, nl_ctl);
+VNET_DECLARE(struct nl_control, nl_ctl);
 #define	V_nl_ctl	VNET(nl_ctl)
 
-
 struct sockaddr_nl;
 struct sockaddr;
 struct nlmsghdr;
 
-/* netlink_module.c */
-struct nl_control *vnet_nl_ctl_init(void);
-
 int nl_verify_proto(int proto);
 const char *nl_get_proto_name(int proto);
 
diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c
index d856498b975f..93465e55e6aa 100644
--- a/sys/netlink/route/iface.c
+++ b/sys/netlink/route/iface.c
@@ -1383,9 +1383,6 @@ rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
 		return;
 	}
 
-	if (!nl_has_listeners(NETLINK_ROUTE, group))
-		return;
-
 	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, group, 0,
 	    false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");
@@ -1404,9 +1401,6 @@ rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask)
 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
 	struct nl_writer nw;
 
-	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
-		return;
-
 	if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK, 0,
 	    false)) {
 		NL_LOG(LOG_DEBUG, "error allocating group writer");



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202502050054.5150sbG9050744>