Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 28 Apr 2023 13:56:19 GMT
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 30d7e724db0c - main - route: show originator PID in netlink monitor
Message-ID:  <202304281356.33SDuJ0R012420@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=30d7e724db0c9805c9cafdd70a33f546df168d8c

commit 30d7e724db0c9805c9cafdd70a33f546df168d8c
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2023-04-28 12:44:04 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2023-04-28 13:54:54 +0000

    route: show originator PID in netlink monitor
    
    Replacing rtsock with netlink also means providing similar tracing facilities,
    rtsock provides `route -n monitor` interface, where each message can be traced
    to the originating PID.
    This diff closes the feature gap between rtsock and netlink in that regard.
    
    Netlink works slightly differently from rtsock, as it is a generic message
    "broker". It calls some kernel KPIs and returns the result to the caller.
    Other Netlink consumers gets notified on the changed kernel state using the
    relevant subsystem callbacks. Typically, it is close to impossible to pass
    some data through these KPIs to enhance the notification.
    
    This diff approaches the problem by using osd(9) to assign the relevant
    socket pointer (`'nlp`) to the per-socket taskqueue execution thread.
    This change allows to recover the pointer in the aforementioned notification
    callbacks and extract some additional data.
    Using `osd(9)` (and adding additional metadata) to the notification receiver
    comes with some additional cost attached, so this interface needs to be
    enabled explicitly by using a newly-created `NETLINK_MSG_INFO` `SOL_NETLINK`
    socket option.
    
    The actual medatadata (which includes the originator PID) is provided via
    control messages. To enable extensibility, the control message data is
    encoded in the standard netlink(TLV-based) fashion. The list of the
    currently-provided properties can be found in `nlmsginfo_attrs`.
    snl(3) is extended to enable decoding of netlink messages with metadata
    (`snl_read_message_dbg()` stores the parsed structure in the provided buffer).
    
    Differential Revision: https://reviews.freebsd.org/D39391
---
 sbin/route/route_netlink.c   | 73 ++++++++++++++++++++++++++----------------
 sys/netlink/netlink.h        | 10 ++++++
 sys/netlink/netlink_ctl.h    | 17 ++++++++++
 sys/netlink/netlink_domain.c | 57 +++++++++++++++++++++++++++++++--
 sys/netlink/netlink_glue.c   | 13 ++++++++
 sys/netlink/netlink_io.c     | 75 ++++++++++++++++++++++++++++++++++++++++---
 sys/netlink/netlink_module.c |  3 ++
 sys/netlink/netlink_snl.h    | 76 ++++++++++++++++++++++++++++++++++++++++++++
 sys/netlink/netlink_var.h    |  8 +++++
 9 files changed, 298 insertions(+), 34 deletions(-)

diff --git a/sbin/route/route_netlink.c b/sbin/route/route_netlink.c
index e3305c0d7df1..01494fbefd51 100644
--- a/sbin/route/route_netlink.c
+++ b/sbin/route/route_netlink.c
@@ -39,8 +39,11 @@ int flushroutes_fib_nl(int fib, int af);
 void monitor_nl(int fib);
 
 struct nl_helper;
-static void print_getmsg(struct nl_helper *h, struct nlmsghdr *hdr, struct sockaddr *dst);
-static void print_nlmsg(struct nl_helper *h, struct nlmsghdr *hdr);
+struct snl_msg_info;
+static void print_getmsg(struct nl_helper *h, struct nlmsghdr *hdr,
+    struct sockaddr *dst);
+static void print_nlmsg(struct nl_helper *h, struct nlmsghdr *hdr,
+    struct snl_msg_info *cinfo);
 
 #define s6_addr32 __u6_addr.__u6_addr32
 #define	bitcount32(x)	__bitcount32((uint32_t)(x))
@@ -434,9 +437,9 @@ print_prefix(struct nl_helper *h, char *buf, int bufsize, struct sockaddr *sa, i
 		snprintf(buf + sz, bufsize - sz, "/%d", plen);
 }
 
-
 static int
-print_line_prefix(const char *cmd, const char *name)
+print_line_prefix(struct nlmsghdr *hdr, struct snl_msg_info *cinfo,
+    const char *cmd, const char *name)
 {
 	struct timespec tp;
 	struct tm tm;
@@ -446,7 +449,8 @@ print_line_prefix(const char *cmd, const char *name)
 	localtime_r(&tp.tv_sec, &tm);
 
 	strftime(buf, sizeof(buf), "%T", &tm);
-	int len = printf("%s.%03ld %s %s ", buf, tp.tv_nsec / 1000000, cmd, name);
+	int len = printf("%s.%03ld PID %4u %s %s ", buf, tp.tv_nsec / 1000000,
+	    cinfo->process_id, cmd, name);
 
 	return (len);
 }
@@ -498,7 +502,8 @@ print_nlmsg_route_nhop(struct nl_helper *h, struct snl_parsed_route *r,
 }
 
 static void
-print_nlmsg_route(struct nl_helper *h, struct nlmsghdr *hdr)
+print_nlmsg_route(struct nl_helper *h, struct nlmsghdr *hdr,
+    struct snl_msg_info *cinfo)
 {
 	struct snl_parsed_route r = { .rtax_weight = RT_DEFAULT_WEIGHT };
 	struct snl_state *ss = &h->ss_cmd;
@@ -509,7 +514,7 @@ print_nlmsg_route(struct nl_helper *h, struct nlmsghdr *hdr)
 	// 20:19:41.333 add route 10.0.0.0/24 gw 10.0.0.1 ifp vtnet0 mtu 1500 table inet.0
 
 	const char *cmd = get_action_name(hdr, RTM_NEWROUTE);
-	int len = print_line_prefix(cmd, "route");
+	int len = print_line_prefix(hdr, cinfo, cmd, "route");
 
 	char buf[128];
 	print_prefix(h, buf, sizeof(buf), r.rta_dst, r.rtm_dst_len);
@@ -564,7 +569,8 @@ static const char *operstate[] = {
 };
 
 static void
-print_nlmsg_link(struct nl_helper *h, struct nlmsghdr *hdr)
+print_nlmsg_link(struct nl_helper *h, struct nlmsghdr *hdr,
+    struct snl_msg_info *cinfo)
 {
 	struct snl_parsed_link l = {};
 	struct snl_state *ss = &h->ss_cmd;
@@ -574,7 +580,7 @@ print_nlmsg_link(struct nl_helper *h, struct nlmsghdr *hdr)
 
 	// 20:19:41.333 add iface#3 vtnet0 admin UP oper UP mtu 1500 table inet.0
 	const char *cmd = get_action_name(hdr, RTM_NEWLINK);
-	print_line_prefix(cmd, "iface");
+	print_line_prefix(hdr, cinfo, cmd, "iface");
 
 	printf("iface#%u %s ", l.ifi_index, l.ifla_ifname);
 	printf("admin %s ", (l.ifi_flags & IFF_UP) ? "UP" : "DOWN");
@@ -587,7 +593,8 @@ print_nlmsg_link(struct nl_helper *h, struct nlmsghdr *hdr)
 }
 
 static void
-print_nlmsg_addr(struct nl_helper *h, struct nlmsghdr *hdr)
+print_nlmsg_addr(struct nl_helper *h, struct nlmsghdr *hdr,
+    struct snl_msg_info *cinfo)
 {
 	struct snl_parsed_addr attrs = {};
 	struct snl_state *ss = &h->ss_cmd;
@@ -597,7 +604,7 @@ print_nlmsg_addr(struct nl_helper *h, struct nlmsghdr *hdr)
 
 	// add addr 192.168.1.1/24 iface vtnet0
 	const char *cmd = get_action_name(hdr, RTM_NEWADDR);
-	print_line_prefix(cmd, "addr");
+	print_line_prefix(hdr, cinfo, cmd, "addr");
 
 	char buf[128];
 	struct sockaddr *addr = attrs.ifa_local ? attrs.ifa_local : attrs.ifa_address;
@@ -636,7 +643,8 @@ static const char *nudstate[] = {
 
 
 static void
-print_nlmsg_neigh(struct nl_helper *h, struct nlmsghdr *hdr)
+print_nlmsg_neigh(struct nl_helper *h, struct nlmsghdr *hdr,
+    struct snl_msg_info *cinfo)
 {
 	struct snl_parsed_neigh attrs = {};
 	struct snl_state *ss = &h->ss_cmd;
@@ -646,7 +654,7 @@ print_nlmsg_neigh(struct nl_helper *h, struct nlmsghdr *hdr)
 
 	// add addr 192.168.1.1 state %s lladdr %s iface vtnet0
 	const char *cmd = get_action_name(hdr, RTM_NEWNEIGH);
-	print_line_prefix(cmd, "neigh");
+	print_line_prefix(hdr, cinfo, cmd, "neigh");
 
 	char buf[128];
 	print_prefix(h, buf, sizeof(buf), attrs.nda_dst, -1);
@@ -694,32 +702,35 @@ print_nlmsg_neigh(struct nl_helper *h, struct nlmsghdr *hdr)
 }
 
 static void
-print_nlmsg_generic(struct nl_helper *h, struct nlmsghdr *hdr)
+print_nlmsg_generic(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo)
 {
+	const char *cmd = get_action_name(hdr, 0);
+	print_line_prefix(hdr, cinfo, cmd, "unknown message");
+	printf(" type %u\n", hdr->nlmsg_type);
 }
 
 static void
-print_nlmsg(struct nl_helper *h, struct nlmsghdr *hdr)
+print_nlmsg(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo)
 {
 	switch (hdr->nlmsg_type) {
 	case RTM_NEWLINK:
 	case RTM_DELLINK:
-		print_nlmsg_link(h, hdr);
+		print_nlmsg_link(h, hdr, cinfo);
 		break;
 	case RTM_NEWADDR:
 	case RTM_DELADDR:
-		print_nlmsg_addr(h, hdr);
+		print_nlmsg_addr(h, hdr, cinfo);
 		break;
 	case RTM_NEWROUTE:
 	case RTM_DELROUTE:
-		print_nlmsg_route(h, hdr);
+		print_nlmsg_route(h, hdr, cinfo);
 		break;
 	case RTM_NEWNEIGH:
 	case RTM_DELNEIGH:
-		print_nlmsg_neigh(h, hdr);
+		print_nlmsg_neigh(h, hdr, cinfo);
 		break;
 	default:
-		print_nlmsg_generic(h, hdr);
+		print_nlmsg_generic(h, hdr, cinfo);
 	}
 
 	snl_clear_lb(&h->ss_cmd);
@@ -748,6 +759,10 @@ monitor_nl(int fib)
 #endif
 	};
 
+	int optval = 1;
+	socklen_t optlen = sizeof(optval);
+	setsockopt(ss_event.fd, SOL_NETLINK, NETLINK_MSG_INFO, &optval, optlen);
+
 	for (unsigned int i = 0; i < NL_ARRAY_LEN(groups); i++) {
 		int error;
 		int optval = groups[i];
@@ -758,11 +773,11 @@ monitor_nl(int fib)
 			warn("Unable to subscribe to group %d", optval);
 	}
 
+	struct snl_msg_info attrs = {};
 	struct nlmsghdr *hdr;
-	while ((hdr = snl_read_message(&ss_event)) != NULL)
+	while ((hdr = snl_read_message_dbg(&ss_event, &attrs)) != NULL)
 	{
-		// printf("-- MSG type %d--\n", hdr->nlmsg_type);
-		print_nlmsg(&h, hdr);
+		print_nlmsg(&h, hdr, &attrs);
 		snl_clear_lb(&h.ss_cmd);
 		snl_clear_lb(&ss_event);
 	}
@@ -814,8 +829,10 @@ flushroute_one(struct nl_helper *h, struct snl_parsed_route *r)
 		return (true);
 	};
 
-	if (verbose)
-		print_nlmsg(h, hdr);
+	if (verbose) {
+		struct snl_msg_info attrs = {};
+		print_nlmsg(h, hdr, &attrs);
+	}
 	else {
 		if (r->rta_multipath != NULL) {
 			for (int i = 0; i < r->rta_multipath->num_nhops; i++) {
@@ -863,8 +880,10 @@ flushroutes_fib_nl(int fib, int af)
 
 		if (!snl_parse_nlmsg(&ss, hdr, &snl_rtm_route_parser, &r))
 			continue;
-		if (verbose)
-			print_nlmsg(&h, hdr);
+		if (verbose) {
+			struct snl_msg_info attrs = {};
+			print_nlmsg(&h, hdr, &attrs);
+		}
 		if (r.rta_table != (uint32_t)fib || r.rtm_family != af)
 			continue;
 		if ((r.rta_rtflags & RTF_GATEWAY) == 0)
diff --git a/sys/netlink/netlink.h b/sys/netlink/netlink.h
index 3623ae754951..0021be4ea786 100644
--- a/sys/netlink/netlink.h
+++ b/sys/netlink/netlink.h
@@ -89,6 +89,7 @@ struct sockaddr_nl {
 #define NETLINK_EXT_ACK			11 /* Ack support for receiving additional TLVs in ack */
 #define NETLINK_GET_STRICT_CHK		12 /* Strict header checking */
 
+#define	NETLINK_MSG_INFO		257 /* (FreeBSD-specific) Receive message originator data in cmsg */
 
 /*
  * RFC 3549, 2.3.2 Netlink Message Header
@@ -183,6 +184,15 @@ enum nlmsgerr_attrs {
 	NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
 };
 
+/* FreeBSD-specific debugging info */
+
+enum nlmsginfo_attrs {
+	NLMSGINFO_ATTR_UNUSED,
+	NLMSGINFO_ATTR_PROCESS_ID	= 1, /* u32, source process PID */
+	NLMSGINFO_ATTR_PORT_ID		= 2, /* u32, source socket nl_pid */
+	NLMSGINFO_ATTR_SEQ_ID		= 3, /* u32, source message seq_id */
+};
+
 
 #ifndef roundup2
 #define	roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
diff --git a/sys/netlink/netlink_ctl.h b/sys/netlink/netlink_ctl.h
index 6c195c0217a9..9b51e9492a41 100644
--- a/sys/netlink/netlink_ctl.h
+++ b/sys/netlink/netlink_ctl.h
@@ -108,5 +108,22 @@ uint32_t genl_get_family_id(const struct genl_family *gf);
 typedef void (*genl_family_event_handler_t)(void *arg, const struct genl_family *gf, int action);
 EVENTHANDLER_DECLARE(genl_family_event, genl_family_event_handler_t);
 
+struct thread;
+#if defined(NETLINK) || defined(NETLINK_MODULE)
+/* Provide optimized calls to the functions inside the same linking unit */
+struct nlpcb *_nl_get_thread_nlp(struct thread *td);
+
+static inline struct nlpcb *
+nl_get_thread_nlp(struct thread *td)
+{
+	return (_nl_get_thread_nlp(td));
+}
+
+#else
+/* Provide access to the functions via netlink_glue.c */
+struct nlpcb *nl_get_thread_nlp(struct thread *td);
+
+#endif /* defined(NETLINK) || defined(NETLINK_MODULE) */
+
 #endif
 #endif
diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
index 8b0d09ac0b66..9cc2a5073fdd 100644
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -38,6 +38,7 @@
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
+#include <sys/osd.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/ck.h>
@@ -84,6 +85,38 @@ SYSCTL_OID(_net_netlink, OID_AUTO, nl_maxsockbuf,
     sysctl_handle_nl_maxsockbuf, "LU",
     "Maximum Netlink socket buffer size");
 
+
+static unsigned int osd_slot_id = 0;
+
+void
+nl_osd_register(void)
+{
+	osd_slot_id = osd_register(OSD_THREAD, NULL, NULL);
+}
+
+void
+nl_osd_unregister(void)
+{
+	osd_deregister(OSD_THREAD, osd_slot_id);
+}
+
+struct nlpcb *
+_nl_get_thread_nlp(struct thread *td)
+{
+	return (osd_get(OSD_THREAD, &td->td_osd, osd_slot_id));
+}
+
+void
+nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp)
+{
+	NLP_LOG(LOG_DEBUG2, nlp, "Set thread %p nlp to %p (slot %u)", td, nlp, osd_slot_id);
+	if (osd_set(OSD_THREAD, &td->td_osd, osd_slot_id, nlp) == 0)
+		return;
+	/* Failed, need to realloc */
+	void **rsv = osd_reserve(osd_slot_id);
+	osd_set_reserved(OSD_THREAD, &td->td_osd, osd_slot_id, rsv, nlp);
+}
+
 /*
  * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx.
  * Returns nlpcb pointer if present else NULL
@@ -144,6 +177,15 @@ nl_get_groups_compat(struct nlpcb *nlp)
 	return (groups_mask);
 }
 
+static void
+nl_send_one_group(struct mbuf *m, struct nlpcb *nlp, int num_messages,
+    int io_flags)
+{
+	if (__predict_false(nlp->nl_flags & NLF_MSG_INFO))
+		nl_add_msg_info(m);
+	nl_send_one(m, nlp, num_messages, io_flags);
+}
+
 /*
  * Broadcasts message @m to the protocol @proto group specified by @group_id
  */
@@ -180,7 +222,8 @@ nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
 				struct mbuf *m_copy;
 				m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 				if (m_copy != NULL)
-					nl_send_one(m_copy, nlp_last, num_messages, io_flags);
+					nl_send_one_group(m_copy, nlp_last,
+					    num_messages, io_flags);
 				else {
 					NLP_LOCK(nlp_last);
 					if (nlp_last->nl_socket != NULL)
@@ -192,7 +235,7 @@ nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
 		}
 	}
 	if (nlp_last != NULL)
-		nl_send_one(m, nlp_last, num_messages, io_flags);
+		nl_send_one_group(m, nlp_last, num_messages, io_flags);
 	else
 		m_freem(m);
 
@@ -296,6 +339,7 @@ nl_pru_attach(struct socket *so, int proto, struct thread *td)
 	nlp->nl_linux = is_linux;
 	nlp->nl_active = true;
 	nlp->nl_unconstrained_vnet = !jailed_without_vnet(so->so_cred);
+	nlp->nl_need_thread_setup = true;
 	NLP_LOCK_INIT(nlp);
 	refcount_init(&nlp->nl_refcount, 1);
 	nl_init_io(nlp);
@@ -589,6 +633,8 @@ nl_getoptflag(int sopt_name)
 		return (NLF_EXT_ACK);
 	case NETLINK_GET_STRICT_CHK:
 		return (NLF_STRICT);
+	case NETLINK_MSG_INFO:
+		return (NLF_MSG_INFO);
 	}
 
 	return (0);
@@ -630,12 +676,18 @@ nl_ctloutput(struct socket *so, struct sockopt *sopt)
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
 		case NETLINK_GET_STRICT_CHK:
+		case NETLINK_MSG_INFO:
 			error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
 			if (error != 0)
 				break;
 
 			flag = nl_getoptflag(sopt->sopt_name);
 
+			if ((flag == NLF_MSG_INFO) && nlp->nl_linux) {
+				error = EINVAL;
+				break;
+			}
+
 			NLCTL_WLOCK(ctl);
 			if (optval != 0)
 				nlp->nl_flags |= flag;
@@ -658,6 +710,7 @@ nl_ctloutput(struct socket *so, struct sockopt *sopt)
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
 		case NETLINK_GET_STRICT_CHK:
+		case NETLINK_MSG_INFO:
 			NLCTL_RLOCK(ctl);
 			optval = (nlp->nl_flags & nl_getoptflag(sopt->sopt_name)) != 0;
 			NLCTL_RUNLOCK(ctl);
diff --git a/sys/netlink/netlink_glue.c b/sys/netlink/netlink_glue.c
index 940ff2de4859..e881bf019f52 100644
--- a/sys/netlink/netlink_glue.c
+++ b/sys/netlink/netlink_glue.c
@@ -191,6 +191,12 @@ nl_store_ifp_cookie_stub(struct nl_pstate *npt __unused, struct ifnet *ifp __unu
 {
 }
 
+static struct nlpcb *
+nl_get_thread_nlp_stub(struct thread *td __unused)
+{
+	return (NULL);
+}
+
 const static struct nl_function_wrapper nl_stub = {
 	.nlmsg_add = nlmsg_add_stub,
 	.nlmsg_refill_buffer = nlmsg_refill_buffer_stub,
@@ -204,6 +210,7 @@ const static struct nl_function_wrapper nl_stub = {
 	.nlmsg_end_dump = nlmsg_end_dump_stub,
 	.nl_modify_ifp_generic = nl_modify_ifp_generic_stub,
 	.nl_store_ifp_cookie = nl_store_ifp_cookie_stub,
+	.nl_get_thread_nlp = nl_get_thread_nlp_stub,
 };
 
 /*
@@ -292,5 +299,11 @@ nl_store_ifp_cookie(struct nl_pstate *npt, struct ifnet *ifp)
 	return (_nl->nl_store_ifp_cookie(npt, ifp));
 }
 
+struct nlpcb *
+nl_get_thread_nlp(struct thread *td)
+{
+	return (_nl->nl_get_thread_nlp(td));
+}
+
 #endif /* !NETLINK */
 
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index b40ffaab7dd9..db0a97eef0fd 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -125,6 +125,55 @@ queue_free(struct nl_io_queue *q)
 	q->length = 0;
 }
 
+void
+nl_add_msg_info(struct mbuf *m)
+{
+	struct nlpcb *nlp = nl_get_thread_nlp(curthread);
+	NL_LOG(LOG_DEBUG2, "Trying to recover nlp from thread %p: %p",
+	    curthread, nlp);
+
+	if (nlp == NULL)
+		return;
+
+	/* Prepare what we want to encode - PID, socket PID & msg seq */
+	struct {
+		struct nlattr nla;
+		uint32_t val;
+	} data[] = {
+		{
+			.nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
+			.nla.nla_type = NLMSGINFO_ATTR_PROCESS_ID,
+			.val = nlp->nl_process_id,
+		},
+		{
+			.nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
+			.nla.nla_type = NLMSGINFO_ATTR_PORT_ID,
+			.val = nlp->nl_port,
+		},
+	};
+
+
+	while (m->m_next != NULL)
+		m = m->m_next;
+	m->m_next = sbcreatecontrol(data, sizeof(data),
+	    NETLINK_MSG_INFO, SOL_NETLINK, M_NOWAIT);
+
+	NL_LOG(LOG_DEBUG2, "Storing %lu bytes of data, ctl: %p", sizeof(data), m->m_next);
+}
+
+static __noinline struct mbuf *
+extract_msg_info(struct mbuf *m)
+{
+	while (m->m_next != NULL) {
+		if (m->m_next->m_type == MT_CONTROL) {
+			struct mbuf *ctl = m->m_next;
+			m->m_next = NULL;
+			return (ctl);
+		}
+		m = m->m_next;
+	}
+	return (NULL);
+}
 
 static void
 nl_schedule_taskqueue(struct nlpcb *nlp)
@@ -181,10 +230,16 @@ tx_check_locked(struct nlpcb *nlp)
 
 	while (true) {
 		struct mbuf *m = queue_head(&nlp->tx_queue);
-		if (m && sbappendaddr_locked(sb, nl_empty_src, m, NULL) != 0) {
-			/* appended successfully */
-			queue_pop(&nlp->tx_queue);
-			appended = true;
+		if (m != NULL) {
+			struct mbuf *ctl = NULL;
+			if (__predict_false(m->m_next != NULL))
+				ctl = extract_msg_info(m);
+			if (sbappendaddr_locked(sb, nl_empty_src, m, ctl) != 0) {
+				/* appended successfully */
+				queue_pop(&nlp->tx_queue);
+				appended = true;
+			} else
+				break;
 		} else
 			break;
 	}
@@ -257,6 +312,13 @@ nl_process_received(struct nlpcb *nlp)
 {
 	NL_LOG(LOG_DEBUG3, "taskqueue called");
 
+	if (__predict_false(nlp->nl_need_thread_setup)) {
+		nl_set_thread_nlp(curthread, nlp);
+		NLP_LOCK(nlp);
+		nlp->nl_need_thread_setup = false;
+		NLP_UNLOCK(nlp);
+	}
+
 	while (nl_process_received_one(nlp))
 		;
 }
@@ -374,7 +436,10 @@ nl_send_one(struct mbuf *m, struct nlpcb *nlp, int num_messages, int io_flags)
 	}
 
 	struct socket *so = nlp->nl_socket;
-	if (sbappendaddr(&so->so_rcv, nl_empty_src, m, NULL) != 0) {
+	struct mbuf *ctl = NULL;
+	if (__predict_false(m->m_next != NULL))
+		ctl = extract_msg_info(m);
+	if (sbappendaddr(&so->so_rcv, nl_empty_src, m, ctl) != 0) {
 		sorwakeup(so);
 		NLP_LOG(LOG_DEBUG3, nlp, "appended data & woken up");
 	} else {
diff --git a/sys/netlink/netlink_module.c b/sys/netlink/netlink_module.c
index 31faf1d003d9..6835c4a0e730 100644
--- a/sys/netlink/netlink_module.c
+++ b/sys/netlink/netlink_module.c
@@ -189,6 +189,7 @@ const static struct nl_function_wrapper nl_module = {
 	.nlmsg_end_dump = _nlmsg_end_dump,
 	.nl_modify_ifp_generic = _nl_modify_ifp_generic,
 	.nl_store_ifp_cookie = _nl_store_ifp_cookie,
+	.nl_get_thread_nlp = _nl_get_thread_nlp,
 };
 #endif
 
@@ -222,6 +223,7 @@ netlink_modevent(module_t mod __unused, int what, void *priv __unused)
 	switch (what) {
 	case MOD_LOAD:
 		NL_LOG(LOG_DEBUG2, "Loading");
+		nl_osd_register();
 #if !defined(NETLINK) && defined(NETLINK_MODULE)
 		nl_set_functions(&nl_module);
 #endif
@@ -235,6 +237,7 @@ netlink_modevent(module_t mod __unused, int what, void *priv __unused)
 #if !defined(NETLINK) && defined(NETLINK_MODULE)
 			nl_set_functions(NULL);
 #endif
+			nl_osd_unregister();
 		} else
 			ret = EBUSY;
 		break;
diff --git a/sys/netlink/netlink_snl.h b/sys/netlink/netlink_snl.h
index 191a303111fa..4cb1b3e13abc 100644
--- a/sys/netlink/netlink_snl.h
+++ b/sys/netlink/netlink_snl.h
@@ -277,6 +277,55 @@ snl_get_seq(struct snl_state *ss)
 	return (++ss->seq);
 }
 
+struct snl_msg_info {
+	int		cmsg_type;
+	int		cmsg_level;
+	uint32_t	process_id;
+	uint8_t		port_id;
+	uint8_t		seq_id;
+};
+static inline bool parse_cmsg(struct snl_state *ss, const struct msghdr *msg,
+    struct snl_msg_info *attrs);
+
+static inline struct nlmsghdr *
+snl_read_message_dbg(struct snl_state *ss, struct snl_msg_info *cinfo)
+{
+	memset(cinfo, 0, sizeof(*cinfo));
+
+	if (ss->off == ss->datalen) {
+		struct sockaddr_nl nladdr;
+		char cbuf[64];
+
+		struct iovec iov = {
+			.iov_base = ss->buf,
+			.iov_len = ss->bufsize,
+		};
+		struct msghdr msg = {
+			.msg_name = &nladdr,
+			.msg_namelen = sizeof(nladdr),
+			.msg_iov = &iov,
+			.msg_iovlen = 1,
+			.msg_control = cbuf,
+			.msg_controllen = sizeof(cbuf),
+		};
+		ss->off = 0;
+		ss->datalen = 0;
+		for (;;) {
+			ssize_t datalen = recvmsg(ss->fd, &msg, 0);
+			if (datalen > 0) {
+				ss->datalen = datalen;
+				parse_cmsg(ss, &msg, cinfo);
+				break;
+			} else if (errno != EINTR)
+				return (NULL);
+		}
+	}
+	struct nlmsghdr *hdr = (struct nlmsghdr *)(void *)&ss->buf[ss->off];
+	ss->off += NLMSG_ALIGN(hdr->nlmsg_len);
+	return (hdr);
+}
+
+
 static inline struct nlmsghdr *
 snl_read_message(struct snl_state *ss)
 {
@@ -661,6 +710,33 @@ snl_read_reply_code(struct snl_state *ss, uint32_t nlmsg_seq, struct snl_errmsg_
 	return (false);
 }
 
+#define	_OUT(_field)	offsetof(struct snl_msg_info, _field)
+static const struct snl_attr_parser _nla_p_cinfo[] = {
+	{ .type = NLMSGINFO_ATTR_PROCESS_ID, .off = _OUT(process_id), .cb = snl_attr_get_uint32 },
+	{ .type = NLMSGINFO_ATTR_PORT_ID, .off = _OUT(port_id), .cb = snl_attr_get_uint32 },
+	{ .type = NLMSGINFO_ATTR_SEQ_ID, .off = _OUT(seq_id), .cb = snl_attr_get_uint32 },
+};
+#undef _OUT
+SNL_DECLARE_ATTR_PARSER(snl_msg_info_parser, _nla_p_cinfo);
+
+static inline bool
+parse_cmsg(struct snl_state *ss, const struct msghdr *msg, struct snl_msg_info *attrs)
+{
+	for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+	    cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		if (cmsg->cmsg_level != SOL_NETLINK || cmsg->cmsg_type != NETLINK_MSG_INFO)
+			continue;
+
+		void *data = CMSG_DATA(cmsg);
+		int len = cmsg->cmsg_len - ((char *)data - (char *)cmsg);
+		const struct snl_hdr_parser *ps = &snl_msg_info_parser;
+
+		return (snl_parse_attrs_raw(ss, data, len, ps->np, ps->np_size, attrs));
+	}
+
+	return (false);
+}
+
 /*
  * Assumes e is zeroed
  */
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
index cb1e3974b5f5..7882bfbf5359 100644
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -62,6 +62,7 @@ struct nlpcb {
 	bool			nl_tx_blocked; /* No new requests accepted */
 	bool			nl_linux; /* true if running under compat */
 	bool			nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */
+	bool			nl_need_thread_setup;
 	struct nl_io_queue	rx_queue;
 	struct nl_io_queue	tx_queue;
 	struct taskqueue	*nl_taskqueue;
@@ -88,6 +89,7 @@ struct nlpcb {
 #define NLF_CAP_ACK             0x01 /* Do not send message body with errmsg */
 #define NLF_EXT_ACK             0x02 /* Allow including extended TLVs in ack */
 #define	NLF_STRICT		0x04 /* Perform strict header checks */
+#define	NLF_MSG_INFO		0x08 /* Send caller info along with the notifications */
 
 SYSCTL_DECL(_net_netlink);
 SYSCTL_DECL(_net_netlink_debug);
@@ -130,6 +132,9 @@ extern struct nl_proto_handler *nl_handlers;
 
 /* netlink_domain.c */
 void nl_send_group(struct mbuf *m, int cnt, int proto, int group_id);
+void nl_osd_register(void);
+void nl_osd_unregister(void);
+void nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp);
 
 /* netlink_io.c */
 #define	NL_IOF_UNTRANSLATED	0x01
@@ -144,6 +149,8 @@ void nl_free_io(struct nlpcb *nlp);
 void nl_taskqueue_handler(void *_arg, int pending);
 int nl_receive_async(struct mbuf *m, struct socket *so);
 void nl_process_receive_locked(struct nlpcb *nlp);
+void nl_set_source_metadata(struct mbuf *m, int num_messages);
+void nl_add_msg_info(struct mbuf *m);
 
 /* netlink_generic.c */
 struct genl_family {
@@ -193,6 +200,7 @@ struct nl_function_wrapper {
 	int (*nl_modify_ifp_generic)(struct ifnet *ifp, struct nl_parsed_link *lattrs,
 	    const struct nlattr_bmask *bm, struct nl_pstate *npt);
 	void (*nl_store_ifp_cookie)(struct nl_pstate *npt, struct ifnet *ifp);
+	struct nlpcb * (*nl_get_thread_nlp)(struct  thread *td);
 };
 void nl_set_functions(const struct nl_function_wrapper *nl);
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202304281356.33SDuJ0R012420>