Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 30 Jul 2022 18:46:50 GMT
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: be1f485d7d6b - main - sockets: add MSG_TRUNC flag handling for recvfrom()/recvmsg().
Message-ID:  <202207301846.26UIko7G080500@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=be1f485d7d6bebc53b055cc165a11ada0ab5fb17

commit be1f485d7d6bebc53b055cc165a11ada0ab5fb17
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2022-07-25 19:46:40 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2022-07-30 18:21:51 +0000

    sockets: add MSG_TRUNC flag handling for recvfrom()/recvmsg().
    
    Implement Linux-variant of MSG_TRUNC input flag used in recv(), recvfrom() and recvmsg().
    Posix defines MSG_TRUNC as an output flag, indicating packet/datagram truncation.
    Linux extended it a while (~15+ years) ago to act as input flag,
    resulting in returning the full packet size regarless of the input
    buffer size.
    It's a (relatively) popular pattern to do recvmsg( MSG_PEEK | MSG_TRUNC) to get the
    packet size, allocate the buffer and issue another call to fetch the packet.
    In particular, it's popular in userland netlink code, which is the primary driving factor of this change.
    
    This commit implements the MSG_TRUNC support for SOCK_DGRAM sockets (udp, unix and all soreceive_generic() users).
    
    PR:             kern/176322
    Reviewed by:    pauamma(doc)
    Differential Revision: https://reviews.freebsd.org/D35909
    MFC after:      1 month
---
 lib/libc/sys/recv.2               |  12 +++
 sys/kern/uipc_socket.c            |  13 ++-
 sys/kern/uipc_usrreq.c            |  29 +++++--
 tests/sys/kern/Makefile           |   1 +
 tests/sys/kern/socket_msg_trunc.c | 169 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 212 insertions(+), 12 deletions(-)

diff --git a/lib/libc/sys/recv.2 b/lib/libc/sys/recv.2
index 1f3bf531c3e7..21c8a570ef68 100644
--- a/lib/libc/sys/recv.2
+++ b/lib/libc/sys/recv.2
@@ -163,6 +163,7 @@ one or more of the values:
 .Bl -column ".Dv MSG_CMSG_CLOEXEC" -offset indent
 .It Dv MSG_OOB Ta process out-of-band data
 .It Dv MSG_PEEK Ta peek at incoming message
+.It Dv MSG_TRUNC Ta return real packet or datagram length
 .It Dv MSG_WAITALL Ta wait for full request or error
 .It Dv MSG_DONTWAIT Ta do not block
 .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec
@@ -185,6 +186,17 @@ from the beginning of the receive queue without removing that
 data from the queue.
 Thus, a subsequent receive call will return the same data.
 The
+.Dv MSG_TRUNC
+flag causes the receive operation to return the full length of the packet
+or datagram even if larger than provided buffer. The flag is supported
+on SOCK_DGRAM sockets for
+.Dv AF_INET
+,
+.Dv AF_INET6
+and
+.Dv AF_UNIX
+families.
+The
 .Dv MSG_WAITALL
 flag requests that the operation block until
 the full request is satisfied.
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index dac1373773fb..8ecf83d30e28 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1896,15 +1896,18 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	ssize_t orig_resid = uio->uio_resid;
+	bool report_real_len = false;
 
 	mp = mp0;
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
-	if (flagsp != NULL)
+	if (flagsp != NULL) {
+		report_real_len = *flagsp & MSG_TRUNC;
+		*flagsp &= ~MSG_TRUNC;
 		flags = *flagsp &~ MSG_EOR;
-	else
+	} else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
@@ -1978,7 +1981,7 @@ restart:
 			error = ENOTCONN;
 			goto release;
 		}
-		if (uio->uio_resid == 0) {
+		if (uio->uio_resid == 0 && !report_real_len) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
@@ -2326,6 +2329,8 @@ dontblock:
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (m != NULL && pr->pr_flags & PR_ATOMIC) {
+		if (report_real_len)
+			uio->uio_resid -= m_length(m, NULL) - moff;
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord_locked(&so->so_rcv);
@@ -2624,7 +2629,7 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
 	 * For any complicated cases, fall back to the full
 	 * soreceive_generic().
 	 */
-	if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
+	if (mp0 != NULL || (flags & (MSG_PEEK | MSG_OOB | MSG_TRUNC)))
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index b0661c0d0daf..3b54c5f145be 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1417,7 +1417,7 @@ static int
 uipc_peek_dgram(struct socket *so, struct mbuf *m, struct sockaddr **psa,
     struct uio *uio, struct mbuf **controlp, int *flagsp)
 {
-	ssize_t len;
+	ssize_t len = 0;
 	int error;
 
 	so->so_rcv.uxdg_peeked = m;
@@ -1459,8 +1459,16 @@ uipc_peek_dgram(struct socket *so, struct mbuf *m, struct sockaddr **psa,
 	}
 	SOCK_IO_RECV_UNLOCK(so);
 
-	if (m != NULL && flagsp != NULL)
-		*flagsp |= MSG_TRUNC;
+	if (flagsp != NULL) {
+		if (m != NULL) {
+			if (*flagsp & MSG_TRUNC) {
+				/* Report real length of the packet */
+				uio->uio_resid -= m_length(m, NULL) - len;
+			}
+			*flagsp |= MSG_TRUNC;
+		} else
+			*flagsp &= ~MSG_TRUNC;
+	}
 
 	return (0);
 }
@@ -1475,7 +1483,7 @@ uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
 	struct sockbuf *sb = NULL;
 	struct mbuf *m;
 	int flags, error;
-	ssize_t len;
+	ssize_t len = 0;
 	bool nonblock;
 
 	MPASS(mp0 == NULL);
@@ -1619,11 +1627,16 @@ uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
 	SOCK_IO_RECV_UNLOCK(so);
 
 	if (m != NULL) {
-		flags |= MSG_TRUNC;
+		if (flagsp != NULL) {
+			if (flags & MSG_TRUNC) {
+				/* Report real length of the packet */
+				uio->uio_resid -= m_length(m, NULL);
+			}
+			*flagsp |= MSG_TRUNC;
+		}
 		m_freem(m);
-	}
-	if (flagsp != NULL)
-		*flagsp |= flags;
+	} else if (flagsp != NULL)
+		*flagsp &= ~MSG_TRUNC;
 
 	return (0);
 }
diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile
index 93f62022a0f2..3891d78f7629 100644
--- a/tests/sys/kern/Makefile
+++ b/tests/sys/kern/Makefile
@@ -29,6 +29,7 @@ ATF_TESTS_C+=	reaper
 ATF_TESTS_C+=	sched_affinity
 ATF_TESTS_C+=	sigaltstack
 ATF_TESTS_C+=	sigwait
+ATF_TESTS_C+=	socket_msg_trunc
 TEST_METADATA.sigwait+=	is_exclusive="true"
 .if ${MACHINE_ARCH} != "i386" && ${MACHINE_ARCH:Mpowerpc*} == ""
 ATF_TESTS_C+=	subr_physmem_test
diff --git a/tests/sys/kern/socket_msg_trunc.c b/tests/sys/kern/socket_msg_trunc.c
new file mode 100644
index 000000000000..5041842f32b7
--- /dev/null
+++ b/tests/sys/kern/socket_msg_trunc.c
@@ -0,0 +1,169 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <poll.h>
+
+#include <atf-c.h>
+
+static void
+check_recvmsg(const char *test_name)
+{
+	int ss, cs, rc;
+	struct sockaddr *sa;
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	struct sockaddr_un saun;
+	int *sizes, sizes_count;
+	int one = 1;
+
+
+	if (!strcmp(test_name, "udp")) {
+		ss = socket(PF_INET, SOCK_DGRAM, 0);
+		ATF_CHECK(ss >= 0);
+		rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+		ATF_CHECK_EQ(0, rc);
+		bzero(&sin, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		sin.sin_port = htons(6666);
+		sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		sa = (struct sockaddr *)&sin;
+		rc = bind(ss, sa, sa->sa_len);
+		ATF_CHECK_EQ(0, rc);
+
+		cs = socket(PF_INET, SOCK_DGRAM, 0);
+		ATF_CHECK(cs >= 0);
+		int inet_sizes[] = {80, 255, 256, 1024, 4096, 9000};
+		sizes_count = sizeof(inet_sizes) / sizeof(int);
+		sizes = malloc(sizeof(inet_sizes));
+		memcpy(sizes, inet_sizes, sizeof(inet_sizes));
+
+	} else if (!strcmp(test_name, "udp6")) {
+		ss = socket(PF_INET6, SOCK_DGRAM, 0);
+		ATF_CHECK(ss >= 0);
+		rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+		ATF_CHECK_EQ(0, rc);
+		bzero(&sin6, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_port = htons(6666);
+		const struct in6_addr in6loopback = IN6ADDR_LOOPBACK_INIT;
+		sin6.sin6_addr = in6loopback;
+		sa = (struct sockaddr *)&sin6;
+		rc = bind(ss, sa, sa->sa_len);
+		ATF_CHECK_EQ(0, rc);
+
+		cs = socket(PF_INET6, SOCK_DGRAM, 0);
+		ATF_CHECK(cs >= 0);
+		int inet_sizes[] = {80, 255, 256, 1024, 4096, 9000};
+		sizes_count = sizeof(inet_sizes) / sizeof(int);
+		sizes = malloc(sizeof(inet_sizes));
+		memcpy(sizes, inet_sizes, sizeof(inet_sizes));
+
+	} else if (!strcmp(test_name, "unix")) {
+		const char *PATH = "/tmp/test_check_recvmsg_socket";
+		ss = socket(PF_UNIX, SOCK_DGRAM, 0);
+		ATF_CHECK(ss >= 0);
+		rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+		ATF_CHECK_EQ(0, rc);
+		bzero(&saun, sizeof(saun));
+		saun.sun_family = AF_UNIX;
+		strcpy(saun.sun_path, PATH);
+		saun.sun_len = sizeof(saun);
+		sa = (struct sockaddr *)&saun;
+		unlink(PATH);
+		rc = bind(ss, sa, sa->sa_len);
+		ATF_CHECK_EQ(0, rc);
+
+		cs = socket(PF_UNIX, SOCK_DGRAM, 0);
+		ATF_CHECK(cs >= 0);
+		int unix_sizes[] = {80, 255, 256, 1024, 2000};
+		sizes_count = sizeof(unix_sizes) / sizeof(int);
+		sizes = malloc(sizeof(unix_sizes));
+		memcpy(sizes, unix_sizes, sizeof(unix_sizes));
+	} else
+		return;
+
+	char buf[4096];
+	memset(buf, 0xFF, sizeof(buf));
+	for (int i = 0; i < sizes_count; i++) {
+		int sz = sizes[i];
+		char tbuf[1];
+		rc = sendto(cs, buf, sz, 0, sa, sa->sa_len);
+		ATF_REQUIRE_EQ(rc, sz);
+
+		rc = recv(ss, NULL, 0, MSG_PEEK | MSG_TRUNC);
+		ATF_CHECK_EQ(rc, sz);
+
+		rc = recv(ss, tbuf, sizeof(tbuf), MSG_PEEK | MSG_TRUNC);
+		ATF_CHECK_EQ(rc, sz);
+
+		rc = recv(ss, tbuf, sizeof(tbuf), MSG_TRUNC);
+		ATF_CHECK_EQ(rc, sz);
+	}
+
+	close(ss);
+	close(cs);
+}
+
+ATF_TC_WITHOUT_HEAD(socket_afinet_udp_recv_trunc);
+ATF_TC_BODY(socket_afinet_udp_recv_trunc, tc)
+{
+	check_recvmsg("udp");
+}
+
+ATF_TC_WITHOUT_HEAD(socket_afinet6_udp_recv_trunc);
+ATF_TC_BODY(socket_afinet6_udp_recv_trunc, tc)
+{
+	check_recvmsg("udp6");
+}
+
+ATF_TC_WITHOUT_HEAD(socket_afunix_recv_trunc);
+ATF_TC_BODY(socket_afunix_recv_trunc, tc)
+{
+	check_recvmsg("unix");
+}
+
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, socket_afinet_udp_recv_trunc);
+	ATF_TP_ADD_TC(tp, socket_afinet6_udp_recv_trunc);
+	ATF_TP_ADD_TC(tp, socket_afunix_recv_trunc);
+
+	return atf_no_error();
+}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202207301846.26UIko7G080500>