Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 14 Dec 1997 00:02:43 +0100
From:      pb@fasterix.freenix.org (Pierre Beyssac)
To:        hasty@rah.star-gate.com (Amancio Hasty)
Cc:        freebsd-current@FreeBSD.ORG
Subject:   Re: small patch to fix MSG_COMPAT in recvfrom()
Message-ID:  <19971214000243.EA63641@@>
In-Reply-To: <199712132009.MAA00640@rah.star-gate.com>; from Amancio Hasty on Dec 13, 1997 12:09:15 -0800
References:  <19971213181809.AU40295@@> <199712132009.MAA00640@rah.star-gate.com>

next in thread | previous in thread | raw e-mail | index | archive | help
Amancio Hasty writes:
> If you don't mind please post the patches to the list it will allow others
> to review them and test them.

Ok, here they are. They implement the following:

	- emulate Linux IP_HDRINCL behaviour in sendto(): byte order fixed
	  Note that we do an extra getsockopt() on every sendto()
	  to check if the option is set because we don't keep state
	  in the emulator code. Is there a better way to implement
	  this?
	- correct a bug (value of "name" not passed) with
	  getsockopt() (see last hunk)

I have a slightly different version (fewer comments and messier
code ;-)) of the IP_HDRINCL fix adapted to 2.2.5 with the int
*retval stuff everywhere, but I suppose there's too much new code
for it to be included in -stable. OTOH the getsockopt() fix is
only one line and can probably go into -stable, though it's not
much use without the HDRINCL stuff.

There may be other patches to come for recvfrom() (possibly the
same problem as sendto() with the byte order on ip_len and ip_off
on received packets, I haven't been able to check this yet).  I'd
also hope to emulate Linux "snoop" sockets (the equivalent of bpf)
but I'm not yet sure it's very easy...

For the anecdote, I hacked this code after I got tired porting
Linux raw IP code to BSD again and again because of those incompatible
Linux network includes. After I told my boss it would be a good
idea to convert our FreeBSD network testing box to a Linux box for
that very reason, I regretted this and decided it was way better
to improve the Linux emulator instead :-)

--- linux_socket.c.orig	Tue Dec  9 01:42:58 1997
+++ linux_socket.c	Sat Dec 13 23:32:15 1997
@@ -31,16 +31,20 @@
 /* XXX we use functions that might not exist. */
 #define	COMPAT_43	1
 
 #include <sys/param.h>
+#include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/socket.h>
 
 #include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
 
 #include <i386/linux/linux.h>
 #include <i386/linux/linux_proto.h>
+#include <i386/linux/linux_util.h>
 
 static int
 linux_to_bsd_domain(int domain)
 {
@@ -92,8 +96,9 @@
 	return IP_ADD_MEMBERSHIP;
     case LINUX_IP_DROP_MEMBERSHIP:
 	return IP_DROP_MEMBERSHIP;
     case LINUX_IP_HDRINCL:
+        return IP_HDRINCL;
     default:
 	return -1;
     }
 }
@@ -130,8 +135,111 @@
 	return -1;
     }
 }
 
+/* Return 0 if IP_HDRINCL is set of the given socket, not 0 otherwise */
+static int
+linux_check_hdrincl(struct proc *p, int s)
+{
+    struct getsockopt_args /* {
+	int s;
+	int level;
+	int name;
+	caddr_t val;
+	int *avalsize;
+    } */ bsd_args;
+    int error;
+    caddr_t sg, val, valsize;
+    int size_val = sizeof val;
+    int optval;
+
+    sg = stackgap_init();
+    val = stackgap_alloc(&sg, sizeof(int));
+    valsize = stackgap_alloc(&sg, sizeof(int));
+
+    if ((error=copyout(&size_val, valsize, sizeof(size_val))))
+	return error;
+    bsd_args.s = s;
+    bsd_args.level = IPPROTO_IP;
+    bsd_args.name = IP_HDRINCL;
+    bsd_args.val = val;
+    bsd_args.avalsize = (int *)valsize;
+    if ((error=getsockopt(p, &bsd_args)))
+	return error;
+    if ((error=copyin(val, &optval, sizeof(optval))))
+	return error;
+    return optval == 0;
+}
+
+/*
+ * Updated sendto() when IP_HDRINCL is set:
+ * tweak endian-dependent fields in the IP packet.
+ */
+static int
+linux_sendto_hdrincl(struct proc *p, struct sendto_args *bsd_args)
+{
+/*
+ * linux_ip_copysize defines how many bytes we should copy
+ * from the beginning of the IP packet before we customize it for BSD.
+ * It should include all the fields we modify (ip_len and ip_off)
+ * and be as small as possible to minimize copying overhead.
+ */
+#define linux_ip_copysize	8
+
+    caddr_t sg;
+    struct ip *packet;
+    struct msghdr *msg;
+    struct iovec *iov;
+
+    int error;
+    struct  sendmsg_args /* {
+	int s;
+	caddr_t msg;
+	int flags;
+    } */ sendmsg_args;
+
+    /* Check the packet isn't too small before we mess with it */
+    if (bsd_args->len < linux_ip_copysize)
+	return EINVAL;
+
+    /*
+     * Tweaking the user buffer in place would be bad manners.
+     * We create a corrected IP header with just the needed length,
+     * then use an iovec to glue it to the rest of the user packet
+     * when calling sendmsg().
+     */
+    sg = stackgap_init();
+    packet = (struct ip *)stackgap_alloc(&sg, linux_ip_copysize);
+    msg = (struct msghdr *)stackgap_alloc(&sg, sizeof(*msg));
+    iov = (struct iovec *)stackgap_alloc(&sg, sizeof(*iov)*2);
+
+    /* Make a copy of the beginning of the packet to be sent */
+    if ((error = copyin(bsd_args->buf, (caddr_t)packet, linux_ip_copysize)))
+	return error;
+
+    /* Convert fields from Linux to BSD raw IP socket format */
+    packet->ip_len = bsd_args->len;
+    packet->ip_off = ntohs(packet->ip_off);
+
+    /* Prepare the msghdr and iovec structures describing the new packet */
+    msg->msg_name = bsd_args->to;
+    msg->msg_namelen = bsd_args->tolen;
+    msg->msg_iov = iov;
+    msg->msg_iovlen = 2;
+    msg->msg_control = NULL;
+    msg->msg_controllen = 0;
+    msg->msg_flags = 0;
+    iov[0].iov_base = (char *)packet;
+    iov[0].iov_len = linux_ip_copysize;
+    iov[1].iov_base = (char *)(bsd_args->buf) + linux_ip_copysize;
+    iov[1].iov_len = bsd_args->len - linux_ip_copysize;
+
+    sendmsg_args.s = bsd_args->s;
+    sendmsg_args.msg = (caddr_t)msg;
+    sendmsg_args.flags = bsd_args->flags;
+    return sendmsg(p, &sendmsg_args);
+}
+
 struct linux_socket_args {
     int domain;
     int type;
     int protocol;
@@ -146,17 +254,48 @@
 	int type;
 	int protocol;
     } */ bsd_args;
     int error;
+    int retval_socket;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.protocol = linux_args.protocol;
     bsd_args.type = linux_args.type;
     bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
     if (bsd_args.domain == -1)
 	return EINVAL;
-    return socket(p, &bsd_args);
+
+    retval_socket = socket(p, &bsd_args);
+    if (bsd_args.type == SOCK_RAW
+	&& (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
+	&& bsd_args.domain == AF_INET
+	&& retval_socket >= 0) {
+	/* It's a raw IP socket: set the IP_HDRINCL option. */
+	struct setsockopt_args /* {
+	    int s;
+	    int level;
+	    int name;
+	    caddr_t val;
+	    int valsize;
+	} */ bsd_setsockopt_args;
+	caddr_t sg;
+	int *hdrincl;
+
+	sg = stackgap_init();
+	hdrincl = (int *)stackgap_alloc(&sg, sizeof(*hdrincl));
+	*hdrincl = 1;
+	bsd_setsockopt_args.s = p->p_retval[0];
+	bsd_setsockopt_args.level = IPPROTO_IP;
+	bsd_setsockopt_args.name = IP_HDRINCL;
+	bsd_setsockopt_args.val = (caddr_t)hdrincl;
+	bsd_setsockopt_args.valsize = sizeof(*hdrincl);
+	/* We ignore any error returned by setsockopt() */
+	setsockopt(p, &bsd_setsockopt_args);
+	/* Copy back the return value from socket() */
+	p->p_retval[0] = bsd_setsockopt_args.s;
+    }
+    return retval_socket;
 }
 
 struct linux_bind_args {
     int s;
@@ -421,8 +560,13 @@
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     bsd_args.to = linux_args.to;
     bsd_args.tolen = linux_args.tolen;
+
+    if (linux_check_hdrincl(p, linux_args.s) == 0)
+	/* IP_HDRINCL set, tweak the packet before sending */
+	return linux_sendto_hdrincl(p, &bsd_args);
+
     return sendto(p, &bsd_args);
 }
 
 struct linux_recvfrom_args {
@@ -560,8 +704,9 @@
 	return EINVAL;
     }
     if (name == -1)
 	return EINVAL;
+    bsd_args.name = name;
     bsd_args.val = linux_args.optval;
     bsd_args.avalsize = linux_args.optlen;
     return getsockopt(p, &bsd_args);
 }
-- 
Pierre Beyssac	      pb@fasterix.frmug.org pb@fasterix.freenix.org
{Free,Net,Open}BSD, Linux : il y a moins bien, mais c'est plus cher
    Free domains: http://www.eu.org/ or mail dns-manager@EU.org



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?19971214000243.EA63641>