From owner-freebsd-current Sat Dec 13 15:07:11 1997 Return-Path: Received: (from root@localhost) by hub.freebsd.org (8.8.7/8.8.7) id PAA22266 for current-outgoing; Sat, 13 Dec 1997 15:07:11 -0800 (PST) (envelope-from owner-freebsd-current) Received: from frmug.org (frmug-gw.frmug.org [193.56.58.252]) by hub.freebsd.org (8.8.7/8.8.7) with ESMTP id PAA22244 for ; Sat, 13 Dec 1997 15:06:53 -0800 (PST) (envelope-from pb@fasterix.frmug.org) Received: (from uucp@localhost) by frmug.org (8.8.8/frmug-2.1/nospam) with UUCP id AAA09258; Sun, 14 Dec 1997 00:06:10 +0100 (CET) (envelope-from pb@fasterix.frmug.org) Received: (from pb@localhost) by fasterix.frmug.org (8.8.8/8.8.5/pb-19970302) id AAA27183; Sun, 14 Dec 1997 00:02:43 +0100 (CET) Message-ID: <19971214000243.EA63641@@> Date: Sun, 14 Dec 1997 00:02:43 +0100 From: pb@fasterix.freenix.org (Pierre Beyssac) To: hasty@rah.star-gate.com (Amancio Hasty) Cc: freebsd-current@FreeBSD.ORG Subject: Re: small patch to fix MSG_COMPAT in recvfrom() References: <19971213181809.AU40295@@> <199712132009.MAA00640@rah.star-gate.com> X-Mailer: Mutt 0.59.1e Mime-Version: 1.0 In-Reply-To: <199712132009.MAA00640@rah.star-gate.com>; from Amancio Hasty on Dec 13, 1997 12:09:15 -0800 Sender: owner-freebsd-current@FreeBSD.ORG X-Loop: FreeBSD.org Precedence: bulk Amancio Hasty writes: > If you don't mind please post the patches to the list it will allow others > to review them and test them. Ok, here they are. They implement the following: - emulate Linux IP_HDRINCL behaviour in sendto(): byte order fixed Note that we do an extra getsockopt() on every sendto() to check if the option is set because we don't keep state in the emulator code. Is there a better way to implement this? - correct a bug (value of "name" not passed) with getsockopt() (see last hunk) I have a slightly different version (fewer comments and messier code ;-)) of the IP_HDRINCL fix adapted to 2.2.5 with the int *retval stuff everywhere, but I suppose there's too much new code for it to be included in -stable. OTOH the getsockopt() fix is only one line and can probably go into -stable, though it's not much use without the HDRINCL stuff. There may be other patches to come for recvfrom() (possibly the same problem as sendto() with the byte order on ip_len and ip_off on received packets, I haven't been able to check this yet). I'd also hope to emulate Linux "snoop" sockets (the equivalent of bpf) but I'm not yet sure it's very easy... For the anecdote, I hacked this code after I got tired porting Linux raw IP code to BSD again and again because of those incompatible Linux network includes. After I told my boss it would be a good idea to convert our FreeBSD network testing box to a Linux box for that very reason, I regretted this and decided it was way better to improve the Linux emulator instead :-) --- linux_socket.c.orig Tue Dec 9 01:42:58 1997 +++ linux_socket.c Sat Dec 13 23:32:15 1997 @@ -31,16 +31,20 @@ /* XXX we use functions that might not exist. */ #define COMPAT_43 1 #include +#include #include #include #include #include +#include +#include #include #include +#include static int linux_to_bsd_domain(int domain) { @@ -92,8 +96,9 @@ return IP_ADD_MEMBERSHIP; case LINUX_IP_DROP_MEMBERSHIP: return IP_DROP_MEMBERSHIP; case LINUX_IP_HDRINCL: + return IP_HDRINCL; default: return -1; } } @@ -130,8 +135,111 @@ return -1; } } +/* Return 0 if IP_HDRINCL is set of the given socket, not 0 otherwise */ +static int +linux_check_hdrincl(struct proc *p, int s) +{ + struct getsockopt_args /* { + int s; + int level; + int name; + caddr_t val; + int *avalsize; + } */ bsd_args; + int error; + caddr_t sg, val, valsize; + int size_val = sizeof val; + int optval; + + sg = stackgap_init(); + val = stackgap_alloc(&sg, sizeof(int)); + valsize = stackgap_alloc(&sg, sizeof(int)); + + if ((error=copyout(&size_val, valsize, sizeof(size_val)))) + return error; + bsd_args.s = s; + bsd_args.level = IPPROTO_IP; + bsd_args.name = IP_HDRINCL; + bsd_args.val = val; + bsd_args.avalsize = (int *)valsize; + if ((error=getsockopt(p, &bsd_args))) + return error; + if ((error=copyin(val, &optval, sizeof(optval)))) + return error; + return optval == 0; +} + +/* + * Updated sendto() when IP_HDRINCL is set: + * tweak endian-dependent fields in the IP packet. + */ +static int +linux_sendto_hdrincl(struct proc *p, struct sendto_args *bsd_args) +{ +/* + * linux_ip_copysize defines how many bytes we should copy + * from the beginning of the IP packet before we customize it for BSD. + * It should include all the fields we modify (ip_len and ip_off) + * and be as small as possible to minimize copying overhead. + */ +#define linux_ip_copysize 8 + + caddr_t sg; + struct ip *packet; + struct msghdr *msg; + struct iovec *iov; + + int error; + struct sendmsg_args /* { + int s; + caddr_t msg; + int flags; + } */ sendmsg_args; + + /* Check the packet isn't too small before we mess with it */ + if (bsd_args->len < linux_ip_copysize) + return EINVAL; + + /* + * Tweaking the user buffer in place would be bad manners. + * We create a corrected IP header with just the needed length, + * then use an iovec to glue it to the rest of the user packet + * when calling sendmsg(). + */ + sg = stackgap_init(); + packet = (struct ip *)stackgap_alloc(&sg, linux_ip_copysize); + msg = (struct msghdr *)stackgap_alloc(&sg, sizeof(*msg)); + iov = (struct iovec *)stackgap_alloc(&sg, sizeof(*iov)*2); + + /* Make a copy of the beginning of the packet to be sent */ + if ((error = copyin(bsd_args->buf, (caddr_t)packet, linux_ip_copysize))) + return error; + + /* Convert fields from Linux to BSD raw IP socket format */ + packet->ip_len = bsd_args->len; + packet->ip_off = ntohs(packet->ip_off); + + /* Prepare the msghdr and iovec structures describing the new packet */ + msg->msg_name = bsd_args->to; + msg->msg_namelen = bsd_args->tolen; + msg->msg_iov = iov; + msg->msg_iovlen = 2; + msg->msg_control = NULL; + msg->msg_controllen = 0; + msg->msg_flags = 0; + iov[0].iov_base = (char *)packet; + iov[0].iov_len = linux_ip_copysize; + iov[1].iov_base = (char *)(bsd_args->buf) + linux_ip_copysize; + iov[1].iov_len = bsd_args->len - linux_ip_copysize; + + sendmsg_args.s = bsd_args->s; + sendmsg_args.msg = (caddr_t)msg; + sendmsg_args.flags = bsd_args->flags; + return sendmsg(p, &sendmsg_args); +} + struct linux_socket_args { int domain; int type; int protocol; @@ -146,17 +254,48 @@ int type; int protocol; } */ bsd_args; int error; + int retval_socket; if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args)))) return error; bsd_args.protocol = linux_args.protocol; bsd_args.type = linux_args.type; bsd_args.domain = linux_to_bsd_domain(linux_args.domain); if (bsd_args.domain == -1) return EINVAL; - return socket(p, &bsd_args); + + retval_socket = socket(p, &bsd_args); + if (bsd_args.type == SOCK_RAW + && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0) + && bsd_args.domain == AF_INET + && retval_socket >= 0) { + /* It's a raw IP socket: set the IP_HDRINCL option. */ + struct setsockopt_args /* { + int s; + int level; + int name; + caddr_t val; + int valsize; + } */ bsd_setsockopt_args; + caddr_t sg; + int *hdrincl; + + sg = stackgap_init(); + hdrincl = (int *)stackgap_alloc(&sg, sizeof(*hdrincl)); + *hdrincl = 1; + bsd_setsockopt_args.s = p->p_retval[0]; + bsd_setsockopt_args.level = IPPROTO_IP; + bsd_setsockopt_args.name = IP_HDRINCL; + bsd_setsockopt_args.val = (caddr_t)hdrincl; + bsd_setsockopt_args.valsize = sizeof(*hdrincl); + /* We ignore any error returned by setsockopt() */ + setsockopt(p, &bsd_setsockopt_args); + /* Copy back the return value from socket() */ + p->p_retval[0] = bsd_setsockopt_args.s; + } + return retval_socket; } struct linux_bind_args { int s; @@ -421,8 +560,13 @@ bsd_args.len = linux_args.len; bsd_args.flags = linux_args.flags; bsd_args.to = linux_args.to; bsd_args.tolen = linux_args.tolen; + + if (linux_check_hdrincl(p, linux_args.s) == 0) + /* IP_HDRINCL set, tweak the packet before sending */ + return linux_sendto_hdrincl(p, &bsd_args); + return sendto(p, &bsd_args); } struct linux_recvfrom_args { @@ -560,8 +704,9 @@ return EINVAL; } if (name == -1) return EINVAL; + bsd_args.name = name; bsd_args.val = linux_args.optval; bsd_args.avalsize = linux_args.optlen; return getsockopt(p, &bsd_args); } -- Pierre Beyssac pb@fasterix.frmug.org pb@fasterix.freenix.org {Free,Net,Open}BSD, Linux : il y a moins bien, mais c'est plus cher Free domains: http://www.eu.org/ or mail dns-manager@EU.org