Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 20 May 2020 11:03:59 +0000 (UTC)
From:      Wei Hu <whu@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r361275 - in head/sys: conf dev/hyperv/hvsock dev/hyperv/include dev/hyperv/vmbus modules/hyperv modules/hyperv/hvsock sys
Message-ID:  <202005201103.04KB3xTp013965@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: whu
Date: Wed May 20 11:03:59 2020
New Revision: 361275
URL: https://svnweb.freebsd.org/changeset/base/361275

Log:
  HyperV socket implementation for FreeBSD
  
  This change adds Hyper-V socket feature in FreeBSD. New socket address
  family AF_HYPERV and its kernel support are added.
  
  Submitted by:	Wei Hu <weh@microsoft.com>
  Reviewed by:	Dexuan Cui <decui@microsoft.com>
  Relnotes:	yes
  Sponsored by:	Microsoft
  Differential Revision:	https://reviews.freebsd.org/D24061

Added:
  head/sys/dev/hyperv/hvsock/
  head/sys/dev/hyperv/hvsock/hv_sock.c   (contents, props changed)
  head/sys/dev/hyperv/hvsock/hv_sock.h   (contents, props changed)
  head/sys/modules/hyperv/hvsock/
  head/sys/modules/hyperv/hvsock/Makefile   (contents, props changed)
Modified:
  head/sys/conf/files.x86
  head/sys/dev/hyperv/include/vmbus.h
  head/sys/dev/hyperv/vmbus/vmbus.c
  head/sys/dev/hyperv/vmbus/vmbus_br.c
  head/sys/dev/hyperv/vmbus/vmbus_brvar.h
  head/sys/dev/hyperv/vmbus/vmbus_chan.c
  head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
  head/sys/dev/hyperv/vmbus/vmbus_reg.h
  head/sys/modules/hyperv/Makefile
  head/sys/sys/socket.h

Modified: head/sys/conf/files.x86
==============================================================================
--- head/sys/conf/files.x86	Wed May 20 11:01:10 2020	(r361274)
+++ head/sys/conf/files.x86	Wed May 20 11:03:59 2020	(r361275)
@@ -133,6 +133,7 @@ dev/hwpmc/hwpmc_core.c		optional	hwpmc
 dev/hwpmc/hwpmc_uncore.c	optional	hwpmc
 dev/hwpmc/hwpmc_tsc.c		optional	hwpmc
 dev/hwpmc/hwpmc_x86.c		optional	hwpmc
+dev/hyperv/hvsock/hv_sock.c				optional	hyperv
 dev/hyperv/input/hv_kbd.c				optional	hyperv
 dev/hyperv/input/hv_kbdc.c				optional	hyperv
 dev/hyperv/pcib/vmbus_pcib.c				optional	hyperv pci

Added: head/sys/dev/hyperv/hvsock/hv_sock.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/hyperv/hvsock/hv_sock.c	Wed May 20 11:03:59 2020	(r361275)
@@ -0,0 +1,1748 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domain.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/sockbuf.h>
+#include <sys/sx.h>
+#include <sys/uio.h>
+
+#include <net/vnet.h>
+
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+
+#include "hv_sock.h"
+
+#define HVSOCK_DBG_NONE			0x0
+#define HVSOCK_DBG_INFO			0x1
+#define HVSOCK_DBG_ERR			0x2
+#define HVSOCK_DBG_VERBOSE		0x3
+
+
+SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
+
+static int hvs_dbg_level;
+SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
+    0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
+
+
+#define HVSOCK_DBG(level, ...) do {					\
+	if (hvs_dbg_level >= (level))					\
+		printf(__VA_ARGS__);					\
+	} while (0)
+
+MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
+
+/* The MTU is 16KB per host side's design */
+#define HVSOCK_MTU_SIZE		(1024 * 16)
+#define HVSOCK_SEND_BUF_SZ	(PAGE_SIZE - sizeof(struct vmpipe_proto_header))
+
+#define HVSOCK_HEADER_LEN	(sizeof(struct hvs_pkt_header))
+
+#define HVSOCK_PKT_LEN(payload_len)	(HVSOCK_HEADER_LEN + \
+					 roundup2(payload_len, 8) + \
+					 sizeof(uint64_t))
+
+
+static struct domain		hv_socket_domain;
+
+/*
+ * HyperV Transport sockets
+ */
+static struct pr_usrreqs	hvs_trans_usrreqs = {
+	.pru_attach =		hvs_trans_attach,
+	.pru_bind =		hvs_trans_bind,
+	.pru_listen =		hvs_trans_listen,
+	.pru_accept =		hvs_trans_accept,
+	.pru_connect =		hvs_trans_connect,
+	.pru_peeraddr =		hvs_trans_peeraddr,
+	.pru_sockaddr =		hvs_trans_sockaddr,
+	.pru_soreceive =	hvs_trans_soreceive,
+	.pru_sosend =		hvs_trans_sosend,
+	.pru_disconnect =	hvs_trans_disconnect,
+	.pru_close =		hvs_trans_close,
+	.pru_detach =		hvs_trans_detach,
+	.pru_shutdown =		hvs_trans_shutdown,
+	.pru_abort =		hvs_trans_abort,
+};
+
+/*
+ * Definitions of protocols supported in HyperV socket domain
+ */
+static struct protosw		hv_socket_protosw[] = {
+{
+	.pr_type =		SOCK_STREAM,
+	.pr_domain =		&hv_socket_domain,
+	.pr_protocol =		HYPERV_SOCK_PROTO_TRANS,
+	.pr_flags =		PR_CONNREQUIRED,
+	.pr_init =		hvs_trans_init,
+	.pr_usrreqs =		&hvs_trans_usrreqs,
+},
+};
+
+static struct domain		hv_socket_domain = {
+	.dom_family =		AF_HYPERV,
+	.dom_name =		"hyperv",
+	.dom_protosw =		hv_socket_protosw,
+	.dom_protoswNPROTOSW =	&hv_socket_protosw[nitems(hv_socket_protosw)]
+};
+
+VNET_DOMAIN_SET(hv_socket_);
+
+#define MAX_PORT			((uint32_t)0xFFFFFFFF)
+#define MIN_PORT			((uint32_t)0x0)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const struct hyperv_guid srv_id_template = {
+	.hv_guid = {
+	    0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
+	    0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
+};
+
+static int		hvsock_br_callback(void *, int, void *);
+static uint32_t		hvsock_canread_check(struct hvs_pcb *);
+static uint32_t		hvsock_canwrite_check(struct hvs_pcb *);
+static int		hvsock_send_data(struct vmbus_channel *chan,
+    struct uio *uio, uint32_t to_write, struct sockbuf *sb);
+
+
+
+/* Globals */
+static struct sx		hvs_trans_socks_sx;
+static struct mtx		hvs_trans_socks_mtx;
+static LIST_HEAD(, hvs_pcb)	hvs_trans_bound_socks;
+static LIST_HEAD(, hvs_pcb)	hvs_trans_connected_socks;
+static uint32_t			previous_auto_bound_port;
+
+static void
+hvsock_print_guid(struct hyperv_guid *guid)
+{
+	unsigned char *p = (unsigned char *)guid;
+
+	HVSOCK_DBG(HVSOCK_DBG_INFO,
+	    "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
+	    *(unsigned int *)p,
+	    *((unsigned short *) &p[4]),
+	    *((unsigned short *) &p[6]),
+	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
+}
+
+static bool
+is_valid_srv_id(const struct hyperv_guid *id)
+{
+	return !memcmp(&id->hv_guid[4],
+	    &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
+}
+
+static unsigned int
+get_port_by_srv_id(const struct hyperv_guid *srv_id)
+{
+	return *((const unsigned int *)srv_id);
+}
+
+static void
+set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
+{
+	*((unsigned int *)srv_id) = port;
+}
+
+
+static void
+__hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
+{
+	struct hvs_pcb *p = NULL;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+	if (!pcb)
+		return;
+
+	if (list & HVS_LIST_BOUND) {
+		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+			if  (p == pcb)
+				LIST_REMOVE(p, bound_next);
+	}
+
+	if (list & HVS_LIST_CONNECTED) {
+		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+			if (p == pcb)
+				LIST_REMOVE(pcb, connected_next);
+	}
+}
+
+static void
+__hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+	__hvs_remove_pcb_from_list(pcb, list);
+}
+
+static void
+__hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+
+	if (list & HVS_LIST_BOUND)
+		LIST_INSERT_HEAD(&hvs_trans_bound_socks,
+		   pcb, bound_next);
+
+	if (list & HVS_LIST_CONNECTED)
+		LIST_INSERT_HEAD(&hvs_trans_connected_socks,
+		   pcb, connected_next);
+}
+
+void
+hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+	if (!so || !so->so_pcb) {
+		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+		    "%s: socket or so_pcb is null\n", __func__);
+		return;
+	}
+
+	mtx_lock(&hvs_trans_socks_mtx);
+	__hvs_remove_socket_from_list(so, list);
+	mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static void
+hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+	if (!so || !so->so_pcb) {
+		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+		    "%s: socket or so_pcb is null\n", __func__);
+		return;
+	}
+
+	mtx_lock(&hvs_trans_socks_mtx);
+	__hvs_insert_socket_on_list(so, list);
+	mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static struct socket *
+__hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+	struct hvs_pcb *p = NULL;
+
+	if (list & HVS_LIST_BOUND)
+		LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+			if (p->so != NULL &&
+			    addr->hvs_port == p->local_addr.hvs_port)
+				return p->so;
+
+	if (list & HVS_LIST_CONNECTED)
+		LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+			if (p->so != NULL &&
+			    addr->hvs_port == p->local_addr.hvs_port)
+				return p->so;
+
+	return NULL;
+}
+
+static struct socket *
+hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+	struct socket *s = NULL;
+
+	mtx_lock(&hvs_trans_socks_mtx);
+	s = __hvs_find_socket_on_list(addr, list);
+	mtx_unlock(&hvs_trans_socks_mtx);
+
+	return s;
+}
+
+static inline void
+hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->sa_family = AF_HYPERV;
+	addr->hvs_port = port;
+}
+
+void
+hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
+{
+	hvs_addr_set(addr, get_port_by_srv_id(svr_id));
+}
+
+int
+hvs_trans_lock(void)
+{
+	sx_xlock(&hvs_trans_socks_sx);
+	return (0);
+}
+
+void
+hvs_trans_unlock(void)
+{
+	sx_xunlock(&hvs_trans_socks_sx);
+}
+
+void
+hvs_trans_init(void)
+{
+	/* Skip initialization of globals for non-default instances. */
+	if (!IS_DEFAULT_VNET(curvnet))
+		return;
+
+	if (vm_guest != VM_GUEST_HV)
+		return;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_init called\n", __func__);
+
+	/* Initialize Globals */
+	previous_auto_bound_port = MAX_PORT;
+	sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
+	mtx_init(&hvs_trans_socks_mtx,
+	    "hvs_trans_socks_mtx", NULL, MTX_DEF);
+	LIST_INIT(&hvs_trans_bound_socks);
+	LIST_INIT(&hvs_trans_connected_socks);
+}
+
+/*
+ * Called in two cases:
+ * 1) When user calls socket();
+ * 2) When we accept new incoming conneciton and call sonewconn().
+ */
+int
+hvs_trans_attach(struct socket *so, int proto, struct thread *td)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_attach called\n", __func__);
+
+	if (so->so_type != SOCK_STREAM)
+		return (ESOCKTNOSUPPORT);
+
+	if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
+		return (EPROTONOSUPPORT);
+
+	if (pcb != NULL)
+		return (EISCONN);
+	pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
+	if (pcb == NULL)
+		return (ENOMEM);
+
+	pcb->so = so;
+	so->so_pcb = (void *)pcb;
+
+	return (0);
+}
+
+void
+hvs_trans_detach(struct socket *so)
+{
+	struct hvs_pcb *pcb;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_detach called\n", __func__);
+
+	(void) hvs_trans_lock();
+	pcb = so2hvspcb(so);
+	if (pcb == NULL) {
+		hvs_trans_unlock();
+		return;
+	}
+
+	if (SOLISTENING(so)) {
+		bzero(pcb, sizeof(*pcb));
+		free(pcb, M_HVSOCK);
+	}
+
+	so->so_pcb = NULL;
+
+	hvs_trans_unlock();
+}
+
+int
+hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+	struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
+	int error = 0;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_bind called\n", __func__);
+
+	if (sa == NULL) {
+		return (EINVAL);
+	}
+
+	if (pcb == NULL) {
+		return (EINVAL);
+	}
+
+	if (sa->sa_family != AF_HYPERV) {
+		HVSOCK_DBG(HVSOCK_DBG_ERR,
+		    "%s: Not supported, sa_family is %u\n",
+		    __func__, sa->sa_family);
+		return (EAFNOSUPPORT);
+	}
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
+
+	mtx_lock(&hvs_trans_socks_mtx);
+	if (__hvs_find_socket_on_list(sa,
+	    HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
+		error = EADDRINUSE;
+	} else {
+		/*
+		 * The address is available for us to bind.
+		 * Add socket to the bound list.
+		 */
+		hvs_addr_set(&pcb->local_addr, sa->hvs_port);
+		hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
+		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+	}
+	mtx_unlock(&hvs_trans_socks_mtx);
+
+	return (error);
+}
+
+int
+hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+	struct socket *bound_so;
+	int error;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_listen called\n", __func__);
+
+	if (pcb == NULL)
+		return (EINVAL);
+
+	/* Check if the address is already bound and it was by us. */
+	bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
+	if (bound_so == NULL || bound_so != so) {
+		HVSOCK_DBG(HVSOCK_DBG_ERR,
+		    "%s: Address not bound or not by us.\n", __func__);
+		return (EADDRNOTAVAIL);
+	}
+
+	SOCK_LOCK(so);
+	error = solisten_proto_check(so);
+	if (error == 0)
+		solisten_proto(so, backlog);
+	SOCK_UNLOCK(so);
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket listen error = %d\n", __func__, error);
+	return (error);
+}
+
+int
+hvs_trans_accept(struct socket *so, struct sockaddr **nam)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_accept called\n", __func__);
+
+	if (pcb == NULL)
+		return (EINVAL);
+
+	*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
+	    M_NOWAIT);
+
+	return ((*nam == NULL) ? ENOMEM : 0);
+}
+
+int
+hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+	struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
+	bool found_auto_bound_port = false;
+	int i, error = 0;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
+	    __func__, raddr->hvs_port);
+
+	if (pcb == NULL)
+		return (EINVAL);
+
+	/* Verify the remote address */
+	if (raddr == NULL)
+		return (EINVAL);
+	if (raddr->sa_family != AF_HYPERV)
+		return (EAFNOSUPPORT);
+
+	mtx_lock(&hvs_trans_socks_mtx);
+	if (so->so_state &
+	    (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
+			HVSOCK_DBG(HVSOCK_DBG_ERR,
+			    "%s: socket connect in progress\n",
+			    __func__);
+			error = EINPROGRESS;
+			goto out;
+	}
+
+	/*
+	 * Find an available port for us to auto bind the local
+	 * address.
+	 */
+	hvs_addr_set(&pcb->local_addr, 0);
+
+	for (i = previous_auto_bound_port - 1;
+	    i != previous_auto_bound_port; i --) {
+		if (i == MIN_PORT)
+			i = MAX_PORT;
+
+		pcb->local_addr.hvs_port = i;
+
+		if (__hvs_find_socket_on_list(&pcb->local_addr,
+		    HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
+			found_auto_bound_port = true;
+			previous_auto_bound_port = i;
+			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+			    "%s: found local bound port is %x\n",
+			    __func__, pcb->local_addr.hvs_port);
+			break;
+		}
+	}
+
+	if (found_auto_bound_port == true) {
+		/* Found available port for auto bound, put on list */
+		__hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+		/* Set VM service ID */
+		pcb->vm_srv_id = srv_id_template;
+		set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
+		/* Set host service ID and remote port */
+		pcb->host_srv_id = srv_id_template;
+		set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
+		hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
+
+		/* Change the socket state to SS_ISCONNECTING */
+		soisconnecting(so);
+	} else {
+		HVSOCK_DBG(HVSOCK_DBG_ERR,
+		    "%s: No local port available for auto bound\n",
+		    __func__);
+		error = EADDRINUSE;
+	}
+
+	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
+	hvsock_print_guid(&pcb->vm_srv_id);
+	HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
+	hvsock_print_guid(&pcb->host_srv_id);
+
+out:
+	mtx_unlock(&hvs_trans_socks_mtx);
+
+	if (found_auto_bound_port == true)
+		 vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
+
+	return (error);
+}
+
+int
+hvs_trans_disconnect(struct socket *so)
+{
+	struct hvs_pcb *pcb;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
+
+	(void) hvs_trans_lock();
+	pcb = so2hvspcb(so);
+	if (pcb == NULL) {
+		hvs_trans_unlock();
+		return (EINVAL);
+	}
+
+	/* If socket is already disconnected, skip this */
+	if ((so->so_state & SS_ISDISCONNECTED) == 0)
+		soisdisconnecting(so);
+
+	hvs_trans_unlock();
+
+	return (0);
+}
+
+#define SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
+struct hvs_callback_arg {
+	struct uio *uio;
+	struct sockbuf *sb;
+};
+
+int
+hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
+    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+	struct sockbuf *sb;
+	ssize_t orig_resid;
+	uint32_t canread, to_read;
+	int flags, error = 0;
+	struct hvs_callback_arg cbarg;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
+
+	if (so->so_type != SOCK_STREAM)
+		return (EINVAL);
+	if (pcb == NULL)
+		return (EINVAL);
+
+	if (flagsp != NULL)
+		flags = *flagsp &~ MSG_EOR;
+	else
+		flags = 0;
+
+	if (flags & MSG_PEEK)
+		return (EOPNOTSUPP);
+
+	/* If no space to copy out anything */
+	if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
+		return (EINVAL);
+
+	sb = &so->so_rcv;
+
+	orig_resid = uio->uio_resid;
+
+	/* Prevent other readers from entering the socket. */
+	error = sblock(sb, SBLOCKWAIT(flags));
+	if (error) {
+		HVSOCK_DBG(HVSOCK_DBG_ERR,
+		    "%s: sblock returned error = %d\n", __func__, error);
+		return (error);
+	}
+
+	SOCKBUF_LOCK(sb);
+
+	cbarg.uio = uio;
+	cbarg.sb = sb;
+	/*
+	 * If the socket is closing, there might still be some data
+	 * in rx br to read. However we need to make sure
+	 * the channel is still open.
+	 */
+	if ((sb->sb_state & SBS_CANTRCVMORE) &&
+	    (so->so_state & SS_ISDISCONNECTED)) {
+		/* Other thread already closed the channel */
+		error = EPIPE;
+		goto out;
+	}
+
+	while (true) {
+		while (uio->uio_resid > 0 &&
+		    (canread = hvsock_canread_check(pcb)) > 0) {
+			to_read = MIN(canread, uio->uio_resid);
+			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+			    "%s: to_read = %u, skip = %u\n", __func__, to_read,
+			    (unsigned int)(sizeof(struct hvs_pkt_header) +
+			    pcb->recv_data_off));
+
+			error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
+			    sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
+			    hvsock_br_callback, (void *)&cbarg);
+			/*
+			 * It is possible socket is disconnected becasue
+			 * we released lock in hvsock_br_callback. So we
+			 * need to check the state to make sure it is not
+			 * disconnected.
+			 */
+			if (error || so->so_state & SS_ISDISCONNECTED) {
+				break;
+			}
+
+			pcb->recv_data_len -= to_read;
+			pcb->recv_data_off += to_read;
+		}
+
+		if (error)
+			break;
+
+		/* Abort if socket has reported problems. */
+		if (so->so_error) {
+			if (so->so_error == ESHUTDOWN &&
+			    orig_resid > uio->uio_resid) {
+				/*
+				 * Although we got a FIN, we also received
+				 * some data in this round. Delivery it
+				 * to user.
+				 */
+				error = 0;
+			} else {
+				if (so->so_error != ESHUTDOWN)
+					error = so->so_error;
+			}
+
+			break;
+		}
+
+		/* Cannot received more. */
+		if (sb->sb_state & SBS_CANTRCVMORE)
+			break;
+
+		/* We are done if buffer has been filled */
+		if (uio->uio_resid == 0)
+			break;
+
+		if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
+			break;
+
+		/* Buffer ring is empty and we shall not block */
+		if ((so->so_state & SS_NBIO) ||
+		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
+			if (orig_resid == uio->uio_resid) {
+				/* We have not read anything */
+				error = EAGAIN;
+			}
+			HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+			    "%s: non blocked read return, error %d.\n",
+			    __func__, error);
+			break;
+		}
+
+		/*
+		 * Wait and block until (more) data comes in.
+		 * Note: Drops the sockbuf lock during wait.
+		 */
+		error = sbwait(sb);
+
+		if (error)
+			break;
+
+		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+		    "%s: wake up from sbwait, read available is %u\n",
+		    __func__, vmbus_chan_read_available(pcb->chan));
+	}
+
+out:
+	SOCKBUF_UNLOCK(sb);
+
+	sbunlock(sb);
+
+	/* We recieved a FIN in this call */
+	if (so->so_error == ESHUTDOWN) {
+		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+			/* Send has already closed */
+			soisdisconnecting(so);
+		} else {
+			/* Just close the receive side */
+			socantrcvmore(so);
+		}
+	}
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: returning error = %d, so_error = %d\n",
+	    __func__, error, so->so_error);
+
+	return (error);
+}
+
+int
+hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+    struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+	struct sockbuf *sb;
+	ssize_t orig_resid;
+	uint32_t canwrite, to_write;
+	int error = 0;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %lu\n",
+	    __func__, uio->uio_resid);
+
+	if (so->so_type != SOCK_STREAM)
+		return (EINVAL);
+	if (pcb == NULL)
+		return (EINVAL);
+
+	/* If nothing to send */
+	if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
+		return (EINVAL);
+
+	sb = &so->so_snd;
+
+	orig_resid = uio->uio_resid;
+
+	/* Prevent other writers from entering the socket. */
+	error = sblock(sb, SBLOCKWAIT(flags));
+	if (error) {
+		HVSOCK_DBG(HVSOCK_DBG_ERR,
+		    "%s: sblock returned error = %d\n", __func__, error);
+		return (error);
+	}
+
+	SOCKBUF_LOCK(sb);
+
+	if ((sb->sb_state & SBS_CANTSENDMORE) ||
+	    so->so_error == ESHUTDOWN) {
+		error = EPIPE;
+		goto out;
+	}
+
+	while (uio->uio_resid > 0) {
+		canwrite = hvsock_canwrite_check(pcb);
+		if (canwrite == 0) {
+			/* We have sent some data */
+			if (orig_resid > uio->uio_resid)
+				break;
+			/*
+			 * We have not sent any data and it is
+			 * non-blocked io
+			 */
+			if (so->so_state & SS_NBIO ||
+			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
+				error = EWOULDBLOCK;
+				break;
+			} else {
+				/*
+				 * We are here because there is no space on
+				 * send buffer ring. Signal the other side
+				 * to read and free more space.
+				 * Sleep wait until space avaiable to send
+				 * Note: Drops the sockbuf lock during wait.
+				 */
+				error = sbwait(sb);
+
+				if (error)
+					break;
+
+				HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+				    "%s: wake up from sbwait, space avail on "
+				    "tx ring is %u\n",
+				    __func__,
+				    vmbus_chan_write_available(pcb->chan));
+
+				continue;
+			}
+		}
+		to_write = MIN(canwrite, uio->uio_resid);
+		to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
+
+		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+		    "%s: canwrite is %u, to_write = %u\n", __func__,
+		    canwrite, to_write);
+		error = hvsock_send_data(pcb->chan, uio, to_write, sb);
+
+		if (error)
+			break;
+	}
+
+out:
+	SOCKBUF_UNLOCK(sb);
+	sbunlock(sb);
+
+	return (error);
+}
+
+int
+hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
+
+	if (pcb == NULL)
+		return (EINVAL);
+
+	*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
+
+	return ((*nam == NULL)? ENOMEM : 0);
+}
+
+int
+hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
+
+	if (pcb == NULL)
+		return (EINVAL);
+
+	*nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
+
+	return ((*nam == NULL)? ENOMEM : 0);
+}
+
+void
+hvs_trans_close(struct socket *so)
+{
+	struct hvs_pcb *pcb;
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_close called\n", __func__);
+
+	(void) hvs_trans_lock();
+	pcb = so2hvspcb(so);
+	if (!pcb) {
+		hvs_trans_unlock();
+		return;
+	}
+
+	if (so->so_state & SS_ISCONNECTED) {
+		/* Send a FIN to peer */
+		HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+		    "%s: hvs_trans_close sending a FIN to host\n", __func__);
+		(void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
+	}
+
+	if (so->so_state &
+	    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
+		soisdisconnected(so);
+
+	pcb->chan = NULL;
+	pcb->so = NULL;
+
+	if (SOLISTENING(so)) {
+		mtx_lock(&hvs_trans_socks_mtx);
+		/* Remove from bound list */
+		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+		mtx_unlock(&hvs_trans_socks_mtx);
+	}
+
+	hvs_trans_unlock();
+
+	return;
+}
+
+void
+hvs_trans_abort(struct socket *so)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);
+
+	HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+	    "%s: HyperV Socket hvs_trans_abort called\n", __func__);
+
+	(void) hvs_trans_lock();
+	if (pcb == NULL) {
+		hvs_trans_unlock();
+		return;
+	}
+
+	if (SOLISTENING(so)) {
+		mtx_lock(&hvs_trans_socks_mtx);
+		/* Remove from bound list */
+		__hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+		mtx_unlock(&hvs_trans_socks_mtx);
+	}
+
+	if (so->so_state & SS_ISCONNECTED) {
+		(void) sodisconnect(so);
+	}
+	hvs_trans_unlock();
+
+	return;
+}
+
+int
+hvs_trans_shutdown(struct socket *so)
+{
+	struct hvs_pcb *pcb = so2hvspcb(so);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202005201103.04KB3xTp013965>