Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 19 Jun 2024 12:13:25 GMT
From:      Doug Rabson <dfr@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: e97ad33a89a7 - main - Add an implementation of the 9P filesystem
Message-ID:  <202406191213.45JCDPPP051964@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by dfr:

URL: https://cgit.FreeBSD.org/src/commit/?id=e97ad33a89a78f55280b0485b3249ee9b907a718

commit e97ad33a89a78f55280b0485b3249ee9b907a718
Author:     Doug Rabson <dfr@FreeBSD.org>
AuthorDate: 2022-12-06 13:07:46 +0000
Commit:     Doug Rabson <dfr@FreeBSD.org>
CommitDate: 2024-06-19 12:12:04 +0000

    Add an implementation of the 9P filesystem
    
    This is derived from swills@ fork of the Juniper virtfs with many
    changes by me including bug fixes, style improvements, clearer layering
    and more consistent logging. The filesystem is renamed to p9fs to better
    reflect its function and to prevent possible future confusion with
    virtio-fs.
    
    Several updates and fixes from Juniper have been integrated into this
    version by Val Packett and these contributions along with the original
    Juniper authors are credited below.
    
    To use this with bhyve, add 'virtio_p9fs_load=YES' to loader.conf. The
    bhyve virtio-9p device allows access from the guest to files on the host
    by mapping a 'sharename' to a host path. It is possible to use p9fs as a
    root filesystem by adding this to /boot/loader.conf:
    
            vfs.root.mountfrom="p9fs:sharename"
    
    for non-root filesystems add something like this to /etc/fstab:
    
            sharename /mnt p9fs rw 0 0
    
    In both examples, substitute the share name used on the bhyve command
    line.
    
    The 9P filesystem protocol relies on stateful file opens which map
    protocol-level FIDs to host file descriptors. The FreeBSD vnode
    interface doesn't really support this and we use heuristics to guess the
    right FID to use for file operations.  This can be confused by privilege
    lowering and does not guarantee that the FID created for a given file
    open is always used for file operations, even if the calling process is
    using the file descriptor from the original open call. Improving this
    would involve changes to the vnode interface which is out-of-scope for
    this import.
    
    Differential Revision: https://reviews.freebsd.org/D41844
    Reviewed by: kib, emaste, dch
    MFC after: 3 months
    Co-authored-by: Val Packett <val@packett.cool>
    Co-authored-by: Ka Ho Ng <kahon@juniper.net>
    Co-authored-by: joyu <joyul@juniper.net>
    Co-authored-by: Kumara Babu Narayanaswamy <bkumara@juniper.net>
---
 share/man/man5/Makefile           |    1 +
 share/man/man5/p9fs.5             |  127 +++
 sys/conf/files                    |    7 +
 sys/conf/options                  |    1 +
 sys/dev/virtio/p9fs/virtio_p9fs.c |  511 +++++++++
 sys/dev/virtio/p9fs/virtio_p9fs.h |   39 +
 sys/fs/p9fs/p9_client.c           | 1311 ++++++++++++++++++++++
 sys/fs/p9fs/p9_client.h           |  168 +++
 sys/fs/p9fs/p9_debug.h            |   45 +
 sys/fs/p9fs/p9_protocol.c         |  632 +++++++++++
 sys/fs/p9fs/p9_protocol.h         |  280 +++++
 sys/fs/p9fs/p9_transport.c        |   70 ++
 sys/fs/p9fs/p9_transport.h        |   53 +
 sys/fs/p9fs/p9fs.h                |  203 ++++
 sys/fs/p9fs/p9fs_proto.h          |   42 +
 sys/fs/p9fs/p9fs_subr.c           |  411 +++++++
 sys/fs/p9fs/p9fs_vfsops.c         |  602 +++++++++++
 sys/fs/p9fs/p9fs_vnops.c          | 2148 +++++++++++++++++++++++++++++++++++++
 sys/kern/vfs_mountroot.c          |    1 +
 sys/modules/Makefile              |    1 +
 sys/modules/p9fs/Makefile         |    8 +
 sys/modules/virtio/Makefile       |    2 +-
 sys/modules/virtio/p9fs/Makefile  |   32 +
 23 files changed, 6694 insertions(+), 1 deletion(-)

diff --git a/share/man/man5/Makefile b/share/man/man5/Makefile
index bc345b42717c..465cc85a3feb 100644
--- a/share/man/man5/Makefile
+++ b/share/man/man5/Makefile
@@ -35,6 +35,7 @@ MAN=	acct.5 \
 	nsmb.conf.5 \
 	nsswitch.conf.5 \
 	os-release.5 \
+	p9fs.5 \
 	passwd.5 \
 	pbm.5 \
 	periodic.conf.5 \
diff --git a/share/man/man5/p9fs.5 b/share/man/man5/p9fs.5
new file mode 100644
index 000000000000..5c110e3dc963
--- /dev/null
+++ b/share/man/man5/p9fs.5
@@ -0,0 +1,127 @@
+.\"
+.\" Copyright (c) 2022-present Doug Rabson
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS DOCUMENTATION IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd December 7, 2022
+.Dt P9FS 5
+.Os
+.Sh NAME
+.Nm p9fs
+.Nd "9P file system"
+.Sh SYNOPSIS
+To use this filesystem,
+either add the following to the kernel config:
+.Bd -ragged -offset indent
+.Cd "options P9FS"
+.Cd "device virtio_p9fs"
+.Ed
+.Pp
+Alternatively, load the driver as a kernel module,
+either at boot time by adding the following to
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+virtio_p9fs_load="YES"
+.Ed
+.Pp
+or on system startup using the command:
+.Pp
+.Dl "# sysrc kld_list+=virtio_p9fs"
+.Sh DESCRIPTION
+The
+.Nm
+filesystem uses the 9P protocol to mount a host file system directory
+into a
+.Xr bhyve 8
+guest.
+Multiple host directories can be accessed using the
+.Xr bhyve 8
+virtio-9p virtual PCI device.
+Each device is configured with a share name and a host directory path.
+The share name can be used with
+.Xr mount 8
+to mount the host directory in the guest:
+.Pp
+.Dl "# mount -t p9fs mysharename /mnt"
+.Pp
+Host directories can be mounted on system startup using
+.Xr fstab 5
+like this:
+.Pp
+.Bd -literal -offset indent
+mysharename	/mnt	p9fs	rw	0	0
+.Ed
+.Pp
+Using
+.Nm
+as a root file system is supported by adding the following to
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+vfs.root.mountfrom="p9fs:mysharename"
+.Ed
+.Sh LIMITATIONS
+The 9P protocol relies on stateful file opens
+which map protocol-level FIDs to host file descriptors.
+The FreeBSD vnode interface doesn't support this and
+.Nm
+uses heuristics to guess the right FID to use for file operations.
+.Pp
+This can be confused by privilege lowering and
+does not guarantee that the FID created for a
+given file open is always used,
+even if the calling process is using the file descriptor from
+the original open call.
+.Pp
+In particular, accessing unlinked files using open file descriptor
+may not work correctly.
+If
+.Nm
+is the root filesystem,
+it is recommented to use with
+.Xr tmpfs 5
+to ensure that temporary files created in
+.Pa /tmp
+or
+.Pa /var/tmp
+have the expected semantics.
+.Sh SEE ALSO
+.Xr fstab 5
+.Sh HISTORY
+The 9P protocol first appeared in the Plan 9 operating system.
+More recently, the protocol has been widely used with virtual machines
+to allow the use of host file resources inside a guest VM.
+.Sh AUTHORS
+This is derived from software released by Juniper Networks, Inc.
+with many improvements and fixes from
+.An Steve Wills .
+.Pp
+This manual page was written by
+.An -nosplit
+.An Doug Rabson Aq Mt dfr@FreeBSD.org .
+.Sh BUGS
+A better name for this filesystem would be
+.Ar 9pfs
+but for technical reasons,
+the names of filesystems must be valid C identifiers.
+As a compromise,
+the filesystem is named
+.Nm .
diff --git a/sys/conf/files b/sys/conf/files
index 875021aaa357..968894ea948b 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3444,6 +3444,7 @@ dev/virtio/balloon/virtio_balloon.c	optional	virtio_balloon
 dev/virtio/block/virtio_blk.c		optional	virtio_blk
 dev/virtio/console/virtio_console.c	optional	virtio_console
 dev/virtio/gpu/virtio_gpu.c		optional	virtio_gpu
+dev/virtio/p9fs/virtio_p9fs.c		optional	virtio_p9fs
 dev/virtio/random/virtio_random.c	optional	virtio_random
 dev/virtio/scmi/virtio_scmi.c		optional	virtio_scmi
 dev/virtio/scsi/virtio_scsi.c		optional	virtio_scsi
@@ -3593,6 +3594,12 @@ fs/nfsserver/nfs_nfsdcache.c	optional nfsd inet
 fs/nullfs/null_subr.c		optional nullfs
 fs/nullfs/null_vfsops.c		optional nullfs
 fs/nullfs/null_vnops.c		optional nullfs
+fs/p9fs/p9_client.c		optional p9fs
+fs/p9fs/p9_protocol.c		optional p9fs
+fs/p9fs/p9_transport.c		optional p9fs
+fs/p9fs/p9fs_subr.c		optional p9fs
+fs/p9fs/p9fs_vfsops.c		optional p9fs
+fs/p9fs/p9fs_vnops.c		optional p9fs
 fs/procfs/procfs.c		optional procfs
 fs/procfs/procfs_dbregs.c	optional procfs
 fs/procfs/procfs_fpregs.c	optional procfs
diff --git a/sys/conf/options b/sys/conf/options
index d9bc981232c4..52fafffabd99 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -284,6 +284,7 @@ TMPFS		opt_dontuse.h
 UDF		opt_dontuse.h
 UNIONFS		opt_dontuse.h
 ZFS		opt_dontuse.h
+P9FS		opt_dontuse.h
 
 # Pseudofs debugging
 PSEUDOFS_TRACE	opt_pseudofs.h
diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.c b/sys/dev/virtio/p9fs/virtio_p9fs.c
new file mode 100644
index 000000000000..48430b4f6b67
--- /dev/null
+++ b/sys/dev/virtio/p9fs/virtio_p9fs.c
@@ -0,0 +1,511 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+/*
+ * The Virtio 9P transport driver. This file contains all functions related to
+ * the virtqueue infrastructure which include creating the virtqueue, host
+ * interactions, interrupts etc.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/module.h>
+#include <sys/sglist.h>
+#include <sys/queue.h>
+#include <sys/bus.h>
+#include <sys/kthread.h>
+#include <sys/condvar.h>
+#include <sys/sysctl.h>
+
+#include <machine/bus.h>
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_protocol.h>
+#include <fs/p9fs/p9_transport.h>
+
+#include <dev/virtio/virtio.h>
+#include <dev/virtio/virtqueue.h>
+#include <dev/virtio/virtio_ring.h>
+#include <dev/virtio/p9fs/virtio_p9fs.h>
+
+#define VT9P_MTX(_sc) (&(_sc)->vt9p_mtx)
+#define VT9P_LOCK(_sc) mtx_lock(VT9P_MTX(_sc))
+#define VT9P_UNLOCK(_sc) mtx_unlock(VT9P_MTX(_sc))
+#define VT9P_LOCK_INIT(_sc) mtx_init(VT9P_MTX(_sc), \
+    "VIRTIO 9P CHAN lock", NULL, MTX_DEF)
+#define VT9P_LOCK_DESTROY(_sc) mtx_destroy(VT9P_MTX(_sc))
+#define MAX_SUPPORTED_SGS 20
+static MALLOC_DEFINE(M_P9FS_MNTTAG, "p9fs_mount_tag", "P9fs Mounttag");
+
+struct vt9p_softc {
+	device_t vt9p_dev;
+	struct mtx vt9p_mtx;
+	struct sglist *vt9p_sglist;
+	struct cv submit_cv;
+	bool busy;
+	struct virtqueue *vt9p_vq;
+	int max_nsegs;
+	uint16_t mount_tag_len;
+	char *mount_tag;
+	STAILQ_ENTRY(vt9p_softc) chan_next;
+};
+
+/* Global channel list, Each channel will correspond to a mount point */
+static STAILQ_HEAD( ,vt9p_softc) global_chan_list;
+struct mtx global_chan_list_mtx;
+
+static struct virtio_feature_desc virtio_9p_feature_desc[] = {
+	{ VIRTIO_9PNET_F_MOUNT_TAG,	"9PMountTag" },
+	{ 0, NULL }
+};
+
+static void
+global_chan_list_init(void)
+{
+
+	mtx_init(&global_chan_list_mtx, "9pglobal",
+	    NULL, MTX_DEF);
+	STAILQ_INIT(&global_chan_list);
+}
+SYSINIT(global_chan_list_init, SI_SUB_KLD, SI_ORDER_FIRST,
+    global_chan_list_init, NULL);
+
+/* We don't currently allow canceling of virtio requests */
+static int
+vt9p_cancel(void *handle, struct p9_req_t *req)
+{
+
+	return (1);
+}
+
+SYSCTL_NODE(_vfs, OID_AUTO, 9p, CTLFLAG_RW, 0, "9P File System Protocol");
+
+/*
+ * Maximum number of seconds vt9p_request thread sleep waiting for an
+ * ack from the host, before exiting
+ */
+static unsigned int vt9p_ackmaxidle = 120;
+
+SYSCTL_UINT(_vfs_9p, OID_AUTO, ackmaxidle, CTLFLAG_RW, &vt9p_ackmaxidle, 0,
+    "Maximum time request thread waits for ack from host");
+
+/*
+ * Wait for completion of a p9 request.
+ *
+ * This routine will sleep and release the chan mtx during the period.
+ * chan mtx will be acquired again upon return.
+ */
+static int
+vt9p_req_wait(struct vt9p_softc *chan, struct p9_req_t *req)
+{
+	if (req->tc->tag != req->rc->tag) {
+		if (msleep(req, VT9P_MTX(chan), 0, "chan lock",
+		    vt9p_ackmaxidle * hz)) {
+			/*
+			 * Waited for 120s. No response from host.
+			 * Can't wait for ever..
+			 */
+			P9_DEBUG(ERROR, "Timeout after waiting %u seconds"
+			    "for an ack from host\n", vt9p_ackmaxidle);
+			return (EIO);
+		}
+		KASSERT(req->tc->tag == req->rc->tag,
+		    ("Spurious event on p9 req"));
+	}
+	return (0);
+}
+
+/*
+ * Request handler. This is called for every request submitted to the host
+ * It basically maps the tc/rc buffers to sg lists and submits the requests
+ * into the virtqueue. Since we have implemented a synchronous version, the
+ * submission thread sleeps until the ack in the interrupt wakes it up. Once
+ * it wakes up, it returns back to the P9fs layer. The rc buffer is then
+ * processed and completed to its upper layers.
+ */
+static int
+vt9p_request(void *handle, struct p9_req_t *req)
+{
+	int error;
+	struct vt9p_softc *chan;
+	int readable, writable;
+	struct sglist *sg;
+	struct virtqueue *vq;
+
+	chan = handle;
+	sg = chan->vt9p_sglist;
+	vq = chan->vt9p_vq;
+
+	P9_DEBUG(TRANS, "%s: req=%p\n", __func__, req);
+
+	/* Grab the channel lock*/
+	VT9P_LOCK(chan);
+	sglist_reset(sg);
+	/* Handle out VirtIO ring buffers */
+	error = sglist_append(sg, req->tc->sdata, req->tc->size);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__);
+		VT9P_UNLOCK(chan);
+		return (error);
+	}
+	readable = sg->sg_nseg;
+
+	error = sglist_append(sg, req->rc->sdata, req->rc->capacity);
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__);
+		VT9P_UNLOCK(chan);
+		return (error);
+	}
+	writable = sg->sg_nseg - readable;
+
+req_retry:
+	error = virtqueue_enqueue(vq, req, sg, readable, writable);
+
+	if (error != 0) {
+		if (error == ENOSPC) {
+			/*
+			 * Condvar for the submit queue. Unlock the chan
+			 * since wakeup needs one.
+			 */
+			cv_wait(&chan->submit_cv, VT9P_MTX(chan));
+			P9_DEBUG(TRANS, "%s: retry virtio request\n", __func__);
+			goto req_retry;
+		} else {
+			P9_DEBUG(ERROR, "%s: virtio enuqueue failed \n", __func__);
+			VT9P_UNLOCK(chan);
+			return (EIO);
+		}
+	}
+
+	/* We have to notify */
+	virtqueue_notify(vq);
+
+	error = vt9p_req_wait(chan, req);
+	if (error != 0) {
+		VT9P_UNLOCK(chan);
+		return (error);
+	}
+
+	VT9P_UNLOCK(chan);
+
+	P9_DEBUG(TRANS, "%s: virtio request kicked\n", __func__);
+
+	return (0);
+}
+
+/*
+ * Completion of the request from the virtqueue. This interrupt handler is
+ * setup at initialization and is called for every completing request. It
+ * just wakes up the sleeping submission requests.
+ */
+static void
+vt9p_intr_complete(void *xsc)
+{
+	struct vt9p_softc *chan;
+	struct virtqueue *vq;
+	struct p9_req_t *curreq;
+
+	chan = (struct vt9p_softc *)xsc;
+	vq = chan->vt9p_vq;
+
+	P9_DEBUG(TRANS, "%s: completing\n", __func__);
+
+	VT9P_LOCK(chan);
+	while ((curreq = virtqueue_dequeue(vq, NULL)) != NULL) {
+		curreq->rc->tag = curreq->tc->tag;
+		wakeup_one(curreq);
+	}
+	virtqueue_enable_intr(vq);
+	cv_signal(&chan->submit_cv);
+	VT9P_UNLOCK(chan);
+}
+
+/*
+ * Allocation of the virtqueue with interrupt complete routines.
+ */
+static int
+vt9p_alloc_virtqueue(struct vt9p_softc *sc)
+{
+	struct vq_alloc_info vq_info;
+	device_t dev;
+
+	dev = sc->vt9p_dev;
+
+	VQ_ALLOC_INFO_INIT(&vq_info, sc->max_nsegs,
+	    vt9p_intr_complete, sc, &sc->vt9p_vq,
+	    "%s request", device_get_nameunit(dev));
+
+	return (virtio_alloc_virtqueues(dev, 1, &vq_info));
+}
+
+/* Probe for existence of 9P virtio channels */
+static int
+vt9p_probe(device_t dev)
+{
+
+	/* If the virtio device type is a 9P device, then we claim and attach it */
+	if (virtio_get_device_type(dev) != VIRTIO_ID_9P)
+		return (ENXIO);
+	device_set_desc(dev, "VirtIO 9P Transport");
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+static void
+vt9p_stop(struct vt9p_softc *sc)
+{
+
+	/* Device specific stops .*/
+	virtqueue_disable_intr(sc->vt9p_vq);
+	virtio_stop(sc->vt9p_dev);
+}
+
+/* Detach the 9P virtio PCI device */
+static int
+vt9p_detach(device_t dev)
+{
+	struct vt9p_softc *sc;
+
+	sc = device_get_softc(dev);
+	VT9P_LOCK(sc);
+	vt9p_stop(sc);
+	VT9P_UNLOCK(sc);
+
+	if (sc->vt9p_sglist) {
+		sglist_free(sc->vt9p_sglist);
+		sc->vt9p_sglist = NULL;
+	}
+	if (sc->mount_tag) {
+		free(sc->mount_tag, M_P9FS_MNTTAG);
+		sc->mount_tag = NULL;
+	}
+	mtx_lock(&global_chan_list_mtx);
+	STAILQ_REMOVE(&global_chan_list, sc, vt9p_softc, chan_next);
+	mtx_unlock(&global_chan_list_mtx);
+
+	VT9P_LOCK_DESTROY(sc);
+	cv_destroy(&sc->submit_cv);
+
+	return (0);
+}
+
+/* Attach the 9P virtio PCI device */
+static int
+vt9p_attach(device_t dev)
+{
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *tree;
+	struct vt9p_softc *chan;
+	char *mount_tag;
+	int error;
+	uint16_t mount_tag_len;
+
+	chan = device_get_softc(dev);
+	chan->vt9p_dev = dev;
+
+	/* Init the channel lock. */
+	VT9P_LOCK_INIT(chan);
+	/* Initialize the condition variable */
+	cv_init(&chan->submit_cv, "Conditional variable for submit queue" );
+	chan->max_nsegs = MAX_SUPPORTED_SGS;
+	chan->vt9p_sglist = sglist_alloc(chan->max_nsegs, M_NOWAIT);
+
+	/* Negotiate the features from the host */
+	virtio_set_feature_desc(dev, virtio_9p_feature_desc);
+	virtio_negotiate_features(dev, VIRTIO_9PNET_F_MOUNT_TAG);
+
+	/*
+	 * If mount tag feature is supported read the mount tag
+	 * from device config
+	 */
+	if (virtio_with_feature(dev, VIRTIO_9PNET_F_MOUNT_TAG))
+		mount_tag_len = virtio_read_dev_config_2(dev,
+		    offsetof(struct virtio_9pnet_config, mount_tag_len));
+	else {
+		error = EINVAL;
+		P9_DEBUG(ERROR, "%s: Mount tag feature not supported by host\n", __func__);
+		goto out;
+	}
+	mount_tag = malloc(mount_tag_len + 1, M_P9FS_MNTTAG,
+	    M_WAITOK | M_ZERO);
+
+	virtio_read_device_config(dev,
+	    offsetof(struct virtio_9pnet_config, mount_tag),
+	    mount_tag, mount_tag_len);
+
+	device_printf(dev, "Mount tag: %s\n", mount_tag);
+
+	mount_tag_len++;
+	chan->mount_tag_len = mount_tag_len;
+	chan->mount_tag = mount_tag;
+
+	ctx = device_get_sysctl_ctx(dev);
+	tree = device_get_sysctl_tree(dev);
+	SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "p9fs_mount_tag",
+	    CTLFLAG_RD, chan->mount_tag, 0, "Mount tag");
+
+	if (chan->vt9p_sglist == NULL) {
+		error = ENOMEM;
+		P9_DEBUG(ERROR, "%s: Cannot allocate sglist\n", __func__);
+		goto out;
+	}
+
+	/* We expect one virtqueue, for requests. */
+	error = vt9p_alloc_virtqueue(chan);
+
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: Allocating the virtqueue failed \n", __func__);
+		goto out;
+	}
+
+	error = virtio_setup_intr(dev, INTR_TYPE_MISC|INTR_MPSAFE);
+
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: Cannot setup virtqueue interrupt\n", __func__);
+		goto out;
+	}
+	error = virtqueue_enable_intr(chan->vt9p_vq);
+
+	if (error != 0) {
+		P9_DEBUG(ERROR, "%s: Cannot enable virtqueue interrupt\n", __func__);
+		goto out;
+	}
+
+	mtx_lock(&global_chan_list_mtx);
+	/* Insert the channel in global channel list */
+	STAILQ_INSERT_HEAD(&global_chan_list, chan, chan_next);
+	mtx_unlock(&global_chan_list_mtx);
+
+	return (0);
+out:
+	/* Something went wrong, detach the device */
+	vt9p_detach(dev);
+	return (error);
+}
+
+/*
+ * Allocate a new virtio channel. This sets up a transport channel
+ * for 9P communication
+ */
+static int
+vt9p_create(const char *mount_tag, void **handlep)
+{
+	struct vt9p_softc *sc, *chan;
+
+	chan = NULL;
+
+	/*
+	 * Find out the corresponding channel for a client from global list
+	 * of channels based on mount tag and attach it to client
+	 */
+	mtx_lock(&global_chan_list_mtx);
+	STAILQ_FOREACH(sc, &global_chan_list, chan_next) {
+		if (!strcmp(sc->mount_tag, mount_tag)) {
+			chan = sc;
+			break;
+		}
+	}
+	mtx_unlock(&global_chan_list_mtx);
+
+	/*
+	 * If chan is already attached to a client then it cannot be used for
+	 * another client.
+	 */
+	if (chan && chan->busy) {
+		//p9_debug(TRANS, "Channel busy: used by clnt=%p\n", chan->client);
+		return (EBUSY);
+	}
+
+	/* If we dont have one, for now bail out.*/
+	if (chan) {
+		*handlep = (void *)chan;
+		chan->busy = TRUE;
+	} else {
+		P9_DEBUG(TRANS, "%s: No Global channel with mount_tag=%s\n",
+		    __func__, mount_tag);
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+static void
+vt9p_close(void *handle)
+{
+	struct vt9p_softc *chan = handle;
+	chan->busy = FALSE;
+}
+
+static struct p9_trans_module vt9p_trans = {
+	.name = "virtio",
+	.create = vt9p_create,
+	.close = vt9p_close,
+	.request = vt9p_request,
+	.cancel = vt9p_cancel,
+};
+
+static device_method_t vt9p_mthds[] = {
+	/* Device methods. */
+	DEVMETHOD(device_probe,	 vt9p_probe),
+	DEVMETHOD(device_attach, vt9p_attach),
+	DEVMETHOD(device_detach, vt9p_detach),
+	DEVMETHOD_END
+};
+
+static driver_t vt9p_drv = {
+	"virtio_p9fs",
+	vt9p_mthds,
+	sizeof(struct vt9p_softc)
+};
+
+static int
+vt9p_modevent(module_t mod, int type, void *unused)
+{
+	int error;
+
+	error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		p9_init_zones();
+		p9_register_trans(&vt9p_trans);
+		break;
+	case MOD_UNLOAD:
+		p9_destroy_zones();
+		break;
+	case MOD_SHUTDOWN:
+		break;
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	return (error);
+}
+
+DRIVER_MODULE(virtio_p9fs, virtio_pci, vt9p_drv, vt9p_modevent, 0);
+MODULE_VERSION(virtio_p9fs, 1);
+MODULE_DEPEND(virtio_p9fs, virtio, 1, 1, 1);
+MODULE_DEPEND(virtio_p9fs, p9fs, 1, 1, 1);
diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.h b/sys/dev/virtio/p9fs/virtio_p9fs.h
new file mode 100644
index 000000000000..924b413d29a5
--- /dev/null
+++ b/sys/dev/virtio/p9fs/virtio_p9fs.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __VIRTIO_9P_CONFIG__
+#define __VIRTIO_9P_CONFIG__
+
+/* Mount point feature specified in config variable */
+#define VIRTIO_9PNET_F_MOUNT_TAG 1
+
+struct virtio_9pnet_config {
+	/* Mount tag length */
+	uint16_t mount_tag_len;
+	/* non NULL terminated tag name */
+	uint8_t mount_tag[0];
+};
+#endif /* __VIRTIO_9P_CONFIG__ */
diff --git a/sys/fs/p9fs/p9_client.c b/sys/fs/p9fs/p9_client.c
new file mode 100644
index 000000000000..8f36cc4e775a
--- /dev/null
+++ b/sys/fs/p9fs/p9_client.c
@@ -0,0 +1,1311 @@
+/*-
+ * Copyright (c) 2017 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	notice, this list of conditions and the following disclaimer in the
+ *	documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains 9P client functions which prepares message to be sent to
+ * the server. Every fileop typically has a function defined here to interact
+ * with the host.
+ */
+
+#include <vm/uma.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+
+#include <fs/p9fs/p9_client.h>
+#include <fs/p9fs/p9_debug.h>
+#include <fs/p9fs/p9_transport.h>
+
+#define QEMU_HEADER 7
+#define P9FS_MAX_FID_CNT (1024 * 1024 * 1024)
+#define P9FS_ROOT_FID_NO 2
+#define P9FS_MIN_TAG 1
+#define P9FS_MAX_TAG 65535
+#define WSTAT_SIZE 47
+#define WSTAT_EXTENSION_SIZE 14
+
+static MALLOC_DEFINE(M_P9CLNT, "p9_client", "p9fs client structure");
+static uma_zone_t p9fs_fid_zone;
+static uma_zone_t p9fs_req_zone;
+static uma_zone_t p9fs_buf_zone;
+
+SYSCTL_DECL(_vfs_p9fs);
+int p9_debug_level = 0;
+SYSCTL_INT(_vfs_p9fs, OID_AUTO, debug_level, CTLFLAG_RW,
+    &p9_debug_level, 0, "p9fs debug logging");
+
+static struct p9_req_t *p9_get_request(struct p9_client *c, int *error);
+static struct p9_req_t *p9_client_request(
+    struct p9_client *c, int8_t type, int *error, const char *fmt, ...);
+
+inline int
+p9_is_proto_dotl(struct p9_client *clnt)
+{
+
+	return (clnt->proto_version == p9_proto_2000L);
+}
+
+inline int
+p9_is_proto_dotu(struct p9_client *clnt)
+{
+
+	return (clnt->proto_version == p9_proto_2000u);
+}
+
+/* Parse mount options into client structure */
+static int
+p9_parse_opts(struct mount  *mp, struct p9_client *clnt)
+{
+	int error, len;
+	char *trans;
+
+	/*
+	 * Default to virtio since thats the only transport we have for now.
+	 */
+	error = vfs_getopt(mp->mnt_optnew, "trans", (void **)&trans, &len);
+	if (error == ENOENT)
+		trans = "virtio";
+
+	/* These are defaults for now */
+	clnt->proto_version = p9_proto_2000L;
+	clnt->msize = 8192;
+
+	/* Get the default trans callback */
+	clnt->ops = p9_get_trans_by_name(trans);
+
+	return (0);
+}
+
+/* Allocate buffer for sending request and getting responses */
+static struct p9_buffer *
+p9_buffer_alloc(int alloc_msize)
+{
+	struct p9_buffer *fc;
+
+	fc = uma_zalloc(p9fs_buf_zone, M_WAITOK | M_ZERO);
+	fc->capacity = alloc_msize;
+	fc->offset = 0;
+	fc->size = 0;
+	fc->sdata = (char *)fc + sizeof(struct p9_buffer);
+
+	return (fc);
+}
+
+/* Free memory used by request and response buffers */
+static void
+p9_buffer_free(struct p9_buffer **buf)
+{
+
+	/* Free the sdata buffers first, then the whole structure*/
+	uma_zfree(p9fs_buf_zone, *buf);
+	*buf = NULL;
+}
+
+/* Free the request */
+static void
+p9_free_req(struct p9_client *clnt, struct p9_req_t *req)
+{
+
+	if (req->tc != NULL) {
+		if (req->tc->tag != P9_NOTAG)
+			p9_tag_destroy(clnt, req->tc->tag);
+		p9_buffer_free(&req->tc);
+	}
+
+	if (req->rc != NULL)
+		p9_buffer_free(&req->rc);
+
+	uma_zfree(p9fs_req_zone, req);
+}
+
+/* Allocate a request by tag */
+static struct p9_req_t *
+p9_get_request(struct p9_client *clnt, int *error)
+{
+	struct p9_req_t *req;
+	int alloc_msize;
+	uint16_t tag;
+
+	alloc_msize = P9FS_MTU;
+
*** 5962 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202406191213.45JCDPPP051964>