Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 1 Jan 2021 17:20:01 GMT
From:      Alan Somers <asomers@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 92bbfe1f0d1f - main - fusefs: implement FUSE_COPY_FILE_RANGE.
Message-ID:  <202101011720.101HK1d8090063@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by asomers:

URL: https://cgit.FreeBSD.org/src/commit/?id=92bbfe1f0d1f1c4436d1f064a16e5aaf682526ba

commit 92bbfe1f0d1f1c4436d1f064a16e5aaf682526ba
Author:     Alan Somers <asomers@gmail.com>
AuthorDate: 2020-12-29 01:25:21 +0000
Commit:     Alan Somers <asomers@FreeBSD.org>
CommitDate: 2021-01-01 17:18:23 +0000

    fusefs: implement FUSE_COPY_FILE_RANGE.
    
    This updates the FUSE protocol to 7.28, though most of the new features
    are optional and are not yet implemented.
    
    MFC after:      2 weeks
    Relnotes:       yes
    Reviewed by:    cem
    Differential Revision:  https://reviews.freebsd.org/D27818
---
 sys/fs/fuse/fuse_internal.c                |  48 ++++
 sys/fs/fuse/fuse_internal.h                |   4 +
 sys/fs/fuse/fuse_io.c                      |  42 +--
 sys/fs/fuse/fuse_ipc.c                     |   4 +
 sys/fs/fuse/fuse_kernel.h                  | 141 ++++++----
 sys/fs/fuse/fuse_vnops.c                   | 122 +++++++++
 tests/sys/fs/fusefs/Makefile               |   1 +
 tests/sys/fs/fusefs/copy_file_range.cc     | 401 +++++++++++++++++++++++++++++
 tests/sys/fs/fusefs/default_permissions.cc | 110 +++++++-
 tests/sys/fs/fusefs/mockfs.cc              |  20 ++
 tests/sys/fs/fusefs/mockfs.hh              |   1 +
 tests/sys/fs/fusefs/write.cc               |   1 -
 12 files changed, 803 insertions(+), 92 deletions(-)

diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index 2faad7cd8651..60f9a7319e00 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -1054,6 +1054,9 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
 	if (!fuse_libabi_geq(data, 7, 24))
 		fsess_set_notimpl(data->mp, FUSE_LSEEK);
 
+	if (!fuse_libabi_geq(data, 7, 28))
+		fsess_set_notimpl(data->mp, FUSE_COPY_FILE_RANGE);
+
 out:
 	if (err) {
 		fdata_set_dead(data);
@@ -1098,6 +1101,12 @@ fuse_internal_send_init(struct fuse_data *data, struct thread *td)
 	 * FUSE_READDIRPLUS_AUTO: not yet implemented
 	 * FUSE_ASYNC_DIO: not yet implemented
 	 * FUSE_NO_OPEN_SUPPORT: not yet implemented
+	 * FUSE_PARALLEL_DIROPS: not yet implemented
+	 * FUSE_HANDLE_KILLPRIV: not yet implemented
+	 * FUSE_POSIX_ACL: not yet implemented
+	 * FUSE_ABORT_ERROR: not yet implemented
+	 * FUSE_CACHE_SYMLINKS: not yet implemented
+	 * FUSE_MAX_PAGES: not yet implemented
 	 */
 	fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
 		| FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE;
@@ -1228,6 +1237,45 @@ out:
 	return err;
 }
 
+/*
+ * FreeBSD clears the SUID and SGID bits on any write by a non-root user.
+ */
+void
+fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
+	struct thread *td)
+{
+	struct fuse_data *data;
+	struct mount *mp;
+	struct vattr va;
+	int dataflags;
+
+	mp = vnode_mount(vp);
+	data = fuse_get_mpdata(mp);
+	dataflags = data->dataflags;
+
+	ASSERT_VOP_LOCKED(vp, __func__);
+
+	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
+		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
+			fuse_internal_getattr(vp, &va, cred, td);
+			if (va.va_mode & (S_ISUID | S_ISGID)) {
+				mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID);
+				/* Clear all vattr fields except mode */
+				vattr_null(&va);
+				va.va_mode = mode;
+
+				/*
+				 * Ignore fuse_internal_setattr's return value,
+				 * because at this point the write operation has
+				 * already succeeded and we don't want to return
+				 * failing status for that.
+				 */
+				(void)fuse_internal_setattr(vp, &va, td, NULL);
+			}
+		}
+	}
+}
+
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 static int
 isbzero(void *buf, size_t len)
diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h
index de68861beae2..20a10d7dfda0 100644
--- a/sys/fs/fuse/fuse_internal.h
+++ b/sys/fs/fuse/fuse_internal.h
@@ -274,6 +274,10 @@ void fuse_internal_vnode_disappear(struct vnode *vp);
 int fuse_internal_setattr(struct vnode *vp, struct vattr *va,
 	struct thread *td, struct ucred *cred);
 
+/* write */
+void fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
+    struct thread *td);
+
 /* strategy */
 
 /* entity creation */
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index 4e178bb5340a..3f23a35a8626 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -121,9 +121,6 @@ SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*");
 
 static int
 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end);
-static void
-fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
-    struct thread *td);
 static int 
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
@@ -190,43 +187,6 @@ fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end)
 	return (0);
 }
 
-/*
- * FreeBSD clears the SUID and SGID bits on any write by a non-root user.
- */
-static void
-fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
-	struct thread *td)
-{
-	struct fuse_data *data;
-	struct mount *mp;
-	struct vattr va;
-	int dataflags;
-
-	mp = vnode_mount(vp);
-	data = fuse_get_mpdata(mp);
-	dataflags = data->dataflags;
-
-	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
-		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
-			fuse_internal_getattr(vp, &va, cred, td);
-			if (va.va_mode & (S_ISUID | S_ISGID)) {
-				mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID);
-				/* Clear all vattr fields except mode */
-				vattr_null(&va);
-				va.va_mode = mode;
-
-				/*
-				 * Ignore fuse_internal_setattr's return value,
-				 * because at this point the write operation has
-				 * already succeeded and we don't want to return
-				 * failing status for that.
-				 */
-				(void)fuse_internal_setattr(vp, &va, td, NULL);
-			}
-		}
-	}
-}
-
 SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*",
 		"int", "struct ucred*", "struct fuse_filehandle*");
 SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*",
@@ -318,7 +278,7 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
 			err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag,
 				pid);
 		}
-		fuse_io_clear_suid_on_write(vp, cred, uio->uio_td);
+		fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td);
 		break;
 	default:
 		panic("uninterpreted mode passed to fuse_io_dispatch");
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
index d3738da26b34..791ee9f38444 100644
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -855,6 +855,10 @@ fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
 		err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL;
 		break;
 
+	case FUSE_COPY_FILE_RANGE:
+		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
+		break;
+
 	default:
 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
 	}
diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h
index 6e97b04a733f..14cf4fabac14 100644
--- a/sys/fs/fuse/fuse_kernel.h
+++ b/sys/fs/fuse/fuse_kernel.h
@@ -1,4 +1,6 @@
-/*--
+/*-
+ * SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause)
+ *
  * This file defines the kernel interface of FUSE
  * Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
  *
@@ -105,6 +107,22 @@
  *
  * 7.24
  *  - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
+ *
+ *  7.25
+ *  - add FUSE_PARALLEL_DIROPS
+ *
+ *  7.26
+ *  - add FUSE_HANDLE_KILLPRIV
+ *  - add FUSE_POSIX_ACL
+ *
+ *  7.27
+ *  - add FUSE_ABORT_ERROR
+ *
+ *  7.28
+ *  - add FUSE_COPY_FILE_RANGE
+ *  - add FOPEN_CACHE_DIR
+ *  - add FUSE_MAX_PAGES, add max_pages to init_out
+ *  - add FUSE_CACHE_SYMLINKS
  */
 
 #ifndef _FUSE_FUSE_KERNEL_H
@@ -120,7 +138,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 24
+#define FUSE_KERNEL_MINOR_VERSION 28
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -188,10 +206,12 @@ struct fuse_file_lock {
  * FOPEN_DIRECT_IO: bypass page cache for this open file
  * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
  * FOPEN_NONSEEKABLE: the file is not seekable
+ * FOPEN_CACHE_DIR: allow caching this directory
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
 #define FOPEN_NONSEEKABLE	(1 << 2)
+#define FOPEN_CACHE_DIR		(1 << 3)
 
 /**
  * INIT request/reply flags
@@ -214,6 +234,12 @@ struct fuse_file_lock {
  * FUSE_ASYNC_DIO: asynchronous direct I/O submission
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
+ * FUSE_POSIX_ACL: filesystem supports posix acls
+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
+ * FUSE_CACHE_SYMLINKS: cache READLINK responses
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -233,6 +259,12 @@ struct fuse_file_lock {
 #define FUSE_ASYNC_DIO		(1 << 15)
 #define FUSE_WRITEBACK_CACHE	(1 << 16)
 #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
+#define FUSE_PARALLEL_DIROPS    (1 << 18)
+#define FUSE_HANDLE_KILLPRIV	(1 << 19)
+#define FUSE_POSIX_ACL		(1 << 20)
+#define FUSE_ABORT_ERROR	(1 << 21)
+#define FUSE_MAX_PAGES		(1 << 22)
+#define FUSE_CACHE_SYMLINKS	(1 << 23)
 
 #ifdef linux
 /**
@@ -300,54 +332,55 @@ struct fuse_file_lock {
 #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
 
 enum fuse_opcode {
-	FUSE_LOOKUP	   = 1,
-	FUSE_FORGET	   = 2,  /* no reply */
-	FUSE_GETATTR	   = 3,
-	FUSE_SETATTR	   = 4,
-	FUSE_READLINK	   = 5,
-	FUSE_SYMLINK	   = 6,
-	FUSE_MKNOD	   = 8,
-	FUSE_MKDIR	   = 9,
-	FUSE_UNLINK	   = 10,
-	FUSE_RMDIR	   = 11,
-	FUSE_RENAME	   = 12,
-	FUSE_LINK	   = 13,
-	FUSE_OPEN	   = 14,
-	FUSE_READ	   = 15,
-	FUSE_WRITE	   = 16,
-	FUSE_STATFS	   = 17,
-	FUSE_RELEASE       = 18,
-	FUSE_FSYNC         = 20,
-	FUSE_SETXATTR      = 21,
-	FUSE_GETXATTR      = 22,
-	FUSE_LISTXATTR     = 23,
-	FUSE_REMOVEXATTR   = 24,
-	FUSE_FLUSH         = 25,
-	FUSE_INIT          = 26,
-	FUSE_OPENDIR       = 27,
-	FUSE_READDIR       = 28,
-	FUSE_RELEASEDIR    = 29,
-	FUSE_FSYNCDIR      = 30,
-	FUSE_GETLK         = 31,
-	FUSE_SETLK         = 32,
-	FUSE_SETLKW        = 33,
-	FUSE_ACCESS        = 34,
-	FUSE_CREATE        = 35,
-	FUSE_INTERRUPT     = 36,
-	FUSE_BMAP          = 37,
-	FUSE_DESTROY       = 38,
-	FUSE_IOCTL         = 39,
-	FUSE_POLL          = 40,
-	FUSE_NOTIFY_REPLY  = 41,
-	FUSE_BATCH_FORGET  = 42,
-	FUSE_FALLOCATE     = 43,
-	FUSE_READDIRPLUS   = 44,
-	FUSE_RENAME2       = 45,
-	FUSE_LSEEK         = 46,
+	FUSE_LOOKUP		= 1,
+	FUSE_FORGET		= 2,  /* no reply */
+	FUSE_GETATTR		= 3,
+	FUSE_SETATTR		= 4,
+	FUSE_READLINK		= 5,
+	FUSE_SYMLINK		= 6,
+	FUSE_MKNOD		= 8,
+	FUSE_MKDIR		= 9,
+	FUSE_UNLINK		= 10,
+	FUSE_RMDIR		= 11,
+	FUSE_RENAME		= 12,
+	FUSE_LINK		= 13,
+	FUSE_OPEN		= 14,
+	FUSE_READ		= 15,
+	FUSE_WRITE		= 16,
+	FUSE_STATFS		= 17,
+	FUSE_RELEASE		= 18,
+	FUSE_FSYNC		= 20,
+	FUSE_SETXATTR		= 21,
+	FUSE_GETXATTR		= 22,
+	FUSE_LISTXATTR		= 23,
+	FUSE_REMOVEXATTR	= 24,
+	FUSE_FLUSH		= 25,
+	FUSE_INIT		= 26,
+	FUSE_OPENDIR		= 27,
+	FUSE_READDIR		= 28,
+	FUSE_RELEASEDIR		= 29,
+	FUSE_FSYNCDIR		= 30,
+	FUSE_GETLK		= 31,
+	FUSE_SETLK		= 32,
+	FUSE_SETLKW		= 33,
+	FUSE_ACCESS		= 34,
+	FUSE_CREATE		= 35,
+	FUSE_INTERRUPT		= 36,
+	FUSE_BMAP		= 37,
+	FUSE_DESTROY		= 38,
+	FUSE_IOCTL		= 39,
+	FUSE_POLL		= 40,
+	FUSE_NOTIFY_REPLY	= 41,
+	FUSE_BATCH_FORGET	= 42,
+	FUSE_FALLOCATE		= 43,
+	FUSE_READDIRPLUS	= 44,
+	FUSE_RENAME2		= 45,
+	FUSE_LSEEK		= 46,
+	FUSE_COPY_FILE_RANGE	= 47,
 
 #ifdef linux
 	/* CUSE specific operations */
-	CUSE_INIT          = 4096,
+	CUSE_INIT		= 4096,
 #endif /* linux */
 };
 
@@ -585,7 +618,9 @@ struct fuse_init_out {
 	uint16_t	congestion_threshold;
 	uint32_t	max_write;
 	uint32_t	time_gran;
-	uint32_t	unused[9];
+	uint16_t	max_pages;
+	uint16_t	padding;
+	uint32_t	unused[8];
 };
 
 #ifdef linux
@@ -766,4 +801,14 @@ struct fuse_lseek_out {
        uint64_t        offset;
 };
 
+struct fuse_copy_file_range_in {
+	uint64_t	fh_in;
+	uint64_t	off_in;
+	uint64_t	nodeid_out;
+	uint64_t	fh_out;
+	uint64_t	off_out;
+	uint64_t	len;
+	uint64_t	flags;
+};
+
 #endif /* _FUSE_FUSE_KERNEL_H */
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index efac7e041cf6..1e9434f9403d 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -130,6 +130,7 @@ static vop_advlock_t fuse_vnop_advlock;
 static vop_bmap_t fuse_vnop_bmap;
 static vop_close_t fuse_fifo_close;
 static vop_close_t fuse_vnop_close;
+static vop_copy_file_range_t fuse_vnop_copy_file_range;
 static vop_create_t fuse_vnop_create;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
 static vop_fdatasync_t fuse_vnop_fdatasync;
@@ -185,6 +186,7 @@ struct vop_vector fuse_vnops = {
 	.vop_advlock = fuse_vnop_advlock,
 	.vop_bmap = fuse_vnop_bmap,
 	.vop_close = fuse_vnop_close,
+	.vop_copy_file_range = fuse_vnop_copy_file_range,
 	.vop_create = fuse_vnop_create,
 	.vop_deleteextattr = fuse_vnop_deleteextattr,
 	.vop_fsync = fuse_vnop_fsync,
@@ -609,6 +611,126 @@ fuse_vnop_close(struct vop_close_args *ap)
 	return err;
 }
 
+/*
+   struct vop_copy_file_range_args {
+	struct vop_generic_args a_gen;
+	struct vnode *a_invp;
+	off_t *a_inoffp;
+	struct vnode *a_outvp;
+	off_t *a_outoffp;
+	size_t *a_lenp;
+	unsigned int a_flags;
+	struct ucred *a_incred;
+	struct ucred *a_outcred;
+	struct thread *a_fsizetd;
+}
+ */
+static int
+fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
+{
+	struct vnode *invp = ap->a_invp;
+	struct vnode *outvp = ap->a_outvp;
+	struct mount *mp = vnode_mount(invp);
+	struct fuse_dispatcher fdi;
+	struct fuse_filehandle *infufh, *outfufh;
+	struct fuse_copy_file_range_in *fcfri;
+	struct ucred *incred = ap->a_incred;
+	struct ucred *outcred = ap->a_outcred;
+	struct fuse_write_out *fwo;
+	struct thread *td;
+	struct uio io;
+	pid_t pid;
+	int err;
+
+	if (mp != vnode_mount(outvp))
+		goto fallback;
+
+	if (incred->cr_uid != outcred->cr_uid)
+		goto fallback;
+
+	if (incred->cr_groups[0] != outcred->cr_groups[0])
+		goto fallback;
+
+	if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE))
+		goto fallback;
+
+	if (ap->a_fsizetd == NULL)
+		td = curthread;
+	else
+		td = ap->a_fsizetd;
+	pid = td->td_proc->p_pid;
+
+	err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid);
+	if (err)
+		return (err);
+
+	err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid);
+	if (err)
+		return (err);
+
+	/* Lock both vnodes, avoiding risk of deadlock. */
+	do {
+		err = vn_lock(outvp, LK_EXCLUSIVE);
+		if (invp == outvp)
+			break;
+		if (err == 0) {
+			err = vn_lock(invp, LK_SHARED | LK_NOWAIT);
+			if (err == 0)
+				break;
+			VOP_UNLOCK(outvp);
+			err = vn_lock(invp, LK_SHARED);
+			if (err == 0)
+				VOP_UNLOCK(invp);
+		}
+	} while (err == 0);
+	if (err != 0)
+		return (err);
+
+	if (ap->a_fsizetd) {
+		io.uio_offset = *ap->a_outoffp;
+		io.uio_resid = *ap->a_lenp;
+		err = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd);
+		if (err)
+			goto unlock;
+	}
+
+	fdisp_init(&fdi, sizeof(*fcfri));
+	fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred);
+	fcfri = fdi.indata;
+	fcfri->fh_in = infufh->fh_id;
+	fcfri->off_in = *ap->a_inoffp;
+	fcfri->nodeid_out = VTOI(outvp);
+	fcfri->fh_out = outfufh->fh_id;
+	fcfri->off_out = *ap->a_outoffp;
+	fcfri->len = *ap->a_lenp;
+	fcfri->flags = 0;
+
+	err = fdisp_wait_answ(&fdi);
+	if (err == 0) {
+		fwo = fdi.answ;
+		*ap->a_lenp = fwo->size;
+		*ap->a_inoffp += fwo->size;
+		*ap->a_outoffp += fwo->size;
+		fuse_internal_clear_suid_on_write(outvp, outcred, td);
+	}
+	fdisp_destroy(&fdi);
+
+unlock:
+	if (invp != outvp)
+		VOP_UNLOCK(invp);
+	VOP_UNLOCK(outvp);
+
+	if (err == ENOSYS) {
+		fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE);
+fallback:
+		err = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
+		    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
+		    ap->a_incred, ap->a_outcred, ap->a_fsizetd);
+	}
+
+	return (err);
+}
+
 static void
 fdisp_make_mknod_for_fallback(
 	struct fuse_dispatcher *fdip,
diff --git a/tests/sys/fs/fusefs/Makefile b/tests/sys/fs/fusefs/Makefile
index 8d199a53c074..2c858ff42dd1 100644
--- a/tests/sys/fs/fusefs/Makefile
+++ b/tests/sys/fs/fusefs/Makefile
@@ -13,6 +13,7 @@ GTESTS+=	access
 GTESTS+=	allow_other
 GTESTS+=	bmap
 GTESTS+=	cache
+GTESTS+=	copy_file_range
 GTESTS+=	create
 GTESTS+=	default_permissions
 GTESTS+=	default_permissions_privileged
diff --git a/tests/sys/fs/fusefs/copy_file_range.cc b/tests/sys/fs/fusefs/copy_file_range.cc
new file mode 100644
index 000000000000..bb8eecf8b862
--- /dev/null
+++ b/tests/sys/fs/fusefs/copy_file_range.cc
@@ -0,0 +1,401 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alan Somers
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+extern "C" {
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <fcntl.h>
+#include <signal.h>
+#include <unistd.h>
+}
+
+#include "mockfs.hh"
+#include "utils.hh"
+
+using namespace testing;
+
+class CopyFileRange: public FuseTest {
+public:
+static sig_atomic_t s_sigxfsz;
+
+void SetUp() {
+	s_sigxfsz = 0;
+	FuseTest::SetUp();
+}
+
+void TearDown() {
+	struct sigaction sa;
+
+	bzero(&sa, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigaction(SIGXFSZ, &sa, NULL);
+
+	FuseTest::TearDown();
+}
+
+void expect_maybe_lseek(uint64_t ino)
+{
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_LSEEK &&
+				in.header.nodeid == ino);
+		}, Eq(true)),
+		_)
+	).Times(AtMost(1))
+	.WillRepeatedly(Invoke(ReturnErrno(ENOSYS)));
+}
+
+void expect_open(uint64_t ino, uint32_t flags, int times, uint64_t fh)
+{
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_OPEN &&
+				in.header.nodeid == ino);
+		}, Eq(true)),
+		_)
+	).Times(times)
+	.WillRepeatedly(Invoke(
+		ReturnImmediate([=](auto in __unused, auto& out) {
+		out.header.len = sizeof(out.header);
+		SET_OUT_HEADER_LEN(out, open);
+		out.body.open.fh = fh;
+		out.body.open.open_flags = flags;
+	})));
+}
+
+void expect_write(uint64_t ino, uint64_t offset, uint64_t isize,
+	uint64_t osize, const void *contents)
+{
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			const char *buf = (const char*)in.body.bytes +
+				sizeof(struct fuse_write_in);
+
+			return (in.header.opcode == FUSE_WRITE &&
+				in.header.nodeid == ino &&
+				in.body.write.offset == offset  &&
+				in.body.write.size == isize &&
+				0 == bcmp(buf, contents, isize));
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, write);
+		out.body.write.size = osize;
+	})));
+}
+
+};
+
+sig_atomic_t CopyFileRange::s_sigxfsz = 0;
+
+void sigxfsz_handler(int __unused sig) {
+	CopyFileRange::s_sigxfsz = 1;
+}
+
+
+class CopyFileRange_7_27: public CopyFileRange {
+public:
+virtual void SetUp() {
+	m_kernel_minor_version = 27;
+	CopyFileRange::SetUp();
+}
+};
+
+TEST_F(CopyFileRange, eio)
+{
+	const char FULLPATH1[] = "mountpoint/src.txt";
+	const char RELPATH1[] = "src.txt";
+	const char FULLPATH2[] = "mountpoint/dst.txt";
+	const char RELPATH2[] = "dst.txt";
+	const uint64_t ino1 = 42;
+	const uint64_t ino2 = 43;
+	const uint64_t fh1 = 0xdeadbeef1a7ebabe;
+	const uint64_t fh2 = 0xdeadc0de88c0ffee;
+	off_t fsize1 = 1 << 20;		/* 1 MiB */
+	off_t fsize2 = 1 << 19;		/* 512 KiB */
+	off_t start1 = 1 << 18;
+	off_t start2 = 3 << 17;
+	ssize_t len = 65536;
+	int fd1, fd2;
+
+	expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1);
+	expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1);
+	expect_open(ino1, 0, 1, fh1);
+	expect_open(ino2, 0, 1, fh2);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE &&
+				in.header.nodeid == ino1 &&
+				in.body.copy_file_range.fh_in == fh1 &&
+				(off_t)in.body.copy_file_range.off_in == start1 &&
+				in.body.copy_file_range.nodeid_out == ino2 &&
+				in.body.copy_file_range.fh_out == fh2 &&
+				(off_t)in.body.copy_file_range.off_out == start2 &&
+				in.body.copy_file_range.len == (size_t)len &&
+				in.body.copy_file_range.flags == 0);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(EIO)));
+
+	fd1 = open(FULLPATH1, O_RDONLY);
+	fd2 = open(FULLPATH2, O_WRONLY);
+	ASSERT_EQ(-1, copy_file_range(fd1, &start1, fd2, &start2, len, 0));
+	EXPECT_EQ(EIO, errno);
+}
+
+/*
+ * If the server doesn't support FUSE_COPY_FILE_RANGE, the kernel should
+ * fallback to a read/write based implementation.
+ */
+TEST_F(CopyFileRange, fallback)
+{
+	const char FULLPATH1[] = "mountpoint/src.txt";
+	const char RELPATH1[] = "src.txt";
+	const char FULLPATH2[] = "mountpoint/dst.txt";
+	const char RELPATH2[] = "dst.txt";
+	const uint64_t ino1 = 42;
+	const uint64_t ino2 = 43;
+	const uint64_t fh1 = 0xdeadbeef1a7ebabe;
+	const uint64_t fh2 = 0xdeadc0de88c0ffee;
+	off_t fsize2 = 0;
+	off_t start1 = 0;
+	off_t start2 = 0;
+	const char *contents = "Hello, world!";
+	ssize_t len;
+	int fd1, fd2;
+
+	len = strlen(contents);
+
+	/* 
+	 * Ensure that we read to EOF, just so the buffer cache's read size is
+	 * predictable.
+	 */
+	expect_lookup(RELPATH1, ino1, S_IFREG | 0644, start1 + len, 1);
+	expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1);
+	expect_open(ino1, 0, 1, fh1);
+	expect_open(ino2, 0, 1, fh2);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE &&
+				in.header.nodeid == ino1 &&
+				in.body.copy_file_range.fh_in == fh1 &&
+				(off_t)in.body.copy_file_range.off_in == start1 &&
+				in.body.copy_file_range.nodeid_out == ino2 &&
+				in.body.copy_file_range.fh_out == fh2 &&
+				(off_t)in.body.copy_file_range.off_out == start2 &&
+				in.body.copy_file_range.len == (size_t)len &&
+				in.body.copy_file_range.flags == 0);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
+	expect_maybe_lseek(ino1);
+	expect_read(ino1, start1, len, len, contents, 0);
+	expect_write(ino2, start2, len, len, contents);
+
+	fd1 = open(FULLPATH1, O_RDONLY);
+	ASSERT_GE(fd1, 0);
+	fd2 = open(FULLPATH2, O_WRONLY);
+	ASSERT_GE(fd2, 0);
+	ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0));
+}
+
+/* fusefs should respect RLIMIT_FSIZE */
+TEST_F(CopyFileRange, rlimit_fsize)
+{
+	const char FULLPATH1[] = "mountpoint/src.txt";
+	const char RELPATH1[] = "src.txt";
+	const char FULLPATH2[] = "mountpoint/dst.txt";
+	const char RELPATH2[] = "dst.txt";
+	struct rlimit rl;
+	const uint64_t ino1 = 42;
+	const uint64_t ino2 = 43;
+	const uint64_t fh1 = 0xdeadbeef1a7ebabe;
+	const uint64_t fh2 = 0xdeadc0de88c0ffee;
+	off_t fsize1 = 1 << 20;		/* 1 MiB */
+	off_t fsize2 = 1 << 19;		/* 512 KiB */
+	off_t start1 = 1 << 18;
+	off_t start2 = fsize2;
+	ssize_t len = 65536;
+	int fd1, fd2;
+
+	expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1);
+	expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1);
+	expect_open(ino1, 0, 1, fh1);
+	expect_open(ino2, 0, 1, fh2);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE);
+		}, Eq(true)),
+		_)
+	).Times(0);
+
+	rl.rlim_cur = fsize2;
+	rl.rlim_max = 10 * fsize2;
+	ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rl)) << strerror(errno);
+	ASSERT_NE(SIG_ERR, signal(SIGXFSZ, sigxfsz_handler)) << strerror(errno);
+
+	fd1 = open(FULLPATH1, O_RDONLY);
+	fd2 = open(FULLPATH2, O_WRONLY);
+	ASSERT_EQ(-1, copy_file_range(fd1, &start1, fd2, &start2, len, 0));
+	EXPECT_EQ(EFBIG, errno);
+	EXPECT_EQ(1, s_sigxfsz);
+}
+
+TEST_F(CopyFileRange, ok)
+{
+	const char FULLPATH1[] = "mountpoint/src.txt";
+	const char RELPATH1[] = "src.txt";
+	const char FULLPATH2[] = "mountpoint/dst.txt";
+	const char RELPATH2[] = "dst.txt";
+	const uint64_t ino1 = 42;
+	const uint64_t ino2 = 43;
+	const uint64_t fh1 = 0xdeadbeef1a7ebabe;
+	const uint64_t fh2 = 0xdeadc0de88c0ffee;
+	off_t fsize1 = 1 << 20;		/* 1 MiB */
+	off_t fsize2 = 1 << 19;		/* 512 KiB */
+	off_t start1 = 1 << 18;
+	off_t start2 = 3 << 17;
+	ssize_t len = 65536;
+	int fd1, fd2;
+
+	expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1);
+	expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1);
+	expect_open(ino1, 0, 1, fh1);
+	expect_open(ino2, 0, 1, fh2);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE &&
+				in.header.nodeid == ino1 &&
+				in.body.copy_file_range.fh_in == fh1 &&
+				(off_t)in.body.copy_file_range.off_in == start1 &&
+				in.body.copy_file_range.nodeid_out == ino2 &&
+				in.body.copy_file_range.fh_out == fh2 &&
+				(off_t)in.body.copy_file_range.off_out == start2 &&
+				in.body.copy_file_range.len == (size_t)len &&
+				in.body.copy_file_range.flags == 0);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, write);
+		out.body.write.size = len;
+	})));
+
+	fd1 = open(FULLPATH1, O_RDONLY);
+	fd2 = open(FULLPATH2, O_WRONLY);
+	ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0));
+}
+
+/* 
+ * copy_file_range can make copies within a single file, as long as the ranges
+ * don't overlap.
+ * */
+TEST_F(CopyFileRange, same_file)
+{
+	const char FULLPATH[] = "mountpoint/src.txt";
+	const char RELPATH[] = "src.txt";
+	const uint64_t ino = 4;
+	const uint64_t fh = 0xdeadbeefa7ebabe;
+	off_t fsize = 1 << 20;		/* 1 MiB */
+	off_t off_in = 1 << 18;
+	off_t off_out = 3 << 17;
+	ssize_t len = 65536;
+	int fd;
+
+	expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
+	expect_open(ino, 0, 1, fh);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE &&
+				in.header.nodeid == ino &&
+				in.body.copy_file_range.fh_in == fh &&
+				(off_t)in.body.copy_file_range.off_in == off_in &&
+				in.body.copy_file_range.nodeid_out == ino &&
+				in.body.copy_file_range.fh_out == fh &&
+				(off_t)in.body.copy_file_range.off_out == off_out &&
+				in.body.copy_file_range.len == (size_t)len &&
+				in.body.copy_file_range.flags == 0);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, write);
+		out.body.write.size = len;
+	})));
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_EQ(len, copy_file_range(fd, &off_in, fd, &off_out, len, 0));
+}
+
+/* With older protocol versions, no FUSE_COPY_FILE_RANGE should be attempted */
+TEST_F(CopyFileRange_7_27, fallback)
+{
+	const char FULLPATH1[] = "mountpoint/src.txt";
+	const char RELPATH1[] = "src.txt";
+	const char FULLPATH2[] = "mountpoint/dst.txt";
+	const char RELPATH2[] = "dst.txt";
+	const uint64_t ino1 = 42;
+	const uint64_t ino2 = 43;
+	const uint64_t fh1 = 0xdeadbeef1a7ebabe;
+	const uint64_t fh2 = 0xdeadc0de88c0ffee;
+	off_t fsize2 = 0;
+	off_t start1 = 0;
+	off_t start2 = 0;
+	const char *contents = "Hello, world!";
+	ssize_t len;
+	int fd1, fd2;
+
+	len = strlen(contents);
+
+	/* 
+	 * Ensure that we read to EOF, just so the buffer cache's read size is
+	 * predictable.
+	 */
+	expect_lookup(RELPATH1, ino1, S_IFREG | 0644, start1 + len, 1);
+	expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1);
+	expect_open(ino1, 0, 1, fh1);
+	expect_open(ino2, 0, 1, fh2);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_COPY_FILE_RANGE);
+		}, Eq(true)),
+		_)
+	).Times(0);
+	expect_maybe_lseek(ino1);
+	expect_read(ino1, start1, len, len, contents, 0);
+	expect_write(ino2, start2, len, len, contents);
+
+	fd1 = open(FULLPATH1, O_RDONLY);
+	ASSERT_GE(fd1, 0);
+	fd2 = open(FULLPATH2, O_WRONLY);
+	ASSERT_GE(fd2, 0);
+	ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0));
+}
+
+
diff --git a/tests/sys/fs/fusefs/default_permissions.cc b/tests/sys/fs/fusefs/default_permissions.cc
index 368c28bbcb3f..6401f926bb49 100644
--- a/tests/sys/fs/fusefs/default_permissions.cc
+++ b/tests/sys/fs/fusefs/default_permissions.cc
@@ -109,6 +109,25 @@ void expect_create(const char *relpath, uint64_t ino)
 	})));
 }
*** 201 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202101011720.101HK1d8090063>