Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 8 Aug 2025 08:45:41 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Rick Macklem <rmacklem@freebsd.org>
Cc:        src-committers@freebsd.org, dev-commits-src-all@freebsd.org, dev-commits-src-main@freebsd.org
Subject:   Re: git: 37b2cb5ecb0f - main - vfs: Add support for file cloning to VOP_COPY_FILE_RANGE
Message-ID:  <aJWPBVIq2MURFu0d@kib.kiev.ua>
In-Reply-To: <202508080055.5780tYVq010160@gitrepo.freebsd.org>
References:  <202508080055.5780tYVq010160@gitrepo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, Aug 08, 2025 at 12:55:34AM +0000, Rick Macklem wrote:
> The branch main has been updated by rmacklem:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7
> 
> commit 37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7
> Author:     Rick Macklem <rmacklem@FreeBSD.org>
> AuthorDate: 2025-08-08 00:52:23 +0000
> Commit:     Rick Macklem <rmacklem@FreeBSD.org>
> CommitDate: 2025-08-08 00:52:23 +0000
> 
>     vfs: Add support for file cloning to VOP_COPY_FILE_RANGE
>     
>     NFSv4 has a separate CLONE operation from COPY with
>     a couple of semantics differences. Unlike COPY, CLONE
>     must complete the "copy on write" and cannot return
>     partially copied. It also is required to use offsets (and
>     the length if not to EOF) that are aligned to a buffer
>     boundary.
>     
>     Since VOP_COPY_FILE_RANGE() can already do "copy on write"
>     for file systems that support it, such as ZFS with block
>     cloning enabled, all this patch does is add a flag called
>     COPY_FILE_RANGE_CLONE so that it will conform to the
>     rule that it must do a "copy on write" to completion.
>     
>     The patch also adds a new pathconf(2) name _PC_CLONE_BLKSIZE,
>     which acquires the blocksize requirement for cloning and
>     returns 0 for file systems that do not support the
>     "copy on write" feature. (This is needed for the NFSv4.2
>     clone_blksize attribute.)
>     
>     This patch will allow the implementation of CLONE
>     for NFSv4.2.
>     
>     Reviewed by:    asomers
>     Differential Revision:  https://reviews.freebsd.org/D51808
> ---
>  sys/fs/fuse/fuse_vnops.c |  3 +++
>  sys/kern/vfs_default.c   |  1 +
>  sys/kern/vfs_syscalls.c  |  2 +-
>  sys/kern/vfs_vnops.c     |  5 +++++
>  sys/sys/unistd.h         |  1 +
>  sys/sys/vnode.h          | 17 +++++++++++++----
>  6 files changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
> index b90ce60ec664..b782146b7278 100644
> --- a/sys/fs/fuse/fuse_vnops.c
> +++ b/sys/fs/fuse/fuse_vnops.c
> @@ -877,6 +877,9 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
>  	pid_t pid;
>  	int err;
>  
> +	if ((ap->a_flags & COPY_FILE_RANGE_CLONE) != 0)
> +		return (EXTERROR(ENOSYS, "Cannot clone"));
> +
>  	if (mp == NULL || mp != vnode_mount(outvp))
>  		return (EXTERROR(ENOSYS, "Mount points do not match"));
>  
> diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
> index fd6202a1424c..85f67731e1cc 100644
> --- a/sys/kern/vfs_default.c
> +++ b/sys/kern/vfs_default.c
> @@ -457,6 +457,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap)
>  		case _PC_NAMEDATTR_ENABLED:
>  		case _PC_HAS_NAMEDATTR:
>  		case _PC_HAS_HIDDENSYSTEM:
> +		case _PC_CLONE_BLKSIZE:
>  			*ap->a_retval = 0;
>  			return (0);
>  		default:
> diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
> index 9704e9c160a8..c64618036733 100644
> --- a/sys/kern/vfs_syscalls.c
> +++ b/sys/kern/vfs_syscalls.c
> @@ -5058,7 +5058,7 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
>  	error = 0;
>  	retlen = 0;
>  
> -	if (flags != 0) {
> +	if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) {
>  		error = EINVAL;
>  		goto out;
>  	}
> diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
> index 6451c9e07a60..93f87ddae4de 100644
> --- a/sys/kern/vfs_vnops.c
> +++ b/sys/kern/vfs_vnops.c
> @@ -3443,6 +3443,11 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
>  	interrupted = 0;
>  	dat = NULL;
>  
> +	if ((flags & COPY_FILE_RANGE_CLONE) != 0) {
> +		error = ENOSYS;
> +		goto out;
> +	}
> +
>  	error = vn_lock(invp, LK_SHARED);
>  	if (error != 0)
>  		goto out;
> diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
> index 85ed93fd359d..7ab2f021e408 100644
> --- a/sys/sys/unistd.h
> +++ b/sys/sys/unistd.h
> @@ -159,6 +159,7 @@
>  #define	_PC_XATTR_ENABLED	_PC_NAMEDATTR_ENABLED	/* Solaris Compatible */
>  #define	_PC_XATTR_EXISTS	_PC_HAS_NAMEDATTR	/* Solaris Compatible */
>  #define	_PC_HAS_HIDDENSYSTEM	68
> +#define	_PC_CLONE_BLKSIZE	69
>  #endif
>  
>  /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */
> diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
> index 074769d55c2d..8080e9edd8c3 100644
> --- a/sys/sys/vnode.h
> +++ b/sys/sys/vnode.h
> @@ -397,8 +397,21 @@ struct vattr {
>   */
>  #define VLKTIMEOUT	(hz / 20 + 1)
>  
> +/* copy_file_range flags */
> +#define	COPY_FILE_RANGE_KFLAGS		0xff000000
> +
> +/*
> + * copy_file_range flags visible to user space.
> + * Allocate high bits first, to try and avoid conflicting with Linux.
> + */
> +#define	COPY_FILE_RANGE_CLONE		0x00800000	/* Require cloning. */
> +#define	COPY_FILE_RANGE_USERFLAGS	(COPY_FILE_RANGE_CLONE)
> +

These are userspace flags for copy_file_range(2).  Now you require
userspace to include non-std-compliant and highly offensive WRT namespace
pollution sys/vnode.h header to get to some bits of the copy_file_range(2)
interface.  This is not right thing to do, IMO.

>  #ifdef _KERNEL
>  
> +/* copy_file_range flags only usable in the kernel */
> +#define	COPY_FILE_RANGE_TIMEO1SEC	0x01000000	/* Return after 1sec. */
> +
>  #ifdef MALLOC_DECLARE
>  MALLOC_DECLARE(M_VNODE);
>  #endif
> @@ -621,10 +634,6 @@ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
>  #define	VN_OPEN_INVFS		0x00000008
>  #define	VN_OPEN_WANTIOCTLCAPS	0x00000010
>  
> -/* copy_file_range kernel flags */
> -#define	COPY_FILE_RANGE_KFLAGS		0xff000000
> -#define	COPY_FILE_RANGE_TIMEO1SEC	0x01000000	/* Return after 1sec. */
> -
>  /*
>   * Public vnode manipulation functions.
>   */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?aJWPBVIq2MURFu0d>