Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 7 Apr 2023 15:50:34 GMT
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: d012836fb616 - main - zfs: fix up EXDEV handling for clone_range
Message-ID:  <202304071550.337FoYj8060770@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=d012836fb61654942c9d573c8e0f9def598d4ae2

commit d012836fb61654942c9d573c8e0f9def598d4ae2
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2023-04-05 20:27:12 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2023-04-07 15:46:20 +0000

    zfs: fix up EXDEV handling for clone_range
    
    API contract requires VOPs to handle EXDEV internally, worst case by
    falling back to the generic copy routine. This broke with the recent
    changes.
    
    While here whack custom loop to lock 2 vnodes with vn_lock_pair, which
    provides the same functionality internally. write start/finish around
    it plays no role so got eliminated.
    
    One difference is that vn_lock_pair always takes an exclusive lock on
    both vnodes. I did not patch around it because current code takes an
    exclusive lock on the target vnode. zfs supports shared-locking for
    writes, so this serializes different calls to the routine as is, despite
    range locking inside. At the same time you may notice the source vnode
    can get some traffic if only shared-locked, thus once more this goes
    the safer route of exclusive-locking. Note this should be patched to
    use shared-locking for both once the feature is considered stable.
    
    Technically the switch to vn_lock_pair should be a separate change, but
    it would only introduce churn immediately whacked by the rest of the
    patch.
    
    [note: technically the review is still in progress, but so is the
    active breakage]
    
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
---
 .../openzfs/module/os/freebsd/zfs/zfs_vnops_os.c   | 56 +++++++++++-----------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index baa2ee5b3824..67c1a6e3344b 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -6250,56 +6250,54 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
 	 * need something else than vn_generic_copy_file_range().
 	 */
 
-	/* Lock both vnodes, avoiding risk of deadlock. */
-	do {
-		mp = NULL;
-		error = vn_start_write(outvp, &mp, V_WAIT);
-		if (error == 0) {
-			error = vn_lock(outvp, LK_EXCLUSIVE);
-			if (error == 0) {
-				if (invp == outvp)
-					break;
-				error = vn_lock(invp, LK_SHARED | LK_NOWAIT);
-				if (error == 0)
-					break;
-				VOP_UNLOCK(outvp);
-				if (mp != NULL)
-					vn_finished_write(mp);
-				mp = NULL;
-				error = vn_lock(invp, LK_SHARED);
-				if (error == 0)
-					VOP_UNLOCK(invp);
-			}
+	vn_start_write(outvp, &mp, V_WAIT);
+	if (invp == outvp) {
+		if (vn_lock(outvp, LK_EXCLUSIVE) != 0) {
+			goto bad_write_fallback;
 		}
-		if (mp != NULL)
-			vn_finished_write(mp);
-	} while (error == 0);
-	if (error != 0)
-		return (error);
+	} else {
+		vn_lock_pair(invp, false, outvp, false);
+		if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) {
+			goto bad_locked_fallback;
+		}
+	}
+
 #ifdef MAC
 	error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
 	    outvp);
 	if (error != 0)
-		goto unlock;
+		goto out_locked;
 #endif
 
 	io.uio_offset = *ap->a_outoffp;
 	io.uio_resid = *ap->a_lenp;
 	error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd);
 	if (error != 0)
-		goto unlock;
+		goto out_locked;
 
 	error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
 	    ap->a_outoffp, &len, ap->a_outcred);
+	if (error == EXDEV)
+		goto bad_locked_fallback;
 	*ap->a_lenp = (size_t)len;
-
-unlock:
+out_locked:
 	if (invp != outvp)
 		VOP_UNLOCK(invp);
 	VOP_UNLOCK(outvp);
 	if (mp != NULL)
 		vn_finished_write(mp);
+	return (error);
 
+bad_locked_fallback:
+	if (invp != outvp)
+		VOP_UNLOCK(invp);
+	VOP_UNLOCK(outvp);
+bad_write_fallback:
+	if (mp != NULL)
+		vn_finished_write(mp);
+	error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
+	    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
+	    ap->a_incred, ap->a_outcred, ap->a_fsizetd);
 	return (error);
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202304071550.337FoYj8060770>