Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 6 Jun 2010 13:08:36 +0000 (UTC)
From:      Martin Matuska <mm@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r208869 - stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <201006061308.o56D8aRd049837@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mm
Date: Sun Jun  6 13:08:36 2010
New Revision: 208869
URL: http://svn.freebsd.org/changeset/base/208869

Log:
  MFC r208775:
  
  Fix freeing space after deleting large files with holes.
  
  OpenSolaris onnv revision:	9950:78fc41aa9bc5
  
  Reviewed by:	pjd, delphij (mentor)
  Obtained from:	OpenSolaris (Bug ID 6792701)
  Approved by:	re (kib)

Modified:
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)
  stable/8/sys/geom/sched/   (props changed)

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sun Jun  6 11:36:08 2010	(r208868)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sun Jun  6 13:08:36 2010	(r208869)
@@ -371,56 +371,51 @@ dmu_prefetch(objset_t *os, uint64_t obje
 	dnode_rele(dn, FTAG);
 }
 
+/*
+ * Get the next "chunk" of file data to free.  We traverse the file from
+ * the end so that the file gets shorter over time (if we crashes in the
+ * middle, this will leave us in a better state).  We find allocated file
+ * data by simply searching the allocated level 1 indirects.
+ */
 static int
-get_next_chunk(dnode_t *dn, uint64_t *offset, uint64_t limit)
+get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t limit)
 {
-	uint64_t len = *offset - limit;
-	uint64_t chunk_len = dn->dn_datablksz * DMU_MAX_DELETEBLKCNT;
-	uint64_t subchunk =
+	uint64_t len = *start - limit;
+	uint64_t blkcnt = 0;
+	uint64_t maxblks = DMU_MAX_ACCESS / (1ULL << (dn->dn_indblkshift + 1));
+	uint64_t iblkrange =
 	    dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT);
 
-	ASSERT(limit <= *offset);
+	ASSERT(limit <= *start);
 
-	if (len <= chunk_len) {
-		*offset = limit;
+	if (len <= iblkrange * maxblks) {
+		*start = limit;
 		return (0);
 	}
+	ASSERT(ISP2(iblkrange));
 
-	ASSERT(ISP2(subchunk));
-
-	while (*offset > limit) {
-		uint64_t initial_offset = P2ROUNDUP(*offset, subchunk);
-		uint64_t delta;
+	while (*start > limit && blkcnt < maxblks) {
 		int err;
 
-		/* skip over allocated data */
+		/* find next allocated L1 indirect */
 		err = dnode_next_offset(dn,
-		    DNODE_FIND_HOLE|DNODE_FIND_BACKWARDS, offset, 1, 1, 0);
-		if (err == ESRCH)
-			*offset = limit;
-		else if (err)
-			return (err);
+		    DNODE_FIND_BACKWARDS, start, 2, 1, 0);
 
-		ASSERT3U(*offset, <=, initial_offset);
-		*offset = P2ALIGN(*offset, subchunk);
-		delta = initial_offset - *offset;
-		if (delta >= chunk_len) {
-			*offset += delta - chunk_len;
+		/* if there are no more, then we are done */
+		if (err == ESRCH) {
+			*start = limit;
 			return (0);
-		}
-		chunk_len -= delta;
-
-		/* skip over unallocated data */
-		err = dnode_next_offset(dn,
-		    DNODE_FIND_BACKWARDS, offset, 1, 1, 0);
-		if (err == ESRCH)
-			*offset = limit;
-		else if (err)
+		} else if (err) {
 			return (err);
+		}
+		blkcnt += 1;
 
-		if (*offset < limit)
-			*offset = limit;
-		ASSERT3U(*offset, <, initial_offset);
+		/* reset offset to end of "next" block back */
+		*start = P2ALIGN(*start, iblkrange);
+		if (*start <= limit)
+			*start = limit;
+		else
+			*start -= 1;
 	}
 	return (0);
 }

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Sun Jun  6 11:36:08 2010	(r208868)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Sun Jun  6 13:08:36 2010	(r208869)
@@ -1239,6 +1239,22 @@ dnode_willuse_space(dnode_t *dn, int64_t
 	dmu_tx_willuse_space(tx, space);
 }
 
+/*
+ * This function scans a block at the indicated "level" looking for
+ * a hole or data (depending on 'flags').  If level > 0, then we are
+ * scanning an indirect block looking at its pointers.  If level == 0,
+ * then we are looking at a block of dnodes.  If we don't find what we
+ * are looking for in the block, we return ESRCH.  Otherwise, return
+ * with *offset pointing to the beginning (if searching forwards) or
+ * end (if searching backwards) of the range covered by the block
+ * pointer we matched on (or dnode).
+ *
+ * The basic search algorithm used below by dnode_next_offset() is to
+ * use this function to search up the block tree (widen the search) until
+ * we find something (i.e., we don't return ESRCH) and then search back
+ * down the tree (narrow the search) until we reach our original search
+ * level.
+ */
 static int
 dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
 	int lvl, uint64_t blkfill, uint64_t txg)
@@ -1318,6 +1334,7 @@ dnode_next_offset_level(dnode_t *dn, int
 			error = ESRCH;
 	} else {
 		blkptr_t *bp = data;
+		uint64_t start = *offset;
 		span = (lvl - 1) * epbs + dn->dn_datablkshift;
 		minfill = 0;
 		maxfill = blkfill << ((lvl - 1) * epbs);
@@ -1327,18 +1344,25 @@ dnode_next_offset_level(dnode_t *dn, int
 		else
 			minfill++;
 
-		for (i = (*offset >> span) & ((1ULL << epbs) - 1);
+		*offset = *offset >> span;
+		for (i = BF64_GET(*offset, 0, epbs);
 		    i >= 0 && i < epb; i += inc) {
 			if (bp[i].blk_fill >= minfill &&
 			    bp[i].blk_fill <= maxfill &&
 			    (hole || bp[i].blk_birth > txg))
 				break;
-			if (inc < 0 && *offset < (1ULL << span))
-				*offset = 0;
-			else
-				*offset += (1ULL << span) * inc;
+			if (inc > 0 || *offset > 0)
+				*offset += inc;
+		}
+		*offset = *offset << span;
+		if (inc < 0) {
+			/* traversing backwards; position offset at the end */
+			ASSERT3U(*offset, <=, start);
+			*offset = MIN(*offset + (1ULL << span) - 1, start);
+		} else if (*offset < start) {
+			*offset = start;
 		}
-		if (i < 0 || i == epb)
+		if (i < 0 || i >= epb)
 			error = ESRCH;
 	}
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201006061308.o56D8aRd049837>