Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 6 Jul 2002 21:57:24 -0700 (PDT)
From:      Don Lewis <dl-freebsd@catspoiler.org>
To:        Georg.Koltermann@mscsoftware.com
Cc:        obrien@FreeBSD.ORG, current@FreeBSD.ORG, mckusick@FreeBSD.ORG
Subject:   Re: dump(8) is hosed
Message-ID:  <200207070458.g674wI0M021931@gw.catspoiler.org>
In-Reply-To: <1025887736.773.116.camel@hunter.muc.macsch.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On  5 Jul, Georg-W. Koltermann wrote:
> Am Mi, 2002-07-03 um 17.31 schrieb David O'Brien:
>> On a 27-June-2002 23:02:00 UTC system (just before ipfw2 went in,
>> pre-KSE3), dump will not complete dumping more than 5GB.  At that point
>> it stops responding properly to ^T, which should give "DUMP: 47.52% done,
>> finished in 1:19".  At the 5GB mark, ^T gives:
>> 
>>     load: 0.00  cmd: dump 3981 [physstr] 2.11u 43.06s 0% 1536k
>> 
>> and never changes.  The user and system times never advance.  Anybody
>> have any ideas?
> 
> For me it is broken in a different way. For a small FS like / it works,
> but dumping my /home, which is 4G, I get 
> 
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739789]: count=-1
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739788]: count=-1
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739787]: count=-1
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739786]: count=-1
>     
> and on and on. 
> 
> Maybe a 32 bit <--> 64 bit mismatch caused by UFS2?  My -current is of
> date=2002.06.27.22.00.00.

I was finally finally able to reproduce this by creating a large file
before doing the dump.  Dump(8) is *very* hosed.  The UFS2 import broke
it's ability to follow multiple levels of indirect blocks.

Here's a patch that fixed the problem along with a bunch of print format
mismatches:

Index: tape.c
===================================================================
RCS file: /home/ncvs/src/sbin/dump/tape.c,v
retrieving revision 1.20
diff -u -r1.20 tape.c
--- tape.c	21 Jun 2002 06:17:57 -0000	1.20
+++ tape.c	7 Jul 2002 03:56:31 -0000
@@ -204,7 +204,7 @@
 		quit("Cannot recover\n");
 		/* NOTREACHED */
 	}
-	msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno);
+	msg("write error %ld blocks into volume %d\n", blocksthisvol, tapeno);
 	broadcast("DUMP WRITE ERROR!\n");
 	if (!query("Do you want to restart?"))
 		dumpabort(0);
Index: traverse.c
===================================================================
RCS file: /home/ncvs/src/sbin/dump/traverse.c,v
retrieving revision 1.19
diff -u -r1.19 traverse.c
--- traverse.c	21 Jun 2002 06:17:57 -0000	1.19
+++ traverse.c	7 Jul 2002 04:24:14 -0000
@@ -275,9 +275,9 @@
 {
 	int ret = 0;
 	int i;
-	static caddr_t idblk;
+	caddr_t idblk;
 
-	if (idblk == NULL && (idblk = malloc(sblock->fs_bsize)) == NULL)
+	if ((idblk = malloc(sblock->fs_bsize)) == NULL)
 		quit("dirindir: cannot allocate indirect memory.\n");
 	bread(fsbtodb(sblock, blkno), idblk, (int)sblock->fs_bsize);
 	if (ind_level <= 0) {
@@ -294,6 +294,7 @@
 			else
 				*filesize -= sblock->fs_bsize;
 		}
+		free(idblk);
 		return (ret);
 	}
 	ind_level--;
@@ -306,6 +307,7 @@
 			ret |= dirindir(ino, blkno, ind_level, filesize,
 			    tapesize, nodump);
 	}
+	free(idblk);
 	return (ret);
 }
 
@@ -501,9 +503,9 @@
 dmpindir(ino_t ino, ufs2_daddr_t blk, int ind_level, off_t *size)
 {
 	int i, cnt;
-	static caddr_t idblk;
+	caddr_t idblk;
 
-	if (idblk == NULL && (idblk = malloc(sblock->fs_bsize)) == NULL)
+	if ((idblk = malloc(sblock->fs_bsize)) == NULL)
 		quit("dmpindir: cannot allocate indirect memory.\n");
 	if (blk != 0)
 		bread(fsbtodb(sblock, blk), idblk, (int) sblock->fs_bsize);
@@ -519,6 +521,7 @@
 			ufs1_blksout((ufs1_daddr_t *)idblk, cnt, ino);
 		else
 			ufs2_blksout((ufs2_daddr_t *)idblk, cnt, ino);
+		free(idblk);
 		return;
 	}
 	ind_level--;
@@ -529,9 +532,12 @@
 		else
 			dmpindir(ino, ((ufs2_daddr_t *)idblk)[i], ind_level,
 			    size);
-		if (*size <= 0)
+		if (*size <= 0) {
+			free(idblk);
 			return;
+		}
 	}
+	free(idblk);
 }
 
 /*
@@ -705,13 +711,13 @@
 		goto loop;
 	}
 	if (cnt == -1)
-		msg("read error from %s: %s: [block %d]: count=%d\n",
+		msg("read error from %s: %s: [block %qd]: count=%d\n",
 			disk, strerror(errno), blkno, size);
 	else
-		msg("short read error from %s: [block %d]: count=%d, got=%d\n",
+		msg("short read error from %s: [block %qd]: count=%d, got=%d\n",
 			disk, blkno, size, cnt);
 	if (++breaderrors > BREADEMAX) {
-		msg("More than %d block read errors from %d\n",
+		msg("More than %d block read errors from %s\n",
 			BREADEMAX, disk);
 		broadcast("DUMP IS AILING!\n");
 		msg("This is an unrecoverable error.\n");
@@ -730,11 +736,11 @@
 		    ((off_t)blkno << dev_bshift))) == dev_bsize)
 			continue;
 		if (cnt == -1) {
-			msg("read error from %s: %s: [sector %d]: count=%d\n",
+			msg("read error from %s: %s: [sector %qd]: count=%d\n",
 				disk, strerror(errno), blkno, dev_bsize);
 			continue;
 		}
-		msg("short read error from %s: [sector %d]: count=%d, got=%d\n",
+		msg("short read error from %s: [sector %qd]: count=%d, got=%d\n",
 			disk, blkno, dev_bsize, cnt);
 	}
 }



To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200207070458.g674wI0M021931>