Date: Mon, 09 Mar 1998 16:19:20 -0800 From: Julian Elischer <julian@whistle.com> To: Ollivier Robert <roberto@keltia.freenix.fr> Cc: current@FreeBSD.ORG, mckusick@mcKusick.com Subject: Re: page fault (-current && softupdates) Message-ID: <35048708.6201DD56@whistle.com> References: <19980308230222.A9458@keltia.freenix.fr> <13409.889399148@critter.freebsd.dk> <19980309233834.A297@keltia.freenix.fr>
next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------63DECDAD62319AC452BFA1D7
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Here's a bit more info on teh Soft Update problem..
Ollivier Robert wrote:
>
> According to Poul-Henning Kamp:
> > Considering that it mentions "allocindir" it would be obvious that it
> > involves files bigger than what we have in our source-tree. That
> > should also be the avenue to reproduction I think...
>
> Reproducting it is very easy :-)
>
> Finding the cause is not. One of the problem I have is that I can work only
> a kernel core dump, being unable to do remote debugging. The trace I get
> shows it stopped in _softdep_setup_allocindir_page when the real point of
> failure is the bcopy at the end of setup_allocindir_phase2.
>
> I don't have access to the latter's variables since it is a private
> function (I guess). I'll make it a public one and see if I can dig
> something out of this.
>
> Either one of the two pointers (newindirdep->ir_saveddata,
> newindirdep->ir_savebp->b_data) is bad or bp->b_bcount is too big (8192
> which seems right for an indirect block on a 8k/1k FS).
I have this expanded out here..
I replaced bcopy with BC() which in turn calls teh assembler bcopy()
this gave me the stack trace needed
here is a transcript of the debugging session..
from this it looks as if BOTH newindirdep->ir_saveddata AND
newindirdep->ir_savebp are NULL (!??)
looking at the code I can't see how this is possible.
>
> Just extending an existing file doen't seem to trigger it (just tried it).
> Using DDB and putting a break point in setup_allocindir_phrase2 shows that
> the function is called many times but I can't get the bug to manifest
> inside DDB <sigh>.
>
> I'd appreciate if someone with more internal knowledge than I have could
> help :-)
>
> > >It happens for me in two cases:
> > >- rnews batch processing,
> > >- procmail delivering mail into mailboxes.
yes it seems to happen in rathe rspecific cases, but just plain
HEAVY LOAD
is not enough.
>
> I can see why for the latter case (procmail tries to append a message in
> one mailbox). The former could be .overview or history append...
> --
--------------63DECDAD62319AC452BFA1D7
Content-Type: text/plain; charset=iso-8859-1; name="gdb2"
Content-Transfer-Encoding: 8bit
Content-Disposition: inline; filename="gdb2"
NOTE: BC() is an indirect bcopy that saves a stack frame...
Script started on Mon Mar 9 23:09:10 1998
phaser2# gdb
GDB is free software and you are welcome to distribute copies of it
under certain conditions; type "show copying" to see the conditions.
There is absolutely no warranty for GDB; type "show warranty" for details.
GDB 4.16 (i386-unknown-freebsd), Copyright 1996 Free Software Foundation, Inc.
Debugger (msg=0xf011953c "panic") at ../../i386/i386/db_interface.c:317
317 in_Debugger = 0;
(gdb) where
#0 Debugger (msg=0xf011953c "panic") at ../../i386/i386/db_interface.c:317
#1 0xf011959e in panic (
fmt=0xf01cbab0 "vm_fault: fault on nofault entry, addr: %lx")
at ../../kern/kern_shutdown.c:434
#2 0xf01cbbe4 in vm_fault (map=0xf026482c, vaddr=4049862656,
fault_type=3 '\003', fault_flags=0) at ../../vm/vm_fault.c:227
#3 0xf01eca48 in trap_pfault (frame=0xf2d0fc80, usermode=0)
at ../../i386/i386/trap.c:724
#4 0xf01ec6c7 in trap (frame={tf_es = -222429168, tf_ds = 16,
tf_edi = -245104640, tf_esi = -246204416, tf_ebp = -221184812,
tf_isp = -221184856, tf_ebx = -248986476, tf_edx = 8192, tf_ecx = 512,
tf_eax = 1099776, tf_trapno = 12, tf_err = 2, tf_eip = -266422854,
tf_cs = 8, tf_eflags = 66054, tf_esp = -258680576, tf_ss = -260642304})
at ../../i386/i386/trap.c:363
#5 0xf01e3401 in calltrap ()
[***POW***]
#6 0xf01bbd06 in setup_allocindir_phase2 (bp=0xf128c494, ip=0xf08f9c00,
aip=0xf094d900) at ../../ufs/ffs/ffs_softdep.c:1537
#7 0xf01bb9cb in softdep_setup_allocindir_page (ip=0xf08f9c00, lbn=13,
bp=0xf128c494, ptrno=1, newblkno=918504, oldblkno=0, nbp=0xf12a7970)
at ../../ufs/ffs/ffs_softdep.c:1419
#8 0xf01b8f3d in ffs_balloc (ap=0xf2d0fea4) at ../../ufs/ffs/ffs_balloc.c:302
#9 0xf01c270c in ffs_write (ap=0xf2d0fef8) at vnode_if.h:995
#10 0xf013dc77 in vn_write (fp=0xf0905480, uio=0xf2d0ff40, cred=0xf070c800)
at vnode_if.h:331
#11 0xf012116b in write (p=0xf2bf1f40, uap=0xf2d0ff94)
at ../../kern/sys_generic.c:268
#12 0xf01ed2af in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 237568,
tf_esi = 103916, tf_ebp = -272640152, tf_isp = -221184028,
tf_ebx = 537440352, tf_edx = 103916, tf_ecx = -272640112, tf_eax = 4,
tf_trapno = 7, tf_err = 7, tf_eip = 537384545, tf_cs = 31,
tf_eflags = 646, tf_esp = -272640176, tf_ss = 39})
at ../../i386/i386/trap.c:994
#13 0xf01e3455 in Xsyscall ()
#14 0x2007b1d1 in ?? ()
#15 0x2006bb50 in ?? ()
#16 0x3bf7 in ?? ()
#17 0x1099 in ?? ()
(gdb) up 6
#6 0xf01bbd06 in setup_allocindir_phase2 (bp=0xf128c494, ip=0xf08f9c00,
aip=0xf094d900) at ../../ufs/ffs/ffs_softdep.c:1537
1537 BC((caddr_t)newindirdep->ir_saveddata,
(gdb) list
1532 LIST_INIT(&newindirdep->ir_deplisthd);
1533 LIST_INIT(&newindirdep->ir_donehd);
1534 newindirdep->ir_saveddata = (ufs_daddr_t *)bp->b_data;
1535 newindirdep->ir_savebp =
1536 getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
1537 BC((caddr_t)newindirdep->ir_saveddata,
1538 newindirdep->ir_savebp->b_data, bp->b_bcount);
1539 }
1540 }
1541
(gdb) print newindirdep
$1 = (struct indirdep *) 0xf1533800
(gdb) p *$1
$2 = {ir_list = {wk_list = {le_next = 0x0, le_prev = 0x0}, wk_type = 0,
wk_state = 0}, ir_saveddata = 0x0, ir_savebp = 0x0, ir_donehd = {
lh_first = 0x0}, ir_deplisthd = {lh_first = 0x0}}
[*** EH? it's all NULLS??? ***]
(gdb) p $1->ir_savebp
$3 = (struct buf *) 0x0
[*** this could be a problem.. ***]
(gdb) p bp->b_bcount
$4 = 8192
(gdb) info local
aip = (struct allocindir *) 0xf1640000
wk = (struct worklist *) 0x12
indirdep = (struct indirdep *) 0x0
newindirdep = (struct indirdep *) 0xf1533800
bmsafemap = (struct bmsafemap *) 0xf0250240
oldaip = (struct allocindir *) 0x0
freefrag = (struct freefrag *) 0xf01e23f5
newblk = (struct newblk *) 0xf01ba325
(gdb) list setup_allocindir_phase2
1447 static void
1448 setup_allocindir_phase2(bp, ip, aip)
1449 struct buf *bp; /* in-memory copy of the indirect block */
1450 struct inode *ip; /* inode for file being extended */
1451 struct allocindir *aip; /* allocindir allocated by the above routines */
1452 {
1453 struct worklist *wk;
1454 struct indirdep *indirdep, *newindirdep;
1455 struct bmsafemap *bmsafemap;
1456 struct allocindir *oldaip;
1457 struct freefrag *freefrag;
1458 struct newblk *newblk;
1459
1460 if (bp->b_lblkno >= 0)
1461 panic("setup_allocindir_phase2: not indir blk");
1462 for (indirdep = NULL, newindirdep = NULL; ; ) {
1463 ACQUIRE_LOCK(&lk);
1464 for (wk = LIST_FIRST(&bp->b_dep); wk;
1465 wk = LIST_NEXT(wk, wk_list)) {
1466 if (wk->wk_type != D_INDIRDEP)
1467 continue;
1468 indirdep = WK_INDIRDEP(wk);
1469 break;
1470 }
1471 if (indirdep == NULL && newindirdep) {
1472 indirdep = newindirdep;
1473 WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
1474 newindirdep = NULL;
1475 }
1476 FREE_LOCK(&lk);
1477 if (indirdep) {
1478 if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
1479 &newblk) == 0)
1480 panic("setup_allocindir: lost block");
1481 ACQUIRE_LOCK(&lk);
1482 if (newblk->nb_state == DEPCOMPLETE) {
1483 aip->ai_state |= DEPCOMPLETE;
1484 aip->ai_buf = NULL;
1485 } else {
1486 bmsafemap = newblk->nb_bmsafemap;
1487 aip->ai_buf = bmsafemap->sm_buf;
1488 LIST_REMOVE(newblk, nb_deps);
1489 LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
1490 aip, ai_deps);
1491 }
1492 LIST_REMOVE(newblk, nb_hash);
1493 FREE(newblk, M_NEWBLK);
1494 aip->ai_indirdep = indirdep;
1495 /*
1496 * Check to see if there is an existing dependency
1497 * for this block. If there is, merge the old
1498 * dependency into the new one.
1499 */
1500 if (aip->ai_oldblkno == 0)
1501 oldaip = NULL;
1502 else
1503 for (oldaip=LIST_FIRST(&indirdep->ir_deplisthd);
1504 oldaip; oldaip = LIST_NEXT(oldaip, ai_next))
1505 if (oldaip->ai_offset == aip->ai_offset)
1506 break;
1507 if (oldaip != NULL) {
1508 if (oldaip->ai_newblkno != aip->ai_oldblkno)
1509 panic("setup_allocindir_phase2: blkno");
1510 aip->ai_oldblkno = oldaip->ai_oldblkno;
1511 freefrag = oldaip->ai_freefrag;
1512 oldaip->ai_freefrag = aip->ai_freefrag;
1513 aip->ai_freefrag = freefrag;
1514 free_allocindir(oldaip, NULL);
1515 }
1516 LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
1517 ((ufs_daddr_t *)indirdep->ir_savebp->b_data)
1518 [aip->ai_offset] = aip->ai_oldblkno;
1519 FREE_LOCK(&lk);
1520 }
1521 if (newindirdep) {
1522 if (indirdep->ir_savebp != NULL)
1523 brelse(newindirdep->ir_savebp);
1524 WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP);
1525 }
1526 if (indirdep)
1527 break;
1528 MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
1529 M_INDIRDEP, M_WAITOK);
1530 newindirdep->ir_list.wk_type = D_INDIRDEP;
1531 newindirdep->ir_state = ATTACHED;
1532 LIST_INIT(&newindirdep->ir_deplisthd);
1533 LIST_INIT(&newindirdep->ir_donehd);
1534 newindirdep->ir_saveddata = (ufs_daddr_t *)bp->b_data;
1535 newindirdep->ir_savebp =
1536 getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
1537 BC((caddr_t)newindirdep->ir_saveddata,
1538 newindirdep->ir_savebp->b_data, bp->b_bcount);
1539 }
1540 }
1541
1542 /*
1543 * Block de-allocation dependencies.
1544 *
1545 * When blocks are de-allocated, the on-disk pointers must be nullified before
1546 * the blocks are made available for use by other files. (The true
[*** and the caller ***]
(gdb) list softdep_setup_allocindir_page
1398 struct buf *bp; /* buffer with indirect blk referencing page */
1399 int ptrno; /* offset of pointer in indirect block */
1400 ufs_daddr_t newblkno; /* disk block number being added */
1401 ufs_daddr_t oldblkno; /* previous block number, 0 if none */
1402 struct buf *nbp; /* buffer holding allocated page */
1403 {
1404 struct allocindir *aip;
1405 struct pagedep *pagedep;
1406
1407 aip = newallocindir(ip, ptrno, newblkno, oldblkno);
1408 ACQUIRE_LOCK(&lk);
1409 /*
1410 * If we are allocating a directory page, then we must
1411 * allocate an associated pagedep to track additions and
1412 * deletions.
1413 */
1414 if ((ip->i_mode & IFMT) == IFDIR &&
1415 pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
1416 WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
1417 WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
1418 FREE_LOCK(&lk);
1419 setup_allocindir_phase2(bp, ip, aip);
1420 }
1421
1422 /*
1423 * Called just before setting an indirect block pointer to a
1424 * newly allocated indirect block.
1425 */
1426 void
1427 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
(gdb) up
#7 0xf01bb9cb in softdep_setup_allocindir_page (ip=0xf08f9c00, lbn=13,
bp=0xf128c494, ptrno=1, newblkno=918504, oldblkno=0, nbp=0xf12a7970)
at ../../ufs/ffs/ffs_softdep.c:1419
1419 setup_allocindir_phase2(bp, ip, aip);
(gdb) p aip
$5 = (struct allocindir *) 0xf094d900
(gdb) info local
ip = (struct inode *) 0xf08f9c00
inbp = (struct buf *) 0xf12a7970
aip = (struct allocindir *) 0xf094d900
pagedep = (struct pagedep *) 0xf12a7970
(gdb) info frame
Stack level 7, frame at 0xf2d0fd44:
eip = 0xf01bb9cb in softdep_setup_allocindir_page
(../../ufs/ffs/ffs_softdep.c:1419); saved eip 0xf01b8f3d
called by frame at 0xf2d0fe34, caller of frame at 0xf2d0fd14
source language c.
Arglist at 0xf2d0fd44, args: ip=0xf08f9c00, lbn=13, bp=0xf128c494, ptrno=1,
newblkno=918504, oldblkno=0, nbp=0xf12a7970
Locals at 0xf2d0fd44, Previous frame's sp is 0x0
Saved registers:
ebx at 0xf2d0fd34, ebp at 0xf2d0fd44, esi at 0xf2d0fd38, edi at 0xf2d0fd3c,
eip at 0xf2d0fd48
(gdb) down
#6 0xf01bbd06 in setup_allocindir_phase2 (bp=0xf128c494, ip=0xf08f9c00,
aip=0xf094d900) at ../../ufs/ffs/ffs_softdep.c:1537
1537 BC((caddr_t)newindirdep->ir_saveddata,
(gdb) p bp
$6 = (struct buf *) 0xf128c494
(gdb) p *bp
$7 = {b_hash = {le_next = 0x0, le_prev = 0xf025e340}, b_vnbufs = {
le_next = 0x0, le_prev = 0xf12a7978}, b_freelist = {tqe_next = 0xf12c0f80,
tqe_prev = 0xf12c1af8}, b_act = {tqe_next = 0xf1291100,
tqe_prev = 0xf0681dd4}, b_proc = 0x0, b_flags = 536870960, b_qindex = 0,
b_usecount = 6 '\006', b_error = 0, b_bufsize = 8192, b_bcount = 8192,
b_resid = 0, b_dev = 4294967295, b_data = 0xf1532000 "à\003\016",
b_kvabase = 0xf1532000 "à\003\016", b_kvasize = 8192, b_lblkno = -12,
b_blkno = -12, b_iodone = 0, b_iodone_chain = 0x0, b_vp = 0xf2c7d7c0,
b_dirtyoff = 0, b_dirtyend = 0, b_generation = 37739, b_rcred = 0x0,
b_wcred = 0x0, b_validoff = 0, b_validend = 0, b_pblkno = 2949231,
b_saveaddr = 0x0, b_savekva = 0x0, b_driver1 = 0x0, b_driver2 = 0x0,
b_spc = 0x0, b_cluster = {cluster_head = {tqh_first = 0xf1298d54,
tqh_last = 0xf12bf4b4}, cluster_entry = {tqe_next = 0xf1298d54,
tqe_prev = 0xf12bf4b4}}, b_pages = {0xf047a5b4, 0xf047b2e8,
0x0 <repeats 30 times>}, b_npages = 2, b_dep = {lh_first = 0x0}}
(gdb) p wk
$8 = (struct worklist *) 0x12
(gdb) info local
aip = (struct allocindir *) 0xf1640000
wk = (struct worklist *) 0x12
indirdep = (struct indirdep *) 0x0
newindirdep = (struct indirdep *) 0xf1533800
bmsafemap = (struct bmsafemap *) 0xf0250240
oldaip = (struct allocindir *) 0x0
freefrag = (struct freefrag *) 0xf01e23f5
newblk = (struct newblk *) 0xf01ba325
(gdb) p ip
$11 = (struct inode *) 0xf08f9c00
(gdb) p *ip
$12 = {i_lock = {lk_interlock = {lock_data = 0}, lk_flags = 1024,
lk_sharecount = 0, lk_waitcount = 0, lk_exclusivecount = 1, lk_prio = 8,
lk_wmesg = 0xf01c187b "inode", lk_timo = 0, lk_lockholder = 7712},
i_hash = {le_next = 0x0, le_prev = 0xf07e7c20}, i_vnode = 0xf2c7d7c0,
i_devvp = 0xf2bef060, i_flag = 142, i_dev = 132102, i_number = 215166,
i_effnlink = 1, inode_u = {fs = 0xf06ba000, e2fs = 0xf06ba000}, i_dquot = {
0x0, 0x0}, i_modrev = 0x3500d1f98f53e53d, i_lockf = 0x0, i_count = 0,
i_endoff = 0, i_diroff = 0, i_offset = 0, i_ino = 0, i_reclen = 0,
i_spare = {0, 0, 0, 0, 0}, i_din = {di_mode = 33188, di_nlink = 1, di_u = {
oldids = {0, 0}, inumber = 0}, di_size = 106496, di_atime = 889246204,
di_atimensec = 0, di_mtime = 889246315, di_mtimensec = 0,
di_ctime = 889246315, di_ctimensec = 0, di_db = {919968, 919976, 919984,
919992, 920000, 920160, 920168, 920176, 920184, 920192, 920200, 920208},
di_ib = {917512, 0, 0}, di_flags = 0, di_blocks = 240, di_gen = 329868342,
di_uid = 0, di_gid = 0, di_spare = {0, 0}}}
(gdb) list
1527 break;
1528 MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
1529 M_INDIRDEP, M_WAITOK);
1530 newindirdep->ir_list.wk_type = D_INDIRDEP;
1531 newindirdep->ir_state = ATTACHED;
1532 LIST_INIT(&newindirdep->ir_deplisthd);
1533 LIST_INIT(&newindirdep->ir_donehd);
1534 newindirdep->ir_saveddata = (ufs_daddr_t *)bp->b_data;
1535 newindirdep->ir_savebp =
1536 getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
1537 BC((caddr_t)newindirdep->ir_saveddata,
1538 newindirdep->ir_savebp->b_data, bp->b_bcount);
1539 }
1540 }
1541
1542 /*
1543 * Block de-allocation dependencies.
1544 *
1545 * When blocks are de-allocated, the on-disk pointers must be nullified before
1546 * the blocks are made available for use by other files. (The true
(gdb) quit
Script done on Mon Mar 9 23:29:59 1998
--------------63DECDAD62319AC452BFA1D7--
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?35048708.6201DD56>
