Date: Tue, 4 Mar 2008 22:31:53 +0100 From: Juergen Lock <nox@jelal.kn-bremen.de> To: qemu-devel@nongnu.org, freebsd-emulation@FreeBSD.org Subject: patch: tcg bug (was: Re: qemu 2008-03-02 snapshot FreeBSD 7.0/amd64 guest regression) (tcg?) Message-ID: <20080304213153.GB15959@saturn.kn-bremen.de> In-Reply-To: <20080303000134.GA68444@saturn.kn-bremen.de> References: <20080302204702.GA62895@saturn.kn-bremen.de> <20080303000134.GA68444@saturn.kn-bremen.de>
next in thread | previous in thread | raw e-mail | index | archive | help
--pWyiEgJYm5f9v55/ Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Mon, Mar 03, 2008 at 01:01:34AM +0100, Juergen Lock wrote: > On Sun, Mar 02, 2008 at 09:47:02PM +0100, Juergen Lock wrote: > > Hi! > > > > I've prepared a FreeBSD qemu-devel port update, as already mentioned > > on the freebsd-emulation list, and found the FreeBSD 7.0/amd64 isos > > now pagefault repeatedly, saying: > > > > panic: page fault > > cpuid = 0 > > kernel trap 12 with interrupts disabled > > > > > > Fatal trap 12: page fault while in kernel mode > > cpuid = 0; apic id = 00 > > fault virtual address = 0x20 > > fault code = supervisor read data, page not present > > instruction pointer = 0x8:0xffffffff8046c704 > > trap number = 12 > > frame pointer = 0x10:0x0 > > ... > > > > 0xffffffff8046c704 in the 7.0-RELEASE kernel used on the isos is > > in _thread_lock_flags: > > > > (kgdb) disassemble _thread_lock_flags > > Dump of assembler code for function _thread_lock_flags: > > 0xffffffff8046c6e0 <_thread_lock_flags+0>: push %r14 > > 0xffffffff8046c6e2 <_thread_lock_flags+2>: mov %rdi,%r14 > > 0xffffffff8046c6e5 <_thread_lock_flags+5>: push %r13 > > 0xffffffff8046c6e7 <_thread_lock_flags+7>: push %r12 > > 0xffffffff8046c6e9 <_thread_lock_flags+9>: push %rbp > > 0xffffffff8046c6ea <_thread_lock_flags+10>: push %rbx > > 0xffffffff8046c6eb <_thread_lock_flags+11>: mov %gs:0x0,%r13 > > 0xffffffff8046c6f4 <_thread_lock_flags+20>: xor %r12d,%r12d > > 0xffffffff8046c6f7 <_thread_lock_flags+23>: callq 0xffffffff8071df80 <spinlock_enter> > > 0xffffffff8046c6fc <_thread_lock_flags+28>: mov (%r14),%rbp > > 0xffffffff8046c6ff <_thread_lock_flags+31>: mov $0x4,%eax > > 0xffffffff8046c704 <_thread_lock_flags+36>: lock cmpxchg %r13,0x20(%rbp) > > 0xffffffff8046c70a <_thread_lock_flags+42>: sete %al > > 0xffffffff8046c70d <_thread_lock_flags+45>: test %al,%al > > 0xffffffff8046c70f <_thread_lock_flags+47>: jne 0xffffffff8046c799 <_thread_lock_flags+185> > > 0xffffffff8046c715 <_thread_lock_flags+53>: mov 0x20(%rbp),%rdx > > 0xffffffff8046c719 <_thread_lock_flags+57>: cmp %r13,%rdx > > 0xffffffff8046c71c <_thread_lock_flags+60>: je 0xffffffff8046c7cd <_thread_lock_flags+237> > > 0xffffffff8046c722 <_thread_lock_flags+66>: callq 0xffffffff8071c4e0 <spinlock_exit> > > ---Type <return> to continue, or q <return> to quit--- > > 0xffffffff8046c727 <_thread_lock_flags+71>: jmp 0xffffffff8046c73c <_thread_lock_flags+92> > > 0xffffffff8046c729 <_thread_lock_flags+73>: data16 > > ... > > > > so this looks like either %rbp is indeed zero or that cmpxchg insn isnt > > getting correctly translated. If you want to reproduce just boot the 35 MB > > 7.0-RELEASE-amd64-bootonly.iso in qemu-system-x86_64 (without kqemu); you > > can find mirrors via > > http://mirrorlist.freebsd.org/ > > (search for isos, amd64 architecture, I used 7.0 as you can see.) > > > > Oh, if you want to look at the live kernel you can boot the > > 7.0-RELEASE-amd64-livefs.iso in 0.9.1 with the previously mentioned > > patch (see > > http://www.nabble.com/forum/ViewPost.jtp?post=14921171 > > ), select fixit->cdrom in the menu that comes up after choosing > > the keyboard layout, and run `kgdb /dist/boot/kernel/kernel /dev/mem'. > > Update: looks like the bug is i386 host only, at least I got a report > of amd64 host working. (will try to confirm later...) Ok, confirmed. Tho the real problem was another fault that I must have overlooked at first: There's a movsbq at (in this kernel) vm_phys_free_pages+4 that gets sign extended wrong, 1 gets turned into 0x100000001 in %r8 at vm_phys_free_pages+9, which causes the offset into vm_phys_segs (0xffffffff80a68340) to end up as 0x2000000020 instead of 0x20, causing the first fault at vm_phys_free_pages+43, which then only seems to trigger the repeated faults mentioned above. (kgdb) disassemble vm_phys_free_pages Dump of assembler code for function vm_phys_free_pages: 0xffffffff80692ae0 <vm_phys_free_pages+0>: push %r12 0xffffffff80692ae2 <vm_phys_free_pages+2>: push %rbp 0xffffffff80692ae3 <vm_phys_free_pages+3>: push %rbx 0xffffffff80692ae4 <vm_phys_free_pages+4>: movsbq 0x61(%rdi),%r8 0xffffffff80692ae9 <vm_phys_free_pages+9>: mov %esi,%ebx 0xffffffff80692aeb <vm_phys_free_pages+11>: mov 0x40(%rdi),%rbp 0xffffffff80692aef <vm_phys_free_pages+15>: shl $0x5,%r8 0xffffffff80692af3 <vm_phys_free_pages+19>: cmp $0xb,%esi 0xffffffff80692af6 <vm_phys_free_pages+22>: lea 0xffffffff80a68340(%r8),%r12 0xffffffff80692afd <vm_phys_free_pages+29>: jg 0xffffffff80692c4e <vm_phys_free_pages+366> 0xffffffff80692b03 <vm_phys_free_pages+35>: lea 0xc(%rbx),%ecx 0xffffffff80692b06 <vm_phys_free_pages+38>: mov $0x1,%eax 0xffffffff80692b0b <vm_phys_free_pages+43>: mov 0xffffffff80a68340(%r8),%rdx 0xffffffff80692b12 <vm_phys_free_pages+50>: shl %cl,%eax 0xffffffff80692b14 <vm_phys_free_pages+52>: cltq 0xffffffff80692b16 <vm_phys_free_pages+54>: xor %rbp,%rax 0xffffffff80692b19 <vm_phys_free_pages+57>: cmp %rdx,%rax ... I'll attch the fix for qemu/tcg/tcg-op.h (the same bug was in the movswq case). More interesting for the ppl reading -emulation might be a patch I applied to kgdb, to be able to do `kgdb -r <32bit-box>:1234 kernel.debug' from the amd64 box to talk to qemu -s -S (yeah I ended up using a debug kernel for this, as you can see above by the missing `mov %rsp,%rbp' the kernel on the isos got built with -fomit-frame-pointer): Index: src/gnu/usr.bin/gdb/kgdb/main.c =================================================================== RCS file: /home/ncvs/src/gnu/usr.bin/gdb/kgdb/main.c,v retrieving revision 1.11 diff -u -r1.11 main.c --- src/gnu/usr.bin/gdb/kgdb/main.c 4 Jan 2006 23:17:52 -0000 1.11 +++ src/gnu/usr.bin/gdb/kgdb/main.c 4 Mar 2008 15:38:39 -0000 @@ -408,19 +408,21 @@ errx(1, "%s: not a regular file", path); vmcore = strdup(path); } else if (remote != NULL && remote[0] != ':' && remote[0] != '|') { - if (stat(remote, &st) != 0) { - snprintf(path, sizeof(path), "/dev/%s", remote); - if (stat(path, &st) != 0) { - err(1, "%s", remote); + if (!strchr(remote, ':')) { + if (stat(remote, &st) != 0) { + snprintf(path, sizeof(path), "/dev/%s", remote); + if (stat(path, &st) != 0) { + err(1, "%s", remote); + /* NOTREACHED */ + } + free(remote); + remote = strdup(path); + } + if (!S_ISCHR(st.st_mode) && !S_ISFIFO(st.st_mode)) { + errx(1, "%s: not a special file, FIFO or socket", + remote); /* NOTREACHED */ } - free(remote); - remote = strdup(path); - } - if (!S_ISCHR(st.st_mode) && !S_ISFIFO(st.st_mode)) { - errx(1, "%s: not a special file, FIFO or socket", - remote); - /* NOTREACHED */ } } else if (argc > optind) { if (vmcore == NULL) enjoy, Juergen --pWyiEgJYm5f9v55/ Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename=patch-tcg3 Index: qemu/tcg/tcg-op.h @@ -1172,7 +1172,7 @@ tcg_gen_op3i(INDEX_op_qemu_ld8s, ret, addr, mem_index); #else tcg_gen_op4i(INDEX_op_qemu_ld8s, ret, addr, TCGV_HIGH(addr), mem_index); - tcg_gen_ext8s_i32(TCGV_HIGH(ret), ret); + tcg_gen_sari_i32(TCGV_HIGH(ret), ret, 31); #endif } @@ -1192,7 +1192,7 @@ tcg_gen_op3i(INDEX_op_qemu_ld16s, ret, addr, mem_index); #else tcg_gen_op4i(INDEX_op_qemu_ld16s, ret, addr, TCGV_HIGH(addr), mem_index); - tcg_gen_ext16s_i32(TCGV_HIGH(ret), ret); + tcg_gen_sari_i32(TCGV_HIGH(ret), ret, 31); #endif } --pWyiEgJYm5f9v55/--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20080304213153.GB15959>