From owner-freebsd-hackers@FreeBSD.ORG Thu Aug 23 05:34:12 2007 Return-Path: Delivered-To: freebsd-hackers@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 6264816A421 for ; Thu, 23 Aug 2007 05:34:12 +0000 (UTC) (envelope-from yuriy.tsibizov@gmail.com) Received: from wr-out-0506.google.com (wr-out-0506.google.com [64.233.184.237]) by mx1.freebsd.org (Postfix) with ESMTP id 088F713C46B for ; Thu, 23 Aug 2007 05:34:11 +0000 (UTC) (envelope-from yuriy.tsibizov@gmail.com) Received: by wr-out-0506.google.com with SMTP id 70so299937wra for ; Wed, 22 Aug 2007 22:34:11 -0700 (PDT) DKIM-Signature: a=rsa-sha1; c=relaxed/relaxed; d=gmail.com; s=beta; h=domainkey-signature:received:received:message-id:date:from:to:subject:cc:in-reply-to:mime-version:content-type:content-transfer-encoding:content-disposition:references; b=okeTw7CCf3PdlGa7/nl0oxUuRuNCy56DiXfYspy64Uzuw3ZsrzthAg+cuzaWPCYW6Gx0bzvQn2jqYfE7XlVCDlgKteAYYNw/Z419vUFaml37FIQfMmcnoJAgtt0zD7DH8S7PTap8BfaMVjZHtCYgg4+uXtLqfFyhIuBnr+BdJao= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=beta; h=received:message-id:date:from:to:subject:cc:in-reply-to:mime-version:content-type:content-transfer-encoding:content-disposition:references; b=so9nXdG1vCKL7OFlVgi4asqQQu5eTZv2SiJRArPAiMH/GfxO+mL4wCJ+ZbMxOYkxXLL/ntdRRQzDRhCg/dY5udJJS7sXTaxCyPVI3/Kjbqee1+T7svIiK901EqhLOVBJX+ED4YQfIaaMddVPyYXOce4LmK1iVbzvEiLwkdoy7ZA= Received: by 10.90.88.13 with SMTP id l13mr5963135agb.1187847250834; Wed, 22 Aug 2007 22:34:10 -0700 (PDT) Received: by 10.90.84.15 with HTTP; Wed, 22 Aug 2007 22:34:10 -0700 (PDT) Message-ID: Date: Thu, 23 Aug 2007 09:34:10 +0400 From: "Yuriy Tsibizov" To: "Roman Divacky" In-Reply-To: <20070822211047.GA35783@freebsd.org> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Content-Disposition: inline References: <20070822211047.GA35783@freebsd.org> X-Mailman-Approved-At: Thu, 23 Aug 2007 05:39:21 +0000 Cc: freebsd-hackers@freebsd.org, freebsd-emulation@freebsd.org Subject: Re: User-mode Linux (Was: modify syscall nr on-the-fly) X-BeenThere: freebsd-hackers@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Technical Discussions relating to FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 23 Aug 2007 05:34:12 -0000 2007/8/23, Roman Divacky : > here is a little review of mine... just little suggestions. > > Index: i386/i386/trap.c > =================================================================== > RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v > retrieving revision 1.307 > diff -u -r1.307 trap.c > --- i386/i386/trap.c 26 Jul 2007 15:32:55 -0000 1.307 > +++ i386/i386/trap.c 22 Aug 2007 08:53:19 -0000 > @@ -1004,6 +1004,32 @@ > > PTRACESTOP_SC(p, td, S_PT_SCE); > > + if (__predict_false(p->p_sysent->sv_name[0]=='L')) { > > please use __predict_true(p->p_sysent != &elf_linux_sysvec) Will it be possible to link (GENERIC) kernel wih this check? I can't find elf_linux_sysvec in my /boot/kernel/kernel... > > + if (code != frame->tf_eax) { > + printf("linux sysctl patched: code %d return eax %d\n", code, frame->tf_eax); > + /* retry */ > + code = frame->tf_eax; > + > + if (p->p_sysent->sv_prepsyscall) > + /* > + * The prep code is MP aware. > + */ > + (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms); > + /* else should always be null */ > + > + if (p->p_sysent->sv_mask) > + code &= p->p_sysent->sv_mask; > > the sv_mask should be removed.. dont use it in your code. its entirely pointless when dealing > with Linux binaries ok > > + if (code >= p->p_sysent->sv_size) > + callp = &p->p_sysent->sv_table[0]; > + else > + callp = &p->p_sysent->sv_table[code]; > + > + narg = callp->sy_narg; > + /* retry ends */ > + } > + } > + > AUDIT_SYSCALL_ENTER(code, td); > error = (*callp->sy_call)(td, args); > AUDIT_SYSCALL_EXIT(error, td); > Index: i386/linux/linux_ptrace.c > =================================================================== > RCS file: /home/ncvs/src/sys/i386/linux/linux_ptrace.c,v > retrieving revision 1.17 > diff -u -r1.17 linux_ptrace.c > --- i386/linux/linux_ptrace.c 22 Feb 2006 18:57:49 -0000 1.17 > +++ i386/linux/linux_ptrace.c 22 Aug 2007 09:27:01 -0000 > @@ -78,6 +78,7 @@ > #define PTRACE_SETFPXREGS 19 > > #define PTRACE_SETOPTIONS 21 > +#define PTRACE_O_TRACESYSGOOD 0x00000001 > > /* > * Linux keeps debug registers at the following > @@ -95,6 +96,10 @@ > return ((signum == SIGSTOP)? 0 : signum); > } > > +struct linux_pt_lreg { > + l_long reg[19]; > +}; > + > struct linux_pt_reg { > l_long ebx; > l_long ecx; > @@ -103,17 +108,17 @@ > l_long edi; > l_long ebp; > l_long eax; > - l_int xds; > - l_int xes; > - l_int xfs; > - l_int xgs; > + l_long xds; > + l_long xes; > + l_long xfs; > + l_long xgs; > l_long orig_eax; > l_long eip; > - l_int xcs; > + l_long xcs; > l_long eflags; > l_long esp; > - l_int xss; > -}; > + l_long xss; > +} __packed; > > why is this necessary? how does it affect amd64 linux32 emulator? I'll need to re-check this. It should not access segment registers. > > /* > * Translate i386 ptrace registers between Linux and FreeBSD formats. > @@ -247,6 +252,7 @@ > struct linux_pt_reg reg; > struct linux_pt_fpreg fpreg; > struct linux_pt_fpxreg fpxreg; > + struct linux_pt_lreg lreg; > } r; > union { > struct reg bsd_reg; > @@ -429,20 +435,21 @@ > * as necessary. > */ > if (uap->addr < sizeof(struct linux_pt_reg)) { > + if (uap->addr == (11 << 2)) /* orig_eax */ > + uap->addr = (6 << 2); /* eax */ > + > error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0); > if (error != 0) > break; > > map_regs_to_linux(&u.bsd_reg, &r.reg); > if (req == PTRACE_PEEKUSR) { > - error = copyout((char *)&r.reg + uap->addr, > - (void *)uap->data, sizeof(l_int)); > + error = copyout((l_long*)(&r.lreg.reg[uap->addr>>2]), > + (void *)uap->data, sizeof(l_long)); > break; > } > > - *(l_int *)((char *)&r.reg + uap->addr) = > - (l_int)uap->data; > - > + r.lreg.reg[uap->addr>>2] = (l_long)uap->data; > map_regs_from_linux(&u.bsd_reg, &r.reg); > error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0); > } > @@ -470,11 +477,34 @@ > error = kern_ptrace(td, PT_SETDBREGS, pid, > &u.bsd_dbreg, 0); > } > - > + } > + break; > + case PTRACE_SETOPTIONS: { > + struct proc *p; > + if (uap->data == PTRACE_O_TRACESYSGOOD) { > + p = td->td_proc; > + PROC_LOCK(p); > + p->p_stops |= S_PT_SYSGOOD; > + PROC_UNLOCK(p); > + break; > + } > + printf("linux: ptrace(21,...,%u) not implemented\n", > + (unsigned int)uap->data); > + error = EINVAL; > + } > break; > > braces around case "case" ? please remove (the blocking there is implicit) and introduce > procedure-wide "p" Similar blocks was in original code, I were trying to keep in it's style. > > + case PTRACE_SYSCALL: { > + struct proc *p; > + > + p = td->td_proc; > + PROC_LOCK(p); > + p->p_stops |= S_PT_LINUX; > + PROC_UNLOCK(p); > + > + if (addr == NULL) addr = (void *)1; > + error = kern_ptrace(td, PT_SYSCALL, pid, addr, uap->data); > } > - case PTRACE_SYSCALL: > - /* fall through */ > + break; > > ditto. > > default: > printf("linux: ptrace(%u, ...) not implemented\n", > (unsigned int)uap->req); > Index: sys/cdefs.h > =================================================================== > RCS file: /home/ncvs/src/sys/sys/cdefs.h,v > retrieving revision 1.93 > diff -u -r1.93 cdefs.h > --- sys/cdefs.h 21 Sep 2006 01:38:58 -0000 1.93 > +++ sys/cdefs.h 10 Aug 2007 18:01:34 -0000 > @@ -338,6 +338,10 @@ > #endif > > /* Compiler-dependent macros that rely on FreeBSD-specific extensions. */ > +#ifndef __FreeBSD_cc_version > +#define __FreeBSD_cc_version 0 > +#endif > + > #if __FreeBSD_cc_version >= 300001 && defined(__GNUC__) && !defined(__INTEL_COMPILER) > #define __printf0like(fmtarg, firstvararg) \ > __attribute__((__format__ (__printf0__, fmtarg, firstvararg))) > Index: sys/ptrace.h > =================================================================== > RCS file: /home/ncvs/src/sys/sys/ptrace.h,v > retrieving revision 1.28 > diff -u -r1.28 ptrace.h > --- sys/ptrace.h 6 Feb 2006 09:41:56 -0000 1.28 > +++ sys/ptrace.h 22 Aug 2007 08:53:45 -0000 > @@ -103,7 +103,12 @@ > #define PTRACESTOP_SC(p, td, flag) \ > if ((p)->p_flag & P_TRACED && (p)->p_stops & (flag)) { \ > PROC_LOCK(p); \ > - ptracestop((td), SIGTRAP); \ > + if (__predict_false(p->p_sysent->sv_name[0]=='L')) { \ > > please use p->p_sysent instead like stated above > > + (p)->p_stops &= ~(S_PT_SCE | S_PT_SCX); \ > + ptracestop((td), SIGTRAP | 0x80); \ > + } \ > + else \ > + ptracestop((td), SIGTRAP); \ > PROC_UNLOCK(p); \ > } > /* > @@ -112,6 +117,16 @@ > */ > #define S_PT_SCE 0x000010000 > #define S_PT_SCX 0x000020000 > +/* > + * Linux ptrace conventions: clear S_PT_SCE and S_PT_SCX before raising > + * signals > + */ > +#define S_PT_LINUX 0x000040000 > +/* > + * Linux ptrace option PTRACE_O_TRACESYSGOOD, when enabled, changes signal > + * number to ( SIGTRAP | 0x80 ) > + */ > +#define S_PT_SYSGOOD 0x000080000 > > int ptrace_set_pc(struct thread *_td, unsigned long _addr); > int ptrace_single_step(struct thread *_td); > Index: compat/linux/linux_misc.c > =================================================================== > RCS file: /home/ncvs/src/sys/compat/linux/linux_misc.c,v > retrieving revision 1.213 > diff -u -r1.213 linux_misc.c > --- compat/linux/linux_misc.c 12 Jun 2007 00:11:57 -0000 1.213 > +++ compat/linux/linux_misc.c 22 Aug 2007 09:07:34 -0000 > @@ -63,6 +63,7 @@ > #include > #include > #include > +#include > > #include > > @@ -852,6 +853,8 @@ > > if (args->status) { > tmpstat &= 0xffff; > + if (!(td->td_proc->p_stops & S_PT_SYSGOOD)) > + tmpstat &= 0x7fff; > if (WIFSIGNALED(tmpstat)) > tmpstat = (tmpstat & 0xffffff80) | > BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); > @@ -898,6 +901,8 @@ > > if (args->status) { > tmpstat &= 0xffff; > + if (!(td->td_proc->p_stops & S_PT_SYSGOOD)) > + tmpstat &= 0x7fff; > if (WIFSIGNALED(tmpstat)) > tmpstat = (tmpstat & 0xffffff80) | > BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); > > > thnx for the patch! Thanks for review, I'm also trying to get PTRACE_SYSEMU to work (looks like the same as PTRACE_SYSCTL, but it does not call syscall handler and relies on SIGTRAP handler to do all syscall work). UM Linux also uses PTRACE_FAULTINFO (returns pointer to something, like a last page fault address) and PTRACE_LDT (updates a process LDT entry), but they are not included in stock Linux kernel and there is no man page describing their behavior. It boots without them. bad news that UM Linux can't access hdd image ("hostfs" does not work too)-- it hangs detecting partitions. Yuriy.