Date: Thu, 23 Aug 2007 09:34:10 +0400 From: "Yuriy Tsibizov" <yuriy.tsibizov@gmail.com> To: "Roman Divacky" <rdivacky@freebsd.org> Cc: freebsd-hackers@freebsd.org, freebsd-emulation@freebsd.org Subject: Re: User-mode Linux (Was: modify syscall nr on-the-fly) Message-ID: <c019b3090708222234i497da7cax488c7343471ed6c3@mail.gmail.com> In-Reply-To: <20070822211047.GA35783@freebsd.org> References: <c019b3090708211153o7dce8365l945b24ad1c962d22@mail.gmail.com> <c019b3090708220242s690b673i601899c0be2b32d4@mail.gmail.com> <20070822211047.GA35783@freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
2007/8/23, Roman Divacky <rdivacky@freebsd.org>: > here is a little review of mine... just little suggestions. > > Index: i386/i386/trap.c > =================================================================== > RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v > retrieving revision 1.307 > diff -u -r1.307 trap.c > --- i386/i386/trap.c 26 Jul 2007 15:32:55 -0000 1.307 > +++ i386/i386/trap.c 22 Aug 2007 08:53:19 -0000 > @@ -1004,6 +1004,32 @@ > > PTRACESTOP_SC(p, td, S_PT_SCE); > > + if (__predict_false(p->p_sysent->sv_name[0]=='L')) { > > please use __predict_true(p->p_sysent != &elf_linux_sysvec) Will it be possible to link (GENERIC) kernel wih this check? I can't find elf_linux_sysvec in my /boot/kernel/kernel... > > + if (code != frame->tf_eax) { > + printf("linux sysctl patched: code %d return eax %d\n", code, frame->tf_eax); > + /* retry */ > + code = frame->tf_eax; > + > + if (p->p_sysent->sv_prepsyscall) > + /* > + * The prep code is MP aware. > + */ > + (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms); > + /* else should always be null */ > + > + if (p->p_sysent->sv_mask) > + code &= p->p_sysent->sv_mask; > > the sv_mask should be removed.. dont use it in your code. its entirely pointless when dealing > with Linux binaries ok > > + if (code >= p->p_sysent->sv_size) > + callp = &p->p_sysent->sv_table[0]; > + else > + callp = &p->p_sysent->sv_table[code]; > + > + narg = callp->sy_narg; > + /* retry ends */ > + } > + } > + > AUDIT_SYSCALL_ENTER(code, td); > error = (*callp->sy_call)(td, args); > AUDIT_SYSCALL_EXIT(error, td); > Index: i386/linux/linux_ptrace.c > =================================================================== > RCS file: /home/ncvs/src/sys/i386/linux/linux_ptrace.c,v > retrieving revision 1.17 > diff -u -r1.17 linux_ptrace.c > --- i386/linux/linux_ptrace.c 22 Feb 2006 18:57:49 -0000 1.17 > +++ i386/linux/linux_ptrace.c 22 Aug 2007 09:27:01 -0000 > @@ -78,6 +78,7 @@ > #define PTRACE_SETFPXREGS 19 > > #define PTRACE_SETOPTIONS 21 > +#define PTRACE_O_TRACESYSGOOD 0x00000001 > > /* > * Linux keeps debug registers at the following > @@ -95,6 +96,10 @@ > return ((signum == SIGSTOP)? 0 : signum); > } > > +struct linux_pt_lreg { > + l_long reg[19]; > +}; > + > struct linux_pt_reg { > l_long ebx; > l_long ecx; > @@ -103,17 +108,17 @@ > l_long edi; > l_long ebp; > l_long eax; > - l_int xds; > - l_int xes; > - l_int xfs; > - l_int xgs; > + l_long xds; > + l_long xes; > + l_long xfs; > + l_long xgs; > l_long orig_eax; > l_long eip; > - l_int xcs; > + l_long xcs; > l_long eflags; > l_long esp; > - l_int xss; > -}; > + l_long xss; > +} __packed; > > why is this necessary? how does it affect amd64 linux32 emulator? I'll need to re-check this. It should not access segment registers. > > /* > * Translate i386 ptrace registers between Linux and FreeBSD formats. > @@ -247,6 +252,7 @@ > struct linux_pt_reg reg; > struct linux_pt_fpreg fpreg; > struct linux_pt_fpxreg fpxreg; > + struct linux_pt_lreg lreg; > } r; > union { > struct reg bsd_reg; > @@ -429,20 +435,21 @@ > * as necessary. > */ > if (uap->addr < sizeof(struct linux_pt_reg)) { > + if (uap->addr == (11 << 2)) /* orig_eax */ > + uap->addr = (6 << 2); /* eax */ > + > error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0); > if (error != 0) > break; > > map_regs_to_linux(&u.bsd_reg, &r.reg); > if (req == PTRACE_PEEKUSR) { > - error = copyout((char *)&r.reg + uap->addr, > - (void *)uap->data, sizeof(l_int)); > + error = copyout((l_long*)(&r.lreg.reg[uap->addr>>2]), > + (void *)uap->data, sizeof(l_long)); > break; > } > > - *(l_int *)((char *)&r.reg + uap->addr) = > - (l_int)uap->data; > - > + r.lreg.reg[uap->addr>>2] = (l_long)uap->data; > map_regs_from_linux(&u.bsd_reg, &r.reg); > error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0); > } > @@ -470,11 +477,34 @@ > error = kern_ptrace(td, PT_SETDBREGS, pid, > &u.bsd_dbreg, 0); > } > - > + } > + break; > + case PTRACE_SETOPTIONS: { > + struct proc *p; > + if (uap->data == PTRACE_O_TRACESYSGOOD) { > + p = td->td_proc; > + PROC_LOCK(p); > + p->p_stops |= S_PT_SYSGOOD; > + PROC_UNLOCK(p); > + break; > + } > + printf("linux: ptrace(21,...,%u) not implemented\n", > + (unsigned int)uap->data); > + error = EINVAL; > + } > break; > > braces around case "case" ? please remove (the blocking there is implicit) and introduce > procedure-wide "p" Similar blocks was in original code, I were trying to keep in it's style. > > + case PTRACE_SYSCALL: { > + struct proc *p; > + > + p = td->td_proc; > + PROC_LOCK(p); > + p->p_stops |= S_PT_LINUX; > + PROC_UNLOCK(p); > + > + if (addr == NULL) addr = (void *)1; > + error = kern_ptrace(td, PT_SYSCALL, pid, addr, uap->data); > } > - case PTRACE_SYSCALL: > - /* fall through */ > + break; > > ditto. > > default: > printf("linux: ptrace(%u, ...) not implemented\n", > (unsigned int)uap->req); > Index: sys/cdefs.h > =================================================================== > RCS file: /home/ncvs/src/sys/sys/cdefs.h,v > retrieving revision 1.93 > diff -u -r1.93 cdefs.h > --- sys/cdefs.h 21 Sep 2006 01:38:58 -0000 1.93 > +++ sys/cdefs.h 10 Aug 2007 18:01:34 -0000 > @@ -338,6 +338,10 @@ > #endif > > /* Compiler-dependent macros that rely on FreeBSD-specific extensions. */ > +#ifndef __FreeBSD_cc_version > +#define __FreeBSD_cc_version 0 > +#endif > + > #if __FreeBSD_cc_version >= 300001 && defined(__GNUC__) && !defined(__INTEL_COMPILER) > #define __printf0like(fmtarg, firstvararg) \ > __attribute__((__format__ (__printf0__, fmtarg, firstvararg))) > Index: sys/ptrace.h > =================================================================== > RCS file: /home/ncvs/src/sys/sys/ptrace.h,v > retrieving revision 1.28 > diff -u -r1.28 ptrace.h > --- sys/ptrace.h 6 Feb 2006 09:41:56 -0000 1.28 > +++ sys/ptrace.h 22 Aug 2007 08:53:45 -0000 > @@ -103,7 +103,12 @@ > #define PTRACESTOP_SC(p, td, flag) \ > if ((p)->p_flag & P_TRACED && (p)->p_stops & (flag)) { \ > PROC_LOCK(p); \ > - ptracestop((td), SIGTRAP); \ > + if (__predict_false(p->p_sysent->sv_name[0]=='L')) { \ > > please use p->p_sysent instead like stated above > > + (p)->p_stops &= ~(S_PT_SCE | S_PT_SCX); \ > + ptracestop((td), SIGTRAP | 0x80); \ > + } \ > + else \ > + ptracestop((td), SIGTRAP); \ > PROC_UNLOCK(p); \ > } > /* > @@ -112,6 +117,16 @@ > */ > #define S_PT_SCE 0x000010000 > #define S_PT_SCX 0x000020000 > +/* > + * Linux ptrace conventions: clear S_PT_SCE and S_PT_SCX before raising > + * signals > + */ > +#define S_PT_LINUX 0x000040000 > +/* > + * Linux ptrace option PTRACE_O_TRACESYSGOOD, when enabled, changes signal > + * number to ( SIGTRAP | 0x80 ) > + */ > +#define S_PT_SYSGOOD 0x000080000 > > int ptrace_set_pc(struct thread *_td, unsigned long _addr); > int ptrace_single_step(struct thread *_td); > Index: compat/linux/linux_misc.c > =================================================================== > RCS file: /home/ncvs/src/sys/compat/linux/linux_misc.c,v > retrieving revision 1.213 > diff -u -r1.213 linux_misc.c > --- compat/linux/linux_misc.c 12 Jun 2007 00:11:57 -0000 1.213 > +++ compat/linux/linux_misc.c 22 Aug 2007 09:07:34 -0000 > @@ -63,6 +63,7 @@ > #include <sys/vmmeter.h> > #include <sys/vnode.h> > #include <sys/wait.h> > +#include <sys/ptrace.h> > > #include <security/mac/mac_framework.h> > > @@ -852,6 +853,8 @@ > > if (args->status) { > tmpstat &= 0xffff; > + if (!(td->td_proc->p_stops & S_PT_SYSGOOD)) > + tmpstat &= 0x7fff; > if (WIFSIGNALED(tmpstat)) > tmpstat = (tmpstat & 0xffffff80) | > BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); > @@ -898,6 +901,8 @@ > > if (args->status) { > tmpstat &= 0xffff; > + if (!(td->td_proc->p_stops & S_PT_SYSGOOD)) > + tmpstat &= 0x7fff; > if (WIFSIGNALED(tmpstat)) > tmpstat = (tmpstat & 0xffffff80) | > BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); > > > thnx for the patch! Thanks for review, I'm also trying to get PTRACE_SYSEMU to work (looks like the same as PTRACE_SYSCTL, but it does not call syscall handler and relies on SIGTRAP handler to do all syscall work). UM Linux also uses PTRACE_FAULTINFO (returns pointer to something, like a last page fault address) and PTRACE_LDT (updates a process LDT entry), but they are not included in stock Linux kernel and there is no man page describing their behavior. It boots without them. bad news that UM Linux can't access hdd image ("hostfs" does not work too)-- it hangs detecting partitions. Yuriy.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?c019b3090708222234i497da7cax488c7343471ed6c3>