Date: Wed, 22 Aug 2007 23:10:47 +0200 From: Roman Divacky <rdivacky@freebsd.org> To: Yuriy Tsibizov <yuriy.tsibizov@gmail.com> Cc: freebsd-hackers@freebsd.org, freebsd-emulation@freebsd.org Subject: Re: User-mode Linux (Was: modify syscall nr on-the-fly) Message-ID: <20070822211047.GA35783@freebsd.org> In-Reply-To: <c019b3090708220242s690b673i601899c0be2b32d4@mail.gmail.com> References: <c019b3090708211153o7dce8365l945b24ad1c962d22@mail.gmail.com> <c019b3090708220242s690b673i601899c0be2b32d4@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
here is a little review of mine... just little suggestions. Index: i386/i386/trap.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v retrieving revision 1.307 diff -u -r1.307 trap.c --- i386/i386/trap.c 26 Jul 2007 15:32:55 -0000 1.307 +++ i386/i386/trap.c 22 Aug 2007 08:53:19 -0000 @@ -1004,6 +1004,32 @@ PTRACESTOP_SC(p, td, S_PT_SCE); + if (__predict_false(p->p_sysent->sv_name[0]=='L')) { please use __predict_true(p->p_sysent != &elf_linux_sysvec) + if (code != frame->tf_eax) { + printf("linux sysctl patched: code %d return eax %d\n", code, frame->tf_eax); + /* retry */ + code = frame->tf_eax; + + if (p->p_sysent->sv_prepsyscall) + /* + * The prep code is MP aware. + */ + (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms); + /* else should always be null */ + + if (p->p_sysent->sv_mask) + code &= p->p_sysent->sv_mask; the sv_mask should be removed.. dont use it in your code. its entirely pointless when dealing with Linux binaries + if (code >= p->p_sysent->sv_size) + callp = &p->p_sysent->sv_table[0]; + else + callp = &p->p_sysent->sv_table[code]; + + narg = callp->sy_narg; + /* retry ends */ + } + } + AUDIT_SYSCALL_ENTER(code, td); error = (*callp->sy_call)(td, args); AUDIT_SYSCALL_EXIT(error, td); Index: i386/linux/linux_ptrace.c =================================================================== RCS file: /home/ncvs/src/sys/i386/linux/linux_ptrace.c,v retrieving revision 1.17 diff -u -r1.17 linux_ptrace.c --- i386/linux/linux_ptrace.c 22 Feb 2006 18:57:49 -0000 1.17 +++ i386/linux/linux_ptrace.c 22 Aug 2007 09:27:01 -0000 @@ -78,6 +78,7 @@ #define PTRACE_SETFPXREGS 19 #define PTRACE_SETOPTIONS 21 +#define PTRACE_O_TRACESYSGOOD 0x00000001 /* * Linux keeps debug registers at the following @@ -95,6 +96,10 @@ return ((signum == SIGSTOP)? 0 : signum); } +struct linux_pt_lreg { + l_long reg[19]; +}; + struct linux_pt_reg { l_long ebx; l_long ecx; @@ -103,17 +108,17 @@ l_long edi; l_long ebp; l_long eax; - l_int xds; - l_int xes; - l_int xfs; - l_int xgs; + l_long xds; + l_long xes; + l_long xfs; + l_long xgs; l_long orig_eax; l_long eip; - l_int xcs; + l_long xcs; l_long eflags; l_long esp; - l_int xss; -}; + l_long xss; +} __packed; why is this necessary? how does it affect amd64 linux32 emulator? /* * Translate i386 ptrace registers between Linux and FreeBSD formats. @@ -247,6 +252,7 @@ struct linux_pt_reg reg; struct linux_pt_fpreg fpreg; struct linux_pt_fpxreg fpxreg; + struct linux_pt_lreg lreg; } r; union { struct reg bsd_reg; @@ -429,20 +435,21 @@ * as necessary. */ if (uap->addr < sizeof(struct linux_pt_reg)) { + if (uap->addr == (11 << 2)) /* orig_eax */ + uap->addr = (6 << 2); /* eax */ + error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0); if (error != 0) break; map_regs_to_linux(&u.bsd_reg, &r.reg); if (req == PTRACE_PEEKUSR) { - error = copyout((char *)&r.reg + uap->addr, - (void *)uap->data, sizeof(l_int)); + error = copyout((l_long*)(&r.lreg.reg[uap->addr>>2]), + (void *)uap->data, sizeof(l_long)); break; } - *(l_int *)((char *)&r.reg + uap->addr) = - (l_int)uap->data; - + r.lreg.reg[uap->addr>>2] = (l_long)uap->data; map_regs_from_linux(&u.bsd_reg, &r.reg); error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0); } @@ -470,11 +477,34 @@ error = kern_ptrace(td, PT_SETDBREGS, pid, &u.bsd_dbreg, 0); } - + } + break; + case PTRACE_SETOPTIONS: { + struct proc *p; + if (uap->data == PTRACE_O_TRACESYSGOOD) { + p = td->td_proc; + PROC_LOCK(p); + p->p_stops |= S_PT_SYSGOOD; + PROC_UNLOCK(p); + break; + } + printf("linux: ptrace(21,...,%u) not implemented\n", + (unsigned int)uap->data); + error = EINVAL; + } break; braces around case "case" ? please remove (the blocking there is implicit) and introduce procedure-wide "p" + case PTRACE_SYSCALL: { + struct proc *p; + + p = td->td_proc; + PROC_LOCK(p); + p->p_stops |= S_PT_LINUX; + PROC_UNLOCK(p); + + if (addr == NULL) addr = (void *)1; + error = kern_ptrace(td, PT_SYSCALL, pid, addr, uap->data); } - case PTRACE_SYSCALL: - /* fall through */ + break; ditto. default: printf("linux: ptrace(%u, ...) not implemented\n", (unsigned int)uap->req); Index: sys/cdefs.h =================================================================== RCS file: /home/ncvs/src/sys/sys/cdefs.h,v retrieving revision 1.93 diff -u -r1.93 cdefs.h --- sys/cdefs.h 21 Sep 2006 01:38:58 -0000 1.93 +++ sys/cdefs.h 10 Aug 2007 18:01:34 -0000 @@ -338,6 +338,10 @@ #endif /* Compiler-dependent macros that rely on FreeBSD-specific extensions. */ +#ifndef __FreeBSD_cc_version +#define __FreeBSD_cc_version 0 +#endif + #if __FreeBSD_cc_version >= 300001 && defined(__GNUC__) && !defined(__INTEL_COMPILER) #define __printf0like(fmtarg, firstvararg) \ __attribute__((__format__ (__printf0__, fmtarg, firstvararg))) Index: sys/ptrace.h =================================================================== RCS file: /home/ncvs/src/sys/sys/ptrace.h,v retrieving revision 1.28 diff -u -r1.28 ptrace.h --- sys/ptrace.h 6 Feb 2006 09:41:56 -0000 1.28 +++ sys/ptrace.h 22 Aug 2007 08:53:45 -0000 @@ -103,7 +103,12 @@ #define PTRACESTOP_SC(p, td, flag) \ if ((p)->p_flag & P_TRACED && (p)->p_stops & (flag)) { \ PROC_LOCK(p); \ - ptracestop((td), SIGTRAP); \ + if (__predict_false(p->p_sysent->sv_name[0]=='L')) { \ please use p->p_sysent instead like stated above + (p)->p_stops &= ~(S_PT_SCE | S_PT_SCX); \ + ptracestop((td), SIGTRAP | 0x80); \ + } \ + else \ + ptracestop((td), SIGTRAP); \ PROC_UNLOCK(p); \ } /* @@ -112,6 +117,16 @@ */ #define S_PT_SCE 0x000010000 #define S_PT_SCX 0x000020000 +/* + * Linux ptrace conventions: clear S_PT_SCE and S_PT_SCX before raising + * signals + */ +#define S_PT_LINUX 0x000040000 +/* + * Linux ptrace option PTRACE_O_TRACESYSGOOD, when enabled, changes signal + * number to ( SIGTRAP | 0x80 ) + */ +#define S_PT_SYSGOOD 0x000080000 int ptrace_set_pc(struct thread *_td, unsigned long _addr); int ptrace_single_step(struct thread *_td); Index: compat/linux/linux_misc.c =================================================================== RCS file: /home/ncvs/src/sys/compat/linux/linux_misc.c,v retrieving revision 1.213 diff -u -r1.213 linux_misc.c --- compat/linux/linux_misc.c 12 Jun 2007 00:11:57 -0000 1.213 +++ compat/linux/linux_misc.c 22 Aug 2007 09:07:34 -0000 @@ -63,6 +63,7 @@ #include <sys/vmmeter.h> #include <sys/vnode.h> #include <sys/wait.h> +#include <sys/ptrace.h> #include <security/mac/mac_framework.h> @@ -852,6 +853,8 @@ if (args->status) { tmpstat &= 0xffff; + if (!(td->td_proc->p_stops & S_PT_SYSGOOD)) + tmpstat &= 0x7fff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); @@ -898,6 +901,8 @@ if (args->status) { tmpstat &= 0xffff; + if (!(td->td_proc->p_stops & S_PT_SYSGOOD)) + tmpstat &= 0x7fff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); thnx for the patch! roman
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070822211047.GA35783>