Date: Mon, 12 May 2003 19:09:04 -0700 (PDT) From: Peter Wemm <peter@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 31043 for review Message-ID: <200305130209.h4D294xs053232@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=31043 Change 31043 by peter@peter_hammer on 2003/05/12 19:08:19 And now for something completely different... i386 binary emulation! This was submitted by p4/i386 on the amd64 box itself! Affected files ... .. //depot/projects/hammer/sys/amd64/amd64/cpu_switch.S#4 edit .. //depot/projects/hammer/sys/amd64/amd64/exception.S#11 edit .. //depot/projects/hammer/sys/amd64/amd64/genassym.c#12 edit .. //depot/projects/hammer/sys/amd64/amd64/machdep.c#30 edit .. //depot/projects/hammer/sys/amd64/amd64/support.S#6 edit .. //depot/projects/hammer/sys/amd64/amd64/trap.c#14 edit .. //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#6 edit .. //depot/projects/hammer/sys/amd64/conf/GENERIC#8 edit .. //depot/projects/hammer/sys/amd64/ia32/Makefile#1 add .. //depot/projects/hammer/sys/amd64/ia32/ia32.h#1 add .. //depot/projects/hammer/sys/amd64/ia32/ia32_misc.c#1 add .. //depot/projects/hammer/sys/amd64/ia32/ia32_proto.h#1 add .. //depot/projects/hammer/sys/amd64/ia32/ia32_syscall.h#1 add .. //depot/projects/hammer/sys/amd64/ia32/ia32_sysent.c#1 add .. //depot/projects/hammer/sys/amd64/ia32/ia32_sysvec.c#1 add .. //depot/projects/hammer/sys/amd64/ia32/ia32_util.h#1 add .. //depot/projects/hammer/sys/amd64/ia32/syscalls.conf#1 add .. //depot/projects/hammer/sys/amd64/ia32/syscalls.master#1 add .. //depot/projects/hammer/sys/amd64/include/cpufunc.h#6 edit .. //depot/projects/hammer/sys/amd64/include/elf.h#7 edit .. //depot/projects/hammer/sys/amd64/include/pcb.h#8 edit .. //depot/projects/hammer/sys/conf/files.amd64#9 edit .. //depot/projects/hammer/sys/conf/options.amd64#8 edit Differences ... ==== //depot/projects/hammer/sys/amd64/amd64/cpu_switch.S#4 (text+ko) ==== @@ -106,6 +106,12 @@ pushfq /* PSL */ popq PCB_RFLAGS(%r8) + /* Save kernel %gs.base */ + movl $MSR_GSBASE,%ecx + rdmsr + movl %eax,PCB_KGSBASE(%r8) + movl %edx,PCB_KGSBASE+4(%r8) + /* Save userland %fs */ movl $MSR_FSBASE,%ecx rdmsr @@ -118,6 +124,12 @@ movl %eax,PCB_GSBASE(%r8) movl %edx,PCB_GSBASE+4(%r8) + /* Save segment selector numbers */ + movl %ds,PCB_DS(%r8) + movl %es,PCB_ES(%r8) + movl %fs,PCB_FS(%r8) + movl %gs,PCB_GS(%r8) + /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne 1f @@ -160,6 +172,18 @@ */ movq TD_PCB(%rsi),%r8 + /* Restore segment selector numbers */ + movl PCB_DS(%r8),%ds + movl PCB_ES(%r8),%es + movl PCB_FS(%r8),%fs + movl PCB_GS(%r8),%gs + + /* Restore kernel %gs.base */ + movl $MSR_GSBASE,%ecx + movl PCB_KGSBASE(%r8),%eax + movl PCB_KGSBASE+4(%r8),%edx + wrmsr + /* Restore userland %fs */ movl $MSR_FSBASE,%ecx movl PCB_FSBASE(%r8),%eax ==== //depot/projects/hammer/sys/amd64/amd64/exception.S#11 (text+ko) ==== @@ -247,7 +247,7 @@ movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) FAKE_MCOUNT(13*4(%rsp)) - call syscall + call ia32_syscall MEXITCOUNT jmp doreti ==== //depot/projects/hammer/sys/amd64/amd64/genassym.c#12 (text+ko) ==== @@ -125,6 +125,11 @@ ASSYM(PCB_RFLAGS, offsetof(struct pcb, pcb_rflags)); ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase)); ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase)); +ASSYM(PCB_KGSBASE, offsetof(struct pcb, pcb_kgsbase)); +ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds)); +ASSYM(PCB_ES, offsetof(struct pcb, pcb_es)); +ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs)); +ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_FULLCTX, PCB_FULLCTX); @@ -181,8 +186,10 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); +ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL)); ASSYM(MSR_FSBASE, MSR_FSBASE); +ASSYM(MSR_GSBASE, MSR_GSBASE); ASSYM(MSR_KGSBASE, MSR_KGSBASE); ASSYM(GPROC0_SEL, GPROC0_SEL); ==== //depot/projects/hammer/sys/amd64/amd64/machdep.c#30 (text+ko) ==== @@ -129,7 +129,7 @@ static int set_fpcontext(struct thread *td, const mcontext_t *mcp); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) -int _udatasel, _ucodesel; +int _udatasel, _ucodesel, _ucode32sel; u_long atdevbase; u_int64_t modulep; /* phys addr of metadata table */ @@ -466,11 +466,25 @@ { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; + u_int64_t pc; + wrmsr(MSR_FSBASE, 0); + wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; - wrmsr(MSR_FSBASE, 0); - wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ + pcb->pcb_kgsbase = rdmsr(MSR_GSBASE); + load_ds(_udatasel); + load_es(_udatasel); + load_fs(_udatasel); + critical_enter(); + pc = rdmsr(MSR_GSBASE); + load_gs(_udatasel); /* Clobbers kernel %GS.base */ + wrmsr(MSR_GSBASE, pc); + critical_exit(); + pcb->pcb_ds = _udatasel; + pcb->pcb_es = _udatasel; + pcb->pcb_fs = _udatasel; + pcb->pcb_gs = _udatasel; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; @@ -589,7 +603,7 @@ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ - 0, /* segment descriptor present */ + 1, /* segment descriptor present */ 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, @@ -1289,10 +1303,12 @@ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); + _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ thread0.td_pcb->pcb_cr3 = IdlePML4; + thread0.td_pcb->pcb_kgsbase = (u_int64_t)pc; thread0.td_frame = &proc0_tf; } ==== //depot/projects/hammer/sys/amd64/amd64/support.S#6 (text+ko) ==== @@ -358,6 +358,9 @@ movq $0,PCB_ONFAULT(%rcx) ret +ENTRY(fuword) + jmp fuword64 + ENTRY(fuword32) movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -366,15 +369,10 @@ cmpq %rax,%rdi /* verify address is valid */ ja fusufault -/* XXX use the 64 extend */ - xorq %rax, %rax movl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) ret -ENTRY(fuword) - jmp fuword32 - /* * These two routines are called from the profiling code, potentially * at interrupt time. If they fail, that's okay, good things will @@ -397,8 +395,6 @@ cmpq %rax,%rdi ja fusufault -/* XXX use the 64 extend */ - xorq %rax, %rax movzwl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) ret @@ -414,8 +410,6 @@ cmpq %rax,%rdi ja fusufault -/* XXX use the 64 extend */ - xorq %rax, %rax movzbl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) ret @@ -448,6 +442,9 @@ movq %rax,PCB_ONFAULT(%rcx) ret +ENTRY(suword) + jmp suword64 + ENTRY(suword32) movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) @@ -462,9 +459,6 @@ movq %rax,PCB_ONFAULT(%rcx) ret -ENTRY(suword) - jmp suword32 - /* * suword16 - MP SAFE */ ==== //depot/projects/hammer/sys/amd64/amd64/trap.c#14 (text+ko) ==== @@ -91,7 +91,7 @@ extern void trap(struct trapframe frame); extern void syscall(struct trapframe frame); -static int trap_pfault(struct trapframe *, int, vm_offset_t); +static int trap_pfault(struct trapframe *, int); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(void); @@ -161,13 +161,13 @@ struct proc *p = td->td_proc; u_int sticks = 0; int i = 0, ucode = 0, type, code; - vm_offset_t eva; atomic_add_int(&cnt.v_trap, 1); type = frame.tf_trapno; #ifdef DDB if (db_active) { + vm_offset_t eva; eva = (type == T_PAGEFLT ? frame.tf_addr : 0); trap_fatal(&frame, eva); goto out; @@ -202,7 +202,6 @@ } } - eva = 0; code = frame.tf_err; if (type == T_PAGEFLT) { /* @@ -213,9 +212,8 @@ * kernel can print out a useful trap message and even get * to the debugger. */ - eva = frame.tf_addr; if (PCPU_GET(spinlocks) != NULL) - trap_fatal(&frame, eva); + trap_fatal(&frame, frame.tf_addr); } #ifdef DEVICE_POLLING @@ -261,7 +259,7 @@ break; case T_PAGEFLT: /* page fault */ - i = trap_pfault(&frame, TRUE, eva); + i = trap_pfault(&frame, TRUE); if (i == -1) goto userout; if (i == 0) @@ -331,7 +329,7 @@ ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ - (void) trap_pfault(&frame, FALSE, eva); + (void) trap_pfault(&frame, FALSE); goto out; case T_DNA: @@ -430,7 +428,7 @@ #endif /* DEV_ISA */ } - trap_fatal(&frame, eva); + trap_fatal(&frame, 0); goto out; } @@ -445,7 +443,7 @@ uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) - uprintf(", fault VA = 0x%lx", eva); + uprintf(", fault VA = 0x%lx", frame.tf_addr); uprintf("\n"); } #endif @@ -462,10 +460,9 @@ } static int -trap_pfault(frame, usermode, eva) +trap_pfault(frame, usermode) struct trapframe *frame; int usermode; - vm_offset_t eva; { vm_offset_t va; struct vmspace *vm = NULL; @@ -474,6 +471,7 @@ vm_prot_t ftype; struct thread *td = curthread; struct proc *p = td->td_proc; + vm_offset_t eva = frame->tf_addr; va = trunc_page(eva); if (va >= KERNBASE) { @@ -542,7 +540,7 @@ return (-1); } -printf("trap_pfault: pid %d %s %s %s eva %p, rip %p, rax %p, rbx %p, rcx %p, rdx %p, rsp %p, rvp %p, rsi %p, rdi %p\n", p->p_pid, +printf("trap_pfault: pid %d %s %s %s eva %p, rip %p, rax %p, rbx %p, rcx %p, rdx %p, rsp %p, rbp %p, rsi %p, rdi %p\n", p->p_pid, frame->tf_err & PGEX_U ? "user" : "supervisor", frame->tf_err & PGEX_W ? "write" : "read", frame->tf_err & PGEX_P ? "protection violation" : "page not present", @@ -820,3 +818,173 @@ mtx_assert(&Giant, MA_NOTOWNED); } +void ia32_syscall(struct trapframe frame); +void +ia32_syscall(struct trapframe frame) +{ + caddr_t params; + int i; + struct sysent *callp; + struct thread *td = curthread; + struct proc *p = td->td_proc; + register_t orig_tf_rflags; + u_int sticks; + int error; + int narg; + u_int32_t args[8]; + u_int64_t args64[8]; + u_int code; + + /* + * note: PCPU_LAZY_INC() can only be used if we can afford + * occassional inaccuracy in the count. + */ + cnt.v_syscall++; + + sticks = td->td_sticks; + td->td_frame = &frame; + if (td->td_ucred != p->p_ucred) + cred_update_thread(td); + params = (caddr_t)frame.tf_rsp + sizeof(u_int32_t); + code = frame.tf_rax; + orig_tf_rflags = frame.tf_rflags; + + if (p->p_sysent->sv_prepsyscall) { + /* + * The prep code is MP aware. + */ + (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); + } else { + /* + * Need to check if this is a 32 bit or 64 bit syscall. + * fuword is MP aware. + */ + if (code == SYS_syscall) { + /* + * Code is first argument, followed by actual args. + */ + code = fuword32(params); + params += sizeof(int); + } else if (code == SYS___syscall) { + /* + * Like syscall, but code is a quad, so as to maintain + * quad alignment for the rest of the arguments. + * We use a 32-bit fetch in case params is not + * aligned. + */ + code = fuword32(params); + params += sizeof(quad_t); + } + } + + if (p->p_sysent->sv_mask) + code &= p->p_sysent->sv_mask; + + if (code >= p->p_sysent->sv_size) + callp = &p->p_sysent->sv_table[0]; + else + callp = &p->p_sysent->sv_table[code]; + + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * copyin and the ktrsyscall()/ktrsysret() code is MP-aware + */ + if (params != NULL && narg != 0) + error = copyin(params, (caddr_t)args, + (u_int)(narg * sizeof(int))); + else + error = 0; + + for (i = 0; i < narg; i++) + args64[i] = args[i]; + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) + ktrsyscall(code, narg, args64); +#endif + /* + * Try to run the syscall without Giant if the syscall + * is MP safe. + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) + mtx_lock(&Giant); + + if (error == 0) { + td->td_retval[0] = 0; + td->td_retval[1] = frame.tf_rdx; + + STOPEVENT(p, S_SCE, narg); + + error = (*callp->sy_call)(td, args64); + } + + switch (error) { + case 0: + frame.tf_rax = td->td_retval[0]; + frame.tf_rdx = td->td_retval[1]; + frame.tf_rflags &= ~PSL_C; + break; + + case ERESTART: + /* + * Reconstruct pc, assuming lcall $X,y is 7 bytes, + * int 0x80 is 2 bytes. We saved this in tf_err. + */ + frame.tf_rip -= frame.tf_err; + break; + + case EJUSTRETURN: + break; + + default: + if (p->p_sysent->sv_errsize) { + if (error >= p->p_sysent->sv_errsize) + error = -1; /* XXX */ + else + error = p->p_sysent->sv_errtbl[error]; + } + frame.tf_rax = error; + frame.tf_rflags |= PSL_C; + break; + } + + /* + * Release Giant if we previously set it. + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) + mtx_unlock(&Giant); + + /* + * Traced syscall. + */ + if (orig_tf_rflags & PSL_T) { + frame.tf_rflags &= ~PSL_T; + trapsignal(td, SIGTRAP, 0); + } + + /* + * Handle reschedule and other end-of-syscall issues + */ + userret(td, &frame, sticks); + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSRET)) + ktrsysret(code, error, td->td_retval[0]); +#endif + + /* + * This works because errno is findable through the + * register set. If we ever support an emulation where this + * is not the case, this code will need to be revisited. + */ + STOPEVENT(p, S_SCX, code); + +#ifdef DIAGNOSTIC + cred_free_thread(td); +#endif + WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", + (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); + mtx_assert(&sched_lock, MA_NOTOWNED); + mtx_assert(&Giant, MA_NOTOWNED); +} ==== //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#6 (text+ko) ==== @@ -76,7 +76,6 @@ #include <amd64/isa/isa.h> static void cpu_reset_real(void); -extern int _ucodesel, _udatasel; /* * Finish a fork operation, with process p2 nearly set up. @@ -143,6 +142,7 @@ * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). + * pcb2->pcb_[fg]sbase: cloned above */ /* ==== //depot/projects/hammer/sys/amd64/conf/GENERIC#8 (text+ko) ==== @@ -23,6 +23,7 @@ ident GENERIC maxusers 0 options NDA #Avoid accidental cut/paste of NDA'ed stuff +options IA32 makeoptions NO_MODULES=not_yet @@ -42,6 +43,10 @@ options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS +options SYSVMSG +options SYSVSEM +options SYSVSHM + device isa device pci ==== //depot/projects/hammer/sys/amd64/include/cpufunc.h#6 (text+ko) ==== @@ -447,7 +447,6 @@ __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } -/* XXX these are replaced with rdmsr/wrmsr */ static __inline u_int rfs(void) { @@ -465,6 +464,18 @@ } static __inline void +load_ds(u_int sel) +{ + __asm __volatile("movl %0,%%ds" : : "rm" (sel)); +} + +static __inline void +load_es(u_int sel) +{ + __asm __volatile("movl %0,%%es" : : "rm" (sel)); +} + +static __inline void load_fs(u_int sel) { __asm __volatile("movl %0,%%fs" : : "rm" (sel)); ==== //depot/projects/hammer/sys/amd64/include/elf.h#7 (text+ko) ==== @@ -33,9 +33,12 @@ * ELF definitions for the AMD64 architecture. */ -#include <sys/elf64.h> /* Definitions common to all 64 bit architectures. */ +#ifndef __ELF_WORD_SIZE #define __ELF_WORD_SIZE 64 /* Used by <sys/elf_generic.h> */ +#endif +#include <sys/elf32.h> /* Definitions common to all 32 bit architectures. */ +#include <sys/elf64.h> /* Definitions common to all 64 bit architectures. */ #include <sys/elf_generic.h> #define ELF_ARCH EM_X86_64 @@ -48,6 +51,13 @@ * The i386 supplement to the SVR4 ABI specification names this "auxv_t", * but POSIX lays claim to all symbols ending with "_t". */ +typedef struct { /* Auxiliary vector entry on initial stack */ + int a_type; /* Entry type. */ + union { + int a_val; /* Integer value. */ + } a_un; +} Elf32_Auxinfo; + typedef struct { /* Auxiliary vector entry on initial stack */ long a_type; /* Entry type. */ @@ -118,7 +128,11 @@ #define R_X86_64_COUNT 16 /* Count of defined relocation types. */ /* Define "machine" characteristics */ -#define ELF_TARG_CLASS ELFCLASS64 +#if __ELF_WORD_SIZE == 32 +#define ELF_TARG_CLASS ELFCLASS32 +#else +#define ELF_TARG_CLASS ELFCLASS64 +#endif #define ELF_TARG_DATA ELFDATA2LSB #define ELF_TARG_MACH EM_X86_64 #define ELF_TARG_VER 1 ==== //depot/projects/hammer/sys/amd64/include/pcb.h#8 (text+ko) ==== @@ -59,6 +59,11 @@ register_t pcb_rflags; register_t pcb_fsbase; register_t pcb_gsbase; + register_t pcb_kgsbase; + u_int32_t pcb_ds; + u_int32_t pcb_es; + u_int32_t pcb_fs; + u_int32_t pcb_gs; struct savefpu pcb_save; u_long pcb_flags; ==== //depot/projects/hammer/sys/conf/files.amd64#9 (text+ko) ==== @@ -64,7 +64,10 @@ amd64/pci/pci_cfgreg.c optional pci amd64/pci/pci_bus.c optional pci - +amd64/ia32/ia32_misc.c optional ia32 +amd64/ia32/ia32_sysent.c optional ia32 +amd64/ia32/ia32_sysvec.c optional ia32 +kern/imgact_elf32.c optional ia32 # This file tells config what files go into building a kernel, # files marked standard are always included. ==== //depot/projects/hammer/sys/conf/options.amd64#8 (text+ko) ==== @@ -60,3 +60,4 @@ PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h PSM_DEBUG opt_psm.h +IA32
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200305130209.h4D294xs053232>