From owner-p4-projects@FreeBSD.ORG Fri Jan 26 04:49:57 2007 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 662E316A400; Fri, 26 Jan 2007 04:49:57 +0000 (UTC) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 3D41616A404 for ; Fri, 26 Jan 2007 04:49:57 +0000 (UTC) (envelope-from jkim@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.freebsd.org (Postfix) with ESMTP id 2C2D713C48E for ; Fri, 26 Jan 2007 04:49:57 +0000 (UTC) (envelope-from jkim@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.6/8.13.6) with ESMTP id l0Q4nv2s034235 for ; Fri, 26 Jan 2007 04:49:57 GMT (envelope-from jkim@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.6/8.13.4/Submit) id l0Q4nuPf034232 for perforce@freebsd.org; Fri, 26 Jan 2007 04:49:56 GMT (envelope-from jkim@freebsd.org) Date: Fri, 26 Jan 2007 04:49:56 GMT Message-Id: <200701260449.l0Q4nuPf034232@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to jkim@freebsd.org using -f From: Jung-uk Kim To: Perforce Change Reviews Cc: Subject: PERFORCE change 113545 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 26 Jan 2007 04:49:57 -0000 http://perforce.freebsd.org/chv.cgi?CH=113545 Change 113545 by jkim@jkim_hammer on 2007/01/26 04:49:44 Almost working TLS implementation for AMD64. Simple applications run fine but complex ones may fail. Affected files ... .. //depot/projects/linuxolator/src/sys/amd64/amd64/cpu_switch.S#3 edit .. //depot/projects/linuxolator/src/sys/amd64/amd64/genassym.c#5 edit .. //depot/projects/linuxolator/src/sys/amd64/amd64/machdep.c#12 edit .. //depot/projects/linuxolator/src/sys/amd64/ia32/ia32_signal.c#3 edit .. //depot/projects/linuxolator/src/sys/amd64/include/pcb.h#2 edit .. //depot/projects/linuxolator/src/sys/amd64/include/segments.h#2 edit .. //depot/projects/linuxolator/src/sys/amd64/linux32/linux.h#16 edit .. //depot/projects/linuxolator/src/sys/amd64/linux32/linux32_machdep.c#28 edit .. //depot/projects/linuxolator/src/sys/amd64/linux32/linux32_sysvec.c#7 edit Differences ... ==== //depot/projects/linuxolator/src/sys/amd64/amd64/cpu_switch.S#3 (text+ko) ==== @@ -109,6 +109,7 @@ movl %es,PCB_ES(%r8) movl %fs,PCB_FS(%r8) movl %gs,PCB_GS(%r8) + 1: /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%r8) @@ -183,22 +184,31 @@ rdmsr movl PCB_GS(%r8),%gs wrmsr + + /* Restore userland 32 bit GS descriptor for Linuxulator */ + movq PCB_GS32P(%r8),%rax + testq %rax,%rax + jz 3f /* no, skip over */ + + movq PCB_GS32SD(%r8),%rcx + movq %rcx,(%rax) jmp 2f + 1: - /* Restore userland %fs */ movl $MSR_FSBASE,%ecx movl PCB_FSBASE(%r8),%eax movl PCB_FSBASE+4(%r8),%edx wrmsr +2: /* Restore userland %gs */ movl $MSR_KGSBASE,%ecx movl PCB_GSBASE(%r8),%eax movl PCB_GSBASE+4(%r8),%edx wrmsr -2: +3: /* Update the TSS_RSP0 pointer for the next interrupt */ movq PCPU(TSSP), %rax addq $COMMON_TSS_RSP0, %rax ==== //depot/projects/linuxolator/src/sys/amd64/amd64/genassym.c#5 (text+ko) ==== @@ -147,6 +147,9 @@ ASSYM(COMMON_TSS_RSP0, offsetof(struct amd64tss, tss_rsp0)); +ASSYM(PCB_GS32P, offsetof(struct pcb, pcb_gs32p)); +ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd)); + ASSYM(TF_R15, offsetof(struct trapframe, tf_r15)); ASSYM(TF_R14, offsetof(struct trapframe, tf_r14)); ASSYM(TF_R13, offsetof(struct trapframe, tf_r13)); ==== //depot/projects/linuxolator/src/sys/amd64/amd64/machdep.c#12 (text+ko) ==== @@ -156,7 +156,7 @@ extern vm_offset_t ksym_start, ksym_end; #endif -int _udatasel, _ucodesel, _ucode32sel; +int _udatasel, _ucodesel, _ucode32sel, _ugs32sel; int cold = 1; @@ -725,6 +725,15 @@ 0, /* long */ 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, +/* GUGS32_SEL 8 32 bit GS Descriptor for user */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ + SDT_MEMRWA, /* segment type */ + SEL_UPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 0, /* long */ + 1, /* default 32 vs 16 bit size */ + 1 /* limit granularity (byte/page units)*/ }, }; void @@ -1279,6 +1288,7 @@ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); + _ugs32sel = GSEL(GUGS32_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ ==== //depot/projects/linuxolator/src/sys/amd64/ia32/ia32_signal.c#3 (text+ko) ==== @@ -727,6 +727,7 @@ pcb->pcb_es = _udatasel; pcb->pcb_fs = _udatasel; pcb->pcb_gs = _udatasel; + pcb->pcb_gs32p = NULL; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; ==== //depot/projects/linuxolator/src/sys/amd64/include/pcb.h#2 (text+ko) ==== @@ -73,6 +73,9 @@ #define PCB_FULLCTX 0x80 /* full context restore on sysret */ caddr_t pcb_onfault; /* copyin/out fault recovery */ + + caddr_t pcb_gs32p; /* XXX pointer to gdt[GUGS32_SEL] */ + u_int64_t pcb_gs32sd; /* 32 bit GS segment descriptor */ }; #ifdef _KERNEL ==== //depot/projects/linuxolator/src/sys/amd64/include/segments.h#2 (text+ko) ==== @@ -200,9 +200,10 @@ #define GUCODE32_SEL 3 /* User 32 bit code Descriptor */ #define GUDATA_SEL 4 /* User 32/64 bit Data Descriptor */ #define GUCODE_SEL 5 /* User 64 bit Code Descriptor */ -#define GPROC0_SEL 6 /* TSS for entering kernel etc */ +#define GPROC0_SEL 6 /* TSS for entering kernel etc */ /* slot 6 is second half of GPROC0_SEL */ -#define NGDT 8 +#define GUGS32_SEL 8 /* User 32 bit GS Descriptor */ +#define NGDT 9 #ifdef _KERNEL extern struct user_segment_descriptor gdt[]; ==== //depot/projects/linuxolator/src/sys/amd64/linux32/linux.h#16 (text+ko) ==== @@ -774,7 +774,6 @@ l_uint limit_in_pages:1; l_uint seg_not_present:1; l_uint useable:1; - l_uint lm:1; }; struct l_desc_struct { @@ -798,7 +797,6 @@ #define ENTRY_B_USEABLE 20 #define ENTRY_B_SEG32BIT 22 #define ENTRY_B_LIMIT 23 -#define ENTRY_B_LONGMODE 21 #define LDT_entry_b(info) \ (((info)->base_addr & 0xff000000) | \ @@ -819,8 +817,7 @@ (info)->read_exec_only == 1 && \ (info)->seg_32bit == 0 && \ (info)->limit_in_pages == 0 && \ - (info)->useable == 0 && \ - (info)->lm == 0) + (info)->useable == 0 ) /* macros for converting segments, they do the same as those in arch/i386/kernel/process.c */ #define GET_BASE(desc) ( \ @@ -838,7 +835,6 @@ #define GET_LIMIT_PAGES(desc) (((desc)->b >> ENTRY_B_LIMIT) & 1) #define GET_PRESENT(desc) (((desc)->b >> ENTRY_B_SEG_NOT_PRESENT) & 1) #define GET_USEABLE(desc) (((desc)->b >> ENTRY_B_USEABLE) & 1) -#define GET_LONGMODE(desc) (((desc)->b >> ENTRY_B_LONGMODE) & 1) #define LINUX_CLOCK_REALTIME 0 #define LINUX_CLOCK_MONOTONIC 1 ==== //depot/projects/linuxolator/src/sys/amd64/linux32/linux32_machdep.c#28 (text+ko) ==== @@ -52,6 +52,7 @@ #include #include +#include #include #include @@ -671,7 +672,9 @@ td2->td_frame->tf_rsp = PTROUT(args->stack); if (args->flags & CLONE_SETTLS) { + struct user_segment_descriptor sd; struct l_user_desc info; + int a[2]; int idx; error = copyin((void *)td->td_frame->tf_rsi, &info, sizeof(struct l_user_desc)); @@ -684,26 +687,43 @@ * looks like we're getting the idx we returned * in the set_thread_area() syscall */ - if (idx != 6 && idx != 4) + if (idx != 6 && idx != GUGS32_SEL) return (EINVAL); /* this doesnt happen in practice */ if (idx == 6) { - /* we might copy out the entry_number as 4 */ - info.entry_number = 4; + /* we might copy out the entry_number as GUGS32_SEL */ + info.entry_number = GUGS32_SEL; error = copyout(&info, (void *) td->td_frame->tf_rsi, sizeof(struct l_user_desc)); if (error) return (error); } - /* this is taken from amd64 version of cpu_set_user_tls() */ - critical_enter(); - /* set %gs */ + a[0] = LDT_entry_a(&info); + a[1] = LDT_entry_b(&info); + + memcpy(&sd, &a, sizeof(a)); +#ifdef DEBUG + if (ldebug(clone)) + printf("Segment created in clone with CLONE_SETTLS: " + "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " + "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " + "def32: %i, gran: %i\n", + sd.sd_lobase, + sd.sd_hibase, + sd.sd_lolimit, + sd.sd_hilimit, + sd.sd_type, + sd.sd_dpl, + sd.sd_p, + sd.sd_xx, + sd.sd_long, + sd.sd_def32, + sd.sd_gran); +#endif td2->td_pcb->pcb_gsbase = (register_t)info.base_addr; -#if 0 - wrmsr(MSR_KGSBASE, td->td_pcb->pcb_fsbase); -#endif - critical_exit(); + td2->td_pcb->pcb_gs32p = (caddr_t)&gdt[GUGS32_SEL]; + memcpy(&td2->td_pcb->pcb_gs32sd, &sd, sizeof(sd)); } #ifdef DEBUG @@ -1239,6 +1259,8 @@ linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) { struct l_user_desc info; + struct user_segment_descriptor sd; + int a[2]; int error; int idx; @@ -1248,7 +1270,7 @@ #ifdef DEBUG if (ldebug(set_thread_area)) - printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i, %i\n"), + printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i"), info.entry_number, info.base_addr, info.limit, @@ -1257,8 +1279,7 @@ info.read_exec_only, info.limit_in_pages, info.seg_not_present, - info.useable, - info.lm); + info.useable); #endif idx = info.entry_number; @@ -1282,25 +1303,54 @@ * we should let 4 proceed as well because we use this segment so * if code does two subsequent calls it should succeed */ - if (idx != 6 && idx != -1 && idx != 4) + if (idx != 6 && idx != -1 && idx != GUGS32_SEL) return (EINVAL); /* * we have to copy out the GDT entry we use - * FreeBSD uses GDT entry #3 for storing %gs so load that + * FreeBSD uses GUGS32_SEL for storing %gs so load that * XXX: what if userspace program doesnt check this value and tries * to use 6, 7 or 8? */ - idx = info.entry_number = 4; + idx = info.entry_number = GUGS32_SEL; error = copyout(&info, args->desc, sizeof(struct l_user_desc)); if (error) return (error); + if (LDT_empty(&info)) { + a[0] = 0; + a[1] = 0; + } else { + a[0] = LDT_entry_a(&info); + a[1] = LDT_entry_b(&info); + } + + memcpy(&sd, &a, sizeof(a)); +#ifdef DEBUG + if (ldebug(set_thread_area)) + printf("Segment created in set_thread_area: " + "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " + "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " + "def32: %i, gran: %i\n", + sd.sd_lobase, + sd.sd_hibase, + sd.sd_lolimit, + sd.sd_hilimit, + sd.sd_type, + sd.sd_dpl, + sd.sd_p, + sd.sd_xx, + sd.sd_long, + sd.sd_def32, + sd.sd_gran); +#endif + critical_enter(); - /* set %gs */ td->td_pcb->pcb_gsbase = (register_t)info.base_addr; + td->td_pcb->pcb_gs32p = (caddr_t)&gdt[GUGS32_SEL]; + memcpy(&td->td_pcb->pcb_gs32sd, &sd, sizeof(sd)); wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase); - + gdt[GUGS32_SEL] = sd; critical_exit(); return (0); ==== //depot/projects/linuxolator/src/sys/amd64/linux32/linux32_sysvec.c#7 (text+ko) ==== @@ -280,7 +280,7 @@ return 0; } -extern int _ucodesel, _ucode32sel, _udatasel; +extern int _ucodesel, _ucode32sel, _udatasel, _ugs32sel; extern unsigned long linux_sznonrtsigcode; static void @@ -820,11 +820,12 @@ load_ds(_udatasel); load_es(_udatasel); load_fs(_udatasel); - load_gs(0); + load_gs(_udatasel); pcb->pcb_ds = _udatasel; pcb->pcb_es = _udatasel; pcb->pcb_fs = _udatasel; - pcb->pcb_gs = 0; + pcb->pcb_gs = _udatasel; + pcb->pcb_gs32p = NULL; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry;