From owner-svn-src-head@FreeBSD.ORG Sun May 24 15:28:21 2015 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id C519561B; Sun, 24 May 2015 15:28:21 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id B103B1CF9; Sun, 24 May 2015 15:28:21 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t4OFSLgk051618; Sun, 24 May 2015 15:28:21 GMT (envelope-from dchagin@FreeBSD.org) Received: (from dchagin@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t4OFSILt051595; Sun, 24 May 2015 15:28:18 GMT (envelope-from dchagin@FreeBSD.org) Message-Id: <201505241528.t4OFSILt051595@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: dchagin set sender to dchagin@FreeBSD.org using -f From: Dmitry Chagin Date: Sun, 24 May 2015 15:28:18 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r283407 - in head/sys: amd64/linux32 compat/linux conf i386/linux modules/linux X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 24 May 2015 15:28:21 -0000 Author: dchagin Date: Sun May 24 15:28:17 2015 New Revision: 283407 URL: https://svnweb.freebsd.org/changeset/base/283407 Log: Implement vdso - virtual dynamic shared object. Through vdso Linux exposes functions from kernel with proper DWARF CFI information so that it becomes easier to unwind through them. Using vdso is a mandatory for a thread cancelation && cleanup on a modern glibc. Differential Revision: https://reviews.freebsd.org/D1060 Added: head/sys/amd64/linux32/linux32_vdso.lds.s (contents, props changed) head/sys/compat/linux/linux_vdso.c (contents, props changed) head/sys/compat/linux/linux_vdso.h (contents, props changed) head/sys/i386/linux/linux_vdso.lds.s (contents, props changed) Modified: head/sys/amd64/linux32/linux.h head/sys/amd64/linux32/linux32_genassym.c head/sys/amd64/linux32/linux32_locore.s head/sys/amd64/linux32/linux32_sysvec.c head/sys/compat/linux/linux_misc.h head/sys/conf/files.amd64 head/sys/conf/files.i386 head/sys/i386/linux/linux.h head/sys/i386/linux/linux_genassym.c head/sys/i386/linux/linux_locore.s head/sys/i386/linux/linux_sysvec.c head/sys/modules/linux/Makefile Modified: head/sys/amd64/linux32/linux.h ============================================================================== --- head/sys/amd64/linux32/linux.h Sun May 24 15:27:31 2015 (r283406) +++ head/sys/amd64/linux32/linux.h Sun May 24 15:28:17 2015 (r283407) @@ -114,7 +114,7 @@ typedef struct { /* * Miscellaneous */ -#define LINUX_AT_COUNT 16 /* Count of used aux entry types. +#define LINUX_AT_COUNT 18 /* Count of used aux entry types. * Keep this synchronized with * elf_linux_fixup() code. */ Modified: head/sys/amd64/linux32/linux32_genassym.c ============================================================================== --- head/sys/amd64/linux32/linux32_genassym.c Sun May 24 15:27:31 2015 (r283406) +++ head/sys/amd64/linux32/linux32_genassym.c Sun May 24 15:28:17 2015 (r283407) @@ -7,9 +7,12 @@ __FBSDID("$FreeBSD$"); #include #include +#include ASSYM(LINUX_SIGF_HANDLER, offsetof(struct l_sigframe, sf_handler)); ASSYM(LINUX_SIGF_SC, offsetof(struct l_sigframe, sf_sc)); ASSYM(LINUX_RT_SIGF_HANDLER, offsetof(struct l_rt_sigframe, sf_handler)); ASSYM(LINUX_RT_SIGF_UC, offsetof(struct l_rt_sigframe, sf_sc)); ASSYM(LINUX_RT_SIGF_SC, offsetof(struct l_ucontext, uc_mcontext)); +ASSYM(LINUX_VERSION_CODE, LINUX_VERSION_CODE); +ASSYM(LINUX_SC_ESP, offsetof(struct l_sigcontext, sc_esp)); Modified: head/sys/amd64/linux32/linux32_locore.s ============================================================================== --- head/sys/amd64/linux32/linux32_locore.s Sun May 24 15:27:31 2015 (r283406) +++ head/sys/amd64/linux32/linux32_locore.s Sun May 24 15:28:17 2015 (r283407) @@ -8,31 +8,143 @@ .text .code32 -NON_GPROF_ENTRY(linux_sigcode) - call *LINUX_SIGF_HANDLER(%esp) - leal LINUX_SIGF_SC(%esp),%ebx /* linux scp */ - movl %esp, %ebx /* pass sigframe */ - push %eax /* fake ret addr */ +/* + * To avoid excess stack frame the signal trampoline code emulates + * the 'call' instruction. + */ +NON_GPROF_ENTRY(linux32_sigcode) + movl %esp, %ebx /* preserve sigframe */ + call .getip0 +.getip0: + popl %eax + add $.startsigcode-.getip0, %eax /* ret address */ + push %eax + jmp *LINUX_SIGF_HANDLER(%ebx) +.startsigcode: + popl %eax movl $LINUX_SYS_linux_sigreturn,%eax /* linux_sigreturn() */ int $0x80 /* enter kernel with args */ +.endsigcode: 0: jmp 0b - ALIGN_TEXT -/* XXXXX */ -linux_rt_sigcode: - call *LINUX_RT_SIGF_HANDLER(%esp) + +NON_GPROF_ENTRY(linux32_rt_sigcode) leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */ leal LINUX_RT_SIGF_SC(%ebx),%ecx /* linux sigcontext */ - push %eax /* fake ret addr */ + movl %esp, %edi + call .getip1 +.getip1: + popl %eax + add $.startrtsigcode-.getip1, %eax /* ret address */ + push %eax + jmp *LINUX_RT_SIGF_HANDLER(%edi) +.startrtsigcode: movl $LINUX_SYS_linux_rt_sigreturn,%eax /* linux_rt_sigreturn() */ int $0x80 /* enter kernel with args */ +.endrtsigcode: 0: jmp 0b - ALIGN_TEXT -/* XXXXX */ -linux_esigcode: - - .data - .globl linux_szsigcode, linux_sznonrtsigcode -linux_szsigcode: - .long linux_esigcode-linux_sigcode -linux_sznonrtsigcode: - .long linux_rt_sigcode-linux_sigcode + +NON_GPROF_ENTRY(linux32_vsyscall) +.startvsyscall: + int $0x80 + ret +.endvsyscall: + + + .section .note.Linux, "a",@note + .long 2f - 1f /* namesz */ + .balign 4 + .long 4f - 3f /* descsz */ + .long 0 +1: + .asciz "Linux" +2: + .balign 4 +3: + .long LINUX_VERSION_CODE +4: + .balign 4 + .previous + + +#define do_cfa_expr(offset) \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 11f-10f; /* length */ \ +10: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ + .byte 0x06; /* DW_OP_deref */ \ +11: + + + /* CIE */ + .section .eh_frame,"a",@progbits +.LSTARTFRAMEDLSI1: + .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 +.LSTARTCIEDLSI1: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zRS" /* NULL-terminated + * augmentation string + */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address + * register column + */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0 /* DW_CFA_nop */ + .align 4 +.LENDCIEDLSI1: + + /* FDE */ + .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ +.LSTARTFDEDLSI1: + .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ + .long .startsigcode-. /* PC-relative start address */ + .long .endsigcode-.startsigcode + .uleb128 0 /* Augmentation */ + do_cfa_expr(LINUX_SIGF_SC-8) + .align 4 +.LENDFDEDLSI1: + + .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ +.LSTARTFDEDLSI2: + .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ + .long .startrtsigcode-. /* PC-relative start address */ + .long .endrtsigcode-.startrtsigcode + .uleb128 0 /* Augmentation */ + do_cfa_expr(LINUX_RT_SIGF_SC-4+LINUX_SC_ESP) + .align 4 +.LENDFDEDLSI2: + .previous + + .section .eh_frame,"a",@progbits +.LSTARTFRAMEDLSI2: + .long .LENDCIEDLSI2-.LSTARTCIEDLSI2 +.LSTARTCIEDLSI2: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NULL-terminated + * augmentation string + */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address register column */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0x0c /* DW_CFA_def_cfa */ + .uleb128 4 + .uleb128 4 + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .uleb128 1 + .align 4 +.LENDCIEDLSI2: + .long .LENDFDEDLSI3-.LSTARTFDEDLSI3 /* Length FDE */ +.LSTARTFDEDLSI3: + .long .LSTARTFDEDLSI3-.LSTARTFRAMEDLSI2 /* CIE pointer */ + .long .startvsyscall-. /* PC-relative start address */ + .long .endvsyscall-.startvsyscall + .uleb128 0 + .align 4 +.LENDFDEDLSI3: + .previous Modified: head/sys/amd64/linux32/linux32_sysvec.c ============================================================================== --- head/sys/amd64/linux32/linux32_sysvec.c Sun May 24 15:27:31 2015 (r283406) +++ head/sys/amd64/linux32/linux32_sysvec.c Sun May 24 15:28:17 2015 (r283407) @@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include MODULE_VERSION(linux, 1); @@ -111,8 +112,11 @@ MALLOC_DEFINE(M_LINUX, "linux", "Linux m const char *linux_platform = "i686"; static int linux_szplatform; -extern char linux_sigcode[]; -extern int linux_szsigcode; +static int linux_szsigcode; +static vm_object_t linux_shared_page_obj; +static char *linux_shared_page_mapping; +extern char _binary_linux32_locore_o_start; +extern char _binary_linux32_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; @@ -127,6 +131,8 @@ static void exec_linux_setregs(struct th struct image_params *imgp, u_long stack); static void linux32_fixlimit(struct rlimit *rl, int which); static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); +static void linux_vdso_install(void *param); +static void linux_vdso_deinstall(void *param); static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; @@ -220,6 +226,10 @@ struct linux32_ps_strings { u_int ps_nenvstr; /* the number of environment strings */ }; +LINUX_VDSO_SYM_INTPTR(linux32_sigcode); +LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); +LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); + /* * If FreeBSD & Linux have a difference of opinion about what a trap * means, deal with it here. @@ -259,6 +269,9 @@ elf_linux_fixup(register_t **stack_base, args = (Elf32_Auxargs *)imgp->auxargs; pos = base + (imgp->args->argc + imgp->args->envc + 2); + AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, + imgp->proc->p_sysent->sv_shared_page_base); + AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); /* @@ -297,8 +310,6 @@ elf_linux_fixup(register_t **stack_base, return (0); } -extern unsigned long linux_sznonrtsigcode; - static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { @@ -353,7 +364,8 @@ linux_rt_sendsig(sig_t catcher, ksiginfo ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* - * Build the signal context to be used by sigreturn. + * Build the signal context to be used by sigreturn + * and libgcc unwind. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = 0; /* XXX ??? */ @@ -371,6 +383,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; + frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; @@ -412,7 +425,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); - regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode; + regs->tf_rip = linux32_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; @@ -507,6 +520,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; frame.sf_sc.sc_ebx = regs->tf_rbx; + frame.sf_sc.sc_esp = regs->tf_rsp; frame.sf_sc.sc_edx = regs->tf_rdx; frame.sf_sc.sc_ecx = regs->tf_rcx; frame.sf_sc.sc_eax = regs->tf_rax; @@ -535,7 +549,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); - regs->tf_rip = p->p_sysent->sv_sigcode_base; + regs->tf_rip = linux32_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; @@ -1014,7 +1028,7 @@ struct sysentvec elf_linux_sysvec = { .sv_transtrap = translate_traps, .sv_fixup = elf_linux_fixup, .sv_sendsig = linux_sendsig, - .sv_sigcode = linux_sigcode, + .sv_sigcode = &_binary_linux32_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_prepsyscall = NULL, .sv_name = "Linux ELF32", @@ -1040,7 +1054,39 @@ struct sysentvec elf_linux_sysvec = { .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, }; -INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec); + +static void +linux_vdso_install(void *param) +{ + + linux_szsigcode = (&_binary_linux32_locore_o_end - + &_binary_linux32_locore_o_start); + + if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) + panic("Linux invalid vdso size\n"); + + __elfN(linux_vdso_fixup)(&elf_linux_sysvec); + + linux_shared_page_obj = __elfN(linux_shared_page_init) + (&linux_shared_page_mapping); + + __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); + + bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, + linux_szsigcode); + elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; +} +SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, + (sysinit_cfunc_t)linux_vdso_install, NULL); + +static void +linux_vdso_deinstall(void *param) +{ + + __elfN(linux_shared_page_fini)(linux_shared_page_obj); +}; +SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, + (sysinit_cfunc_t)linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; Added: head/sys/amd64/linux32/linux32_vdso.lds.s ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/amd64/linux32/linux32_vdso.lds.s Sun May 24 15:28:17 2015 (r283407) @@ -0,0 +1,65 @@ +/* + * Linker script for 32-bit vDSO. + * Copied from Linux kernel arch/x86/vdso/vdso-layout.lds.S + * and arch/x86/vdso/vdso32/vdso32.lds.S + * + * $FreeBSD$ + */ + +SECTIONS +{ + . = . + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + + . = ALIGN(0x100); + .text : { *(.text*) } :text =0x90909090 +} + +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +ENTRY(linux32_vsyscall); + +VERSION +{ + LINUX_2.5 { + global: + linux32_vsyscall; + linux32_sigcode; + linux32_rt_sigcode; + local: *; + }; +} Modified: head/sys/compat/linux/linux_misc.h ============================================================================== --- head/sys/compat/linux/linux_misc.h Sun May 24 15:27:31 2015 (r283406) +++ head/sys/compat/linux/linux_misc.h Sun May 24 15:28:17 2015 (r283407) @@ -71,6 +71,8 @@ extern const char *linux_platform; * differ from AT_PLATFORM. */ #define LINUX_AT_EXECFN 31 /* filename of program */ +#define LINUX_AT_SYSINFO 32 /* vsyscall */ +#define LINUX_AT_SYSINFO_EHDR 33 /* vdso header */ /* Linux sets the i387 to extended precision. */ #if defined(__i386__) || defined(__amd64__) Added: head/sys/compat/linux/linux_vdso.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/compat/linux/linux_vdso.c Sun May 24 15:28:17 2015 (r283407) @@ -0,0 +1,240 @@ +/*- + * Copyright (c) 2013 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_compat.h" + +#define __ELF_WORD_SIZE 32 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +SLIST_HEAD(, linux_vdso_sym) __elfN(linux_vdso_syms) = + SLIST_HEAD_INITIALIZER(__elfN(linux_vdso_syms)); + +static int __elfN(symtabindex); +static int __elfN(symstrindex); + +static void +__elfN(linux_vdso_lookup)(Elf_Ehdr *, struct linux_vdso_sym *); + + +void +__elfN(linux_vdso_sym_init)(struct linux_vdso_sym *s) +{ + + SLIST_INSERT_HEAD(&__elfN(linux_vdso_syms), s, sym); +} + +vm_object_t +__elfN(linux_shared_page_init)(char **mapping) +{ + vm_page_t m; + vm_object_t obj; + vm_offset_t addr; + + obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, + VM_PROT_DEFAULT, 0, NULL); + VM_OBJECT_WLOCK(obj); + m = vm_page_grab(obj, 0, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO); + m->valid = VM_PAGE_BITS_ALL; + VM_OBJECT_WUNLOCK(obj); + addr = kva_alloc(PAGE_SIZE); + pmap_qenter(addr, &m, 1); + *mapping = (char *)addr; + return (obj); +} + +void +__elfN(linux_shared_page_fini)(vm_object_t obj) +{ + + vm_object_deallocate(obj); +} + +void +__elfN(linux_vdso_fixup)(struct sysentvec *sv) +{ + Elf_Ehdr *ehdr; + Elf_Shdr *shdr; + int i; + + ehdr = (Elf_Ehdr *) sv->sv_sigcode; + + if (!IS_ELF(*ehdr) || + ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || + ehdr->e_ident[EI_DATA] != ELF_TARG_DATA || + ehdr->e_ident[EI_VERSION] != EV_CURRENT || + ehdr->e_shoff == 0 || + ehdr->e_shentsize != sizeof(Elf_Shdr)) + panic("Linux invalid vdso header.\n"); + + if (ehdr->e_type != ET_DYN) + panic("Linux invalid vdso header.\n"); + + shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff); + + __elfN(symtabindex) = -1; + __elfN(symstrindex) = -1; + for (i = 0; i < ehdr->e_shnum; i++) { + if (shdr[i].sh_size == 0) + continue; + if (shdr[i].sh_type == SHT_DYNSYM) { + __elfN(symtabindex) = i; + __elfN(symstrindex) = shdr[i].sh_link; + } + } + + if (__elfN(symtabindex) == -1 || __elfN(symstrindex) == -1) + panic("Linux invalid vdso header.\n"); + + ehdr->e_ident[EI_OSABI] = ELFOSABI_LINUX; +} + +void +__elfN(linux_vdso_reloc)(struct sysentvec *sv, int vdso_adjust) +{ + struct linux_vdso_sym *lsym; + Elf_Ehdr *ehdr; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + Elf_Dyn *dyn; + Elf_Sym *sym; + int i, symcnt; + + ehdr = (Elf_Ehdr *) sv->sv_sigcode; + + /* Adjust our so relative to the sigcode_base */ + if (vdso_adjust != 0) { + ehdr->e_entry += vdso_adjust; + phdr = (Elf_Phdr *)((caddr_t)ehdr + ehdr->e_phoff); + + /* phdrs */ + for (i = 0; i < ehdr->e_phnum; i++) { + phdr[i].p_vaddr += vdso_adjust; + if (phdr[i].p_type != PT_DYNAMIC) + continue; + dyn = (Elf_Dyn *)((caddr_t)ehdr + phdr[i].p_offset); + for(; dyn->d_tag != DT_NULL; dyn++) { + switch (dyn->d_tag) { + case DT_PLTGOT: + case DT_HASH: + case DT_STRTAB: + case DT_SYMTAB: + case DT_RELA: + case DT_INIT: + case DT_FINI: + case DT_REL: + case DT_DEBUG: + case DT_JMPREL: + case DT_VERSYM: + case DT_VERDEF: + case DT_VERNEED: + case DT_ADDRRNGLO ... DT_ADDRRNGHI: + dyn->d_un.d_ptr += vdso_adjust; + break; + case DT_ENCODING ... DT_LOOS-1: + case DT_LOOS ... DT_HIOS: + if (dyn->d_tag >= DT_ENCODING && + (dyn->d_tag & 1) == 0) + dyn->d_un.d_ptr += vdso_adjust; + break; + default: + break; + } + } + } + + /* sections */ + shdr = (Elf_Shdr *)((caddr_t)ehdr + ehdr->e_shoff); + for(i = 0; i < ehdr->e_shnum; i++) { + if (!(shdr[i].sh_flags & SHF_ALLOC)) + continue; + shdr[i].sh_addr += vdso_adjust; + if (shdr[i].sh_type != SHT_SYMTAB && + shdr[i].sh_type != SHT_DYNSYM) + continue; + + sym = (Elf_Sym *)((caddr_t)ehdr + shdr[i].sh_offset); + symcnt = shdr[i].sh_size / sizeof(*sym); + + for(i = 0; i < symcnt; i++, sym++) { + if (sym->st_shndx == SHN_UNDEF || + sym->st_shndx == SHN_ABS) + continue; + sym->st_value += vdso_adjust; + } + } + } + + SLIST_FOREACH(lsym, &__elfN(linux_vdso_syms), sym) + __elfN(linux_vdso_lookup)(ehdr, lsym); +} + +static void +__elfN(linux_vdso_lookup)(Elf_Ehdr *ehdr, struct linux_vdso_sym *vsym) +{ + vm_offset_t strtab, symname; + uint32_t symcnt; + Elf_Shdr *shdr; + int i; + + shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff); + + strtab = (vm_offset_t)((caddr_t)ehdr + + shdr[__elfN(symstrindex)].sh_offset); + Elf_Sym *sym = (Elf_Sym *)((caddr_t)ehdr + + shdr[__elfN(symtabindex)].sh_offset); + symcnt = shdr[__elfN(symtabindex)].sh_size / sizeof(*sym); + + for (i = 0; i < symcnt; ++i, ++sym) { + symname = strtab + sym->st_name; + if (strncmp(vsym->symname, (char *)symname, vsym->size) == 0) { + *vsym->ptr = (uintptr_t)sym->st_value; + break; + } + } +} Added: head/sys/compat/linux/linux_vdso.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/compat/linux/linux_vdso.h Sun May 24 15:28:17 2015 (r283407) @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2013 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_VDSO_H_ +#define _LINUX_VDSO_H_ + +#include + +struct linux_vdso_sym { + SLIST_ENTRY(linux_vdso_sym) sym; + uint32_t size; + uintptr_t * ptr; + char symname[]; +}; + +vm_object_t __elfN(linux_shared_page_init)(char **); +void __elfN(linux_shared_page_fini)(vm_object_t); +void __elfN(linux_vdso_fixup)(struct sysentvec *); +void __elfN(linux_vdso_reloc)(struct sysentvec *, int); +void __elfN(linux_vdso_sym_init)(struct linux_vdso_sym *); + +#define LINUX_VDSO_SYM_INTPTR(name) \ +uintptr_t name; \ +LINUX_VDSO_SYM_DEFINE(name) + +#define LINUX_VDSO_SYM_CHAR(name) \ +const char * name; \ +LINUX_VDSO_SYM_DEFINE(name) + +#define LINUX_VDSO_SYM_DEFINE(name) \ +static struct linux_vdso_sym name ## sym = { \ + .symname = #name, \ + .size = sizeof(#name), \ + .ptr = (uintptr_t *)&name \ +}; \ +SYSINIT(__elfN(name ## _sym_init), SI_SUB_EXEC, \ + SI_ORDER_FIRST, __elfN(linux_vdso_sym_init), &name ## sym); \ +struct __hack + +#endif /* _LINUX_VDSO_H_ */ Modified: head/sys/conf/files.amd64 ============================================================================== --- head/sys/conf/files.amd64 Sun May 24 15:27:31 2015 (r283406) +++ head/sys/conf/files.amd64 Sun May 24 15:28:17 2015 (r283407) @@ -20,6 +20,18 @@ linux32_assym.h optional compat_linux3 no-obj no-implicit-rule before-depend \ clean "linux32_assym.h" # +linux32_locore.o optional compat_linux32 \ + dependency "linux32_assym.h $S/amd64/linux32/linux32_locore.s" \ + compile-with "${CC} -x assembler-with-cpp -DLOCORE -m32 -shared -s -pipe -I. -I$S -Werror -Wall -fno-common -nostdinc -nostdlib -Wl,-T$S/amd64/linux32/linux32_vdso.lds.s -Wl,-soname=linux32_vdso.so,--eh-frame-hdr,-fPIC,-warn-common ${.IMPSRC} -o ${.TARGET}" \ + no-obj no-implicit-rule \ + clean "linux32_locore.o" +# +linux32_vdso.so optional compat_linux32 \ + dependency "linux32_locore.o" \ + compile-with "${OBJCOPY} --input binary --output elf64-x86-64-freebsd --binary-architecture i386 linux32_locore.o ${.TARGET}" \ + no-implicit-rule \ + clean "linux32_vdso.so" +# ia32_genassym.o standard \ dependency "$S/compat/ia32/ia32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ @@ -473,8 +485,6 @@ compat/linsysfs/linsysfs.c optional lins # Linux/i386 binary support # amd64/linux32/linux32_dummy.c optional compat_linux32 -amd64/linux32/linux32_locore.s optional compat_linux32 \ - dependency "linux32_assym.h" amd64/linux32/linux32_machdep.c optional compat_linux32 amd64/linux32/linux32_support.s optional compat_linux32 \ dependency "linux32_assym.h" @@ -497,6 +507,7 @@ compat/linux/linux_time.c optional compa compat/linux/linux_timer.c optional compat_linux32 compat/linux/linux_uid16.c optional compat_linux32 compat/linux/linux_util.c optional compat_linux32 +compat/linux/linux_vdso.c optional compat_linux32 dev/amr/amr_linux.c optional compat_linux32 amr dev/mfi/mfi_linux.c optional compat_linux32 mfi # Modified: head/sys/conf/files.i386 ============================================================================== --- head/sys/conf/files.i386 Sun May 24 15:27:31 2015 (r283406) +++ head/sys/conf/files.i386 Sun May 24 15:28:17 2015 (r283407) @@ -19,6 +19,18 @@ linux_assym.h optional compat_linux \ no-obj no-implicit-rule before-depend \ clean "linux_assym.h" # +linux_locore.o optional compat_linux \ + dependency "linux_assym.h $S/i386/linux/linux_locore.s" \ + compile-with "${CC} -x assembler-with-cpp -DLOCORE -shared -s -pipe -I. -I$S -Werror -Wall -fno-common -nostdinc -nostdlib -Wl,-T$S/i386/linux/linux_vdso.lds.s -Wl,-soname=linux_vdso.so,--eh-frame-hdr,-fPIC,-warn-common ${.IMPSRC} -o ${.TARGET}" \ + no-obj no-implicit-rule \ + clean "linux_locore.o" +# +linux_vdso.so optional compat_linux \ + dependency "linux_locore.o" \ + compile-with "${OBJCOPY} --input binary --output elf32-i386-freebsd --binary-architecture i386 linux_locore.o ${.TARGET}" \ + no-implicit-rule \ + clean "linux_vdso.so" +# svr4_genassym.o optional compat_svr4 \ dependency "$S/i386/svr4/svr4_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ @@ -86,6 +98,7 @@ compat/linux/linux_time.c optional compa compat/linux/linux_timer.c optional compat_linux compat/linux/linux_uid16.c optional compat_linux compat/linux/linux_util.c optional compat_linux +compat/linux/linux_vdso.c optional compat_linux compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci @@ -490,8 +503,6 @@ i386/isa/prof_machdep.c optional profil i386/isa/spic.c optional spic i386/linux/imgact_linux.c optional compat_linux i386/linux/linux_dummy.c optional compat_linux -i386/linux/linux_locore.s optional compat_linux \ - dependency "linux_assym.h" i386/linux/linux_machdep.c optional compat_linux i386/linux/linux_ptrace.c optional compat_linux i386/linux/linux_support.s optional compat_linux \ Modified: head/sys/i386/linux/linux.h ============================================================================== --- head/sys/i386/linux/linux.h Sun May 24 15:27:31 2015 (r283406) +++ head/sys/i386/linux/linux.h Sun May 24 15:28:17 2015 (r283407) @@ -108,7 +108,7 @@ typedef struct { /* * Miscellaneous */ -#define LINUX_AT_COUNT 16 /* Count of used aux entry types. +#define LINUX_AT_COUNT 18 /* Count of used aux entry types. * Keep this synchronized with * elf_linux_fixup() code. */ Modified: head/sys/i386/linux/linux_genassym.c ============================================================================== --- head/sys/i386/linux/linux_genassym.c Sun May 24 15:27:31 2015 (r283406) +++ head/sys/i386/linux/linux_genassym.c Sun May 24 15:28:17 2015 (r283407) @@ -6,6 +6,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include ASSYM(LINUX_SIGF_HANDLER, offsetof(struct l_sigframe, sf_handler)); ASSYM(LINUX_SIGF_SC, offsetof(struct l_sigframe, sf_sc)); @@ -14,3 +15,5 @@ ASSYM(LINUX_SC_EFLAGS, offsetof(struct l ASSYM(LINUX_RT_SIGF_HANDLER, offsetof(struct l_rt_sigframe, sf_handler)); ASSYM(LINUX_RT_SIGF_UC, offsetof(struct l_rt_sigframe, sf_sc)); ASSYM(LINUX_RT_SIGF_SC, offsetof(struct l_ucontext, uc_mcontext)); +ASSYM(LINUX_SC_ESP, offsetof(struct l_sigcontext, sc_esp)); +ASSYM(LINUX_VERSION_CODE, LINUX_VERSION_CODE); Modified: head/sys/i386/linux/linux_locore.s ============================================================================== --- head/sys/i386/linux/linux_locore.s Sun May 24 15:27:31 2015 (r283406) +++ head/sys/i386/linux/linux_locore.s Sun May 24 15:28:17 2015 (r283407) @@ -5,33 +5,145 @@ #include /* system call numbers */ +#include "assym.s" + +/* + * To avoid excess stack frame the signal trampoline code emulates + * the 'call' instruction. + */ NON_GPROF_ENTRY(linux_sigcode) - call *LINUX_SIGF_HANDLER(%esp) - leal LINUX_SIGF_SC(%esp),%ebx /* linux scp */ - mov LINUX_SC_GS(%ebx),%gs - movl %esp, %ebx /* pass sigframe */ - push %eax /* fake ret addr */ + movl %esp, %ebx /* preserve sigframe */ + call .getip0 +.getip0: + popl %eax + add $.startsigcode-.getip0, %eax /* ret address */ + push %eax + jmp *LINUX_SIGF_HANDLER(%ebx) +.startsigcode: + popl %eax /* gcc unwind code need this */ movl $LINUX_SYS_linux_sigreturn,%eax /* linux_sigreturn() */ int $0x80 /* enter kernel with args */ +.endsigcode: 0: jmp 0b - ALIGN_TEXT -/* XXXXX */ -linux_rt_sigcode: - call *LINUX_RT_SIGF_HANDLER(%esp) + +NON_GPROF_ENTRY(linux_rt_sigcode) leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */ leal LINUX_RT_SIGF_SC(%ebx),%ecx /* linux sigcontext */ - mov LINUX_SC_GS(%ecx),%gs - push %eax /* fake ret addr */ + movl %esp, %edi + call .getip1 +.getip1: + popl %eax + add $.startrtsigcode-.getip1, %eax /* ret address */ + push %eax + jmp *LINUX_RT_SIGF_HANDLER(%edi) +.startrtsigcode: movl $LINUX_SYS_linux_rt_sigreturn,%eax /* linux_rt_sigreturn() */ int $0x80 /* enter kernel with args */ +.endrtsigcode: 0: jmp 0b - ALIGN_TEXT -/* XXXXX */ -linux_esigcode: - - .data - .globl linux_szsigcode, linux_sznonrtsigcode -linux_szsigcode: - .long linux_esigcode-linux_sigcode -linux_sznonrtsigcode: - .long linux_rt_sigcode-linux_sigcode + +NON_GPROF_ENTRY(linux_vsyscall) +.startvsyscall: + int $0x80 + ret +.endvsyscall: + + + .section .note.Linux, "a",@note + .long 2f - 1f /* namesz */ + .balign 4 + .long 4f - 3f /* descsz */ + .long 0 +1: + .asciz "Linux" +2: + .balign 4 +3: + .long LINUX_VERSION_CODE +4: + .balign 4 + .previous + + +#define do_cfa_expr(offset) \ + .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ + .uleb128 11f-10f; /* length */ \ +10: .byte 0x74; /* DW_OP_breg4 */ \ + .sleb128 offset; /* offset */ \ + .byte 0x06; /* DW_OP_deref */ \ +11: + + + /* CIE */ + .section .eh_frame,"a",@progbits +.LSTARTFRAMEDLSI1: + .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 +.LSTARTCIEDLSI1: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zRS" /* NULL-terminated + * augmentation string + */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address + * register column + */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0 /* DW_CFA_nop */ + .align 4 +.LENDCIEDLSI1: + + /* FDE */ + .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ +.LSTARTFDEDLSI1: + .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ + .long .startsigcode-. /* PC-relative start address */ + .long .endsigcode-.startsigcode + .uleb128 0 /* Augmentation */ + do_cfa_expr(LINUX_SIGF_SC-8) + .align 4 +.LENDFDEDLSI1: + + .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ +.LSTARTFDEDLSI2: + .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ + .long .startrtsigcode-. /* PC-relative start address */ + .long .endrtsigcode-.startrtsigcode + .uleb128 0 /* Augmentation */ + do_cfa_expr(LINUX_RT_SIGF_SC-4+LINUX_SC_ESP) + .align 4 +.LENDFDEDLSI2: + .previous + + .section .eh_frame,"a",@progbits +.LSTARTFRAMEDLSI2: + .long .LENDCIEDLSI2-.LSTARTCIEDLSI2 +.LSTARTCIEDLSI2: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .string "zR" /* NULL-terminated + * augmentation string + */ + .uleb128 1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 8 /* Return address register column */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ + .byte 0x0c /* DW_CFA_def_cfa */ + .uleb128 4 + .uleb128 4 + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .uleb128 1 *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***