Date: Sat, 23 Jun 2012 10:15:23 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r237477 - in head/sys: conf kern Message-ID: <201206231015.q5NAFN7v034274@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Sat Jun 23 10:15:23 2012 New Revision: 237477 URL: http://svn.freebsd.org/changeset/base/237477 Log: Move the code dealing with shared page into a dedicated kern_sharedpage.c source file from kern_exec.c. MFC after: 29 days Added: head/sys/kern/kern_sharedpage.c - copied, changed from r237475, head/sys/kern/kern_exec.c Modified: head/sys/conf/files head/sys/kern/kern_exec.c Modified: head/sys/conf/files ============================================================================== --- head/sys/conf/files Sat Jun 23 10:14:51 2012 (r237476) +++ head/sys/conf/files Sat Jun 23 10:15:23 2012 (r237477) @@ -2567,6 +2567,7 @@ kern/kern_rmlock.c standard kern/kern_rwlock.c standard kern/kern_sdt.c optional kdtrace_hooks kern/kern_sema.c standard +kern/kern_sharedpage.c standard kern/kern_shutdown.c standard kern/kern_sig.c standard kern/kern_switch.c standard Modified: head/sys/kern/kern_exec.c ============================================================================== --- head/sys/kern/kern_exec.c Sat Jun 23 10:14:51 2012 (r237476) +++ head/sys/kern/kern_exec.c Sat Jun 23 10:15:23 2012 (r237477) @@ -28,7 +28,6 @@ __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" -#include "opt_compat.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_ktrace.h" @@ -65,7 +64,6 @@ __FBSDID("$FreeBSD$"); #include <sys/sysent.h> #include <sys/shm.h> #include <sys/sysctl.h> -#include <sys/vdso.h> #include <sys/vnode.h> #include <sys/stat.h> #ifdef KTRACE @@ -1513,193 +1511,3 @@ exec_unregister(execsw_arg) execsw = newexecsw; return (0); } - -static struct sx shared_page_alloc_sx; -static vm_object_t shared_page_obj; -static int shared_page_free; -char *shared_page_mapping; - -void -shared_page_write(int base, int size, const void *data) -{ - - bcopy(data, shared_page_mapping + base, size); -} - -static int -shared_page_alloc_locked(int size, int align) -{ - int res; - - res = roundup(shared_page_free, align); - if (res + size >= IDX_TO_OFF(shared_page_obj->size)) - res = -1; - else - shared_page_free = res + size; - return (res); -} - -int -shared_page_alloc(int size, int align) -{ - int res; - - sx_xlock(&shared_page_alloc_sx); - res = shared_page_alloc_locked(size, align); - sx_xunlock(&shared_page_alloc_sx); - return (res); -} - -int -shared_page_fill(int size, int align, const void *data) -{ - int res; - - sx_xlock(&shared_page_alloc_sx); - res = shared_page_alloc_locked(size, align); - if (res != -1) - shared_page_write(res, size, data); - sx_xunlock(&shared_page_alloc_sx); - return (res); -} - -static void -shared_page_init(void *dummy __unused) -{ - vm_page_t m; - vm_offset_t addr; - - sx_init(&shared_page_alloc_sx, "shpsx"); - shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, - VM_PROT_DEFAULT, 0, NULL); - VM_OBJECT_LOCK(shared_page_obj); - m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY | - VM_ALLOC_ZERO); - m->valid = VM_PAGE_BITS_ALL; - VM_OBJECT_UNLOCK(shared_page_obj); - addr = kmem_alloc_nofault(kernel_map, PAGE_SIZE); - pmap_qenter(addr, &m, 1); - shared_page_mapping = (char *)addr; -} - -SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, - NULL); - -static void -timehands_update(struct sysentvec *sv) -{ - struct vdso_timehands th; - struct vdso_timekeep *tk; - uint32_t enabled, idx; - - enabled = tc_fill_vdso_timehands(&th); - tk = (struct vdso_timekeep *)(shared_page_mapping + - sv->sv_timekeep_off); - idx = sv->sv_timekeep_curr; - atomic_store_rel_32(&tk->tk_th[idx].th_gen, 0); - if (++idx >= VDSO_TH_NUM) - idx = 0; - sv->sv_timekeep_curr = idx; - if (++sv->sv_timekeep_gen == 0) - sv->sv_timekeep_gen = 1; - th.th_gen = 0; - if (enabled) - tk->tk_th[idx] = th; - tk->tk_enabled = enabled; - atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen); - tk->tk_current = idx; -} - -#ifdef COMPAT_FREEBSD32 -static void -timehands_update32(struct sysentvec *sv) -{ - struct vdso_timekeep32 *tk; - struct vdso_timehands32 th; - uint32_t enabled, idx; - - enabled = tc_fill_vdso_timehands32(&th); - tk = (struct vdso_timekeep32 *)(shared_page_mapping + - sv->sv_timekeep_off); - idx = sv->sv_timekeep_curr; - atomic_store_rel_32(&tk->tk_th[idx].th_gen, 0); - if (++idx >= VDSO_TH_NUM) - idx = 0; - sv->sv_timekeep_curr = idx; - if (++sv->sv_timekeep_gen == 0) - sv->sv_timekeep_gen = 1; - th.th_gen = 0; - if (enabled) - tk->tk_th[idx] = th; - tk->tk_enabled = enabled; - atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen); - tk->tk_current = idx; -} -#endif - -/* - * This is hackish, but easiest way to avoid creating list structures - * that needs to be iterated over from the hardclock interrupt - * context. - */ -static struct sysentvec *host_sysentvec; -#ifdef COMPAT_FREEBSD32 -static struct sysentvec *compat32_sysentvec; -#endif - -void -timekeep_push_vdso(void) -{ - - if (host_sysentvec != NULL && host_sysentvec->sv_timekeep_base != 0) - timehands_update(host_sysentvec); -#ifdef COMPAT_FREEBSD32 - if (compat32_sysentvec != NULL && - compat32_sysentvec->sv_timekeep_base != 0) - timehands_update32(compat32_sysentvec); -#endif -} - -void -exec_sysvec_init(void *param) -{ - struct sysentvec *sv; - int tk_base; - uint32_t tk_ver; - - sv = (struct sysentvec *)param; - - if ((sv->sv_flags & SV_SHP) == 0) - return; - sv->sv_shared_page_obj = shared_page_obj; - sv->sv_sigcode_base = sv->sv_shared_page_base + - shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode); - if ((sv->sv_flags & SV_ABI_MASK) != SV_ABI_FREEBSD) - return; - tk_ver = VDSO_TK_VER_CURR; -#ifdef COMPAT_FREEBSD32 - if ((sv->sv_flags & SV_ILP32) != 0) { - tk_base = shared_page_alloc(sizeof(struct vdso_timekeep32) + - sizeof(struct vdso_timehands32) * VDSO_TH_NUM, 16); - KASSERT(tk_base != -1, ("tk_base -1 for 32bit")); - shared_page_write(tk_base + offsetof(struct vdso_timekeep32, - tk_ver), sizeof(uint32_t), &tk_ver); - KASSERT(compat32_sysentvec == 0, - ("Native compat32 already registered")); - compat32_sysentvec = sv; - } else { -#endif - tk_base = shared_page_alloc(sizeof(struct vdso_timekeep) + - sizeof(struct vdso_timehands) * VDSO_TH_NUM, 16); - KASSERT(tk_base != -1, ("tk_base -1 for native")); - shared_page_write(tk_base + offsetof(struct vdso_timekeep, - tk_ver), sizeof(uint32_t), &tk_ver); - KASSERT(host_sysentvec == 0, ("Native already registered")); - host_sysentvec = sv; -#ifdef COMPAT_FREEBSD32 - } -#endif - sv->sv_timekeep_base = sv->sv_shared_page_base + tk_base; - sv->sv_timekeep_off = tk_base; - timekeep_push_vdso(); -} Copied and modified: head/sys/kern/kern_sharedpage.c (from r237475, head/sys/kern/kern_exec.c) ============================================================================== --- head/sys/kern/kern_exec.c Sat Jun 23 09:50:41 2012 (r237475, copy source) +++ head/sys/kern/kern_sharedpage.c Sat Jun 23 10:15:23 2012 (r237477) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1993, David Greenman + * Copyright (c) 2010, 2012 Konstantin Belousov <kib@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,1493 +27,28 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_capsicum.h" #include "opt_compat.h" -#include "opt_hwpmc_hooks.h" -#include "opt_kdtrace.h" -#include "opt_ktrace.h" #include "opt_vm.h" #include <sys/param.h> -#include <sys/capability.h> #include <sys/systm.h> -#include <sys/capability.h> -#include <sys/eventhandler.h> +#include <sys/kernel.h> #include <sys/lock.h> #include <sys/mutex.h> -#include <sys/sysproto.h> -#include <sys/signalvar.h> -#include <sys/kernel.h> -#include <sys/mount.h> -#include <sys/filedesc.h> -#include <sys/fcntl.h> -#include <sys/acct.h> -#include <sys/exec.h> -#include <sys/imgact.h> -#include <sys/imgact_elf.h> -#include <sys/wait.h> -#include <sys/malloc.h> -#include <sys/priv.h> -#include <sys/proc.h> -#include <sys/pioctl.h> -#include <sys/namei.h> -#include <sys/resourcevar.h> -#include <sys/sched.h> -#include <sys/sdt.h> -#include <sys/sf_buf.h> -#include <sys/syscallsubr.h> #include <sys/sysent.h> -#include <sys/shm.h> #include <sys/sysctl.h> #include <sys/vdso.h> -#include <sys/vnode.h> -#include <sys/stat.h> -#ifdef KTRACE -#include <sys/ktrace.h> -#endif #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> -#include <vm/vm_page.h> -#include <vm/vm_map.h> -#include <vm/vm_kern.h> #include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> #include <vm/vm_object.h> +#include <vm/vm_page.h> #include <vm/vm_pager.h> -#ifdef HWPMC_HOOKS -#include <sys/pmckern.h> -#endif - -#include <machine/reg.h> - -#include <security/audit/audit.h> -#include <security/mac/mac_framework.h> - -#ifdef KDTRACE_HOOKS -#include <sys/dtrace_bsd.h> -dtrace_execexit_func_t dtrace_fasttrap_exec; -#endif - -SDT_PROVIDER_DECLARE(proc); -SDT_PROBE_DEFINE(proc, kernel, , exec, exec); -SDT_PROBE_ARGTYPE(proc, kernel, , exec, 0, "char *"); -SDT_PROBE_DEFINE(proc, kernel, , exec_failure, exec-failure); -SDT_PROBE_ARGTYPE(proc, kernel, , exec_failure, 0, "int"); -SDT_PROBE_DEFINE(proc, kernel, , exec_success, exec-success); -SDT_PROBE_ARGTYPE(proc, kernel, , exec_success, 0, "char *"); - -MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); - -static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); -static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); -static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); -static int do_execve(struct thread *td, struct image_args *args, - struct mac *mac_p); - -/* XXX This should be vm_size_t. */ -SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, - NULL, 0, sysctl_kern_ps_strings, "LU", ""); - -/* XXX This should be vm_size_t. */ -SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| - CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", ""); - -SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, - NULL, 0, sysctl_kern_stackprot, "I", ""); - -u_long ps_arg_cache_limit = PAGE_SIZE / 16; -SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, - &ps_arg_cache_limit, 0, ""); - -static int map_at_zero = 0; -TUNABLE_INT("security.bsd.map_at_zero", &map_at_zero); -SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RW, &map_at_zero, 0, - "Permit processes to map an object at virtual address 0."); - -static int -sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) -{ - struct proc *p; - int error; - - p = curproc; -#ifdef SCTL_MASK32 - if (req->flags & SCTL_MASK32) { - unsigned int val; - val = (unsigned int)p->p_sysent->sv_psstrings; - error = SYSCTL_OUT(req, &val, sizeof(val)); - } else -#endif - error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, - sizeof(p->p_sysent->sv_psstrings)); - return error; -} - -static int -sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) -{ - struct proc *p; - int error; - - p = curproc; -#ifdef SCTL_MASK32 - if (req->flags & SCTL_MASK32) { - unsigned int val; - val = (unsigned int)p->p_sysent->sv_usrstack; - error = SYSCTL_OUT(req, &val, sizeof(val)); - } else -#endif - error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, - sizeof(p->p_sysent->sv_usrstack)); - return error; -} - -static int -sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) -{ - struct proc *p; - - p = curproc; - return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, - sizeof(p->p_sysent->sv_stackprot))); -} - -/* - * Each of the items is a pointer to a `const struct execsw', hence the - * double pointer here. - */ -static const struct execsw **execsw; - -#ifndef _SYS_SYSPROTO_H_ -struct execve_args { - char *fname; - char **argv; - char **envv; -}; -#endif - -int -sys_execve(td, uap) - struct thread *td; - struct execve_args /* { - char *fname; - char **argv; - char **envv; - } */ *uap; -{ - int error; - struct image_args args; - - error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, - uap->argv, uap->envv); - if (error == 0) - error = kern_execve(td, &args, NULL); - return (error); -} - -#ifndef _SYS_SYSPROTO_H_ -struct fexecve_args { - int fd; - char **argv; - char **envv; -} -#endif -int -sys_fexecve(struct thread *td, struct fexecve_args *uap) -{ - int error; - struct image_args args; - - error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, - uap->argv, uap->envv); - if (error == 0) { - args.fd = uap->fd; - error = kern_execve(td, &args, NULL); - } - return (error); -} - -#ifndef _SYS_SYSPROTO_H_ -struct __mac_execve_args { - char *fname; - char **argv; - char **envv; - struct mac *mac_p; -}; -#endif - -int -sys___mac_execve(td, uap) - struct thread *td; - struct __mac_execve_args /* { - char *fname; - char **argv; - char **envv; - struct mac *mac_p; - } */ *uap; -{ -#ifdef MAC - int error; - struct image_args args; - - error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, - uap->argv, uap->envv); - if (error == 0) - error = kern_execve(td, &args, uap->mac_p); - return (error); -#else - return (ENOSYS); -#endif -} - -/* - * XXX: kern_execve has the astonishing property of not always returning to - * the caller. If sufficiently bad things happen during the call to - * do_execve(), it can end up calling exit1(); as a result, callers must - * avoid doing anything which they might need to undo (e.g., allocating - * memory). - */ -int -kern_execve(td, args, mac_p) - struct thread *td; - struct image_args *args; - struct mac *mac_p; -{ - struct proc *p = td->td_proc; - int error; - - AUDIT_ARG_ARGV(args->begin_argv, args->argc, - args->begin_envv - args->begin_argv); - AUDIT_ARG_ENVV(args->begin_envv, args->envc, - args->endp - args->begin_envv); - if (p->p_flag & P_HADTHREADS) { - PROC_LOCK(p); - if (thread_single(SINGLE_BOUNDARY)) { - PROC_UNLOCK(p); - exec_free_args(args); - return (ERESTART); /* Try again later. */ - } - PROC_UNLOCK(p); - } - - error = do_execve(td, args, mac_p); - - if (p->p_flag & P_HADTHREADS) { - PROC_LOCK(p); - /* - * If success, we upgrade to SINGLE_EXIT state to - * force other threads to suicide. - */ - if (error == 0) - thread_single(SINGLE_EXIT); - else - thread_single_end(); - PROC_UNLOCK(p); - } - - return (error); -} - -/* - * In-kernel implementation of execve(). All arguments are assumed to be - * userspace pointers from the passed thread. - */ -static int -do_execve(td, args, mac_p) - struct thread *td; - struct image_args *args; - struct mac *mac_p; -{ - struct proc *p = td->td_proc; - struct nameidata nd; - struct ucred *newcred = NULL, *oldcred; - struct uidinfo *euip; - register_t *stack_base; - int error, i; - struct image_params image_params, *imgp; - struct vattr attr; - int (*img_first)(struct image_params *); - struct pargs *oldargs = NULL, *newargs = NULL; - struct sigacts *oldsigacts, *newsigacts; -#ifdef KTRACE - struct vnode *tracevp = NULL; - struct ucred *tracecred = NULL; -#endif - struct vnode *textvp = NULL, *binvp = NULL; - int credential_changing; - int vfslocked; - int textset; -#ifdef MAC - struct label *interpvplabel = NULL; - int will_transition; -#endif -#ifdef HWPMC_HOOKS - struct pmckern_procexec pe; -#endif - static const char fexecv_proc_title[] = "(fexecv)"; - - vfslocked = 0; - imgp = &image_params; - - /* - * Lock the process and set the P_INEXEC flag to indicate that - * it should be left alone until we're done here. This is - * necessary to avoid race conditions - e.g. in ptrace() - - * that might allow a local user to illicitly obtain elevated - * privileges. - */ - PROC_LOCK(p); - KASSERT((p->p_flag & P_INEXEC) == 0, - ("%s(): process already has P_INEXEC flag", __func__)); - p->p_flag |= P_INEXEC; - PROC_UNLOCK(p); - - /* - * Initialize part of the common data - */ - imgp->proc = p; - imgp->execlabel = NULL; - imgp->attr = &attr; - imgp->entry_addr = 0; - imgp->reloc_base = 0; - imgp->vmspace_destroyed = 0; - imgp->interpreted = 0; - imgp->opened = 0; - imgp->interpreter_name = NULL; - imgp->auxargs = NULL; - imgp->vp = NULL; - imgp->object = NULL; - imgp->firstpage = NULL; - imgp->ps_strings = 0; - imgp->auxarg_size = 0; - imgp->args = args; - imgp->execpath = imgp->freepath = NULL; - imgp->execpathp = 0; - imgp->canary = 0; - imgp->canarylen = 0; - imgp->pagesizes = 0; - imgp->pagesizeslen = 0; - imgp->stack_prot = 0; - -#ifdef MAC - error = mac_execve_enter(imgp, mac_p); - if (error) - goto exec_fail; -#endif - - imgp->image_header = NULL; - - /* - * Translate the file name. namei() returns a vnode pointer - * in ni_vp amoung other things. - * - * XXXAUDIT: It would be desirable to also audit the name of the - * interpreter if this is an interpreted binary. - */ - if (args->fname != NULL) { - NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME - | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); - } - - SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); - -interpret: - if (args->fname != NULL) { -#ifdef CAPABILITY_MODE - /* - * While capability mode can't reach this point via direct - * path arguments to execve(), we also don't allow - * interpreters to be used in capability mode (for now). - * Catch indirect lookups and return a permissions error. - */ - if (IN_CAPABILITY_MODE(td)) { - error = ECAPMODE; - goto exec_fail; - } -#endif - error = namei(&nd); - if (error) - goto exec_fail; - - vfslocked = NDHASGIANT(&nd); - binvp = nd.ni_vp; - imgp->vp = binvp; - } else { - AUDIT_ARG_FD(args->fd); - /* - * Some might argue that CAP_READ and/or CAP_MMAP should also - * be required here; such arguments will be entertained. - */ - error = fgetvp_read(td, args->fd, CAP_FEXECVE, &binvp); - if (error) - goto exec_fail; - vfslocked = VFS_LOCK_GIANT(binvp->v_mount); - vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); - AUDIT_ARG_VNODE1(binvp); - imgp->vp = binvp; - } - - /* - * Check file permissions (also 'opens' file) - */ - error = exec_check_permissions(imgp); - if (error) - goto exec_fail_dealloc; - - imgp->object = imgp->vp->v_object; - if (imgp->object != NULL) - vm_object_reference(imgp->object); - - /* - * Set VV_TEXT now so no one can write to the executable while we're - * activating it. - * - * Remember if this was set before and unset it in case this is not - * actually an executable image. - */ - textset = imgp->vp->v_vflag & VV_TEXT; - ASSERT_VOP_ELOCKED(imgp->vp, "vv_text"); - imgp->vp->v_vflag |= VV_TEXT; - - error = exec_map_first_page(imgp); - if (error) - goto exec_fail_dealloc; - - imgp->proc->p_osrel = 0; - /* - * If the current process has a special image activator it - * wants to try first, call it. For example, emulating shell - * scripts differently. - */ - error = -1; - if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) - error = img_first(imgp); - - /* - * Loop through the list of image activators, calling each one. - * An activator returns -1 if there is no match, 0 on success, - * and an error otherwise. - */ - for (i = 0; error == -1 && execsw[i]; ++i) { - if (execsw[i]->ex_imgact == NULL || - execsw[i]->ex_imgact == img_first) { - continue; - } - error = (*execsw[i]->ex_imgact)(imgp); - } - - if (error) { - if (error == -1) { - if (textset == 0) { - ASSERT_VOP_ELOCKED(imgp->vp, "vv_text"); - imgp->vp->v_vflag &= ~VV_TEXT; - } - error = ENOEXEC; - } - goto exec_fail_dealloc; - } - - /* - * Special interpreter operation, cleanup and loop up to try to - * activate the interpreter. - */ - if (imgp->interpreted) { - exec_unmap_first_page(imgp); - /* - * VV_TEXT needs to be unset for scripts. There is a short - * period before we determine that something is a script where - * VV_TEXT will be set. The vnode lock is held over this - * entire period so nothing should illegitimately be blocked. - */ - imgp->vp->v_vflag &= ~VV_TEXT; - /* free name buffer and old vnode */ - if (args->fname != NULL) - NDFREE(&nd, NDF_ONLY_PNBUF); -#ifdef MAC - mac_execve_interpreter_enter(binvp, &interpvplabel); -#endif - if (imgp->opened) { - VOP_CLOSE(binvp, FREAD, td->td_ucred, td); - imgp->opened = 0; - } - vput(binvp); - vm_object_deallocate(imgp->object); - imgp->object = NULL; - VFS_UNLOCK_GIANT(vfslocked); - vfslocked = 0; - /* set new name to that of the interpreter */ - NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME | MPSAFE, - UIO_SYSSPACE, imgp->interpreter_name, td); - args->fname = imgp->interpreter_name; - goto interpret; - } - - /* - * NB: We unlock the vnode here because it is believed that none - * of the sv_copyout_strings/sv_fixup operations require the vnode. - */ - VOP_UNLOCK(imgp->vp, 0); - - /* - * Do the best to calculate the full path to the image file. - */ - if (imgp->auxargs != NULL && - ((args->fname != NULL && args->fname[0] == '/') || - vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) - imgp->execpath = args->fname; - - /* - * Copy out strings (args and env) and initialize stack base - */ - if (p->p_sysent->sv_copyout_strings) - stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); - else - stack_base = exec_copyout_strings(imgp); - - /* - * If custom stack fixup routine present for this process - * let it do the stack setup. - * Else stuff argument count as first item on stack - */ - if (p->p_sysent->sv_fixup != NULL) - (*p->p_sysent->sv_fixup)(&stack_base, imgp); - else - suword(--stack_base, imgp->args->argc); - - /* - * For security and other reasons, the file descriptor table cannot - * be shared after an exec. - */ - fdunshare(p, td); - - /* - * Malloc things before we need locks. - */ - newcred = crget(); - euip = uifind(attr.va_uid); - i = imgp->args->begin_envv - imgp->args->begin_argv; - /* Cache arguments if they fit inside our allowance */ - if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { - newargs = pargs_alloc(i); - bcopy(imgp->args->begin_argv, newargs->ar_args, i); - } - - /* close files on exec */ - fdcloseexec(td); - vn_lock(imgp->vp, LK_SHARED | LK_RETRY); - - /* Get a reference to the vnode prior to locking the proc */ - VREF(binvp); - - /* - * For security and other reasons, signal handlers cannot - * be shared after an exec. The new process gets a copy of the old - * handlers. In execsigs(), the new process will have its signals - * reset. - */ - PROC_LOCK(p); - oldcred = crcopysafe(p, newcred); - if (sigacts_shared(p->p_sigacts)) { - oldsigacts = p->p_sigacts; - PROC_UNLOCK(p); - newsigacts = sigacts_alloc(); - sigacts_copy(newsigacts, oldsigacts); - PROC_LOCK(p); - p->p_sigacts = newsigacts; - } else - oldsigacts = NULL; - - /* Stop profiling */ - stopprofclock(p); - - /* reset caught signals */ - execsigs(p); - - /* name this process - nameiexec(p, ndp) */ - bzero(p->p_comm, sizeof(p->p_comm)); - if (args->fname) - bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, - min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); - else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) - bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); - bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); -#ifdef KTR - sched_clear_tdname(td); -#endif - - /* - * mark as execed, wakeup the process that vforked (if any) and tell - * it that it now has its own resources back - */ - p->p_flag |= P_EXEC; - if (p->p_pptr && (p->p_flag & P_PPWAIT)) { - p->p_flag &= ~P_PPWAIT; - cv_broadcast(&p->p_pwait); - } - - /* - * Implement image setuid/setgid. - * - * Don't honor setuid/setgid if the filesystem prohibits it or if - * the process is being traced. - * - * We disable setuid/setgid/etc in compatibility mode on the basis - * that most setugid applications are not written with that - * environment in mind, and will therefore almost certainly operate - * incorrectly. In principle there's no reason that setugid - * applications might not be useful in capability mode, so we may want - * to reconsider this conservative design choice in the future. - * - * XXXMAC: For the time being, use NOSUID to also prohibit - * transitions on the file system. - */ - credential_changing = 0; - credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != - attr.va_uid; - credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != - attr.va_gid; -#ifdef MAC - will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, - interpvplabel, imgp); - credential_changing |= will_transition; -#endif - - if (credential_changing && -#ifdef CAPABILITY_MODE - ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && -#endif - (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && - (p->p_flag & P_TRACED) == 0) { - /* - * Turn off syscall tracing for set-id programs, except for - * root. Record any set-id flags first to make sure that - * we do not regain any tracing during a possible block. - */ - setsugid(p); - -#ifdef KTRACE - if (priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) - ktrprocexec(p, &tracecred, &tracevp); -#endif - /* - * Close any file descriptors 0..2 that reference procfs, - * then make sure file descriptors 0..2 are in use. - * - * setugidsafety() may call closef() and then pfind() - * which may grab the process lock. - * fdcheckstd() may call falloc() which may block to - * allocate memory, so temporarily drop the process lock. - */ - PROC_UNLOCK(p); - VOP_UNLOCK(imgp->vp, 0); - setugidsafety(td); - error = fdcheckstd(td); - vn_lock(imgp->vp, LK_SHARED | LK_RETRY); - if (error != 0) - goto done1; - PROC_LOCK(p); - /* - * Set the new credentials. - */ - if (attr.va_mode & S_ISUID) - change_euid(newcred, euip); - if (attr.va_mode & S_ISGID) - change_egid(newcred, attr.va_gid); -#ifdef MAC - if (will_transition) { - mac_vnode_execve_transition(oldcred, newcred, imgp->vp, - interpvplabel, imgp); - } -#endif - /* - * Implement correct POSIX saved-id behavior. - * - * XXXMAC: Note that the current logic will save the - * uid and gid if a MAC domain transition occurs, even - * though maybe it shouldn't. - */ - change_svuid(newcred, newcred->cr_uid); - change_svgid(newcred, newcred->cr_gid); - p->p_ucred = newcred; - newcred = NULL; - } else { - if (oldcred->cr_uid == oldcred->cr_ruid && - oldcred->cr_gid == oldcred->cr_rgid) - p->p_flag &= ~P_SUGID; - /* - * Implement correct POSIX saved-id behavior. - * - * XXX: It's not clear that the existing behavior is - * POSIX-compliant. A number of sources indicate that the - * saved uid/gid should only be updated if the new ruid is - * not equal to the old ruid, or the new euid is not equal - * to the old euid and the new euid is not equal to the old - * ruid. The FreeBSD code always updates the saved uid/gid. - * Also, this code uses the new (replaced) euid and egid as - * the source, which may or may not be the right ones to use. - */ - if (oldcred->cr_svuid != oldcred->cr_uid || - oldcred->cr_svgid != oldcred->cr_gid) { - change_svuid(newcred, newcred->cr_uid); - change_svgid(newcred, newcred->cr_gid); - p->p_ucred = newcred; - newcred = NULL; - } - } - - /* - * Store the vp for use in procfs. This vnode was referenced prior - * to locking the proc lock. - */ - textvp = p->p_textvp; - p->p_textvp = binvp; - -#ifdef KDTRACE_HOOKS - /* - * Tell the DTrace fasttrap provider about the exec if it - * has declared an interest. - */ - if (dtrace_fasttrap_exec) - dtrace_fasttrap_exec(p); -#endif - - /* - * Notify others that we exec'd, and clear the P_INEXEC flag - * as we're now a bona fide freshly-execed process. - */ - KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); - p->p_flag &= ~P_INEXEC; - - /* clear "fork but no exec" flag, as we _are_ execing */ - p->p_acflag &= ~AFORK; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201206231015.q5NAFN7v034274>