Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 1 Mar 2011 13:28:27 +0000 (UTC)
From:      Robert Watson <rwatson@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r219131 - in head/sys: kern sys
Message-ID:  <201103011328.p21DSRNj028078@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rwatson
Date: Tue Mar  1 13:28:27 2011
New Revision: 219131
URL: http://svn.freebsd.org/changeset/base/219131

Log:
  Continue to introduce Capsicum Capability Mode support:
  
  Add a new system call flag, SYF_CAPENABLED, which indicates that a
  particular system call is available in capability mode.
  
  Add a new configuration file, kern/capabilities.conf (similar files
  may be introduced for other ABIs in the future), which enumerates
  system calls that are available in capability mode.  When a new
  system call is added to syscalls.master, it will also need to be
  added here (if needed).  Teach sysent parts to use this file to set
  values for SYF_CAPENABLED for the native ABI.
  
  Reviewed by:	anderson
  Discussed with:	benl, kris, pjd
  Obtained from:	Capsicum Project
  MFC after:	3 months

Added:
  head/sys/kern/capabilities.conf   (contents, props changed)
Modified:
  head/sys/kern/Makefile
  head/sys/kern/makesyscalls.sh
  head/sys/sys/sysent.h

Modified: head/sys/kern/Makefile
==============================================================================
--- head/sys/kern/Makefile	Tue Mar  1 13:24:49 2011	(r219130)
+++ head/sys/kern/Makefile	Tue Mar  1 13:28:27 2011	(r219131)
@@ -10,7 +10,8 @@ sysent: init_sysent.c syscalls.c ../sys/
 ../sys/sysproto.h
 
 init_sysent.c syscalls.c systrace_args.c ../sys/syscall.h \
-../sys/syscall.mk ../sys/sysproto.h: makesyscalls.sh syscalls.master
+../sys/syscall.mk ../sys/sysproto.h: makesyscalls.sh syscalls.master \
+capabilities.conf
 	-mv -f init_sysent.c init_sysent.c.bak
 	-mv -f syscalls.c syscalls.c.bak
 	-mv -f systrace_args.c systrace_args.c.bak

Added: head/sys/kern/capabilities.conf
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/kern/capabilities.conf	Tue Mar  1 13:28:27 2011	(r219131)
@@ -0,0 +1,756 @@
+##
+## Copyright (c) 2008-2010 Robert N. M. Watson
+## All rights reserved.
+##
+## This software was developed at the University of Cambridge Computer
+## Laboratory with support from a grant from Google, Inc.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted provided that the following conditions
+## are met:
+## 1. Redistributions of source code must retain the above copyright
+##    notice, this list of conditions and the following disclaimer.
+## 2. Redistributions in binary form must reproduce the above copyright
+##    notice, this list of conditions and the following disclaimer in the
+##    documentation and/or other materials provided with the distribution.
+##
+## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+## ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+## SUCH DAMAGE.
+##
+## List of system calls enabled in capability mode, one name per line.
+##
+## Notes:
+## - sys_exit(2), abort2(2) and close(2) are very important.
+## - Sorted alphabetically, please keep it that way.
+##
+## $FreeBSD$
+##
+
+##
+## Allow ACL and MAC label operations by file descriptor, subject to
+## capability rights.  Allow MAC label operations on the current process but
+## we will need to scope __mac_get_pid(2).
+##
+__acl_aclcheck_fd
+__acl_delete_fd
+__acl_get_fd
+__acl_set_fd
+__mac_get_fd
+#__mac_get_pid
+__mac_get_proc
+__mac_set_fd
+__mac_set_proc
+
+##
+## Allow sysctl(2) as we scope internal to the call; this is a global
+## namespace, but there are several critical sysctls required for almost
+## anything to run, such as hw.pagesize.  For now that policy lives in the
+## kernel for performance and simplicity, but perhaps it could move to a
+## proxying daemon in userspace.
+##
+__sysctl
+
+##
+## Allow umtx operations as these are scoped by address space.
+##
+## XXRW: Need to check this very carefully.
+##
+_umtx_lock
+_umtx_op
+_umtx_unlock
+
+##
+## Allow process termination using abort2(2).
+##
+abort2
+
+##
+## Allow accept(2) since it doesn't manipulate namespaces directly, rather
+## relies on existing bindings on a socket, subject to capability rights.
+##
+accept
+
+##
+## Allow AIO operations by file descriptor, subject to capability rights.
+##
+aio_cancel
+aio_error
+aio_fsync
+aio_read
+aio_return
+aio_suspend
+aio_waitcomplete
+aio_write
+
+##
+## audit(2) is a global operation, submitting to the global trail, but it is
+## controlled by privilege, and it might be useful to be able to submit
+## records from sandboxes.  For now, disallow, but we may want to think about
+## providing some sort of proxy service for this.
+##
+#audit
+
+##
+## Disllow bind(2) for now, even though we support CAP_BIND.
+##
+## XXXRW: Revisit this.
+##
+#bind
+
+##
+## Allow capability mode and capability system calls.
+##
+cap_enter
+cap_getmode
+cap_getrights
+cap_new
+
+##
+## Allow read-only clock operations.
+##
+clock_gettime
+clock_getres
+
+##
+## Always allow file descriptor close(2).
+##
+close
+closefrom
+
+##
+## Disallow connect(2) for now, despite CAP_CONNECT.
+##
+## XXXRW: Revisit this.
+##
+#connect
+
+##
+## cpuset(2) and related calls require scoping by process, but should
+## eventually be allowed, at least in the current process case.
+##
+#cpuset
+#cpuset_getaffinity
+#cpuset_getid
+#cpuset_setaffinity
+#cpuset_setid
+
+##
+## Always allow dup(2) and dup2(2) manipulation of the file descriptor table.
+##
+dup
+dup2
+
+##
+## Allow extended attribute operations by file descriptor, subject to
+## capability rights.
+##
+extattr_delete_fd
+extattr_get_fd
+extattr_list_fd
+extattr_set_fd
+
+##
+## Allow changing file flags, mode, and owner by file descriptor, subject to
+## capability rights.
+##
+fchflags
+fchmod
+fchown
+
+##
+## For now, allow fcntl(2), subject to capability rights, but this probably
+## needs additional scoping.
+##
+fcntl
+
+##
+## Allow fexecve(2), subject to capability rights.  We perform some scoping,
+## such as disallowing privilege escalation.
+##
+fexecve
+
+##
+## Allow flock(2), subject to capability rights.
+##
+flock
+
+##
+## Allow fork(2), even though it returns pids -- some applications seem to
+## prefer this interface.
+##
+fork
+
+##
+## Allow fpathconf(2), subject to capability rights.
+##
+fpathconf
+
+##
+## Allow various file descriptor-based I/O operations, subject to capability
+## rights.  mmap(2) requires further attention.
+##
+freebsd6_ftruncate
+freebsd6_lseek
+freebsd6_mmap
+freebsd6_pread
+freebsd6_pwrite
+
+##
+## Allow querying file and file system state with fstat(2) and fstatfs(2),
+## subject to capability rights.
+##
+fstat
+fstatfs
+
+##
+## Allow further file descriptor-based I/O operations, subject to capability
+## rights.
+##
+fsync
+ftruncate
+
+##
+## Allow futimes(2), subject to capability rights.
+##
+futimes
+
+##
+## Allow querying process audit state, subject to normal access control.
+##
+getaudit
+getaudit_addr
+getauid
+
+##
+## Allow thread context management with getcontext(2).
+##
+getcontext
+
+##
+## Allow directory I/O on a file descriptor, subject to capability rights.
+## Originally we had separate capabilities for directory-specific read
+## operations, but on BSD we allow reading the raw directory data, so we just
+## rely on CAP_READ (etc) now.
+##
+## XXXRW: Possibly these should also use CAP_SEEK.
+##
+getdents
+getdirentries
+
+##
+## Allow querying certain trivial global state.
+##
+getdomainname
+
+##
+## Allow querying current process credential state.
+##
+getegid
+geteuid
+
+##
+## Allow querying certain trivial global state.
+##
+gethostid
+gethostname
+
+##
+## Allow querying per-process timer.
+##
+getitimer
+
+##
+## Allow querying current process credential state.
+##
+getgid
+getgroups
+getlogin
+
+##
+## Allow querying certain trivial global state.
+##
+getpagesize
+getpeername
+
+##
+## Allow querying certain per-process scheduling, resource limit, and
+## credential state.
+##
+## XXXRW: getpgid(2) needs scoping.  It's not clear if it's worth scoping
+## getppid(2).  getpriority(2) needs scoping.  getrusage(2) needs scoping.
+## getsid(2) needs scoping.
+##
+getpgid
+getpgrp
+getpid
+getppid
+getpriority
+getresgid
+getresuid
+getrlimit
+getrusage
+getsid
+
+##
+## Allow querying socket state, subject to capability rights.
+##
+## XXXRW: getsockopt(2) may need more attention.
+##
+getsockname
+getsockopt
+
+##
+## Allow querying the global clock.
+##
+gettimeofday
+
+##
+## Allow querying current process credential state.
+##
+getuid
+
+##
+## Disallow ioctl(2) for now, as frequently ioctl(2) operations have global
+## scope, but this is a tricky one as it is also required for tty control.
+## We do have a capability right for this operation.
+##
+## XXXRW: This needs to be revisited.
+##
+#ioctl
+
+##
+## Allow querying current process credential state.
+##
+issetugid
+
+##
+## Allow kevent(2), as we will authorize based on capability rights on the
+## target descriptor.
+##
+## XXXRW: Do we do this?
+##
+kevent
+
+##
+## Allow message queue operations on file descriptors, subject to capability
+## rights.
+##
+kmq_notify
+kmq_setattr
+kmq_timedreceive
+kmq_timedsend
+
+##
+## Allow kqueue(2), we will control use.
+##
+kqueue
+
+##
+## Allow managing per-process timers.
+##
+ktimer_create
+ktimer_delete
+ktimer_getoverrun
+ktimer_gettime
+ktimer_settime
+
+##
+## We can't allow ktrace(2) because it relies on a global namespace, but we
+## might want to introduce an fktrace(2) of some sort.
+##
+#ktrace
+
+##
+## Allow AIO operations by file descriptor, subject to capability rights.
+##
+lio_listio
+
+##
+## Allow listen(2), subject to capability rights.
+##
+## XXXRW: One might argue this manipulates a global namespace.
+##
+listen
+
+##
+## Allow I/O-related file descriptors, subject to capability rights.
+##
+lseek
+
+##
+## Allow MAC label operations by file descriptor, subject to capability
+## rights.
+##
+mac_get_fd
+mac_set_fd
+
+##
+## Allow simple VM operations on the current process.
+##
+madvise
+mincore
+minherit
+mlock
+mlockall
+
+##
+## Allow memory mapping a file descriptor, and updating protections, subject
+## to capability rights.
+##
+## XXXRW: We currently don't properly mask VM protections using capability
+## rights.
+##
+mmap
+mprotect
+
+##
+## Allow simple VM operations on the current process.
+##
+msync
+munlock
+munlockall
+munmap
+
+##
+## Allow the current process to sleep.
+##
+nanosleep
+
+##
+## Allow querying the global clock.
+##
+ntp_gettime
+
+##
+## Allow AIO operations by file descriptor, subject to capability rights.
+##
+oaio_read
+oaio_write
+
+##
+## Allow simple VM operations on the current process.
+##
+obreak
+
+##
+## Allow AIO operations by file descriptor, subject to capability rights.
+##
+olio_listio
+
+##
+## Once Capsicum is fully merged, some of the *at(2) calls which can be
+## semantically constrained will be permitted in capability mode. For now,
+## we will simply not allow them to be called.
+##
+#faccessat
+#fstatat
+#fchmodat
+#futimesat
+#mkdirat
+#rmdirat
+#mkfifoat
+#mknodat
+#openat
+#renameat
+
+##
+## ONCE CAPSICUM IS FULLY MERGED:
+## Allow entry into open(2). This system call will fail, since access to the global
+## file namespace has been disallowed, but allowing entry into the syscall means
+## that an audit trail will be generated (which is also very useful for debugging),
+##
+#open
+
+##
+## Allow poll(2), which will be scoped by capability rights.
+##
+## XXXRW: Perhaps we don't need the OpenBSD version?
+## XXXRW: We don't yet do that scoping.
+##
+openbsd_poll
+
+##
+## Process descriptor-related system calls are allowed.
+##
+pdfork
+pdgetpid
+pdkill
+pdwait4
+
+##
+## Allow pipe(2).
+##
+pipe
+
+##
+## Allow poll(2), which will be scoped by capability rights.
+## XXXRW: We don't yet do that scoping.
+##
+poll
+
+##
+## Allow I/O-related file descriptors, subject to capability rights.
+##
+pread
+preadv
+
+##
+## Allow access to profiling state on the current process.
+##
+profil
+
+##
+## Disallow ptrace(2) for now, but we do need debugging facilities in
+## capability mode, so we will want to revisit this, possibly by scoping its
+## operation.
+##
+#ptrace
+
+##
+## Allow I/O-related file descriptors, subject to capability rights.
+##
+pwrite
+pwritev
+read
+readv
+recv
+recvfrom
+recvmsg
+
+##
+## Allow real-time scheduling primitives to be used.
+##
+## XXXRW: These require scoping.
+##
+rtprio
+rtprio_thread
+
+##
+## Allow simple VM operations on the current process.
+##
+sbrk
+
+##
+## Allow querying trivial global scheduler state.
+##
+sched_get_priority_max
+sched_get_priority_min
+
+##
+## Allow various thread/process scheduler operations.
+##
+## XXXRW: Some of these require further scoping.
+##
+sched_getparam
+sched_getscheduler
+sched_rr_getinterval
+sched_setparam
+sched_setscheduler
+sched_yield
+
+##
+## Allow I/O-related file descriptors, subject to capability rights.
+##
+sctp_generic_recvmsg
+sctp_generic_sendmsg
+sctp_generic_sendmsg_iov
+sctp_peeloff
+
+##
+## Allow select(2), which will be scoped by capability rights.
+##
+## XXXRW: But is it?
+##
+select
+
+##
+## Allow I/O-related file descriptors, subject to capability rights.  Use of
+## explicit addresses here is restricted by the system calls themselves.
+##
+send
+sendfile
+sendmsg
+sendto
+
+##
+## Allow setting per-process audit state, which is controlled separately by
+## privileges.
+##
+setaudit
+setaudit_addr
+setauid
+
+##
+## Allow setting thread context.
+##
+setcontext
+
+##
+## Allow setting current process credential state, which is controlled
+## separately by privilege.
+##
+setegid
+seteuid
+setgid
+
+##
+## Allow use of the process interval timer.
+##
+setitimer
+
+##
+## Allow setpriority(2).
+##
+## XXXRW: Requires scoping.
+##
+setpriority
+
+##
+## Allow setting current process credential state, which is controlled
+## separately by privilege.
+##
+setregid
+setresgid
+setresuid
+setreuid
+
+##
+## Allow setting process resource limits with setrlimit(2).
+##
+setrlimit
+
+##
+## Allow creating a new session with setsid(2).
+##
+setsid
+
+##
+## Allow setting socket options with setsockopt(2), subject to capability
+## rights.
+##
+## XXXRW: Might require scoping.
+##
+setsockopt
+
+##
+## Allow setting current process credential state, which is controlled
+## separately by privilege.
+##
+setuid
+
+##
+## ONCE CAPSICUM IS FULLY MERGED:
+## Allow shm_open(2), which is scoped so as to allow only access to new
+## anonymous objects.
+##
+#shm_open
+
+##
+## Allow I/O-related file descriptors, subject to capability rights.
+##
+shutdown
+
+##
+## Allow signal control on current process.
+##
+sigaction
+sigaltstack
+sigblock
+sigpending
+sigprocmask
+sigqueue
+sigreturn
+sigsetmask
+sigstack
+sigsuspend
+sigtimedwait
+sigvec
+sigwaitinfo
+
+##
+## Allow creating new socket pairs with socket(2) and socketpair(2).
+##
+socket
+socketpair
+
+##
+## Allow simple VM operations on the current process.
+##
+## XXXRW: Kernel doesn't implement this, so drop?
+##
+sstk
+
+##
+## Do allow sync(2) for now, but possibly shouldn't.
+##
+sync
+
+##
+## Always allow process termination with sys_exit(2).
+##
+sys_exit
+
+##
+## sysarch(2) does rather diverse things, but is required on at least i386
+## in order to configure per-thread data.  As such, it's scoped on each
+## architecture.
+##
+sysarch
+
+##
+## Allow thread operations operating only on current process.
+##
+thr_create
+thr_exit
+thr_kill
+
+##
+## Disallow thr_kill2(2), as it may operate beyond the current process.
+##
+## XXXRW: Requires scoping.
+##
+#thr_kill2
+
+##
+## Allow thread operations operating only on current process.
+##
+thr_new
+thr_self
+thr_set_name
+thr_suspend
+thr_wake
+
+##
+## Allow manipulation of the current process umask with umask(2).
+##
+umask
+
+##
+## Allow submitting of process trace entries with utrace(2).
+##
+utrace
+
+##
+## Allow generating UUIDs with uuidgen(2).
+##
+uuidgen
+
+##
+## Allow I/O-related file descriptors, subject to capability rights.
+##
+write
+writev
+
+##
+## Allow processes to yield(2).
+##
+yield

Modified: head/sys/kern/makesyscalls.sh
==============================================================================
--- head/sys/kern/makesyscalls.sh	Tue Mar  1 13:24:49 2011	(r219130)
+++ head/sys/kern/makesyscalls.sh	Tue Mar  1 13:28:27 2011	(r219131)
@@ -39,6 +39,13 @@ sysarg="sysarg.switch.$$"
 sysprotoend="sysprotoend.$$"
 systracetmp="systrace.$$"
 
+if [ -r capabilities.conf ]; then
+	capenabled=`cat capabilities.conf | grep -v "^#" | grep -v "^$"`
+	capenabled=`echo $capenabled | sed 's/ /,/g'`
+else
+	capenabled=""
+fi
+
 trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp" 0
 
 touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp
@@ -97,8 +104,11 @@ s/\$//g
 		switchname = \"$switchname\"
 		namesname = \"$namesname\"
 		infile = \"$1\"
+		capenabled_string = \"$capenabled\"
 		"'
 
+		split(capenabled_string, capenabled, ",");
+
 		printf "/*\n * System call switch table.\n *\n" > syssw
 		printf " * DO NOT EDIT-- this file is automatically generated.\n" > syssw
 		printf " * $%s$\n", "FreeBSD" > syssw
@@ -290,6 +300,18 @@ s/\$//g
 		f++	#function return type
 
 		funcname=$f
+
+		#
+		# We now know the func name, so define a flags field for it.
+		# Do this before any other processing as we may return early
+		# from it.
+		#
+		for (cap in capenabled) {
+			if (funcname == capenabled[cap]) {
+				flags = "SYF_CAPENABLED";
+			}
+		}
+
 		if (funcalias == "")
 			funcalias = funcname
 		if (argalias == "") {
@@ -348,7 +370,7 @@ s/\$//g
 	}
 
 	#
-	# The currently-empty flags field.
+	# The flags, if any.
 	#
 	{
 		flags = "0";

Modified: head/sys/sys/sysent.h
==============================================================================
--- head/sys/sys/sysent.h	Tue Mar  1 13:24:49 2011	(r219130)
+++ head/sys/sys/sysent.h	Tue Mar  1 13:28:27 2011	(r219131)
@@ -65,6 +65,11 @@ struct sysent {			/* system call table *
 	u_int32_t sy_thrcnt;
 };
 
+/*
+ * A system call is permitted in capability mode.
+ */
+#define	SYF_CAPENABLED	0x00000001
+
 #define	SY_THR_FLAGMASK	0x7
 #define	SY_THR_STATIC	0x1
 #define	SY_THR_DRAINING	0x2



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201103011328.p21DSRNj028078>