From owner-svn-src-head@FreeBSD.ORG Sun May 31 12:10:04 2009 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 723281065673; Sun, 31 May 2009 12:10:04 +0000 (UTC) (envelope-from zec@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 5F3628FC15; Sun, 31 May 2009 12:10:04 +0000 (UTC) (envelope-from zec@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n4VCA4lt042726; Sun, 31 May 2009 12:10:04 GMT (envelope-from zec@svn.freebsd.org) Received: (from zec@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n4VCA4rA042721; Sun, 31 May 2009 12:10:04 GMT (envelope-from zec@svn.freebsd.org) Message-Id: <200905311210.n4VCA4rA042721@svn.freebsd.org> From: Marko Zec Date: Sun, 31 May 2009 12:10:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r193166 - in head/sys: kern net sys X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 31 May 2009 12:10:04 -0000 Author: zec Date: Sun May 31 12:10:04 2009 New Revision: 193166 URL: http://svn.freebsd.org/changeset/base/193166 Log: Introduce an interm userland-kernel API for creating vnets and assigning ifnets from one vnet to another. Deletion of vnets is not yet supported. The interface is implemented as an ioctl extension so that no syscalls had to be introduced. This should be acceptable given that the new interface will be used for a short / interim period only, until the new jail management framwork gains the capability of managing vnets. This method for managing vimages / vnets has been in use for the past 7 years without any observable issues. The userland tool to be used in conjunction with the interim API can be found in p4: //depot/projects/vimage-commit2/src/usr.sbin/vimage/... and will most probably never get commited to svn. While here, bump copyright notices in kern_vimage.c and vimage.h to cover work done in year 2009. Approved by: julian (mentor) Discussed with: bz, rwatson Modified: head/sys/kern/kern_prot.c head/sys/kern/kern_vimage.c head/sys/net/if.c head/sys/sys/sockio.h head/sys/sys/vimage.h Modified: head/sys/kern/kern_prot.c ============================================================================== --- head/sys/kern/kern_prot.c Sun May 31 12:04:01 2009 (r193165) +++ head/sys/kern/kern_prot.c Sun May 31 12:10:04 2009 (r193166) @@ -1748,7 +1748,11 @@ p_canwait(struct thread *td, struct proc KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); - if ((error = prison_check(td->td_ucred, p->p_ucred))) + if ( +#ifdef VIMAGE /* XXX temporary until struct vimage goes away */ + !vi_child_of(TD_TO_VIMAGE(td), P_TO_VIMAGE(p)) && +#endif + (error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_wait(td->td_ucred, p))) Modified: head/sys/kern/kern_vimage.c ============================================================================== --- head/sys/kern/kern_vimage.c Sun May 31 12:04:01 2009 (r193165) +++ head/sys/kern/kern_vimage.c Sun May 31 12:10:04 2009 (r193166) @@ -1,6 +1,6 @@ /*- - * Copyright (c) 2004-2008 University of Zagreb - * Copyright (c) 2006-2008 FreeBSD Foundation + * Copyright (c) 2004-2009 University of Zagreb + * Copyright (c) 2006-2009 FreeBSD Foundation * * This software was developed by the University of Zagreb and the * FreeBSD Foundation under sponsorship by the Stichting NLnet and the @@ -34,16 +34,24 @@ __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include -#include #include #include +#include #include -#include +#include +#include +#include +#include +#include #include #ifdef DDB #include #endif +#include +#include +#include + #ifndef VIMAGE_GLOBALS MALLOC_DEFINE(M_VIMAGE, "vimage", "vimage resource container"); @@ -57,6 +65,22 @@ static int vnet_mod_constructor(struct v static int vnet_mod_destructor(struct vnet_modlink *); #ifdef VIMAGE +static struct vimage *vimage_by_name(struct vimage *, char *); +static struct vimage *vi_alloc(struct vimage *, char *); +static struct vimage *vimage_get_next(struct vimage *, struct vimage *, int); +static void vimage_relative_name(struct vimage *, struct vimage *, + char *, int); +#endif + +#define VNET_LIST_WLOCK() \ + mtx_lock(&vnet_list_refc_mtx); \ + while (vnet_list_refc != 0) \ + cv_wait(&vnet_list_condvar, &vnet_list_refc_mtx); + +#define VNET_LIST_WUNLOCK() \ + mtx_unlock(&vnet_list_refc_mtx); + +#ifdef VIMAGE struct vimage_list_head vimage_head; struct vnet_list_head vnet_head; struct vprocg_list_head vprocg_head; @@ -67,9 +91,294 @@ struct vprocg vprocg_0; #endif #ifdef VIMAGE +struct cv vnet_list_condvar; +struct mtx vnet_list_refc_mtx; +int vnet_list_refc = 0; + +static u_int last_vi_id = 0; +static u_int last_vnet_id = 0; +static u_int last_vprocg_id = 0; + struct vnet *vnet0; #endif +#ifdef VIMAGE + +/* + * Interim userspace interface - will be replaced by jail soon. + */ + +/* + * Move an ifnet to another vnet. The ifnet can be specified either + * by ifp argument, or by name contained in vi_req->vi_if_xname if NULL is + * passed as ifp. The target vnet can be specified either by vnet + * argument or by name. If vnet name equals to ".." or vi_req is set to + * NULL the interface is moved to the parent vnet. + */ +int +vi_if_move(struct vi_req *vi_req, struct ifnet *ifp, struct vimage *vip) +{ + struct vimage *new_vip; + struct vnet *new_vnet = NULL; + + /* Check for API / ABI version mismatch. */ + if (vi_req->vi_api_cookie != VI_API_COOKIE) + return (EDOOFUS); + + /* Find the target vnet. */ + if (vi_req == NULL || strcmp(vi_req->vi_name, "..") == 0) { + if (IS_DEFAULT_VIMAGE(vip)) + return (ENXIO); + new_vnet = vip->vi_parent->v_net; + } else { + new_vip = vimage_by_name(vip, vi_req->vi_name); + if (new_vip == NULL) + return (ENXIO); + new_vnet = new_vip->v_net; + } + + /* Try to find the target ifnet by name. */ + if (ifp == NULL) + ifp = ifunit(vi_req->vi_if_xname); + + if (ifp == NULL) + return (ENXIO); + + /* + * Check for naming clashes in target vnet. Not locked so races + * are possible. + */ + if (vi_req != NULL) { + struct ifnet *t_ifp; + + CURVNET_SET_QUIET(new_vnet); + t_ifp = ifunit(vi_req->vi_if_xname); + CURVNET_RESTORE(); + if (t_ifp != NULL) + return (EEXIST); + } + + /* Detach from curvnet and attach to new_vnet. */ + if_vmove(ifp, new_vnet); + + /* Report the new if_xname back to the userland */ + if (vi_req != NULL) + sprintf(vi_req->vi_if_xname, "%s", ifp->if_xname); + + return (0); +} + +int +vi_td_ioctl(u_long cmd, struct vi_req *vi_req, struct thread *td) +{ + int error = 0; + struct vimage *vip = TD_TO_VIMAGE(td); + struct vimage *vip_r = NULL; + + /* Check for API / ABI version mismatch. */ + if (vi_req->vi_api_cookie != VI_API_COOKIE) + return (EDOOFUS); + + error = priv_check(td, PRIV_REBOOT); /* XXX temp. priv abuse */ + if (error) + return (error); + + vip_r = vimage_by_name(vip, vi_req->vi_name); + if (vip_r == NULL && !(vi_req->vi_req_action & VI_CREATE)) + return (ESRCH); + if (vip_r != NULL && vi_req->vi_req_action & VI_CREATE) + return (EADDRINUSE); + if (vi_req->vi_req_action == VI_GETNEXT) { + vip_r = vimage_get_next(vip, vip_r, 0); + if (vip_r == NULL) + return (ESRCH); + } + if (vi_req->vi_req_action == VI_GETNEXT_RECURSE) { + vip_r = vimage_get_next(vip, vip_r, 1); + if (vip_r == NULL) + return (ESRCH); + } + + if (vip_r && !vi_child_of(vip, vip_r) && /* XXX delete the rest? */ + vi_req->vi_req_action != VI_GET && + vi_req->vi_req_action != VI_GETNEXT) + return (EPERM); + + switch (cmd) { + + case SIOCGPVIMAGE: + vimage_relative_name(vip, vip_r, vi_req->vi_name, + sizeof (vi_req->vi_name)); + vi_req->vi_proc_count = vip_r->v_procg->nprocs; + vi_req->vi_if_count = vip_r->v_net->ifcnt; + vi_req->vi_sock_count = vip_r->v_net->sockcnt; + break; + + case SIOCSPVIMAGE: + if (vi_req->vi_req_action == VI_DESTROY) { +#ifdef NOTYET + error = vi_destroy(vip_r); +#else + error = EOPNOTSUPP; +#endif + break; + } + + if (vi_req->vi_req_action == VI_SWITCHTO) { + struct proc *p = td->td_proc; + struct ucred *oldcred, *newcred; + + /* + * XXX priv_check()? + * XXX allow only a single td per proc here? + */ + newcred = crget(); + PROC_LOCK(p); + oldcred = p->p_ucred; + setsugid(p); + crcopy(newcred, oldcred); + refcount_release(&newcred->cr_vimage->vi_ucredrefc); + newcred->cr_vimage = vip_r; + refcount_acquire(&newcred->cr_vimage->vi_ucredrefc); + p->p_ucred = newcred; + PROC_UNLOCK(p); + sx_xlock(&allproc_lock); + oldcred->cr_vimage->v_procg->nprocs--; + refcount_release(&oldcred->cr_vimage->vi_ucredrefc); + P_TO_VPROCG(p)->nprocs++; + sx_xunlock(&allproc_lock); + crfree(oldcred); + break; + } + + if (vi_req->vi_req_action & VI_CREATE) { + char *dotpos; + + dotpos = strrchr(vi_req->vi_name, '.'); + if (dotpos != NULL) { + *dotpos = 0; + vip = vimage_by_name(vip, vi_req->vi_name); + if (vip == NULL) + return (ESRCH); + dotpos++; + vip_r = vi_alloc(vip, dotpos); + } else + vip_r = vi_alloc(vip, vi_req->vi_name); + if (vip_r == NULL) + return (ENOMEM); + } + } + return (error); +} + +int +vi_child_of(struct vimage *parent, struct vimage *child) +{ + + if (child == parent) + return (0); + for (; child; child = child->vi_parent) + if (child == parent) + return (1); + return (0); +} + +static struct vimage * +vimage_by_name(struct vimage *top, char *name) +{ + struct vimage *vip; + char *next_name; + int namelen; + + next_name = strchr(name, '.'); + if (next_name != NULL) { + namelen = next_name - name; + next_name++; + if (namelen == 0) { + if (strlen(next_name) == 0) + return (top); /* '.' == this vimage */ + else + return (NULL); + } + } else + namelen = strlen(name); + if (namelen == 0) + return (NULL); + LIST_FOREACH(vip, &top->vi_child_head, vi_sibling) { + if (strlen(vip->vi_name) == namelen && + strncmp(name, vip->vi_name, namelen) == 0) { + if (next_name != NULL) + return (vimage_by_name(vip, next_name)); + else + return (vip); + } + } + return (NULL); +} + +static void +vimage_relative_name(struct vimage *top, struct vimage *where, + char *buffer, int bufflen) +{ + int used = 1; + + if (where == top) { + sprintf(buffer, "."); + return; + } else + *buffer = 0; + + do { + int namelen = strlen(where->vi_name); + + if (namelen + used + 1 >= bufflen) + panic("buffer overflow"); + + if (used > 1) { + bcopy(buffer, &buffer[namelen + 1], used); + buffer[namelen] = '.'; + used++; + } else + bcopy(buffer, &buffer[namelen], used); + bcopy(where->vi_name, buffer, namelen); + used += namelen; + where = where->vi_parent; + } while (where != top); +} + +static struct vimage * +vimage_get_next(struct vimage *top, struct vimage *where, int recurse) +{ + struct vimage *next; + + if (recurse) { + /* Try to go deeper in the hierarchy */ + next = LIST_FIRST(&where->vi_child_head); + if (next != NULL) + return (next); + } + + do { + /* Try to find next sibling */ + next = LIST_NEXT(where, vi_sibling); + if (!recurse || next != NULL) + return (next); + + /* Nothing left on this level, go one level up */ + where = where->vi_parent; + } while (where != top->vi_parent); + + /* Nothing left to be visited, we are done */ + return (NULL); +} + +#endif /* VIMAGE */ /* User interface block */ + + +/* + * Kernel interfaces and handlers. + */ + void vnet_mod_register(const struct vnet_modinfo *vmi) { @@ -221,7 +530,7 @@ vnet_mod_constructor(struct vnet_modlink void *mem = malloc(vmi->vmi_size, M_VNET, M_NOWAIT | M_ZERO); if (mem == NULL) /* XXX should return error, not panic. */ - panic("vi_alloc: malloc for %s\n", vmi->vmi_name); + panic("malloc for %s\n", vmi->vmi_name); curvnet->mod_data[vmi->vmi_id] = mem; } #endif @@ -301,43 +610,84 @@ vi_symlookup(struct kld_sym_lookup *look return (ENOENT); } -static void -vi_init(void *unused) -{ #ifdef VIMAGE +static struct vimage * +vi_alloc(struct vimage *parent, char *name) +{ struct vimage *vip; struct vprocg *vprocg; struct vnet *vnet; -#endif - - TAILQ_INIT(&vnet_modlink_head); - TAILQ_INIT(&vnet_modpending_head); - -#ifdef VIMAGE - LIST_INIT(&vimage_head); - LIST_INIT(&vprocg_head); - LIST_INIT(&vnet_head); + struct vnet_modlink *vml; vip = malloc(sizeof(struct vimage), M_VIMAGE, M_NOWAIT | M_ZERO); if (vip == NULL) - panic("malloc failed for struct vimage"); + panic("vi_alloc: malloc failed for vimage \"%s\"\n", name); + vip->vi_id = last_vi_id++; + LIST_INIT(&vip->vi_child_head); + sprintf(vip->vi_name, "%s", name); + vip->vi_parent = parent; + /* XXX locking */ + if (parent != NULL) + LIST_INSERT_HEAD(&parent->vi_child_head, vip, vi_sibling); + else if (!LIST_EMPTY(&vimage_head)) + panic("there can be only one default vimage!"); LIST_INSERT_HEAD(&vimage_head, vip, vi_le); + vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO); + if (vnet == NULL) + panic("vi_alloc: malloc failed for vnet \"%s\"\n", name); + vip->v_net = vnet; + vnet->vnet_id = last_vnet_id++; + if (vnet->vnet_id == 0) + vnet0 = vnet; + vnet->vnet_magic_n = VNET_MAGIC_N; + vprocg = malloc(sizeof(struct vprocg), M_VPROCG, M_NOWAIT | M_ZERO); if (vprocg == NULL) - panic("malloc failed for struct vprocg"); + panic("vi_alloc: malloc failed for vprocg \"%s\"\n", name); vip->v_procg = vprocg; - LIST_INSERT_HEAD(&vprocg_head, vprocg, vprocg_le); + vprocg->vprocg_id = last_vprocg_id++; - vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO); - if (vnet == NULL) - panic("vi_alloc: malloc failed"); + /* Initialize / attach vnet module instances. */ + CURVNET_SET_QUIET(vnet); + TAILQ_FOREACH(vml, &vnet_modlink_head, vml_mod_le) + vnet_mod_constructor(vml); + CURVNET_RESTORE(); + + VNET_LIST_WLOCK(); LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); - vnet->vnet_magic_n = VNET_MAGIC_N; - vip->v_net = vnet; - vnet0 = vnet; + VNET_LIST_WUNLOCK(); + + /* XXX locking */ + LIST_INSERT_HEAD(&vprocg_head, vprocg, vprocg_le); + + return (vip); +} +#endif /* VIMAGE */ - /* We MUST clear curvnet in vi_init_done before going SMP. */ +static void +vi_init(void *unused) +{ + + TAILQ_INIT(&vnet_modlink_head); + TAILQ_INIT(&vnet_modpending_head); + +#ifdef VIMAGE + LIST_INIT(&vimage_head); + LIST_INIT(&vprocg_head); + LIST_INIT(&vnet_head); + + mtx_init(&vnet_list_refc_mtx, "vnet_list_refc_mtx", NULL, MTX_DEF); + cv_init(&vnet_list_condvar, "vnet_list_condvar"); + + /* Default image has no parent and no name. */ + vi_alloc(NULL, ""); + + /* + * We MUST clear curvnet in vi_init_done() before going SMP, + * otherwise CURVNET_SET() macros would scream about unnecessary + * curvnet recursions. + */ curvnet = LIST_FIRST(&vnet_head); #endif } Modified: head/sys/net/if.c ============================================================================== --- head/sys/net/if.c Sun May 31 12:04:01 2009 (r193165) +++ head/sys/net/if.c Sun May 31 12:10:04 2009 (r193166) @@ -2283,6 +2283,21 @@ ifioctl(struct socket *so, u_long cmd, c ifr = (struct ifreq *)data; switch (cmd) { +#ifdef VIMAGE + /* + * XXX vnet creation will be implemented through the new jail + * framework - this is just a temporary hack for testing the + * vnet create / destroy mechanisms. + */ + case SIOCSIFVIMAGE: + error = vi_if_move((struct vi_req *) data, NULL, + TD_TO_VIMAGE(td)); + return (error); + case SIOCSPVIMAGE: + case SIOCGPVIMAGE: + error = vi_td_ioctl(cmd, (struct vi_req *) data, td); + return (error); +#endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); Modified: head/sys/sys/sockio.h ============================================================================== --- head/sys/sys/sockio.h Sun May 31 12:04:01 2009 (r193165) +++ head/sys/sys/sockio.h Sun May 31 12:10:04 2009 (r193166) @@ -108,6 +108,10 @@ #define SIOCGPRIVATE_0 _IOWR('i', 80, struct ifreq) /* device private 0 */ #define SIOCGPRIVATE_1 _IOWR('i', 81, struct ifreq) /* device private 1 */ +#define SIOCSPVIMAGE _IOW('i', 101, struct vi_req) /* set proc vimage */ +#define SIOCGPVIMAGE _IOWR('i', 102, struct vi_req) /* get proc vimage */ +#define SIOCSIFVIMAGE _IOWR('i', 103, struct vi_req) /* set ifc vi/net */ + #define SIOCSDRVSPEC _IOW('i', 123, struct ifdrv) /* set driver-specific parameters */ #define SIOCGDRVSPEC _IOWR('i', 123, struct ifdrv) /* get driver-specific Modified: head/sys/sys/vimage.h ============================================================================== --- head/sys/sys/vimage.h Sun May 31 12:04:01 2009 (r193165) +++ head/sys/sys/vimage.h Sun May 31 12:10:04 2009 (r193166) @@ -1,6 +1,6 @@ /*- - * Copyright (c) 2006-2008 University of Zagreb - * Copyright (c) 2006-2008 FreeBSD Foundation + * Copyright (c) 2006-2009 University of Zagreb + * Copyright (c) 2006-2009 FreeBSD Foundation * * This software was developed by the University of Zagreb and the * FreeBSD Foundation under sponsorship by the Stichting NLnet and the @@ -36,6 +36,31 @@ #include #include +/* Interim userspace API. */ +struct vi_req { + int vi_api_cookie; /* Catch API mismatch. */ + int vi_req_action; /* What to do with this request? */ + u_short vi_proc_count; /* Current number of processes. */ + int vi_if_count; /* Current number of ifnets. */ + int vi_sock_count; + char vi_name[MAXPATHLEN]; + char vi_if_xname[MAXPATHLEN]; /* XXX should be IFNAMSIZ */ +}; + +#define VI_CREATE 0x00000001 +#define VI_DESTROY 0x00000002 +#define VI_SWITCHTO 0x00000008 +#define VI_IFACE 0x00000010 +#define VI_GET 0x00000100 +#define VI_GETNEXT 0x00000200 +#define VI_GETNEXT_RECURSE 0x00000300 + +#define VI_API_VERSION 1 /* Bump on struct changes. */ + +#define VI_API_COOKIE ((sizeof(struct vi_req) << 16) | VI_API_VERSION) + +#ifdef _KERNEL + #if defined(VIMAGE) && defined(VIMAGE_GLOBALS) #error "You cannot have both option VIMAGE and option VIMAGE_GLOBALS!" #endif @@ -46,6 +71,8 @@ struct vprocg; struct vnet; +struct vi_req; +struct ifnet; struct kld_sym_lookup; typedef int vnet_attach_fn(const void *); @@ -129,6 +156,9 @@ struct vnet_modlink { #define V_MOD_vprocg 0 /* no minor module ids like in vnet */ int vi_symlookup(struct kld_sym_lookup *, char *); +int vi_td_ioctl(u_long, struct vi_req *, struct thread *); +int vi_if_move(struct vi_req *, struct ifnet *, struct vimage *); +int vi_child_of(struct vimage *, struct vimage *); void vnet_mod_register(const struct vnet_modinfo *); void vnet_mod_register_multi(const struct vnet_modinfo *, void *, char *); void vnet_mod_deregister(const struct vnet_modinfo *); @@ -449,4 +479,6 @@ extern struct vprocg_list_head vprocg_he #define VIMAGE_CTASSERT(x, y) struct __hack #endif +#endif /* _KERNEL */ + #endif /* !_SYS_VIMAGE_H_ */