Date: Tue, 2 Aug 2016 00:42:49 +0530 From: Akshay Jaggi <akshay1994.leo@gmail.com> To: freebsd-virtualization@freebsd.org, freebsd-xen@freebsd.org, xen-devel@lists.xen.org Cc: Pedro Giffuni <pfg@freebsd.org>, =?UTF-8?Q?Roger_Pau_Monn=C3=A9?= <royger@freebsd.org> Subject: Call for Testing : Grant Table User-space Device Message-ID: <CAAeUNVmz521zKZErZqsMuJKHcS_fSvVCw=LByQXoxh_RAiLwiQ@mail.gmail.com>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --] Hello Everyone, I am one of the students selected under FreeBSD for Google Summer of Code this year. My project was to implement a user-space grant table device for FreeBSD and add the appropriate bindings for the device in Xen, to enable the qdisk backends. (More details: http://akshayjaggi.me/blog/gsoc-freebsd-xen-progress-report/) The code is right out the oven and cooked enough for testing now, and I would be glad if some of the community member would want to give it a try. Instructions for applying the patches: 1) Apply *freebsd_final_flash.patch* to /usr/src on FreeBSD. Buildkernel and Installkernel. 2) Add *xen_freebsd.patch* to /usr/ports/sysutils/xen-tools/files/, and bind it to be applied in /usr/ports/sysutils/xen-tools/Makefile. Make and Make install. Reboot and play! Change the backendtype to qdisk in the domain configuration files, and test. :) Meanwhile, I am working on getting my work pushed into FreeBSD, and once the gntdev device is ready on FreeBSD, I'll push the necessary bindings in Xen. Happy Testing! Cheers and Regards, Akshay Jaggi [-- Attachment #2 --] diff --git a/sys/conf/files b/sys/conf/files index 7b7dbeb..2a0eef4 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2973,6 +2973,7 @@ dev/xen/xenstore/xenstore_dev.c optional xenhvm dev/xen/xenstore/xenstored_dev.c optional xenhvm dev/xen/evtchn/evtchn_dev.c optional xenhvm dev/xen/privcmd/privcmd.c optional xenhvm +dev/xen/gntdev/gntdev.c optional xenhvm dev/xen/debug/debug.c optional xenhvm dev/xl/if_xl.c optional xl pci dev/xl/xlphy.c optional xl pci diff --git a/sys/dev/xen/gntdev/gntdev.c b/sys/dev/xen/gntdev/gntdev.c new file mode 100644 index 0000000..dc3142e --- /dev/null +++ b/sys/dev/xen/gntdev/gntdev.c @@ -0,0 +1,1056 @@ +/*- + * Copyright (c) 2016 Akshay Jaggi <jaggi@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * gntdev.c + * + * Interface to /dev/xen/gntdev. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/uio.h> +#include <sys/bus.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/selinfo.h> +#include <sys/poll.h> +#include <sys/conf.h> +#include <sys/fcntl.h> +#include <sys/ioccom.h> +#include <sys/rman.h> +#include <sys/tree.h> +#include <sys/module.h> +#include <sys/proc.h> +#include <sys/bitset.h> +#include <sys/queue.h> +#include <sys/mman.h> +#include <sys/syslog.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> +#include <vm/vm_phys.h> + +#include <machine/md_var.h> + +#include <xen/xen-os.h> +#include <xen/hypervisor.h> +#include <xen/error.h> +#include <xen/gnttab.h> +#include <xen/gntdev.h> + +MALLOC_DEFINE(M_GNTDEV, "gntdev", "Xen grant-table user-space device"); + +static d_open_t gntdev_open; +static d_ioctl_t gntdev_ioctl; +static d_mmap_single_t gntdev_mmap_single; + +static struct cdevsw gntdev_devsw = { + .d_version = D_VERSION, + .d_open = gntdev_open, + .d_ioctl = gntdev_ioctl, + .d_mmap_single = gntdev_mmap_single, + .d_name = "gntdev", +}; + +static device_t gntdev_dev = NULL; + +struct gntdev_gref; +struct gntdev_gmap; +TAILQ_HEAD(gref_list_head, gntdev_gref); +TAILQ_HEAD(gmap_list_head, gntdev_gmap); + +struct per_user_data { + struct mtx user_data_lock; + struct gref_list_head gref_list; + struct gmap_list_head gmap_list; + uint64_t file_offset; +}; + +/* + * Get offset into the file which will be used while mmapping the + * appropriate pages by the userspace program. + */ +static uint64_t +get_file_offset(struct per_user_data *priv_user, uint32_t count) +{ + uint64_t file_offset; + + mtx_lock(&priv_user->user_data_lock); + file_offset = priv_user->file_offset; + priv_user->file_offset += count * PAGE_SIZE; + mtx_unlock(&priv_user->user_data_lock); + + return (file_offset); +} + +static int gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, + vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); +static void gntdev_gmap_pg_dtor(void *handle); +static int gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, + int prot, vm_page_t *mres); + +static struct cdev_pager_ops gntdev_gmap_pg_ops = { + .cdev_pg_fault = gntdev_gmap_pg_fault, + .cdev_pg_ctor = gntdev_gmap_pg_ctor, + .cdev_pg_dtor = gntdev_gmap_pg_dtor, +}; + +/*-------------------- Grant Allocation Methods -----------------------------*/ + +struct gntdev_gref { + TAILQ_ENTRY(gntdev_gref) gref_list_next; + uint64_t file_index; + grant_ref_t gref_id; + vm_page_t page; + struct ioctl_gntdev_unmap_notify *notify; +}; + +static struct gref_list_head to_kill_grefs = + TAILQ_HEAD_INITIALIZER(to_kill_grefs); + +static struct mtx to_kill_grefs_mtx; +MTX_SYSINIT(to_kill_grefs_mtx, &to_kill_grefs_mtx, + "gntdev to_kill_grefs mutex", MTX_DEF); + +/* + * Traverse over the device-list of to-be-deleted grants allocated, and + * if all accesses, both local mmaps and foreign maps, to them have ended, + * destroy them. + */ +static void +gref_list_dtor() +{ + struct gntdev_gref *gref, *gref_tmp; + + mtx_lock(&to_kill_grefs_mtx); + TAILQ_FOREACH_SAFE(gref, &to_kill_grefs, gref_list_next, gref_tmp) { + if (gref->page && gref->page->object == NULL) { + if (gref->notify) { + // TODO: Handle Notify .... + } + if (gref->gref_id != GRANT_REF_INVALID) { + if (gnttab_query_foreign_access(gref->gref_id)) + continue; + if (gnttab_end_foreign_access_ref(gref->gref_id) + == 0) + continue; + gnttab_free_grant_reference(gref->gref_id); + } + vm_page_unwire(gref->page, PQ_NONE); + vm_page_free(gref->page); + gref->page = NULL; + } + if (gref->page == NULL) { + if (gref->notify) + free(gref->notify, M_GNTDEV); + TAILQ_REMOVE(&to_kill_grefs, gref, gref_list_next); + free(gref, M_GNTDEV); + } + } + mtx_unlock(&to_kill_grefs_mtx); +} + +/* + * Find count number of contiguous allocated grants for a given userspace + * program by file-offset (index). + */ +static struct gntdev_gref* +gntdev_find_grefs(struct per_user_data *priv_user, + uint64_t index, uint32_t count) +{ + struct gntdev_gref *gref, *gref_start = NULL; + + mtx_lock(&priv_user->user_data_lock); + TAILQ_FOREACH(gref, &priv_user->gref_list, gref_list_next) { + if (index == gref->file_index) { + if (gref_start == NULL) + gref_start = gref; + index += PAGE_SIZE; + count--; + if (count == 0) + break; + } + else if (gref_start) + break; + } + mtx_unlock(&priv_user->user_data_lock); + + if (count) + return (NULL); + return (gref_start); +} + +/* + * IOCTL_GNTDEV_ALLOC_GREF + * Allocate required number of wired pages for the request, grant foreign + * access to the physical frames for these pages, and add details about + * this allocation to the per user private data, so that these pages can + * be mmapped by the userspace program. + */ +static int +gntdev_alloc_gref(struct ioctl_gntdev_alloc_gref *arg) +{ + int i, error, readonly; + uint64_t file_offset; + struct gntdev_gref *gref; + struct per_user_data *priv_user; + struct gref_list_head tmp_gref_list = + TAILQ_HEAD_INITIALIZER(tmp_gref_list); + + readonly = !(arg->flags & GNTDEV_ALLOC_FLAG_WRITABLE); + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + /* Cleanup grefs and free pages. */ + gref_list_dtor(); + + /* Get file offset for this request. */ + file_offset = get_file_offset(priv_user, arg->count); + + for (i = 0; i < arg->count; i++) { + struct gntdev_gref *gref; + gref = malloc(sizeof(*gref), M_GNTDEV, M_WAITOK | M_ZERO); + + TAILQ_INSERT_TAIL(&tmp_gref_list, gref, gref_list_next); + gref->file_index = file_offset + i * PAGE_SIZE; + gref->gref_id = GRANT_REF_INVALID; + gref->page = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL + | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); + if (gref->page == NULL) { + log(LOG_ERR, "Page allocation failed."); + error = ENOMEM; + break; + } + if ((gref->page->flags & PG_ZERO) == 0) { + /* + * Zero the allocated page, as we don't want to + * leak our memory to other domains. + */ + pmap_zero_page(gref->page); + } + gref->page->valid = VM_PAGE_BITS_ALL; + + error = gnttab_grant_foreign_access(arg->domid, + (VM_PAGE_TO_PHYS(gref->page) >> PAGE_SHIFT), + readonly, &gref->gref_id); + if (error != 0) { + log(LOG_ERR, "Grant Table Hypercall failed."); + break; + } + } + + if (error != 0) { + /* + * If target domain maps the gref (by guessing the gref-id), + * then we can't clean it up yet and we have to leave the + * page in place so as to not leak our memory to that domain. + * Add it to a global list to be cleaned up later. + */ + mtx_lock(&to_kill_grefs_mtx); + TAILQ_CONCAT(&to_kill_grefs, &tmp_gref_list, gref_list_next); + mtx_unlock(&to_kill_grefs_mtx); + + gref_list_dtor(); + + return (error); + } + + /* Copy the output values. */ + i = 0; + TAILQ_FOREACH(gref, &tmp_gref_list, gref_list_next) + arg->gref_ids[i++] = gref->gref_id; + + arg->index = file_offset; + + /* Modify the per user private data. */ + mtx_lock(&priv_user->user_data_lock); + TAILQ_CONCAT(&priv_user->gref_list, &tmp_gref_list, gref_list_next); + mtx_unlock(&priv_user->user_data_lock); + + return (error); +} + +/* + * IOCTL_GNTDEV_DEALLOC_GREF + * Remove grant allocation information from the per user private data, so + * that it can't be mmapped anymore by the userspace program, and add it + * to the to-be-deleted grants global device-list. + */ +static int +gntdev_dealloc_gref(struct ioctl_gntdev_dealloc_gref *arg) +{ + int error; + uint32_t count; + struct gntdev_gref *gref, *gref_tmp; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + gref = gntdev_find_grefs(priv_user, arg->index, arg->count); + if (gref == NULL) { + log(LOG_ERR, "Can't find requested grant-refs."); + return (EINVAL); + } + + /* Remove the grefs from user private data. */ + count = arg->count; + mtx_lock(&priv_user->user_data_lock); + mtx_lock(&to_kill_grefs_mtx); + TAILQ_FOREACH_FROM_SAFE(gref, + &priv_user->gref_list, gref_list_next, gref_tmp) { + TAILQ_REMOVE(&priv_user->gref_list, gref, gref_list_next); + TAILQ_INSERT_TAIL(&to_kill_grefs, gref, gref_list_next); + count--; + if (count == 0) + break; + } + mtx_unlock(&to_kill_grefs_mtx); + mtx_unlock(&priv_user->user_data_lock); + + gref_list_dtor(); + + return (0); +} + +/*-------------------- Grant Accessing Methods ------------------------------*/ + +struct gntdev_gmap_map { + vm_object_t mem; + struct resource *pseudo_phys_res; + int pseudo_phys_res_id; + vm_paddr_t phys_base_addr; +}; + +struct gntdev_gmap { + TAILQ_ENTRY(gntdev_gmap) gmap_list_next; + uint64_t file_index; + uint32_t count; + struct gnttab_map_grant_ref *grant_map_ops; + struct gntdev_gmap_map *map; + struct ioctl_gntdev_unmap_notify *notify; +}; + +static struct gmap_list_head to_kill_gmaps = + TAILQ_HEAD_INITIALIZER(to_kill_gmaps); + +static struct mtx to_kill_gmaps_mtx; +MTX_SYSINIT(to_kill_gmaps_mtx, &to_kill_gmaps_mtx, + "gntdev to_kill_gmaps mutex", MTX_DEF); + +/* + * Traverse over the device-list of to-be-deleted grant mappings, and if + * the region is no longer mmapped by anyone, free the memory used to + * store information about the mapping. + */ +static void +gmap_list_dtor() +{ + struct gntdev_gmap *gmap, *gmap_tmp; + + mtx_lock(&to_kill_gmaps_mtx); + TAILQ_FOREACH_SAFE(gmap, &to_kill_gmaps, gmap_list_next, gmap_tmp) { + if (gmap->map == NULL) { + if (gmap->notify) + free(gmap->notify, M_GNTDEV); + free(gmap->grant_map_ops, M_GNTDEV); + TAILQ_REMOVE(&to_kill_gmaps, gmap, gmap_list_next); + free(gmap, M_GNTDEV); + } + } + mtx_unlock(&to_kill_gmaps_mtx); +} + +/* + * Find mapped grants for a given userspace program, by file-offset (index) + * and count, as supplied during the map-ioctl. + */ +static struct gntdev_gmap* +gntdev_find_gmap(struct per_user_data *priv_user, + uint64_t index, uint32_t count) +{ + struct gntdev_gmap *gmap, *gmap_start = NULL; + + mtx_lock(&priv_user->user_data_lock); + TAILQ_FOREACH(gmap, &priv_user->gmap_list, gmap_list_next) { + if (gmap->file_index == index && gmap->count == count) { + gmap_start = gmap; + break; + } + } + mtx_unlock(&priv_user->user_data_lock); + + return (gmap_start); +} + +/* + * Remove the pages from the mgtdevice pager, call the unmap hypercall, + * free the xenmem resource. This function is called during the + * destruction of the mgtdevice pager, which happens when all mmaps to + * it have been removed, and the unmap-ioctl has been performed. + */ +static int +notify_unmap_cleanup(struct gntdev_gmap *gmap) +{ + int error, i, count; + vm_page_t m; + struct gnttab_unmap_grant_ref *unmap_ops; + + unmap_ops = malloc(sizeof(struct gnttab_unmap_grant_ref) * gmap->count, + M_GNTDEV, M_WAITOK); + + /* Enumerate freeable maps. */ + count = 0; + for (i = 0; i < gmap->count; i++) { + if (gmap->grant_map_ops[i].handle != -1) { + unmap_ops[count].handle = gmap->grant_map_ops[i].handle; + unmap_ops[count].host_addr = + gmap->grant_map_ops[i].host_addr; + unmap_ops[count].dev_bus_addr = 0; + count++; + } + } + + /* Perform notification. */ + if (count > 0 && gmap->notify) { + // TODO: Handle Notify + } + + /* Free the pages. */ + VM_OBJECT_WLOCK(gmap->map->mem); +retry: + for (i = 0; i < gmap->count; i++) { + m = vm_page_lookup(gmap->map->mem, i); + if (m == NULL) + continue; + if (vm_page_sleep_if_busy(m, "pcmdum")) + goto retry; + cdev_pager_free_page(gmap->map->mem, m); + } + VM_OBJECT_WUNLOCK(gmap->map->mem); + + /* Perform unmap hypercall. */ + error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap_ops, count); + + for (i = 0; i < gmap->count; i++) { + gmap->grant_map_ops[i].handle = -1; + gmap->grant_map_ops[i].host_addr = 0; + } + + if (gmap->map) { + error = xenmem_free(gntdev_dev, gmap->map->pseudo_phys_res_id, + gmap->map->pseudo_phys_res); + KASSERT(error == 0, + ("Unable to release memory resource: %d", error)); + + free(gmap->map, M_GNTDEV); + gmap->map = NULL; + } + + free(unmap_ops, M_GNTDEV); + + return (error); +} + +/* + * IOCTL_GNTDEV_MAP_GRANT_REF + * Populate structures for mapping the grant reference in the per user + * private data. Actual resource allocation and map hypercall is performed + * during the mmap. + */ +static int +gntdev_map_grant_ref(struct ioctl_gntdev_map_grant_ref *arg) +{ + int i, error; + struct gntdev_gmap *gmap; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + gmap = malloc(sizeof(*gmap), M_GNTDEV, M_WAITOK | M_ZERO); + + gmap->count = arg->count; + gmap->file_index = get_file_offset(priv_user, arg->count); + gmap->grant_map_ops = + malloc(sizeof(struct gnttab_map_grant_ref) * arg->count, + M_GNTDEV, M_WAITOK | M_ZERO); + + for (i = 0; i < arg->count; i++) { + gmap->grant_map_ops[i].dom = arg->refs[i].domid; + gmap->grant_map_ops[i].ref = arg->refs[i].ref; + gmap->grant_map_ops[i].handle = -1; + gmap->grant_map_ops[i].flags = GNTMAP_host_map; + } + + mtx_lock(&priv_user->user_data_lock); + TAILQ_INSERT_TAIL(&priv_user->gmap_list, gmap, gmap_list_next); + mtx_unlock(&priv_user->user_data_lock); + + arg->index = gmap->file_index; + + return (error); +} + +/* + * IOCTL_GNTDEV_UNMAP_GRANT_REF + * Remove the map information from the per user private data and add it + * to the global device-list of mappings to be deleted. A reference to + * the mgtdevice pager is also decreased, the reason for which is + * explained in mmap_gmap(). + */ +static int +gntdev_unmap_grant_ref(struct ioctl_gntdev_unmap_grant_ref *arg) +{ + int error; + struct gntdev_gmap *gmap; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + gmap = gntdev_find_gmap(priv_user, arg->index, arg->count); + if (gmap == NULL) { + log(LOG_ERR, "Can't find requested grant-map."); + return (EINVAL); + } + + mtx_lock(&priv_user->user_data_lock); + mtx_lock(&to_kill_gmaps_mtx); + TAILQ_REMOVE(&priv_user->gmap_list, gmap, gmap_list_next); + TAILQ_INSERT_TAIL(&to_kill_gmaps, gmap, gmap_list_next); + mtx_unlock(&to_kill_gmaps_mtx); + mtx_unlock(&priv_user->user_data_lock); + + if (gmap->map) + vm_object_deallocate(gmap->map->mem); + + gmap_list_dtor(); + + return (0); +} + +/* + * IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR + * Get file-offset and count for a given mapping, from the virtual address + * where the mapping is mmapped. + * Please note, this only works for grants mapped by this domain, and not + * grants allocated. Count doesn't make much sense in reference to grants + * allocated. Also, because this function is present in the linux gntdev + * device, but not in the linux gntalloc one, most userspace code only use + * it for mapped grants. + */ +static int +gntdev_get_offset_for_vaddr(struct ioctl_gntdev_get_offset_for_vaddr *arg, + struct thread *td) +{ + int error; + vm_map_t map; + vm_map_entry_t entry; + vm_object_t mem; + vm_pindex_t pindex; + vm_prot_t prot; + boolean_t wired; + struct gntdev_gmap *gmap; + + map = &td->td_proc->p_vmspace->vm_map; + error = vm_map_lookup(&map, arg->vaddr, VM_PROT_NONE, &entry, + &mem, &pindex, &prot, &wired); + if (error != KERN_SUCCESS) + return (EINVAL); + vm_map_lookup_done(map, entry); + + if ((mem->type != OBJT_MGTDEVICE) || + (mem->un_pager.devp.ops != &gntdev_gmap_pg_ops)) + return (EINVAL); + + gmap = mem->handle; + if (gmap == NULL || + (entry->end - entry->start) != (gmap->count * PAGE_SIZE)) + return (EINVAL); + + arg->count = gmap->count; + arg->offset = gmap->file_index; + return (0); +} + +static int +gntdev_set_max_grants(struct ioctl_gntdev_set_max_grants *arg) +{ + + return (0); +} + +/*-------------------- Grant Accessing Pager --------------------------------*/ + +static int +gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color) +{ + + return (0); +} + +static void +gntdev_gmap_pg_dtor(void *handle) +{ + + notify_unmap_cleanup((struct gntdev_gmap *)handle); +} + +static int +gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, + vm_page_t *mres) +{ + struct gntdev_gmap *gmap = object->handle; + vm_pindex_t pidx, ridx; + vm_page_t page, oldm; + vm_ooffset_t relative_offset; + + if (gmap->map == NULL) + return (VM_PAGER_FAIL); + + relative_offset = offset - gmap->file_index; + + pidx = OFF_TO_IDX(offset); + ridx = OFF_TO_IDX(relative_offset); + if (ridx >= gmap->count || + gmap->grant_map_ops[ridx].status != GNTST_okay) + return (VM_PAGER_FAIL); + + page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + relative_offset); + if (page == NULL) + return (VM_PAGER_FAIL); + + KASSERT((page->flags & PG_FICTITIOUS) != 0, + ("not fictitious %p", page)); + KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); + KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); + + if (*mres != NULL) { + oldm = *mres; + vm_page_lock(oldm); + vm_page_free(oldm); + vm_page_unlock(oldm); + *mres = NULL; + } + + vm_page_insert(page, object, pidx); + page->valid = VM_PAGE_BITS_ALL; + vm_page_xbusy(page); + *mres = page; + return (VM_PAGER_OK); +} + +/*------------------ Grant Table Methods ------------------------------------*/ + +/* + * IOCTL_GNTDEV_SET_UNMAP_NOTIFY + * Set unmap notification inside the appropriate grant. It sends a + * notification when the grant is completely munmapped by this domain + * and ready for destruction. + */ +static int +gntdev_set_unmap_notify(struct ioctl_gntdev_unmap_notify *arg) +{ + int error; + uint64_t index; + struct per_user_data *priv_user; + struct gntdev_gref *gref = NULL; + struct gntdev_gmap *gmap; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + index = arg->index & ~(PAGE_SIZE - 1); + gref = gntdev_find_grefs(priv_user, index, 1); + if (gref) { + gref->notify = malloc(sizeof(*arg), M_GNTDEV, M_WAITOK); + memcpy(gref->notify, arg, sizeof(*arg)); + return (error); + } + + mtx_lock(&priv_user->user_data_lock); + TAILQ_FOREACH(gmap, &priv_user->gmap_list, gmap_list_next) { + if (arg->index >= gmap->file_index && + arg->index < gmap->file_index + gmap->count * PAGE_SIZE) { + gmap->notify = malloc(sizeof(*arg), M_GNTDEV, M_WAITOK); + memcpy(gmap->notify, arg, sizeof(*arg)); + mtx_unlock(&priv_user->user_data_lock); + return (error); + } + } + mtx_unlock(&priv_user->user_data_lock); + + return (EINVAL); +} + +/*------------------ Gntdev Char Device Methods -----------------------------*/ + +static void +per_user_data_dtor(void *arg) +{ + struct gntdev_gmap *gmap, *gmap_tmp; + struct per_user_data *priv_user; + + priv_user = (struct per_user_data *) arg; + + mtx_lock(&priv_user->user_data_lock); + mtx_lock(&to_kill_grefs_mtx); + TAILQ_CONCAT(&to_kill_grefs, &priv_user->gref_list, gref_list_next); + mtx_unlock(&to_kill_grefs_mtx); + mtx_lock(&to_kill_gmaps_mtx); + + TAILQ_FOREACH_SAFE(gmap, &priv_user->gmap_list, gmap_list_next, + gmap_tmp) { + TAILQ_REMOVE(&priv_user->gmap_list, gmap, gmap_list_next); + TAILQ_INSERT_TAIL(&to_kill_gmaps, gmap, gmap_list_next); + if (gmap->map) + vm_object_deallocate(gmap->map->mem); + } + mtx_unlock(&to_kill_gmaps_mtx); + + mtx_unlock(&priv_user->user_data_lock); + + gref_list_dtor(); + gmap_list_dtor(); + + mtx_destroy(&priv_user->user_data_lock); + free(priv_user, M_GNTDEV); +} + +static int +gntdev_open(struct cdev *dev, int flag, int otyp, struct thread *td) +{ + int error; + struct per_user_data *priv_user; + + priv_user = malloc(sizeof(*priv_user), M_GNTDEV, M_WAITOK | M_ZERO); + TAILQ_INIT(&priv_user->gref_list); + TAILQ_INIT(&priv_user->gmap_list); + mtx_init(&priv_user->user_data_lock, + "per user data mutex", NULL, MTX_DEF); + + error = devfs_set_cdevpriv(priv_user, per_user_data_dtor); + if (error != 0) + per_user_data_dtor(priv_user); + + return (error); +} + +static int +gntdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, + int fflag, struct thread *td) +{ + int error; + + switch (cmd) { + case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: + error = gntdev_set_unmap_notify( + (struct ioctl_gntdev_unmap_notify*) data); + break; + case IOCTL_GNTDEV_ALLOC_GREF: + error = gntdev_alloc_gref( + (struct ioctl_gntdev_alloc_gref*) data); + break; + case IOCTL_GNTDEV_DEALLOC_GREF: + error = gntdev_dealloc_gref( + (struct ioctl_gntdev_dealloc_gref*) data); + break; + case IOCTL_GNTDEV_MAP_GRANT_REF: + error = gntdev_map_grant_ref( + (struct ioctl_gntdev_map_grant_ref*) data); + break; + case IOCTL_GNTDEV_UNMAP_GRANT_REF: + error = gntdev_unmap_grant_ref( + (struct ioctl_gntdev_unmap_grant_ref*) data); + break; + case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: + error = gntdev_get_offset_for_vaddr( + (struct ioctl_gntdev_get_offset_for_vaddr*) data, td); + break; + case IOCTL_GNTDEV_SET_MAX_GRANTS: + error = gntdev_set_max_grants( + (struct ioctl_gntdev_set_max_grants*) data); + break; + default: + error = ENOSYS; + break; + } + + return (error); +} + +/* + * MMAP an allocated grant into user memory. + * Please note, that the grants must not already be mmapped, otherwise + * this function will fail. + */ +static int +mmap_gref(struct per_user_data *priv_user, struct gntdev_gref *gref_start, + uint32_t count, vm_size_t size, struct vm_object **object) +{ + vm_object_t mem_obj; + struct gntdev_gref *gref; + + mem_obj = vm_object_allocate(OBJT_PHYS, size); + if (mem_obj == NULL) + return (ENOMEM); + + gref = gref_start; + mtx_lock(&priv_user->user_data_lock); + VM_OBJECT_WLOCK(mem_obj); + TAILQ_FOREACH_FROM(gref, &priv_user->gref_list, gref_list_next) { + if (gref->page->object) + break; + + vm_page_insert(gref->page, mem_obj, + OFF_TO_IDX(gref->file_index)); + + count--; + if (count==0) + break; + } + VM_OBJECT_WUNLOCK(mem_obj); + mtx_unlock(&priv_user->user_data_lock); + + if (count) { + vm_object_deallocate(mem_obj); + return (EINVAL); + } + + *object = mem_obj; + + return (0); + +} + +/* + * MMAP a mapped grant into user memory. + */ +static int +mmap_gmap(struct per_user_data *priv_user, struct gntdev_gmap *gmap_start, + vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) +{ + int i, error; + + /* + * The grant map hypercall might already be done. + * If that is the case, increase a reference to the + * vm object and return the already allocated object. + */ + if (gmap_start->map) { + vm_object_reference(gmap_start->map->mem); + *object = gmap_start->map->mem; + return (0); + } + + gmap_start->map = malloc(sizeof(*(gmap_start->map)), M_GNTDEV, + M_WAITOK | M_ZERO); + + /* Allocate the xen pseudo physical memory resource. */ + gmap_start->map->pseudo_phys_res_id = 0; + gmap_start->map->pseudo_phys_res = xenmem_alloc(gntdev_dev, + &gmap_start->map->pseudo_phys_res_id, size); + if (gmap_start->map->pseudo_phys_res == NULL) { + free(gmap_start->map, M_GNTDEV); + gmap_start->map = NULL; + return (ENOMEM); + } + gmap_start->map->phys_base_addr = + rman_get_start(gmap_start->map->pseudo_phys_res); + + /* Allocate the mgtdevice pager. */ + gmap_start->map->mem = cdev_pager_allocate(gmap_start, OBJT_MGTDEVICE, + &gntdev_gmap_pg_ops, size, nprot, *offset, NULL); + if (gmap_start->map->mem == NULL) { + xenmem_free(gntdev_dev, gmap_start->map->pseudo_phys_res_id, + gmap_start->map->pseudo_phys_res); + free(gmap_start->map, M_GNTDEV); + gmap_start->map = NULL; + return (ENOMEM); + } + + for(i = 0; i < gmap_start->count; i++) { + gmap_start->grant_map_ops[i].host_addr = + gmap_start->map->phys_base_addr + i * PAGE_SIZE; + + if ((nprot & PROT_WRITE) == 0) + gmap_start->grant_map_ops[i].flags |= GNTMAP_readonly; + } + /* Make the MAP hypercall. */ + error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + gmap_start->grant_map_ops, gmap_start->count); + if (error != 0) { + /* + * Deallocate pager. + * Pager deallocation will automatically take care of + * xenmem deallocation, etc. + */ + vm_object_deallocate(gmap_start->map->mem); + + return (EINVAL); + } + + /* Retry EAGAIN maps. */ + for (i = 0; i < gmap_start->count; i++) { + int delay = 1; + while (delay < 256 && + gmap_start->grant_map_ops[i].status == GNTST_eagain) { + HYPERVISOR_grant_table_op( GNTTABOP_map_grant_ref, + &gmap_start->grant_map_ops[i], 1); + pause(("gntmap"), delay * SBT_1MS); + delay++; + } + if (gmap_start->grant_map_ops[i].status == GNTST_eagain) + gmap_start->grant_map_ops[i].status = GNTST_bad_page; + + if (gmap_start->grant_map_ops[i].status != GNTST_okay) { + /* + * Deallocate pager. + * Pager deallocation will automatically take care of + * xenmem deallocation, notification, unmap hypercall, + * etc. + */ + vm_object_deallocate(gmap_start->map->mem); + + return (EINVAL); + } + } + + /* + * Add a reference to the vm object. We do not want + * the vm object to be deleted when all the mmaps are + * unmapped, because it may be re-mmapped. Instead, + * we want the object to be deleted, when along with + * munmaps, we have also processed the unmap-ioctl. + */ + vm_object_reference(gmap_start->map->mem); + + *object = gmap_start->map->mem; + + return (0); +} + +static int +gntdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, + struct vm_object **object, int nprot) +{ + int error; + uint32_t count; + struct gntdev_gref *gref_start; + struct gntdev_gmap *gmap_start; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + count = OFF_TO_IDX(size); + + gref_start = gntdev_find_grefs(priv_user, *offset, count); + if (gref_start) { + error = mmap_gref(priv_user, gref_start, count, size, object); + return (error); + } + + gmap_start = gntdev_find_gmap(priv_user, *offset, count); + if (gmap_start) { + error = mmap_gmap(priv_user, gmap_start, offset, size, object, + nprot); + return (error); + } + + return (EINVAL); +} + +/*------------------ Private Device Attachment Functions --------------------*/ +static void +gntdev_identify(driver_t *driver, device_t parent) +{ + + KASSERT((xen_domain()), + ("Trying to attach gntdev device on non Xen domain")); + + if (BUS_ADD_CHILD(parent, 0, "gntdev", 0) == NULL) + panic("unable to attach gntdev user-space device"); +} + +static int +gntdev_probe(device_t dev) +{ + + gntdev_dev = dev; + device_set_desc(dev, "Xen grant-table user-space device"); + return (BUS_PROBE_NOWILDCARD); +} + +static int +gntdev_attach(device_t dev) +{ + + make_dev_credf(MAKEDEV_ETERNAL, &gntdev_devsw, 0, NULL, UID_ROOT, + GID_WHEEL, 0600, "xen/gntdev"); + return (0); +} + +/*-------------------- Private Device Attachment Data -----------------------*/ +static device_method_t gntdev_methods[] = { + DEVMETHOD(device_identify, gntdev_identify), + DEVMETHOD(device_probe, gntdev_probe), + DEVMETHOD(device_attach, gntdev_attach), + DEVMETHOD_END +}; + +static driver_t gntdev_driver = { + "gntdev", + gntdev_methods, + 0, +}; + +devclass_t gntdev_devclass; + +DRIVER_MODULE(gntdev, xenpv, gntdev_driver, gntdev_devclass, 0, 0); +MODULE_DEPEND(gntdev, xenpv, 1, 1, 1); diff --git a/sys/xen/gntdev.h b/sys/xen/gntdev.h new file mode 100644 index 0000000..1d09c5d --- /dev/null +++ b/sys/xen/gntdev.h @@ -0,0 +1,118 @@ +/*- + * Copyright (c) 2016 Akshay Jaggi <jaggi@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * gntdev.h + * + * Interface to /dev/xen/gntdev. + * + */ + +#ifndef __XEN_GNTDEV_H__ +#define __XEN_GNTDEV_H__ + +#include <sys/types.h> + +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ + _IOW('E', 0, struct ioctl_gntdev_unmap_notify) +struct ioctl_gntdev_unmap_notify { + /* IN parameters */ + uint64_t index; + uint32_t action; + uint32_t event_channel_port; +}; + +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 +#define UNMAP_NOTIFY_SEND_EVENT 0x2 + +/*-------------------- Grant Creation IOCTLs --------------------------------*/ + +#define IOCTL_GNTDEV_ALLOC_GREF \ + _IOWR('E', 1, struct ioctl_gntdev_alloc_gref) +struct ioctl_gntdev_alloc_gref { + /* IN parameters */ + uint16_t domid; + uint16_t flags; + uint32_t count; + /* OUT parameters */ + uint64_t index; + /* Variable OUT parameter */ + uint32_t gref_ids[1]; +}; + +#define GNTDEV_ALLOC_FLAG_WRITABLE 1 + +#define IOCTL_GNTDEV_DEALLOC_GREF \ + _IOW('E', 2, struct ioctl_gntdev_dealloc_gref) +struct ioctl_gntdev_dealloc_gref { + /* IN parameters */ + uint64_t index; + uint32_t count; +}; + +/*-------------------- Grant Accessing IOCTLs -------------------------------*/ + +struct ioctl_gntdev_grant_ref { + uint32_t domid; + uint32_t ref; +}; + +#define IOCTL_GNTDEV_MAP_GRANT_REF \ + _IOWR('E', 3, struct ioctl_gntdev_map_grant_ref) +struct ioctl_gntdev_map_grant_ref { + /* IN parameters */ + uint32_t count; + uint32_t pad0; + /* OUT parameters */ + uint64_t index; + /* Variable IN parameter */ + struct ioctl_gntdev_grant_ref refs[1]; +}; + +#define IOCTL_GNTDEV_UNMAP_GRANT_REF \ + _IOW('E', 4, struct ioctl_gntdev_unmap_grant_ref) +struct ioctl_gntdev_unmap_grant_ref { + /* IN parameters */ + uint64_t index; + uint32_t count; +}; + +#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \ + _IOWR('E', 5, struct ioctl_gntdev_get_offset_for_vaddr) +struct ioctl_gntdev_get_offset_for_vaddr { + /* IN parameters */ + uint64_t vaddr; + /* OUT parameters */ + uint64_t offset; + uint32_t count; +}; + +#define IOCTL_GNTDEV_SET_MAX_GRANTS \ + _IOW('E', 6, struct ioctl_gntdev_set_max_grants) +struct ioctl_gntdev_set_max_grants { + /* IN parameters */ + uint32_t count; +}; + +#endif /* __XEN_GNTDEV_H__ */ [-- Attachment #3 --] diff --git a/tools/include/xen-sys/FreeBSD/gntdev.h b/tools/include/xen-sys/FreeBSD/gntdev.h new file mode 100644 index 0000000..1d09c5d --- /dev/null +++ b/tools/include/xen-sys/FreeBSD/gntdev.h @@ -0,0 +1,118 @@ +/*- + * Copyright (c) 2016 Akshay Jaggi <jaggi@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * gntdev.h + * + * Interface to /dev/xen/gntdev. + * + */ + +#ifndef __XEN_GNTDEV_H__ +#define __XEN_GNTDEV_H__ + +#include <sys/types.h> + +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ + _IOW('E', 0, struct ioctl_gntdev_unmap_notify) +struct ioctl_gntdev_unmap_notify { + /* IN parameters */ + uint64_t index; + uint32_t action; + uint32_t event_channel_port; +}; + +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 +#define UNMAP_NOTIFY_SEND_EVENT 0x2 + +/*-------------------- Grant Creation IOCTLs --------------------------------*/ + +#define IOCTL_GNTDEV_ALLOC_GREF \ + _IOWR('E', 1, struct ioctl_gntdev_alloc_gref) +struct ioctl_gntdev_alloc_gref { + /* IN parameters */ + uint16_t domid; + uint16_t flags; + uint32_t count; + /* OUT parameters */ + uint64_t index; + /* Variable OUT parameter */ + uint32_t gref_ids[1]; +}; + +#define GNTDEV_ALLOC_FLAG_WRITABLE 1 + +#define IOCTL_GNTDEV_DEALLOC_GREF \ + _IOW('E', 2, struct ioctl_gntdev_dealloc_gref) +struct ioctl_gntdev_dealloc_gref { + /* IN parameters */ + uint64_t index; + uint32_t count; +}; + +/*-------------------- Grant Accessing IOCTLs -------------------------------*/ + +struct ioctl_gntdev_grant_ref { + uint32_t domid; + uint32_t ref; +}; + +#define IOCTL_GNTDEV_MAP_GRANT_REF \ + _IOWR('E', 3, struct ioctl_gntdev_map_grant_ref) +struct ioctl_gntdev_map_grant_ref { + /* IN parameters */ + uint32_t count; + uint32_t pad0; + /* OUT parameters */ + uint64_t index; + /* Variable IN parameter */ + struct ioctl_gntdev_grant_ref refs[1]; +}; + +#define IOCTL_GNTDEV_UNMAP_GRANT_REF \ + _IOW('E', 4, struct ioctl_gntdev_unmap_grant_ref) +struct ioctl_gntdev_unmap_grant_ref { + /* IN parameters */ + uint64_t index; + uint32_t count; +}; + +#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \ + _IOWR('E', 5, struct ioctl_gntdev_get_offset_for_vaddr) +struct ioctl_gntdev_get_offset_for_vaddr { + /* IN parameters */ + uint64_t vaddr; + /* OUT parameters */ + uint64_t offset; + uint32_t count; +}; + +#define IOCTL_GNTDEV_SET_MAX_GRANTS \ + _IOW('E', 6, struct ioctl_gntdev_set_max_grants) +struct ioctl_gntdev_set_max_grants { + /* IN parameters */ + uint32_t count; +}; + +#endif /* __XEN_GNTDEV_H__ */ diff --git a/tools/libs/gnttab/Makefile b/tools/libs/gnttab/Makefile index af64542..69bb207 100644 --- a/tools/libs/gnttab/Makefile +++ b/tools/libs/gnttab/Makefile @@ -14,7 +14,7 @@ SRCS-GNTSHR += gntshr_core.c SRCS-$(CONFIG_Linux) += $(SRCS-GNTTAB) $(SRCS-GNTSHR) linux.c SRCS-$(CONFIG_MiniOS) += $(SRCS-GNTTAB) gntshr_unimp.c minios.c -SRCS-$(CONFIG_FreeBSD) += gnttab_unimp.c gntshr_unimp.c +SRCS-$(CONFIG_FreeBSD) += $(SRCS-GNTTAB) $(SRCS-GNTSHR) freebsd.c SRCS-$(CONFIG_SunOS) += gnttab_unimp.c gntshr_unimp.c SRCS-$(CONFIG_NetBSD) += gnttab_unimp.c gntshr_unimp.c diff --git a/tools/libs/gnttab/freebsd.c b/tools/libs/gnttab/freebsd.c new file mode 100644 index 0000000..eef0238 --- /dev/null +++ b/tools/libs/gnttab/freebsd.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2007-2008, D G Murray <Derek.Murray@cl.cam.ac.uk> + * Copyright (c) 2016-2017, Akshay Jaggi <jaggi@FreeBSD.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; If not, see <http://www.gnu.org/licenses/>. + * + * Split out from linux.c + */ + +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <xen/sys/gntdev.h> + +#include "private.h" + +#define DEVXEN "/dev/xen/" + +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) + +#define GTERROR(_l, _f...) xtl_log(_l, XTL_ERROR, errno, "gnttab", _f) +#define GSERROR(_l, _f...) xtl_log(_l, XTL_ERROR, errno, "gntshr", _f) + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + +int osdep_gnttab_open(xengnttab_handle *xgt) +{ + int fd = open(DEVXEN "gntdev", O_RDWR|O_CLOEXEC); + if ( fd == -1 ) + return -1; + xgt->fd = fd; + return 0; +} + +int osdep_gnttab_close(xengnttab_handle *xgt) +{ + if ( xgt->fd == -1 ) + return 0; + + return close(xgt->fd); +} + +int osdep_gnttab_set_max_grants(xengnttab_handle *xgt, uint32_t count) +{ + int fd = xgt->fd, rc; + struct ioctl_gntdev_set_max_grants max_grants = { .count = count }; + + rc = ioctl(fd, IOCTL_GNTDEV_SET_MAX_GRANTS, &max_grants); + if (rc) { + /* + * FreeBSD kernel doesn't implement this IOCTL, + * so ignore the resulting specific failure, if any. + */ + if (errno == ENOTTY) + rc = 0; + else + GTERROR(xgt->logger, "ioctl SET_MAX_GRANTS failed"); + } + + return rc; +} + +void *osdep_gnttab_grant_map(xengnttab_handle *xgt, + uint32_t count, int flags, int prot, + uint32_t *domids, uint32_t *refs, + uint32_t notify_offset, + evtchn_port_t notify_port) +{ + int fd = xgt->fd; + struct ioctl_gntdev_map_grant_ref *map; + unsigned int map_size = ROUNDUP((sizeof(*map) + (count - 1) * + sizeof(struct ioctl_gntdev_map_grant_ref)), + PAGE_SHIFT); + void *addr = NULL; + int domids_stride = 1; + int i; + + if (flags & XENGNTTAB_GRANT_MAP_SINGLE_DOMAIN) + domids_stride = 0; + + if ( map_size <= PAGE_SIZE ) + map = alloca(sizeof(*map) + + (count - 1) * sizeof(struct ioctl_gntdev_map_grant_ref)); + else + { + map = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if ( map == MAP_FAILED ) + { + GTERROR(xgt->logger, "mmap of map failed"); + return NULL; + } + } + + for ( i = 0; i < count; i++ ) + { + map->refs[i].domid = domids[i * domids_stride]; + map->refs[i].ref = refs[i]; + } + + map->count = count; + + if ( ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, map) ) { + GTERROR(xgt->logger, "ioctl MAP_GRANT_REF failed"); + goto out; + } + + retry: + addr = mmap(NULL, PAGE_SIZE * count, prot, MAP_SHARED, fd, + map->index); + + if (addr == MAP_FAILED && errno == EAGAIN) + { + /* + * The grant hypercall can return EAGAIN if the granted page + * is swapped out. Since the paging daemon may be in the same + * domain, the hypercall cannot block without causing a + * deadlock. + * + * Because there are no notifications when the page is swapped + * in, wait a bit before retrying, and hope that the page will + * arrive eventually. + */ + usleep(1000); + goto retry; + } + + if (addr != MAP_FAILED) + { + int rv = 0; + struct ioctl_gntdev_unmap_notify notify; + notify.index = map->index; + notify.action = 0; + if (notify_offset < PAGE_SIZE * count) { + notify.index += notify_offset; + notify.action |= UNMAP_NOTIFY_CLEAR_BYTE; + } + if (notify_port != -1) { + notify.event_channel_port = notify_port; + notify.action |= UNMAP_NOTIFY_SEND_EVENT; + } + if (notify.action) + rv = ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, ¬ify); + if (rv) { + GTERROR(xgt->logger, "ioctl SET_UNMAP_NOTIFY failed"); + munmap(addr, count * PAGE_SIZE); + addr = MAP_FAILED; + } + } + + if (addr == MAP_FAILED) + { + int saved_errno = errno; + struct ioctl_gntdev_unmap_grant_ref unmap_grant; + + /* Unmap the driver slots used to store the grant information. */ + GTERROR(xgt->logger, "mmap failed"); + unmap_grant.index = map->index; + unmap_grant.count = count; + ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant); + errno = saved_errno; + addr = NULL; + } + + out: + if ( map_size > PAGE_SIZE ) + munmap(map, map_size); + + return addr; +} + +int osdep_gnttab_unmap(xengnttab_handle *xgt, + void *start_address, + uint32_t count) +{ + int fd = xgt->fd; + struct ioctl_gntdev_get_offset_for_vaddr get_offset; + struct ioctl_gntdev_unmap_grant_ref unmap_grant; + int rc; + + if ( start_address == NULL ) + { + errno = EINVAL; + return -1; + } + + /* First, it is necessary to get the offset which was initially used to + * mmap() the pages. + */ + get_offset.vaddr = (unsigned long)start_address; + if ( (rc = ioctl(fd, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR, + &get_offset)) ) + return rc; + + if ( get_offset.count != count ) + { + errno = EINVAL; + return -1; + } + + /* Next, unmap the memory. */ + if ( (rc = munmap(start_address, count * PAGE_SIZE)) ) + return rc; + + /* Finally, unmap the driver slots used to store the grant information. */ + unmap_grant.index = get_offset.offset; + unmap_grant.count = count; + if ( (rc = ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant)) ) + return rc; + + return 0; +} + +int osdep_gntshr_open(xengntshr_handle *xgs) +{ + int fd = open(DEVXEN "gntdev", O_RDWR); + if ( fd == -1 ) + return -1; + xgs->fd = fd; + return 0; +} + +int osdep_gntshr_close(xengntshr_handle *xgs) +{ + if ( xgs->fd == -1 ) + return 0; + + return close(xgs->fd); +} + +void *osdep_gntshr_share_pages(xengntshr_handle *xgs, + uint32_t domid, int count, + uint32_t *refs, int writable, + uint32_t notify_offset, + evtchn_port_t notify_port) +{ + struct ioctl_gntdev_alloc_gref *gref_info = NULL; + struct ioctl_gntdev_unmap_notify notify; + struct ioctl_gntdev_dealloc_gref gref_drop; + int fd = xgs->fd; + int err; + void *area = NULL; + gref_info = malloc(sizeof(*gref_info) + count * sizeof(uint32_t)); + if (!gref_info) + return NULL; + gref_info->domid = domid; + gref_info->flags = writable ? GNTDEV_ALLOC_FLAG_WRITABLE : 0; + gref_info->count = count; + + err = ioctl(fd, IOCTL_GNTDEV_ALLOC_GREF, gref_info); + if (err) { + GSERROR(xgs->logger, "ioctl failed"); + goto out; + } + + area = mmap(NULL, count * PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, gref_info->index); + + if (area == MAP_FAILED) { + area = NULL; + GSERROR(xgs->logger, "mmap failed"); + goto out_remove_fdmap; + } + + notify.index = gref_info->index; + notify.action = 0; + if (notify_offset < PAGE_SIZE * count) { + notify.index += notify_offset; + notify.action |= UNMAP_NOTIFY_CLEAR_BYTE; + } + if (notify_port != -1) { + notify.event_channel_port = notify_port; + notify.action |= UNMAP_NOTIFY_SEND_EVENT; + } + if (notify.action) + err = ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, ¬ify); + if (err) { + GSERROR(xgs->logger, "ioctl SET_UNMAP_NOTIFY failed"); + munmap(area, count * PAGE_SIZE); + area = NULL; + } + + memcpy(refs, gref_info->gref_ids, count * sizeof(uint32_t)); + + out_remove_fdmap: + /* Removing the mapping from the file descriptor does not cause the pages to + * be deallocated until the mapping is removed. + */ + gref_drop.index = gref_info->index; + gref_drop.count = count; + ioctl(fd, IOCTL_GNTDEV_DEALLOC_GREF, &gref_drop); + out: + free(gref_info); + return area; +} + +int osdep_gntshr_unshare(xengntshr_handle *xgs, + void *start_address, uint32_t count) +{ + return munmap(start_address, count * PAGE_SIZE); +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAAeUNVmz521zKZErZqsMuJKHcS_fSvVCw=LByQXoxh_RAiLwiQ>
