Date: Fri, 21 Aug 2015 05:02:27 +0000 (UTC) From: Marcel Moolenaar <marcel@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r286979 - in user/marcel/libvdsk: bhyve bhyveload Message-ID: <201508210502.t7L52RaI058201@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: marcel Date: Fri Aug 21 05:02:26 2015 New Revision: 286979 URL: https://svnweb.freebsd.org/changeset/base/286979 Log: Sync with ^/head@286949 Added: user/marcel/libvdsk/bhyve/Makefile.depend - copied unchanged from r286949, head/usr.sbin/bhyve/Makefile.depend user/marcel/libvdsk/bhyve/bootrom.c - copied unchanged from r286949, head/usr.sbin/bhyve/bootrom.c user/marcel/libvdsk/bhyve/bootrom.h - copied unchanged from r286949, head/usr.sbin/bhyve/bootrom.h Modified: user/marcel/libvdsk/bhyve/Makefile user/marcel/libvdsk/bhyve/acpi.c user/marcel/libvdsk/bhyve/ahci.h user/marcel/libvdsk/bhyve/bhyve.8 user/marcel/libvdsk/bhyve/bhyverun.c user/marcel/libvdsk/bhyve/block_if.c user/marcel/libvdsk/bhyve/block_if.h user/marcel/libvdsk/bhyve/dbgport.c user/marcel/libvdsk/bhyve/inout.c user/marcel/libvdsk/bhyve/ioapic.c user/marcel/libvdsk/bhyve/ioapic.h user/marcel/libvdsk/bhyve/pci_ahci.c user/marcel/libvdsk/bhyve/pci_emul.c user/marcel/libvdsk/bhyve/pci_hostbridge.c user/marcel/libvdsk/bhyve/pci_irq.c user/marcel/libvdsk/bhyve/pci_irq.h user/marcel/libvdsk/bhyve/pci_lpc.c user/marcel/libvdsk/bhyve/pci_lpc.h user/marcel/libvdsk/bhyve/pci_passthru.c user/marcel/libvdsk/bhyve/pci_virtio_block.c user/marcel/libvdsk/bhyve/pci_virtio_net.c user/marcel/libvdsk/bhyve/pci_virtio_rnd.c user/marcel/libvdsk/bhyve/pm.c user/marcel/libvdsk/bhyve/task_switch.c user/marcel/libvdsk/bhyve/uart_emul.c user/marcel/libvdsk/bhyve/virtio.c user/marcel/libvdsk/bhyve/virtio.h user/marcel/libvdsk/bhyveload/Makefile user/marcel/libvdsk/bhyveload/bhyveload.8 user/marcel/libvdsk/bhyveload/bhyveload.c Directory Properties: user/marcel/libvdsk/bhyve/ (props changed) user/marcel/libvdsk/bhyveload/ (props changed) Modified: user/marcel/libvdsk/bhyve/Makefile ============================================================================== --- user/marcel/libvdsk/bhyve/Makefile Fri Aug 21 02:42:14 2015 (r286978) +++ user/marcel/libvdsk/bhyve/Makefile Fri Aug 21 05:02:26 2015 (r286979) @@ -13,6 +13,7 @@ SRCS= \ acpi.c \ bhyverun.c \ block_if.c \ + bootrom.c \ consport.c \ dbgport.c \ inout.c \ @@ -43,8 +44,8 @@ SRCS= \ .PATH: /sys/amd64/vmm SRCS+= vmm_instruction_emul.c -DPADD= ${LIBVDSK} ${LIBVMMAPI} ${LIBMD} ${LIBUTIL} ${LIBPTHREAD} -LDADD= -lvdsk -lvmmapi -lmd -lutil -lpthread +DPADD= ${LIBVDSK} ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD} +LDADD= -lvdsk -lvmmapi -lmd -lpthread WARNS?= 2 Copied: user/marcel/libvdsk/bhyve/Makefile.depend (from r286949, head/usr.sbin/bhyve/Makefile.depend) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/marcel/libvdsk/bhyve/Makefile.depend Fri Aug 21 05:02:26 2015 (r286979, copy of r286949, head/usr.sbin/bhyve/Makefile.depend) @@ -0,0 +1,22 @@ +# $FreeBSD$ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + gnu/lib/csu \ + gnu/lib/libgcc \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + lib/libmd \ + lib/libthr \ + lib/libutil \ + lib/libvmmapi \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif Modified: user/marcel/libvdsk/bhyve/acpi.c ============================================================================== --- user/marcel/libvdsk/bhyve/acpi.c Fri Aug 21 02:42:14 2015 (r286978) +++ user/marcel/libvdsk/bhyve/acpi.c Fri Aug 21 05:02:26 2015 (r286979) @@ -386,7 +386,7 @@ basl_fwrite_fadt(FILE *fp) EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n"); - EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n"); + EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n"); EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n"); EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n"); EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n"); Modified: user/marcel/libvdsk/bhyve/ahci.h ============================================================================== --- user/marcel/libvdsk/bhyve/ahci.h Fri Aug 21 02:42:14 2015 (r286978) +++ user/marcel/libvdsk/bhyve/ahci.h Fri Aug 21 05:02:26 2015 (r286979) @@ -96,13 +96,14 @@ #define ATA_SS_SPD_NO_SPEED 0x00000000 #define ATA_SS_SPD_GEN1 0x00000010 #define ATA_SS_SPD_GEN2 0x00000020 -#define ATA_SS_SPD_GEN3 0x00000040 +#define ATA_SS_SPD_GEN3 0x00000030 #define ATA_SS_IPM_MASK 0x00000f00 #define ATA_SS_IPM_NO_DEVICE 0x00000000 #define ATA_SS_IPM_ACTIVE 0x00000100 #define ATA_SS_IPM_PARTIAL 0x00000200 #define ATA_SS_IPM_SLUMBER 0x00000600 +#define ATA_SS_IPM_DEVSLEEP 0x00000800 #define ATA_SERROR 14 #define ATA_SE_DATA_CORRECTED 0x00000001 @@ -133,17 +134,19 @@ #define ATA_SC_SPD_NO_SPEED 0x00000000 #define ATA_SC_SPD_SPEED_GEN1 0x00000010 #define ATA_SC_SPD_SPEED_GEN2 0x00000020 -#define ATA_SC_SPD_SPEED_GEN3 0x00000040 +#define ATA_SC_SPD_SPEED_GEN3 0x00000030 #define ATA_SC_IPM_MASK 0x00000f00 #define ATA_SC_IPM_NONE 0x00000000 #define ATA_SC_IPM_DIS_PARTIAL 0x00000100 #define ATA_SC_IPM_DIS_SLUMBER 0x00000200 +#define ATA_SC_IPM_DIS_DEVSLEEP 0x00000400 #define ATA_SACTIVE 16 #define AHCI_MAX_PORTS 32 #define AHCI_MAX_SLOTS 32 +#define AHCI_MAX_IRQS 16 /* SATA AHCI v1.0 register defines */ #define AHCI_CAP 0x00 @@ -208,6 +211,9 @@ #define AHCI_CAP2_BOH 0x00000001 #define AHCI_CAP2_NVMP 0x00000002 #define AHCI_CAP2_APST 0x00000004 +#define AHCI_CAP2_SDS 0x00000008 +#define AHCI_CAP2_SADM 0x00000010 +#define AHCI_CAP2_DESO 0x00000020 #define AHCI_OFFSET 0x100 #define AHCI_STEP 0x80 @@ -265,6 +271,7 @@ #define AHCI_P_CMD_ACTIVE 0x10000000 #define AHCI_P_CMD_PARTIAL 0x20000000 #define AHCI_P_CMD_SLUMBER 0x60000000 +#define AHCI_P_CMD_DEVSLEEP 0x80000000 #define AHCI_P_TFD 0x20 #define AHCI_P_SIG 0x24 @@ -284,6 +291,17 @@ #define AHCI_P_FBS_ADO_SHIFT 12 #define AHCI_P_FBS_DWE 0x000f0000 #define AHCI_P_FBS_DWE_SHIFT 16 +#define AHCI_P_DEVSLP 0x44 +#define AHCI_P_DEVSLP_ADSE 0x00000001 +#define AHCI_P_DEVSLP_DSP 0x00000002 +#define AHCI_P_DEVSLP_DETO 0x000003fc +#define AHCI_P_DEVSLP_DETO_SHIFT 2 +#define AHCI_P_DEVSLP_MDAT 0x00007c00 +#define AHCI_P_DEVSLP_MDAT_SHIFT 10 +#define AHCI_P_DEVSLP_DITO 0x01ff8000 +#define AHCI_P_DEVSLP_DITO_SHIFT 15 +#define AHCI_P_DEVSLP_DM 0x0e000000 +#define AHCI_P_DEVSLP_DM_SHIFT 25 /* Just to be sure, if building as module. */ #if MAXPHYS < 512 * 1024 Modified: user/marcel/libvdsk/bhyve/bhyve.8 ============================================================================== --- user/marcel/libvdsk/bhyve/bhyve.8 Fri Aug 21 02:42:14 2015 (r286978) +++ user/marcel/libvdsk/bhyve/bhyve.8 Fri Aug 21 05:02:26 2015 (r286979) @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 17, 2014 +.Dd August 7, 2015 .Dt BHYVE 8 .Os .Sh NAME @@ -32,7 +32,7 @@ .Nd "run a guest operating system inside a virtual machine" .Sh SYNOPSIS .Nm -.Op Fl abehuwxACHPWY +.Op Fl abehuwxACHPSWY .Op Fl c Ar numcpus .Op Fl g Ar gdbport .Op Fl l Ar lpcdev Ns Op , Ns Ar conf @@ -50,7 +50,7 @@ Parameters such as the number of virtual I/O connectivity can be specified with command-line parameters. .Pp The guest operating system must be loaded with -.Xr bhyveload 4 +.Xr bhyveload 8 or a similar boot loader before running .Nm . .Pp @@ -61,8 +61,8 @@ exit is detected. .Bl -tag -width 10n .It Fl a The guest's local APIC is configured in xAPIC mode. -The xAPIC mode is the default setting so this option is redundant. It will be -deprecated in a future version. +The xAPIC mode is the default setting so this option is redundant. +It will be deprecated in a future version. .It Fl A Generate ACPI tables. Required for @@ -99,10 +99,12 @@ Yield the virtual CPU thread when a HLT If this option is not specified, virtual CPUs will use 100% of a host CPU. .It Fl l Ar lpcdev Ns Op , Ns Ar conf Allow devices behind the LPC PCI-ISA bridge to be configured. -The only supported devices are the TTY-class devices, -.Li com1 +The only supported devices are the TTY-class devices +.Ar com1 and -.Li com2 . +.Ar com2 +and the boot ROM device +.Ar bootrom . .It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t Guest physical memory size in bytes. This must be the same size that was given to @@ -122,7 +124,7 @@ Force the guest virtual CPU to exit when .It Fl s Ar slot,emulation Ns Op , Ns Ar conf Configure a virtual PCI slot and function. .Pp -.Nm bhyve +.Nm provides PCI bus emulation and virtual devices that can be attached to slots on the bus. There are 32 available slots, with the option of providing up to 8 functions @@ -134,11 +136,19 @@ per slot. .Pp The .Ar pcislot -value is 0 to 31. The optional function value is 0 to 7. The optional +value is 0 to 31. +The optional +.Ar function +value is 0 to 7. +The optional .Ar bus value is 0 to 255. -If not specified, the function value defaults to 0. -If not specified, the bus value defaults to 0. +If not specified, the +.Ar function +value defaults to 0. +If not specified, the +.Ar bus +value defaults to 0. .It Ar emulation .Bl -tag -width 10n .It Li hostbridge | Li amd_hostbridge @@ -165,8 +175,8 @@ AHCI controller attached to a SATA hard- .It Li uart PCI 16550 serial device. .It Li lpc -LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. The LPC bridge -emulation can only be configured on bus 0. +LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM. +The LPC bridge emulation can only be configured on bus 0. .El .It Op Ar conf This optional parameter describes the backend for device emulations. @@ -193,8 +203,13 @@ format. .Pp Block storage devices: .Bl -tag -width 10n -.It Pa /filename Ns Oo , Ns Li nocache Oc Ns Oo , Ns Li direct Oc Ns Oo , Ns Li ro Oc -.It Pa /dev/xxx Ns Oo , Ns Ar nocache Oc Ns Oo , Ns Ar direct Oc Ns Oo , Ns Ar ro Oc +.It Pa /filename Ns Oo , Ns Ar block-device-options Oc +.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc +.El +.Pp +The +.Ar block-device-options +are: .Bl -tag -width 8n .It Li nocache Open the file with @@ -204,25 +219,31 @@ Open the file using .Dv O_SYNC . .It Li ro Force the file to be opened read-only. -.El -.Pp -The -.Li nocache , -.Li direct , -and -.Li ro -options are not available for virtio block devices. +.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc +Specify the logical and physical sector sizes of the emulated disk. +The physical sector size is optional and is equal to the logical sector size +if not explicitly specified. .El .Pp TTY devices: .Bl -tag -width 10n .It Li stdio Connect the serial port to the standard input and output of -the bhyve process. +the +.Nm +process. .It Pa /dev/xxx Use the host TTY device for serial port I/O. .El .Pp +Boot ROM device: +.Bl -tag -width 10n +.It Pa romfile +Map +.Ar romfile +in the guest address space reserved for boot firmware. +.El +.Pp Pass-through devices: .Bl -tag -width 10n .It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function @@ -234,11 +255,17 @@ and numbers. .El .Pp +Guest memory must be wired using the +.Fl S +option when a pass-through device is configured. +.Pp The host device must have been reserved at boot-time using the .Va pptdev loader variable as described in .Xr vmm 4 . .El +.It Fl S +Wire guest memory. .It Fl u RTC keeps UTC time. .It Fl U Ar uuid @@ -248,7 +275,8 @@ in the guest's System Management BIOS Sy By default a UUID is generated from the host's hostname and .Ar vmname . .It Fl w -Ignore accesses to unimplemented Model Specific Registers (MSRs). This is intended for debug purposes. +Ignore accesses to unimplemented Model Specific Registers (MSRs). +This is intended for debug purposes. .It Fl W Force virtio PCI device emulations to use MSI interrupts instead of MSI-X interrupts. @@ -263,7 +291,7 @@ This should be the same as that created .El .Sh EXAMPLES The guest operating system must have been loaded with -.Xr bhyveload 4 +.Xr bhyveload 8 or a similar boot loader before .Xr bhyve 4 can be run. @@ -291,9 +319,9 @@ Run an 8GB quad-CPU virtual machine with CD-ROM, a single virtio network port, an AMD hostbridge, and the console port connected to an .Xr nmdm 4 -null-model device. +null-modem device. .Bd -literal -offset indent -bhyve -c 4 \e\ +bhyve -c 4 \\ -s 0,amd_hostbridge -s 1,lpc \\ -s 1:0,ahci-hd,/images/disk.1 \\ -s 1:1,ahci-hd,/images/disk.2 \\ @@ -303,7 +331,7 @@ bhyve -c 4 \e\ -s 1:5,ahci-hd,/images/disk.6 \\ -s 1:6,ahci-hd,/images/disk.7 \\ -s 1:7,ahci-hd,/images/disk.8 \\ - -s 2,ahci-cd,/images.install.iso \\ + -s 2,ahci-cd,/images/install.iso \\ -s 3,virtio-net,tap0 \\ -l com1,/dev/nmdm0A \\ -A -H -P -m 8G Modified: user/marcel/libvdsk/bhyve/bhyverun.c ============================================================================== --- user/marcel/libvdsk/bhyve/bhyverun.c Fri Aug 21 02:42:14 2015 (r286978) +++ user/marcel/libvdsk/bhyve/bhyverun.c Fri Aug 21 05:02:26 2015 (r286979) @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <pthread.h> #include <pthread_np.h> #include <sysexits.h> +#include <stdbool.h> #include <machine/vmm.h> #include <vmmapi.h> @@ -100,7 +101,7 @@ static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; - uint64_t vmexit_bogus_switch; + uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; @@ -122,7 +123,7 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" + "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" @@ -137,6 +138,7 @@ usage(int code) " -p: pin 'vcpu' to 'hostcpu'\n" " -P: vmexit from the guest on pause\n" " -s: <slot,driver,configinfo> PCI slot config\n" + " -S: guest memory cannot be swapped\n" " -u: RTC keeps UTC time\n" " -U: uuid\n" " -w: ignore unimplemented MSRs\n" @@ -325,8 +327,10 @@ vmexit_inout(struct vmctx *ctx, struct v error = emulate_inout(ctx, vcpu, vme, strictio); if (error) { - fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out", - bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); + fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", + in ? "in" : "out", + bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), + port, vmexit->rip); return (VMEXIT_ABORT); } else { return (VMEXIT_CONTINUE); @@ -459,6 +463,17 @@ vmexit_bogus(struct vmctx *ctx, struct v } static int +vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + assert(vmexit->inst_length == 0); + + stats.vmexit_reqidle++; + + return (VMEXIT_CONTINUE); +} + +static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -495,22 +510,27 @@ vmexit_mtrap(struct vmctx *ctx, struct v static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { - int err; + int err, i; + struct vie *vie; + stats.vmexit_inst_emul++; + vie = &vmexit->u.inst_emul.vie; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, - &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging); + vie, &vmexit->u.inst_emul.paging); if (err) { - if (err == EINVAL) { - fprintf(stderr, - "Failed to emulate instruction at 0x%lx\n", - vmexit->rip); - } else if (err == ESRCH) { + if (err == ESRCH) { fprintf(stderr, "Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } + fprintf(stderr, "Failed to emulate instruction ["); + for (i = 0; i < vie->num_valid; i++) { + fprintf(stderr, "0x%02x%s", vie->inst[i], + i != (vie->num_valid - 1) ? " " : ""); + } + fprintf(stderr, "] at 0x%lx\n", vmexit->rip); return (VMEXIT_ABORT); } @@ -564,6 +584,7 @@ static vmexit_handler_t handler[VM_EXITC [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, @@ -681,26 +702,82 @@ fbsdrun_set_capabilities(struct vmctx *c vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } +static struct vmctx * +do_open(const char *vmname) +{ + struct vmctx *ctx; + int error; + bool reinit, romboot; + + reinit = romboot = false; + + if (lpc_bootrom()) + romboot = true; + + error = vm_create(vmname); + if (error) { + if (errno == EEXIST) { + if (romboot) { + reinit = true; + } else { + /* + * The virtual machine has been setup by the + * userspace bootloader. + */ + } + } else { + perror("vm_create"); + exit(1); + } + } else { + if (!romboot) { + /* + * If the virtual machine was just created then a + * bootrom must be configured to boot it. + */ + fprintf(stderr, "virtual machine cannot be booted\n"); + exit(1); + } + } + + ctx = vm_open(vmname); + if (ctx == NULL) { + perror("vm_open"); + exit(1); + } + + if (reinit) { + error = vm_reinit(ctx); + if (error) { + perror("vm_reinit"); + exit(1); + } + } + return (ctx); +} + int main(int argc, char *argv[]) { int c, error, gdb_port, err, bvmcons; - int dump_guest_memory, max_vcpus, mptgen; + int max_vcpus, mptgen, memflags; int rtc_localtime; struct vmctx *ctx; uint64_t rip; size_t memsize; + char *optstr; bvmcons = 0; - dump_guest_memory = 0; progname = basename(argv[0]); gdb_port = 0; guest_ncpus = 1; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; + memflags = 0; - while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) { + optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; + while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': x2apic_mode = 0; @@ -721,7 +798,7 @@ main(int argc, char *argv[]) guest_ncpus = atoi(optarg); break; case 'C': - dump_guest_memory = 1; + memflags |= VM_MEM_F_INCORE; break; case 'g': gdb_port = atoi(optarg); @@ -737,6 +814,9 @@ main(int argc, char *argv[]) exit(1); else break; + case 'S': + memflags |= VM_MEM_F_WIRED; + break; case 'm': error = vm_parse_memsize(optarg, &memsize); if (error) @@ -791,10 +871,10 @@ main(int argc, char *argv[]) usage(1); vmname = argv[0]; + ctx = do_open(vmname); - ctx = vm_open(vmname); - if (ctx == NULL) { - perror("vm_open"); + if (guest_ncpus < 1) { + fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); exit(1); } @@ -807,11 +887,10 @@ main(int argc, char *argv[]) fbsdrun_set_capabilities(ctx, BSP); - if (dump_guest_memory) - vm_set_memflags(ctx, VM_MEM_F_INCORE); + vm_set_memflags(ctx, memflags); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { - fprintf(stderr, "Unable to setup memory (%d)\n", err); + fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(1); } @@ -841,6 +920,16 @@ main(int argc, char *argv[]) if (bvmcons) init_bvmcons(); + if (lpc_bootrom()) { + if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { + fprintf(stderr, "ROM boot failed: unrestricted guest " + "capability not available\n"); + exit(1); + } + error = vcpu_reset(ctx, BSP); + assert(error == 0); + } + error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); Modified: user/marcel/libvdsk/bhyve/block_if.c ============================================================================== --- user/marcel/libvdsk/bhyve/block_if.c Fri Aug 21 02:42:14 2015 (r286978) +++ user/marcel/libvdsk/bhyve/block_if.c Fri Aug 21 05:02:26 2015 (r286979) @@ -53,16 +53,19 @@ __FBSDID("$FreeBSD$"); #define BLOCKIF_SIG 0xb109b109 -#define BLOCKIF_MAXREQ 33 +#define BLOCKIF_NUMTHR 8 +#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) enum blockop { BOP_READ, BOP_WRITE, - BOP_FLUSH + BOP_FLUSH, + BOP_DELETE }; enum blockstat { BST_FREE, + BST_BLOCK, BST_PEND, BST_BUSY, BST_DONE @@ -74,21 +77,22 @@ struct blockif_elem { enum blockop be_op; enum blockstat be_status; pthread_t be_tid; + off_t be_block; }; struct blockif_ctxt { int bc_magic; + int bc_candelete; int bc_rdonly; - pthread_t bc_btid; + int bc_closing; + pthread_t bc_btid[BLOCKIF_NUMTHR]; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; - int bc_closing; /* Request elements and free/pending/busy queues */ TAILQ_HEAD(, blockif_elem) bc_freeq; TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; - u_int bc_req_count; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; @@ -107,69 +111,95 @@ static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { - struct blockif_elem *be; - - assert(bc->bc_req_count < BLOCKIF_MAXREQ); + struct blockif_elem *be, *tbe; + off_t off; + int i; be = TAILQ_FIRST(&bc->bc_freeq); assert(be != NULL); assert(be->be_status == BST_FREE); - TAILQ_REMOVE(&bc->bc_freeq, be, be_link); - be->be_status = BST_PEND; be->be_req = breq; be->be_op = op; + switch (op) { + case BOP_READ: + case BOP_WRITE: + case BOP_DELETE: + off = breq->br_offset; + for (i = 0; i < breq->br_iovcnt; i++) + off += breq->br_iov[i].iov_len; + break; + default: + off = OFF_MAX; + } + be->be_block = off; + TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { + if (tbe->be_block == breq->br_offset) + break; + } + if (tbe == NULL) { + TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { + if (tbe->be_block == breq->br_offset) + break; + } + } + if (tbe == NULL) + be->be_status = BST_PEND; + else + be->be_status = BST_BLOCK; TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); - - bc->bc_req_count++; - - return (0); + return (be->be_status == BST_PEND); } static int -blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep) +blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) { struct blockif_elem *be; - if (bc->bc_req_count == 0) - return (ENOENT); - - be = TAILQ_FIRST(&bc->bc_pendq); - assert(be != NULL); - assert(be->be_status == BST_PEND); + TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { + if (be->be_status == BST_PEND) + break; + assert(be->be_status == BST_BLOCK); + } + if (be == NULL) + return (0); TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_BUSY; - be->be_tid = bc->bc_btid; + be->be_tid = t; TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); - *bep = be; - - return (0); + return (1); } static void blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) { - assert(be->be_status == BST_DONE); + struct blockif_elem *tbe; - TAILQ_REMOVE(&bc->bc_busyq, be, be_link); + if (be->be_status == BST_DONE || be->be_status == BST_BUSY) + TAILQ_REMOVE(&bc->bc_busyq, be, be_link); + else + TAILQ_REMOVE(&bc->bc_pendq, be, be_link); + TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { + if (tbe->be_req->br_offset == be->be_block) + tbe->be_status = BST_PEND; + } be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); - - bc->bc_req_count--; } static void -blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) +blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) { struct blockif_req *br; int err; br = be->be_req; + if (br->br_iovcnt <= 1) + buf = NULL; err = 0; - switch (be->be_op) { case BOP_READ: err = vdsk_read(bc, br->br_iov, br->br_iovcnt, br->br_offset); @@ -180,6 +210,14 @@ blockif_proc(struct blockif_ctxt *bc, st case BOP_FLUSH: err = vdsk_flush(bc); break; + case BOP_DELETE: + if (!bc->bc_candelete) + err = EOPNOTSUPP; + else if (bc->bc_rdonly) + err = EROFS; + else + err = EOPNOTSUPP; + break; default: err = EINVAL; break; @@ -195,28 +233,27 @@ blockif_thr(void *arg) { struct blockif_ctxt *bc; struct blockif_elem *be; + pthread_t t; bc = arg; + t = pthread_self(); + pthread_mutex_lock(&bc->bc_mtx); for (;;) { - pthread_mutex_lock(&bc->bc_mtx); - while (!blockif_dequeue(bc, &be)) { + while (blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, be); + blockif_proc(bc, be, NULL); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } - pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); - pthread_mutex_unlock(&bc->bc_mtx); - - /* - * Check ctxt status here to see if exit requested - */ + /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) - pthread_exit(NULL); + break; + pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } + pthread_mutex_unlock(&bc->bc_mtx); - /* Not reached */ + pthread_exit(NULL); return (NULL); } @@ -256,13 +293,14 @@ struct blockif_ctxt * blockif_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; - char *nopt, *xopts; + char *nopt, *xopts, *cp; struct blockif_ctxt *bc; int extra, i; - int nocache, sync, ro; + int nocache, sync, ro, candelete, ssopt, pssopt; pthread_once(&blockif_once, blockif_init); + ssopt = 0; nocache = 0; sync = 0; ro = 0; @@ -271,16 +309,25 @@ blockif_open(const char *optstr, const c * The first element in the optstring is always a pathname. * Optional elements follow */ - nopt = strdup(optstr); - for (xopts = strtok(nopt, ","); - xopts != NULL; - xopts = strtok(NULL, ",")) { - if (!strcmp(xopts, "nocache")) + nopt = xopts = strdup(optstr); + while (xopts != NULL) { + cp = strsep(&xopts, ","); + if (cp == nopt) /* file or device pathname */ + continue; + else if (!strcmp(cp, "nocache")) nocache = 1; - else if (!strcmp(xopts, "sync")) + else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) sync = 1; - else if (!strcmp(xopts, "ro")) + else if (!strcmp(cp, "ro")) ro = 1; + else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) + ; + else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) + pssopt = ssopt; + else { + fprintf(stderr, "Invalid device option \"%s\"\n", cp); + return (NULL); + } } extra = 0; @@ -302,22 +349,23 @@ blockif_open(const char *optstr, const c } bc->bc_magic = BLOCKIF_SIG; + bc->bc_candelete = candelete; bc->bc_rdonly = ro; pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); - bc->bc_req_count = 0; for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); } - pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); - - snprintf(tname, sizeof(tname), "blk-%s", ident); - pthread_set_name_np(bc->bc_btid, tname); + for (i = 0; i < BLOCKIF_NUMTHR; i++) { + pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); + snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); + pthread_set_name_np(bc->bc_btid[i], tname); + } return (bc); } @@ -331,13 +379,13 @@ blockif_request(struct blockif_ctxt *bc, err = 0; pthread_mutex_lock(&bc->bc_mtx); - if (bc->bc_req_count < BLOCKIF_MAXREQ) { + if (!TAILQ_EMPTY(&bc->bc_freeq)) { /* * Enqueue and inform the block i/o thread * that there is work available */ - blockif_enqueue(bc, breq, op); - pthread_cond_signal(&bc->bc_cond); + if (blockif_enqueue(bc, breq, op)) + pthread_cond_signal(&bc->bc_cond); } else { /* * Callers are not allowed to enqueue more than @@ -377,6 +425,14 @@ blockif_flush(struct blockif_ctxt *bc, s } int +blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (blockif_request(bc, breq, BOP_DELETE)); +} + +int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { struct blockif_elem *be; @@ -395,11 +451,7 @@ blockif_cancel(struct blockif_ctxt *bc, /* * Found it. */ - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - be->be_status = BST_FREE; - be->be_req = NULL; - TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); - bc->bc_req_count--; + blockif_complete(bc, be); pthread_mutex_unlock(&bc->bc_mtx); return (0); @@ -460,7 +512,7 @@ int blockif_close(struct blockif_ctxt *bc) { void *jval; - int err; + int err, i; err = 0; @@ -469,9 +521,12 @@ blockif_close(struct blockif_ctxt *bc) /* * Stop the block i/o thread */ + pthread_mutex_lock(&bc->bc_mtx); bc->bc_closing = 1; - pthread_cond_signal(&bc->bc_cond); - pthread_join(bc->bc_btid, &jval); + pthread_mutex_unlock(&bc->bc_mtx); + pthread_cond_broadcast(&bc->bc_cond); + for (i = 0; i < BLOCKIF_NUMTHR; i++) + pthread_join(bc->bc_btid[i], &jval); /* XXX Cancel queued i/o's ??? */ @@ -552,6 +607,15 @@ blockif_sectsz(struct blockif_ctxt *bc) return (vdsk_sectorsize(bc)); } +void +blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + *size = vdsk_sectorsize(bc); + *off = 0; +} + int blockif_queuesz(struct blockif_ctxt *bc) { @@ -567,3 +631,11 @@ blockif_is_ro(struct blockif_ctxt *bc) assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201508210502.t7L52RaI058201>