Date: Sat, 8 Nov 2014 11:15:47 -0800 From: Marcel Moolenaar <marcel@xcllnt.net> To: Peter Grehan <grehan@freebsd.org> Cc: virtualization@freebsd.org Subject: Re: [current] bhyve under VMware borked? Message-ID: <67DF3123-2654-44F7-BF54-E758843DF354@xcllnt.net> In-Reply-To: <545E5978.2010900@freebsd.org> References: <3054C397-B9F4-44A7-8D71-FF83CB058671@mac.com> <545D160E.7060208@freebsd.org> <BA848416-08C3-4B23-8BBB-C7A293909318@xcllnt.net> <545E5978.2010900@freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
> On Nov 8, 2014, at 9:57 AM, Peter Grehan <grehan@freebsd.org> wrote:
>
> Fusion 7 doesn't have this issue so there's always the upgrade option :)
Good to know.
>> Ok. If we don't depend on PAT-related exits, then I definitely would
>> appreciate preserving the -stable behaviour. Maybe we should not check
>> for the PAT-related exists in the first place? I mean if we don't need
>> them or depend on them to be there at all...
>
> bhyve does depend on h/w save/restore of the PAT MSR, and this is the case for all bare metal that is supported.
>
> For the nested case, bhyve used to trap/emulate PAT MSR accesses and not propagate them to the host. I'll look at resurrecting that code.
That would be great. As I said: stable/10 works just fine,
BTW: Can you review the attached patch that moves disk
support from bhyveload and bhyve into a new library called
libvdsk. Once there, I plan to add support for VM image
formats like vmdk, vhd, qcow, etc. For now, it's just
moving the barebones support into the libary.
Thanks,
--
Marcel Moolenaar
marcel@xcllnt.net
[-- Attachment #2 --]
Index: lib/Makefile
===================================================================
--- lib/Makefile (revision 274256)
+++ lib/Makefile (working copy)
@@ -105,6 +105,7 @@
${_libusbhid} \
${_libusb} \
libutil \
+ libvdsk \
${_libvgl} \
${_libvmmapi} \
libwrap \
Index: lib/libvdsk/Makefile
===================================================================
--- lib/libvdsk/Makefile (revision 0)
+++ lib/libvdsk/Makefile (working copy)
@@ -0,0 +1,7 @@
+# $FreeBSD$
+
+LIB= vdsk
+SRCS= vdsk.c
+INCS= vdsk.h
+
+.include <bsd.lib.mk>
Property changes on: lib/libvdsk/Makefile
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: lib/libvdsk/vdsk.c
===================================================================
--- lib/libvdsk/vdsk.c (revision 0)
+++ lib/libvdsk/vdsk.c (working copy)
@@ -0,0 +1,150 @@
+/*-
+ * Copyright (c) 2014 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/disk.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vdsk.h>
+
+#include "vdsk_int.h"
+
+static struct vdsk *
+vdsk_deref(vdskctx ctx)
+{
+ struct vdsk *vdsk = ctx;
+
+ return (vdsk - 1);
+}
+
+vdskctx
+vdsk_open(const char *path, int flags, size_t size)
+{
+ struct stat sb;
+ struct vdsk *vdsk;
+
+ size += sizeof(struct vdsk);
+ vdsk = malloc(size);
+ if (vdsk == NULL)
+ return (NULL);
+
+ vdsk->fd = open(path, flags);
+ if (vdsk->fd == -1) {
+ free(vdsk);
+ return (NULL);
+ }
+
+ if (fstat(vdsk->fd, &sb) == -1) {
+ close(vdsk->fd);
+ free(vdsk);
+ return (NULL);
+ }
+
+ if (S_ISCHR(sb.st_mode)) {
+ if (ioctl(vdsk->fd, DIOCGMEDIASIZE, &vdsk->capacity) < 0 ||
+ ioctl(vdsk->fd, DIOCGSECTORSIZE, &vdsk->sectorsize) < 0) {
+ close(vdsk->fd);
+ free(vdsk);
+ return (NULL);
+ }
+ } else {
+ vdsk->capacity = sb.st_size;
+ vdsk->sectorsize = DEV_BSIZE;
+ }
+ return (vdsk + 1);
+}
+
+int
+vdsk_close(vdskctx ctx)
+{
+ struct vdsk *vdsk = vdsk_deref(ctx);
+
+ close(vdsk->fd);
+ free(vdsk);
+ return (0);
+}
+
+off_t
+vdsk_capacity(vdskctx ctx)
+{
+ struct vdsk *vdsk = vdsk_deref(ctx);
+
+ return (vdsk->capacity);
+}
+
+int
+vdsk_sectorsize(vdskctx ctx)
+{
+ struct vdsk *vdsk = vdsk_deref(ctx);
+
+ return (vdsk->sectorsize);
+}
+
+ssize_t
+vdsk_read(vdskctx ctx, void *buf, size_t nbytes, off_t offset)
+{
+ struct vdsk *vdsk = vdsk_deref(ctx);
+ ssize_t res;
+
+ res = pread(vdsk->fd, buf, nbytes, offset);
+ return (res);
+}
+
+ssize_t
+vdsk_readv(vdskctx ctx, const struct iovec *iov, int iovcnt, off_t offset)
+{
+ struct vdsk *vdsk = vdsk_deref(ctx);
+ ssize_t res;
+
+ res = preadv(vdsk->fd, iov, iovcnt, offset);
+ return (res);
+}
+
+ssize_t
+vdsk_writev(vdskctx ctx, const struct iovec *iov, int iovcnt, off_t offset)
+{
+ struct vdsk *vdsk = vdsk_deref(ctx);
+ ssize_t res;
+
+ res = pwritev(vdsk->fd, iov, iovcnt, offset);
+ return (res);
+}
+
+int
+vdsk_flush(vdskctx ctx __unused)
+{
+
+ return (0);
+}
+
Property changes on: lib/libvdsk/vdsk.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: lib/libvdsk/vdsk.h
===================================================================
--- lib/libvdsk/vdsk.h (revision 0)
+++ lib/libvdsk/vdsk.h (working copy)
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2014 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __VDSK_H__
+#define __VDSK_H__
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+typedef void *vdskctx;
+
+vdskctx vdsk_open(const char *, int, size_t);
+int vdsk_close(vdskctx);
+
+off_t vdsk_capacity(vdskctx);
+int vdsk_sectorsize(vdskctx);
+
+ssize_t vdsk_read(vdskctx, void *, size_t, off_t);
+ssize_t vdsk_readv(vdskctx, const struct iovec *, int, off_t);
+ssize_t vdsk_writev(vdskctx, const struct iovec *, int, off_t);
+
+int vdsk_flush(vdskctx);
+
+#endif /* __VDSK_H__ */
Property changes on: lib/libvdsk/vdsk.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: lib/libvdsk/vdsk_int.h
===================================================================
--- lib/libvdsk/vdsk_int.h (revision 0)
+++ lib/libvdsk/vdsk_int.h (working copy)
@@ -0,0 +1,38 @@
+/*-
+ * Copyright (c) 2014 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __VDSK_INT_H__
+#define __VDSK_INT_H__
+
+struct vdsk {
+ int fd;
+ int sectorsize;
+ off_t capacity;
+};
+
+#endif /* __VDSK_INT_H__ */
Property changes on: lib/libvdsk/vdsk_int.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: share/mk/bsd.libnames.mk
===================================================================
--- share/mk/bsd.libnames.mk (revision 274256)
+++ share/mk/bsd.libnames.mk (working copy)
@@ -150,6 +150,7 @@
LIBULOG?= ${DESTDIR}${LIBDIR}/libulog.a
LIBUTIL?= ${DESTDIR}${LIBDIR}/libutil.a
LIBUUTIL?= ${DESTDIR}${LIBDIR}/libuutil.a
+LIBVDSK?= ${DESTDIR}${LIBDIR}/libvdsk.a
LIBVGL?= ${DESTDIR}${LIBDIR}/libvgl.a
LIBVMMAPI?= ${DESTDIR}${LIBDIR}/libvmmapi.a
LIBWIND?= ${DESTDIR}${LIBDIR}/libwind.a
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile (revision 274256)
+++ usr.sbin/bhyve/Makefile (working copy)
@@ -43,8 +43,8 @@
.PATH: ${.CURDIR}/../../sys/amd64/vmm
SRCS+= vmm_instruction_emul.c
-DPADD= ${LIBVMMAPI} ${LIBMD} ${LIBUTIL} ${LIBPTHREAD}
-LDADD= -lvmmapi -lmd -lutil -lpthread
+DPADD= ${LIBVDSK} ${LIBVMMAPI} ${LIBMD} ${LIBUTIL} ${LIBPTHREAD}
+LDADD= -lvdsk -lvmmapi -lmd -lutil -lpthread
WARNS?= 2
Index: usr.sbin/bhyve/block_if.c
===================================================================
--- usr.sbin/bhyve/block_if.c (revision 274256)
+++ usr.sbin/bhyve/block_if.c (working copy)
@@ -45,6 +45,7 @@
#include <pthread_np.h>
#include <signal.h>
#include <unistd.h>
+#include <vdsk.h>
#include <machine/atomic.h>
@@ -79,10 +80,7 @@
struct blockif_ctxt {
int bc_magic;
- int bc_fd;
int bc_rdonly;
- off_t bc_size;
- int bc_sectsz;
pthread_t bc_btid;
pthread_mutex_t bc_mtx;
pthread_cond_t bc_cond;
@@ -176,18 +174,17 @@
switch (be->be_op) {
case BOP_READ:
- if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset) < 0)
+ if (vdsk_readv(bc, br->br_iov, br->br_iovcnt,
+ br->br_offset) < 0)
err = errno;
break;
case BOP_WRITE:
- if (bc->bc_rdonly)
- err = EROFS;
- else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset) < 0)
+ if (vdsk_writev(bc, br->br_iov, br->br_iovcnt,
+ br->br_offset) < 0)
err = errno;
break;
case BOP_FLUSH:
+ err = vdsk_flush(bc);
break;
default:
err = EINVAL;
@@ -267,9 +264,7 @@
char tname[MAXCOMLEN + 1];
char *nopt, *xopts;
struct blockif_ctxt *bc;
- struct stat sbuf;
- off_t size;
- int extra, fd, i, sectsz;
+ int extra, i;
int nocache, sync, ro;
pthread_once(&blockif_once, blockif_init);
@@ -300,51 +295,20 @@
if (sync)
extra |= O_SYNC;
- fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
- if (fd < 0 && !ro) {
+ bc = vdsk_open(nopt, (ro ? O_RDONLY : O_RDWR) | extra, sizeof(*bc));
+ if (bc == NULL && !ro) {
/* Attempt a r/w fail with a r/o open */
- fd = open(nopt, O_RDONLY | extra);
+ bc = vdsk_open(nopt, O_RDONLY | extra, sizeof(*bc));
ro = 1;
}
- if (fd < 0) {
+ if (bc == NULL) {
perror("Could not open backing file");
return (NULL);
}
- if (fstat(fd, &sbuf) < 0) {
- perror("Could not stat backing file");
- close(fd);
- return (NULL);
- }
-
- /*
- * Deal with raw devices
- */
- size = sbuf.st_size;
- sectsz = DEV_BSIZE;
- if (S_ISCHR(sbuf.st_mode)) {
- if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
- ioctl(fd, DIOCGSECTORSIZE, §sz)) {
- perror("Could not fetch dev blk/sector size");
- close(fd);
- return (NULL);
- }
- assert(size != 0);
- assert(sectsz != 0);
- }
-
- bc = calloc(1, sizeof(struct blockif_ctxt));
- if (bc == NULL) {
- close(fd);
- return (NULL);
- }
-
bc->bc_magic = BLOCKIF_SIG;
- bc->bc_fd = fd;
bc->bc_rdonly = ro;
- bc->bc_size = size;
- bc->bc_sectsz = sectsz;
pthread_mutex_init(&bc->bc_mtx, NULL);
pthread_cond_init(&bc->bc_cond, NULL);
TAILQ_INIT(&bc->bc_freeq);
@@ -521,8 +485,7 @@
* Release resources
*/
bc->bc_magic = 0;
- close(bc->bc_fd);
- free(bc);
+ vdsk_close(bc);
return (0);
}
@@ -541,7 +504,7 @@
assert(bc->bc_magic == BLOCKIF_SIG);
- sectors = bc->bc_size / bc->bc_sectsz;
+ sectors = vdsk_capacity(bc) / vdsk_sectorsize(bc);
/* Clamp the size to the largest possible with CHS */
if (sectors > 65535UL*16*255)
@@ -584,7 +547,7 @@
{
assert(bc->bc_magic == BLOCKIF_SIG);
- return (bc->bc_size);
+ return (vdsk_capacity(bc));
}
int
@@ -592,7 +555,7 @@
{
assert(bc->bc_magic == BLOCKIF_SIG);
- return (bc->bc_sectsz);
+ return (vdsk_sectorsize(bc));
}
int
Index: usr.sbin/bhyve/xmsr.c
===================================================================
--- usr.sbin/bhyve/xmsr.c (revision 274256)
+++ usr.sbin/bhyve/xmsr.c (working copy)
@@ -49,6 +49,9 @@
emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val)
{
+ if (num == MSR_PAT)
+ return 0;
+
if (cpu_vendor_intel) {
switch (num) {
case 0xd04: /* Sandy Bridge uncore PMCs */
Index: usr.sbin/bhyveload/Makefile
===================================================================
--- usr.sbin/bhyveload/Makefile (revision 274256)
+++ usr.sbin/bhyveload/Makefile (working copy)
@@ -4,8 +4,8 @@
SRCS= bhyveload.c
MAN= bhyveload.8
-DPADD+= ${LIBVMMAPI} ${LIBUTIL}
-LDADD+= -lvmmapi -lutil
+DPADD+= ${LIBVDSK} ${LIBVMMAPI} ${LIBUTIL}
+LDADD+= -lvdsk -lvmmapi -lutil
WARNS?= 3
Index: usr.sbin/bhyveload/bhyveload.c
===================================================================
--- usr.sbin/bhyveload/bhyveload.c (revision 274256)
+++ usr.sbin/bhyveload/bhyveload.c (working copy)
@@ -79,7 +79,7 @@
#include <sysexits.h>
#include <termios.h>
#include <unistd.h>
-
+#include <vdsk.h>
#include <vmmapi.h>
#include "userboot.h"
@@ -92,7 +92,7 @@
static char *host_base;
static struct termios term, oldterm;
-static int disk_fd[NDISKS];
+static vdskctx disk[NDISKS];
static int ndisks;
static int consin_fd, consout_fd;
@@ -290,9 +290,9 @@
{
ssize_t n;
- if (unit < 0 || unit >= ndisks )
+ if (unit < 0 || unit >= ndisks)
return (EIO);
- n = pread(disk_fd[unit], to, size, from);
+ n = vdsk_read(disk[unit], to, size, from);
if (n < 0)
return (errno);
*resid = size - n;
@@ -302,7 +302,6 @@
static int
cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
{
- struct stat sb;
if (unit < 0 || unit >= ndisks)
return (EBADF);
@@ -309,13 +308,10 @@
switch (cmd) {
case DIOCGSECTORSIZE:
- *(u_int *)data = 512;
+ *(u_int *)data = vdsk_sectorsize(disk[unit]);
break;
case DIOCGMEDIASIZE:
- if (fstat(disk_fd[unit], &sb) == 0)
- *(off_t *)data = sb.st_size;
- else
- return (ENOTTY);
+ *(off_t *)data = vdsk_capacity(disk[unit]);
break;
default:
return (ENOTTY);
@@ -607,21 +603,17 @@
static int
disk_open(char *path)
{
- int err, fd;
+ vdskctx vdsk;
- if (ndisks > NDISKS)
+ if (ndisks >= NDISKS)
return (ERANGE);
- err = 0;
- fd = open(path, O_RDONLY);
+ vdsk = vdsk_open(path, O_RDONLY, 0);
+ if (vdsk == NULL)
+ return (errno);
- if (fd > 0) {
- disk_fd[ndisks] = fd;
- ndisks++;
- } else
- err = errno;
-
- return (err);
+ disk[ndisks++] = vdsk;
+ return (0);
}
static void
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?67DF3123-2654-44F7-BF54-E758843DF354>
