Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 19 Jul 2014 20:13:01 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r268887 - stable/10/usr.sbin/bhyve
Message-ID:  <201407192013.s6JKD1NE052720@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Sat Jul 19 20:13:01 2014
New Revision: 268887
URL: http://svnweb.freebsd.org/changeset/base/268887

Log:
  MFC 261904,261905,262143,262184,264921,265211,267169,267292,267294:
  Various PCI fixes:
  - Allow PCI devices to be configured on all valid bus numbers from 0 to 255.
  - Tweak the handling of PCI capabilities in emulated devices to remove
    the non-standard zero capability list terminator.
  - Add a check to validate that memory BARs of passthru devices are 4KB
    aligned.
  - Respect and track the enable bit in the PCI configuration address word.
  - Handle quad-word access to 32-bit register pairs.

Modified:
  stable/10/usr.sbin/bhyve/acpi.c
  stable/10/usr.sbin/bhyve/bhyve.8
  stable/10/usr.sbin/bhyve/bhyverun.c
  stable/10/usr.sbin/bhyve/mptbl.c
  stable/10/usr.sbin/bhyve/pci_emul.c
  stable/10/usr.sbin/bhyve/pci_emul.h
  stable/10/usr.sbin/bhyve/pci_lpc.c
  stable/10/usr.sbin/bhyve/pci_passthru.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/usr.sbin/bhyve/acpi.c
==============================================================================
--- stable/10/usr.sbin/bhyve/acpi.c	Sat Jul 19 19:58:26 2014	(r268886)
+++ stable/10/usr.sbin/bhyve/acpi.c	Sat Jul 19 20:13:01 2014	(r268887)
@@ -713,7 +713,7 @@ basl_fwrite_dsdt(FILE *fp)
 	pci_write_dsdt();
 
 	dsdt_line("");
-	dsdt_line("  Scope (_SB.PCI0)");
+	dsdt_line("  Scope (_SB.PC00)");
 	dsdt_line("  {");
 	dsdt_line("    Device (HPET)");
 	dsdt_line("    {");

Modified: stable/10/usr.sbin/bhyve/bhyve.8
==============================================================================
--- stable/10/usr.sbin/bhyve/bhyve.8	Sat Jul 19 19:58:26 2014	(r268886)
+++ stable/10/usr.sbin/bhyve/bhyve.8	Sat Jul 19 20:13:01 2014	(r268887)
@@ -100,12 +100,16 @@ There are 32 available slots, with the o
 per slot.
 .Bl -tag -width 10n
 .It Ar slot
-.Ar pcislot Ns Op Ar :function
+.Ar pcislot[:function]
+.Ar bus:pcislot:function
 .Pp
 The
 .Ar pcislot
-value is 0 to 31 and the optional function value is 0 to 7.
+value is 0 to 31. The optional function value is 0 to 7. The optional
+.Ar bus
+value is 0 to 255.
 If not specified, the function value defaults to 0.
+If not specified, the bus value defaults to 0.
 .It Ar emulation
 .Bl -tag -width 10n
 .It Li hostbridge | Li amd_hostbridge
@@ -130,7 +134,8 @@ AHCI controller attached to a SATA hard-
 .It Li uart
 PCI 16550 serial device.
 .It Li lpc
-LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports.
+LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. The LPC bridge
+emulation can only be configured on bus 0.
 .El
 .It Op Ar conf
 This optional parameter describes the backend for device emulations.
@@ -227,6 +232,8 @@ This is intended for debug purposes.
 Ignore accesses to unimplemented Model Specific Registers (MSRs). This is intended for debug purposes.
 .It Fl x
 The guest's local APIC is configured in x2APIC mode.
+.It Fl Y
+Disable MPtable generation.
 .It Fl h
 Print help message and exit.
 .It Ar vmname

Modified: stable/10/usr.sbin/bhyve/bhyverun.c
==============================================================================
--- stable/10/usr.sbin/bhyve/bhyverun.c	Sat Jul 19 19:58:26 2014	(r268886)
+++ stable/10/usr.sbin/bhyve/bhyverun.c	Sat Jul 19 20:13:01 2014	(r268887)
@@ -144,6 +144,7 @@ usage(int code)
 		"       -m: memory size in MB\n"
 		"       -w: ignore unimplemented MSRs\n"
 		"       -x: local apic is in x2APIC mode\n"
+		"       -Y: disable MPtable generation\n"
 		"       -U: uuid\n",
 		progname, (int)strlen(progname), "");
 
@@ -591,7 +592,7 @@ int
 main(int argc, char *argv[])
 {
 	int c, error, gdb_port, err, bvmcons;
-	int max_vcpus;
+	int max_vcpus, mptgen;
 	struct vmctx *ctx;
 	uint64_t rip;
 	size_t memsize;
@@ -601,8 +602,9 @@ main(int argc, char *argv[])
 	gdb_port = 0;
 	guest_ncpus = 1;
 	memsize = 256 * MB;
+	mptgen = 1;
 
-	while ((c = getopt(argc, argv, "abehwxAHIPWp:g:c:s:m:l:U:")) != -1) {
+	while ((c = getopt(argc, argv, "abehwxAHIPWYp:g:c:s:m:l:U:")) != -1) {
 		switch (c) {
 		case 'a':
 			x2apic_mode = 0;
@@ -668,6 +670,9 @@ main(int argc, char *argv[])
 		case 'x':
 			x2apic_mode = 1;
 			break;
+		case 'Y':
+			mptgen = 0;
+			break;
 		case 'h':
 			usage(0);			
 		default:
@@ -727,7 +732,11 @@ main(int argc, char *argv[])
 	/*
 	 * build the guest tables, MP etc.
 	 */
-	mptable_build(ctx, guest_ncpus);
+	if (mptgen) {
+		error = mptable_build(ctx, guest_ncpus);
+		if (error)
+			exit(1);
+	}
 
 	error = smbios_build(ctx);
 	assert(error == 0);

Modified: stable/10/usr.sbin/bhyve/mptbl.c
==============================================================================
--- stable/10/usr.sbin/bhyve/mptbl.c	Sat Jul 19 19:58:26 2014	(r268886)
+++ stable/10/usr.sbin/bhyve/mptbl.c	Sat Jul 19 20:13:01 2014	(r268887)
@@ -196,16 +196,21 @@ mpt_build_ioapic_entries(io_apic_entry_p
 static int
 mpt_count_ioint_entries(void)
 {
+	int bus, count;
+
+	count = 0;
+	for (bus = 0; bus <= PCI_BUSMAX; bus++)
+		count += pci_count_lintr(bus);
 
 	/*
 	 * Always include entries for the first 16 pins along with a entry
 	 * for each active PCI INTx pin.
 	 */
-	return (16 + pci_count_lintr());
+	return (16 + count);
 }
 
 static void
-mpt_generate_pci_int(int slot, int pin, int ioapic_irq, void *arg)
+mpt_generate_pci_int(int bus, int slot, int pin, int ioapic_irq, void *arg)
 {
 	int_entry_ptr *mpiep, mpie;
 
@@ -219,7 +224,7 @@ mpt_generate_pci_int(int slot, int pin, 
 	 */
 	mpie->type = MPCT_ENTRY_INT;
 	mpie->int_type = INTENTRY_TYPE_INT;
-	mpie->src_bus_id = 0;
+	mpie->src_bus_id = bus;
 	mpie->src_bus_irq = slot << 2 | (pin - 1);
 	mpie->dst_apic_id = mpie[-1].dst_apic_id;
 	mpie->dst_apic_int = ioapic_irq;
@@ -230,7 +235,7 @@ mpt_generate_pci_int(int slot, int pin, 
 static void
 mpt_build_ioint_entries(int_entry_ptr mpie, int id)
 {
-	int pin;
+	int pin, bus;
 
 	/*
 	 * The following config is taken from kernel mptable.c
@@ -277,7 +282,8 @@ mpt_build_ioint_entries(int_entry_ptr mp
 	}
 
 	/* Next, generate entries for any PCI INTx interrupts. */
-	pci_walk_lintr(mpt_generate_pci_int, &mpie); 
+	for (bus = 0; bus <= PCI_BUSMAX; bus++)
+		pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); 
 }
 
 void
@@ -297,16 +303,31 @@ mptable_build(struct vmctx *ctx, int ncp
 	proc_entry_ptr		mpep;
 	mpfps_t			mpfp;
 	int_entry_ptr		mpie;
-	int			ioints;
+	int			ioints, bus;
 	char 			*curraddr;
 	char 			*startaddr;
 
 	startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH);
 	if (startaddr == NULL) {
-		printf("mptable requires mapped mem\n");
+		fprintf(stderr, "mptable requires mapped mem\n");
 		return (ENOMEM);
 	}
 
+	/*
+	 * There is no way to advertise multiple PCI hierarchies via MPtable
+	 * so require that there is no PCI hierarchy with a non-zero bus
+	 * number.
+	 */
+	for (bus = 1; bus <= PCI_BUSMAX; bus++) {
+		if (pci_bus_configured(bus)) {
+			fprintf(stderr, "MPtable is incompatible with "
+			    "multiple PCI hierarchies.\r\n");
+			fprintf(stderr, "MPtable generation can be disabled "
+			    "by passing the -Y option to bhyve(8).\r\n");
+			return (EINVAL);
+		}
+	}
+
 	curraddr = startaddr;
 	mpfp = (mpfps_t)curraddr;
 	mpt_build_mpfp(mpfp, MPTABLE_BASE);

Modified: stable/10/usr.sbin/bhyve/pci_emul.c
==============================================================================
--- stable/10/usr.sbin/bhyve/pci_emul.c	Sat Jul 19 19:58:26 2014	(r268886)
+++ stable/10/usr.sbin/bhyve/pci_emul.c	Sat Jul 19 20:13:01 2014	(r268887)
@@ -69,6 +69,7 @@ do {									\
 	}								\
 } while (0)
 
+#define	MAXBUSES	(PCI_BUSMAX + 1)
 #define MAXSLOTS	(PCI_SLOTMAX + 1)
 #define	MAXFUNCS	(PCI_FUNCMAX + 1)
 
@@ -86,7 +87,16 @@ struct intxinfo {
 struct slotinfo {
 	struct intxinfo si_intpins[4];
 	struct funcinfo si_funcs[MAXFUNCS];
-} pci_slotinfo[MAXSLOTS];
+};
+
+struct businfo {
+	uint16_t iobase, iolimit;		/* I/O window */
+	uint32_t membase32, memlimit32;		/* mmio window below 4GB */
+	uint64_t membase64, memlimit64;		/* mmio window above 4GB */
+	struct slotinfo slotinfo[MAXSLOTS];
+};
+
+static struct businfo *pci_businfo[MAXBUSES];
 
 SET_DECLARE(pci_devemu_set, struct pci_devemu);
 
@@ -97,7 +107,7 @@ static uint64_t pci_emul_membase64;
 #define	PCI_EMUL_IOBASE		0x2000
 #define	PCI_EMUL_IOLIMIT	0x10000
 
-#define	PCI_EMUL_MEMLIMIT32	0xE0000000		/* 3.5GB */
+#define	PCI_EMUL_MEMLIMIT32	0xE0000000	/* 3.5GB */
 
 #define	PCI_EMUL_MEMBASE64	0xD000000000UL
 #define	PCI_EMUL_MEMLIMIT64	0xFD00000000UL
@@ -105,7 +115,6 @@ static uint64_t pci_emul_membase64;
 static struct pci_devemu *pci_emul_finddev(char *name);
 static void	pci_lintr_update(struct pci_devinst *pi);
 
-static int pci_emul_devices;
 static struct mem_range pci_mem_hole;
 
 /*
@@ -115,6 +124,7 @@ static struct mem_range pci_mem_hole;
 /*
  * Slot options are in the form:
  *
+ *  <bus>:<slot>:<func>,<emul>[,<config>]
  *  <slot>[:<func>],<emul>[,<config>]
  *
  *  slot is 0..31
@@ -136,37 +146,53 @@ pci_parse_slot_usage(char *aopt)
 int
 pci_parse_slot(char *opt)
 {
-	char *slot, *func, *emul, *config;
-	char *str, *cpy;
-	int error, snum, fnum;
+	struct businfo *bi;
+	struct slotinfo *si;
+	char *emul, *config, *str, *cp;
+	int error, bnum, snum, fnum;
 
 	error = -1;
-	str = cpy = strdup(opt);
-
-        slot = strsep(&str, ",");
-        func = NULL;
-        if (strchr(slot, ':') != NULL) {
-		func = cpy;
-		(void) strsep(&func, ":");
-        }
-	
-	emul = strsep(&str, ",");
-	config = str;
+	str = strdup(opt);
 
-	if (emul == NULL) {
+	emul = config = NULL;
+	if ((cp = strchr(str, ',')) != NULL) {
+		*cp = '\0';
+		emul = cp + 1;
+		if ((cp = strchr(emul, ',')) != NULL) {
+			*cp = '\0';
+			config = cp + 1;
+		}
+	} else {
 		pci_parse_slot_usage(opt);
 		goto done;
 	}
 
-	snum = atoi(slot);
-	fnum = func ? atoi(func) : 0;
+	/* <bus>:<slot>:<func> */
+	if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) {
+		bnum = 0;
+		/* <slot>:<func> */
+		if (sscanf(str, "%d:%d", &snum, &fnum) != 2) {
+			fnum = 0;
+			/* <slot> */
+			if (sscanf(str, "%d", &snum) != 1) {
+				snum = -1;
+			}
+		}
+	}
 
-	if (snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) {
+	if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS ||
+	    fnum < 0 || fnum >= MAXFUNCS) {
 		pci_parse_slot_usage(opt);
 		goto done;
 	}
 
-	if (pci_slotinfo[snum].si_funcs[fnum].fi_name != NULL) {
+	if (pci_businfo[bnum] == NULL)
+		pci_businfo[bnum] = calloc(1, sizeof(struct businfo));
+
+	bi = pci_businfo[bnum];
+	si = &bi->slotinfo[snum];
+
+	if (si->si_funcs[fnum].fi_name != NULL) {
 		fprintf(stderr, "pci slot %d:%d already occupied!\n",
 			snum, fnum);
 		goto done;
@@ -179,12 +205,12 @@ pci_parse_slot(char *opt)
 	}
 
 	error = 0;
-	pci_slotinfo[snum].si_funcs[fnum].fi_name = emul;
-	pci_slotinfo[snum].si_funcs[fnum].fi_param = config;
+	si->si_funcs[fnum].fi_name = emul;
+	si->si_funcs[fnum].fi_param = config;
 
 done:
 	if (error)
-		free(cpy);
+		free(str);
 
 	return (error);
 }
@@ -346,10 +372,27 @@ pci_emul_mem_handler(struct vmctx *ctx, 
 
 	offset = addr - pdi->pi_bar[bidx].addr;
 
-	if (dir == MEM_F_WRITE)
-		(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val);
-	else
-		*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size);
+	if (dir == MEM_F_WRITE) {
+		if (size == 8) {
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset,
+					   4, *val & 0xffffffff);
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4,
+					   4, *val >> 32);
+		} else {
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset,
+					   size, *val);
+		}
+	} else {
+		if (size == 8) {
+			*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						 offset, 4);
+			*val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						  offset + 4, 4) << 32;
+		} else {
+			*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						 offset, size);
+		}
+	}
 
 	return (0);
 }
@@ -604,48 +647,39 @@ pci_emul_alloc_pbar(struct pci_devinst *
 static int
 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
 {
-	int i, capoff, capid, reallen;
+	int i, capoff, reallen;
 	uint16_t sts;
 
-	static u_char endofcap[4] = {
-		PCIY_RESERVED, 0, 0, 0
-	};
-
-	assert(caplen > 0 && capdata[0] != PCIY_RESERVED);
+	assert(caplen > 0);
 
 	reallen = roundup2(caplen, 4);		/* dword aligned */
 
 	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
-	if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
+	if ((sts & PCIM_STATUS_CAPPRESENT) == 0)
 		capoff = CAP_START_OFFSET;
-		pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
-		pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
-	} else {
-		capoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
-		while (1) {
-			assert((capoff & 0x3) == 0);
-			capid = pci_get_cfgdata8(pi, capoff);
-			if (capid == PCIY_RESERVED)
-				break;
-			capoff = pci_get_cfgdata8(pi, capoff + 1);
-		}
-	}
+	else
+		capoff = pi->pi_capend + 1;
 
 	/* Check if we have enough space */
-	if (capoff + reallen + sizeof(endofcap) > PCI_REGMAX + 1)
+	if (capoff + reallen > PCI_REGMAX + 1)
 		return (-1);
 
+	/* Set the previous capability pointer */
+	if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
+		pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
+		pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
+	} else
+		pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff);
+
 	/* Copy the capability */
 	for (i = 0; i < caplen; i++)
 		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
 
 	/* Set the next capability pointer */
-	pci_set_cfgdata8(pi, capoff + 1, capoff + reallen);
-
-	/* Copy of the reserved capability which serves as the end marker */
-	for (i = 0; i < sizeof(endofcap); i++)
-		pci_set_cfgdata8(pi, capoff + reallen + i, endofcap[i]);
+	pci_set_cfgdata8(pi, capoff + 1, 0);
 
+	pi->pi_prevcap = capoff;
+	pi->pi_capend = capoff + reallen - 1;
 	return (0);
 }
 
@@ -665,8 +699,8 @@ pci_emul_finddev(char *name)
 }
 
 static int
-pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func,
-	      char *params)
+pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot,
+    int func, struct funcinfo *fi)
 {
 	struct pci_devinst *pdi;
 	int err;
@@ -675,7 +709,7 @@ pci_emul_init(struct vmctx *ctx, struct 
 	bzero(pdi, sizeof(*pdi));
 
 	pdi->pi_vmctx = ctx;
-	pdi->pi_bus = 0;
+	pdi->pi_bus = bus;
 	pdi->pi_slot = slot;
 	pdi->pi_func = func;
 	pthread_mutex_init(&pdi->pi_lintr.lock, NULL);
@@ -692,13 +726,11 @@ pci_emul_init(struct vmctx *ctx, struct 
 	pci_set_cfgdata8(pdi, PCIR_COMMAND,
 		    PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
 
-	err = (*pde->pe_init)(ctx, pdi, params);
-	if (err != 0) {
+	err = (*pde->pe_init)(ctx, pdi, fi->fi_param);
+	if (err == 0)
+		fi->fi_devi = pdi;
+	else
 		free(pdi);
-	} else {
-		pci_emul_devices++;
-		pci_slotinfo[slot].si_funcs[func].fi_devi = pdi;
-	}
 
 	return (err);
 }
@@ -732,7 +764,7 @@ pci_emul_add_msicap(struct pci_devinst *
 
 static void
 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum,
-		     uint32_t msix_tab_size, int nextptr)
+		     uint32_t msix_tab_size)
 {
 	CTASSERT(sizeof(struct msixcap) == 12);
 
@@ -740,7 +772,6 @@ pci_populate_msixcap(struct msixcap *msi
 
 	bzero(msixcap, sizeof(struct msixcap));
 	msixcap->capid = PCIY_MSIX;
-	msixcap->nextptr = nextptr;
 
 	/*
 	 * Message Control Register, all fields set to
@@ -778,7 +809,6 @@ pci_msix_table_init(struct pci_devinst *
 int
 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum)
 {
-	uint16_t pba_index;
 	uint32_t tab_size;
 	struct msixcap msixcap;
 
@@ -795,14 +825,11 @@ pci_emul_add_msixcap(struct pci_devinst 
 	pi->pi_msix.table_offset = 0;
 	pi->pi_msix.table_count = msgnum;
 	pi->pi_msix.pba_offset = tab_size;
-
-	/* calculate the MMIO size required for MSI-X PBA */
-	pba_index = (msgnum - 1) / (PBA_TABLE_ENTRY_SIZE * 8);
-	pi->pi_msix.pba_size = (pba_index + 1) * PBA_TABLE_ENTRY_SIZE;
+	pi->pi_msix.pba_size = PBA_SIZE(msgnum);
 
 	pci_msix_table_init(pi, msgnum);
 
-	pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size, 0);
+	pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size);
 
 	/* allocate memory for MSI-X Table and PBA */
 	pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32,
@@ -925,11 +952,9 @@ pci_emul_capwrite(struct pci_devinst *pi
 	/* Find the capability that we want to update */
 	capoff = CAP_START_OFFSET;
 	while (1) {
-		capid = pci_get_cfgdata8(pi, capoff);
-		if (capid == PCIY_RESERVED)
-			break;
-
 		nextoff = pci_get_cfgdata8(pi, capoff + 1);
+		if (nextoff == 0)
+			break;
 		if (offset >= capoff && offset < nextoff)
 			break;
 
@@ -952,6 +977,7 @@ pci_emul_capwrite(struct pci_devinst *pi
 			return;
 	}
 
+	capid = pci_get_cfgdata8(pi, capoff);
 	switch (capid) {
 	case PCIY_MSI:
 		msicap_cfgwrite(pi, capoff, offset, bytes, val);
@@ -970,25 +996,14 @@ pci_emul_capwrite(struct pci_devinst *pi
 static int
 pci_emul_iscap(struct pci_devinst *pi, int offset)
 {
-	int found;
 	uint16_t sts;
-	uint8_t capid, lastoff;
 
-	found = 0;
 	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
 	if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
-		lastoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
-		while (1) {
-			assert((lastoff & 0x3) == 0);
-			capid = pci_get_cfgdata8(pi, lastoff);
-			if (capid == PCIY_RESERVED)
-				break;
-			lastoff = pci_get_cfgdata8(pi, lastoff + 1);
-		}
-		if (offset >= CAP_START_OFFSET && offset <= lastoff)
-			found = 1;
+		if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend)
+			return (1);
 	}
-	return (found);
+	return (0);
 }
 
 static int
@@ -1006,31 +1021,68 @@ pci_emul_fallback_handler(struct vmctx *
 	return (0);
 }
 
+#define	BUSIO_ROUNDUP		32
+#define	BUSMEM_ROUNDUP		(1024 * 1024)
+
 int
 init_pci(struct vmctx *ctx)
 {
 	struct pci_devemu *pde;
+	struct businfo *bi;
+	struct slotinfo *si;
 	struct funcinfo *fi;
 	size_t lowmem;
-	int slot, func;
+	int bus, slot, func;
 	int error;
 
 	pci_emul_iobase = PCI_EMUL_IOBASE;
 	pci_emul_membase32 = vm_get_lowmem_limit(ctx);
 	pci_emul_membase64 = PCI_EMUL_MEMBASE64;
 
-	for (slot = 0; slot < MAXSLOTS; slot++) {
-		for (func = 0; func < MAXFUNCS; func++) {
-			fi = &pci_slotinfo[slot].si_funcs[func];
-			if (fi->fi_name != NULL) {
+	for (bus = 0; bus < MAXBUSES; bus++) {
+		if ((bi = pci_businfo[bus]) == NULL)
+			continue;
+		/* 
+		 * Keep track of the i/o and memory resources allocated to
+		 * this bus.
+		 */
+		bi->iobase = pci_emul_iobase;
+		bi->membase32 = pci_emul_membase32;
+		bi->membase64 = pci_emul_membase64;
+
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			si = &bi->slotinfo[slot];
+			for (func = 0; func < MAXFUNCS; func++) {
+				fi = &si->si_funcs[func];
+				if (fi->fi_name == NULL)
+					continue;
 				pde = pci_emul_finddev(fi->fi_name);
 				assert(pde != NULL);
-				error = pci_emul_init(ctx, pde, slot, func,
-					    fi->fi_param);
+				error = pci_emul_init(ctx, pde, bus, slot,
+				    func, fi);
 				if (error)
 					return (error);
 			}
 		}
+
+		/*
+		 * Add some slop to the I/O and memory resources decoded by
+		 * this bus to give a guest some flexibility if it wants to
+		 * reprogram the BARs.
+		 */
+		pci_emul_iobase += BUSIO_ROUNDUP;
+		pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
+		bi->iolimit = pci_emul_iobase;
+
+		pci_emul_membase32 += BUSMEM_ROUNDUP;
+		pci_emul_membase32 = roundup2(pci_emul_membase32,
+		    BUSMEM_ROUNDUP);
+		bi->memlimit32 = pci_emul_membase32;
+
+		pci_emul_membase64 += BUSMEM_ROUNDUP;
+		pci_emul_membase64 = roundup2(pci_emul_membase64,
+		    BUSMEM_ROUNDUP);
+		bi->memlimit64 = pci_emul_membase64;
 	}
 
 	/*
@@ -1060,7 +1112,7 @@ init_pci(struct vmctx *ctx)
 }
 
 static void
-pci_prt_entry(int slot, int pin, int ioapic_irq, void *arg)
+pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg)
 {
 	int *count;
 
@@ -1075,96 +1127,165 @@ pci_prt_entry(int slot, int pin, int ioa
 	(*count)--;
 }
 
-void
-pci_write_dsdt(void)
+/*
+ * A bhyve virtual machine has a flat PCI hierarchy with a root port
+ * corresponding to each PCI bus.
+ */
+static void
+pci_bus_write_dsdt(int bus)
 {
+	struct businfo *bi;
+	struct slotinfo *si;
 	struct pci_devinst *pi;
 	int count, slot, func;
 
+	/*
+	 * If there are no devices on this 'bus' then just return.
+	 */
+	if ((bi = pci_businfo[bus]) == NULL) {
+		/*
+		 * Bus 0 is special because it decodes the I/O ports used
+		 * for PCI config space access even if there are no devices
+		 * on it.
+		 */
+		if (bus != 0)
+			return;
+	}
+
 	dsdt_indent(1);
 	dsdt_line("Scope (_SB)");
 	dsdt_line("{");
-	dsdt_line("  Device (PCI0)");
+	dsdt_line("  Device (PC%02X)", bus);
 	dsdt_line("  {");
 	dsdt_line("    Name (_HID, EisaId (\"PNP0A03\"))");
 	dsdt_line("    Name (_ADR, Zero)");
+
+	dsdt_line("    Method (_BBN, 0, NotSerialized)");
+	dsdt_line("    {");
+	dsdt_line("        Return (0x%08X)", bus);
+	dsdt_line("    }");
 	dsdt_line("    Name (_CRS, ResourceTemplate ()");
 	dsdt_line("    {");
 	dsdt_line("      WordBusNumber (ResourceProducer, MinFixed, "
 	    "MaxFixed, PosDecode,");
 	dsdt_line("        0x0000,             // Granularity");
-	dsdt_line("        0x0000,             // Range Minimum");
-	dsdt_line("        0x00FF,             // Range Maximum");
+	dsdt_line("        0x%04X,             // Range Minimum", bus);
+	dsdt_line("        0x%04X,             // Range Maximum", bus);
 	dsdt_line("        0x0000,             // Translation Offset");
-	dsdt_line("        0x0100,             // Length");
+	dsdt_line("        0x0001,             // Length");
 	dsdt_line("        ,, )");
-	dsdt_indent(3);
-	dsdt_fixed_ioport(0xCF8, 8);
-	dsdt_unindent(3);
-	dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
-	    "PosDecode, EntireRange,");
-	dsdt_line("        0x0000,             // Granularity");
-	dsdt_line("        0x0000,             // Range Minimum");
-	dsdt_line("        0x0CF7,             // Range Maximum");
-	dsdt_line("        0x0000,             // Translation Offset");
-	dsdt_line("        0x0CF8,             // Length");
-	dsdt_line("        ,, , TypeStatic)");
+
+	if (bus == 0) {
+		dsdt_indent(3);
+		dsdt_fixed_ioport(0xCF8, 8);
+		dsdt_unindent(3);
+
+		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+		    "PosDecode, EntireRange,");
+		dsdt_line("        0x0000,             // Granularity");
+		dsdt_line("        0x0000,             // Range Minimum");
+		dsdt_line("        0x0CF7,             // Range Maximum");
+		dsdt_line("        0x0000,             // Translation Offset");
+		dsdt_line("        0x0CF8,             // Length");
+		dsdt_line("        ,, , TypeStatic)");
+
+		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+		    "PosDecode, EntireRange,");
+		dsdt_line("        0x0000,             // Granularity");
+		dsdt_line("        0x0D00,             // Range Minimum");
+		dsdt_line("        0x%04X,             // Range Maximum",
+		    PCI_EMUL_IOBASE - 1);
+		dsdt_line("        0x0000,             // Translation Offset");
+		dsdt_line("        0x%04X,             // Length",
+		    PCI_EMUL_IOBASE - 0x0D00);
+		dsdt_line("        ,, , TypeStatic)");
+
+		if (bi == NULL) {
+			dsdt_line("    })");
+			goto done;
+		}
+	}
+	assert(bi != NULL);
+
+	/* i/o window */
 	dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
 	    "PosDecode, EntireRange,");
 	dsdt_line("        0x0000,             // Granularity");
-	dsdt_line("        0x0D00,             // Range Minimum");
-	dsdt_line("        0xFFFF,             // Range Maximum");
+	dsdt_line("        0x%04X,             // Range Minimum", bi->iobase);
+	dsdt_line("        0x%04X,             // Range Maximum",
+	    bi->iolimit - 1);
 	dsdt_line("        0x0000,             // Translation Offset");
-	dsdt_line("        0xF300,             // Length");
+	dsdt_line("        0x%04X,             // Length",
+	    bi->iolimit - bi->iobase);
 	dsdt_line("        ,, , TypeStatic)");
+
+	/* mmio window (32-bit) */
 	dsdt_line("      DWordMemory (ResourceProducer, PosDecode, "
 	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
 	dsdt_line("        0x00000000,         // Granularity");
-	dsdt_line("        0x%08lX,         // Range Minimum\n",
-	    pci_mem_hole.base);
+	dsdt_line("        0x%08X,         // Range Minimum\n", bi->membase32);
 	dsdt_line("        0x%08X,         // Range Maximum\n",
-	    PCI_EMUL_MEMLIMIT32 - 1);
+	    bi->memlimit32 - 1);
 	dsdt_line("        0x00000000,         // Translation Offset");
-	dsdt_line("        0x%08lX,         // Length\n",
-	    PCI_EMUL_MEMLIMIT32 - pci_mem_hole.base);
+	dsdt_line("        0x%08X,         // Length\n",
+	    bi->memlimit32 - bi->membase32);
 	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
+
+	/* mmio window (64-bit) */
 	dsdt_line("      QWordMemory (ResourceProducer, PosDecode, "
 	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
 	dsdt_line("        0x0000000000000000, // Granularity");
-	dsdt_line("        0x%016lX, // Range Minimum\n",
-	    PCI_EMUL_MEMBASE64);
+	dsdt_line("        0x%016lX, // Range Minimum\n", bi->membase64);
 	dsdt_line("        0x%016lX, // Range Maximum\n",
-	    PCI_EMUL_MEMLIMIT64 - 1);
+	    bi->memlimit64 - 1);
 	dsdt_line("        0x0000000000000000, // Translation Offset");
 	dsdt_line("        0x%016lX, // Length\n",
-	    PCI_EMUL_MEMLIMIT64 - PCI_EMUL_MEMBASE64);
+	    bi->memlimit64 - bi->membase64);
 	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
 	dsdt_line("    })");
-	count = pci_count_lintr();
+
+	count = pci_count_lintr(bus);
 	if (count != 0) {
 		dsdt_indent(2);
 		dsdt_line("Name (_PRT, Package (0x%02X)", count);
 		dsdt_line("{");
-		pci_walk_lintr(pci_prt_entry, &count);
+		pci_walk_lintr(bus, pci_prt_entry, &count);
 		dsdt_line("})");
 		dsdt_unindent(2);
 	}
 
 	dsdt_indent(2);
 	for (slot = 0; slot < MAXSLOTS; slot++) {
+		si = &bi->slotinfo[slot];
 		for (func = 0; func < MAXFUNCS; func++) {
-			pi = pci_slotinfo[slot].si_funcs[func].fi_devi;
+			pi = si->si_funcs[func].fi_devi;
 			if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL)
 				pi->pi_d->pe_write_dsdt(pi);
 		}
 	}
 	dsdt_unindent(2);
-
+done:
 	dsdt_line("  }");
 	dsdt_line("}");
 	dsdt_unindent(1);
 }
 
+void
+pci_write_dsdt(void)
+{
+	int bus;
+
+	for (bus = 0; bus < MAXBUSES; bus++)
+		pci_bus_write_dsdt(bus);
+}
+
+int
+pci_bus_configured(int bus)
+{
+	assert(bus >= 0 && bus < MAXBUSES);
+	return (pci_businfo[bus] != NULL);
+}
+
 int
 pci_msi_enabled(struct pci_devinst *pi)
 {
@@ -1231,13 +1352,17 @@ pci_lintr_permitted(struct pci_devinst *
 int
 pci_lintr_request(struct pci_devinst *pi)
 {
+	struct businfo *bi;
 	struct slotinfo *si;
 	int bestpin, bestcount, irq, pin;
 
+	bi = pci_businfo[pi->pi_bus];
+	assert(bi != NULL);
+
 	/*
 	 * First, allocate a pin from our slot.
 	 */
-	si = &pci_slotinfo[pi->pi_slot];
+	si = &bi->slotinfo[pi->pi_slot];
 	bestpin = 0;
 	bestcount = si->si_intpins[0].ii_count;
 	for (pin = 1; pin < 4; pin++) {
@@ -1318,31 +1443,41 @@ pci_lintr_update(struct pci_devinst *pi)
 }
 
 int
-pci_count_lintr(void)
+pci_count_lintr(int bus)
 {
 	int count, slot, pin;
+	struct slotinfo *slotinfo;
 
 	count = 0;
-	for (slot = 0; slot < MAXSLOTS; slot++) {
-		for (pin = 0; pin < 4; pin++) {
-			if (pci_slotinfo[slot].si_intpins[pin].ii_count != 0)
-				count++;
+	if (pci_businfo[bus] != NULL) {
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			slotinfo = &pci_businfo[bus]->slotinfo[slot];
+			for (pin = 0; pin < 4; pin++) {
+				if (slotinfo->si_intpins[pin].ii_count != 0)
+					count++;
+			}
 		}
 	}
 	return (count);
 }
 
 void
-pci_walk_lintr(pci_lintr_cb cb, void *arg)
+pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg)
 {
+	struct businfo *bi;
+	struct slotinfo *si;
 	struct intxinfo *ii;
 	int slot, pin;
 
+	if ((bi = pci_businfo[bus]) == NULL)
+		return;
+
 	for (slot = 0; slot < MAXSLOTS; slot++) {
+		si = &bi->slotinfo[slot];
 		for (pin = 0; pin < 4; pin++) {
-			ii = &pci_slotinfo[slot].si_intpins[pin];
+			ii = &si->si_intpins[pin];
 			if (ii->ii_count != 0)
-				cb(slot, pin + 1, ii->ii_ioapic_irq, arg);
+				cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg);
 		}
 	}
 }
@@ -1352,14 +1487,19 @@ pci_walk_lintr(pci_lintr_cb cb, void *ar
  * Return 0 otherwise.
  */
 static int
-pci_emul_is_mfdev(int slot)
+pci_emul_is_mfdev(int bus, int slot)
 {
+	struct businfo *bi;
+	struct slotinfo *si;
 	int f, numfuncs;
 
 	numfuncs = 0;
-	for (f = 0; f < MAXFUNCS; f++) {
-		if (pci_slotinfo[slot].si_funcs[f].fi_devi != NULL) {
-			numfuncs++;
+	if ((bi = pci_businfo[bus]) != NULL) {
+		si = &bi->slotinfo[slot];
+		for (f = 0; f < MAXFUNCS; f++) {
+			if (si->si_funcs[f].fi_devi != NULL) {
+				numfuncs++;
+			}
 		}
 	}
 	return (numfuncs > 1);
@@ -1370,12 +1510,12 @@ pci_emul_is_mfdev(int slot)
  * whether or not is a multi-function being emulated in the pci 'slot'.
  */
 static void
-pci_emul_hdrtype_fixup(int slot, int off, int bytes, uint32_t *rv)
+pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
 {
 	int mfdev;
 
 	if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) {
-		mfdev = pci_emul_is_mfdev(slot);
+		mfdev = pci_emul_is_mfdev(bus, slot);
 		switch (bytes) {
 		case 1:
 		case 2:
@@ -1394,7 +1534,7 @@ pci_emul_hdrtype_fixup(int slot, int off
 	}
 }
 
-static int cfgbus, cfgslot, cfgfunc, cfgoff;
+static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff;
 
 static int
 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
@@ -1413,9 +1553,12 @@ pci_emul_cfgaddr(struct vmctx *ctx, int 
 		    (cfgslot << 11) |
 		    (cfgfunc << 8) |
 		    cfgoff;
-		*eax = x | CONF1_ENABLE;
+                if (cfgenable)
+			x |= CONF1_ENABLE;	       
+		*eax = x;
 	} else {
 		x = *eax;
+		cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
 		cfgoff = x & PCI_REGMAX;
 		cfgfunc = (x >> 8) & PCI_FUNCMAX;
 		cfgslot = (x >> 11) & PCI_SLOTMAX;
@@ -1492,16 +1635,19 @@ static int
 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
 		 uint32_t *eax, void *arg)
 {
+	struct businfo *bi;
+	struct slotinfo *si;
 	struct pci_devinst *pi;
 	struct pci_devemu *pe;
 	int coff, idx, needcfg;
 	uint64_t addr, bar, mask;
 
 	assert(bytes == 1 || bytes == 2 || bytes == 4);
-	
-	if (cfgbus == 0)
-		pi = pci_slotinfo[cfgslot].si_funcs[cfgfunc].fi_devi;
-	else
+
+	if ((bi = pci_businfo[cfgbus]) != NULL) {
+		si = &bi->slotinfo[cfgslot];
+		pi = si->si_funcs[cfgfunc].fi_devi;
+	} else
 		pi = NULL;
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201407192013.s6JKD1NE052720>