Date: Sat, 9 Jul 2016 20:41:59 +0000 (UTC) From: Alexander Motin <mav@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r302504 - head/usr.sbin/bhyve Message-ID: <201607092041.u69Kfxk2095184@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mav Date: Sat Jul 9 20:41:59 2016 New Revision: 302504 URL: https://svnweb.freebsd.org/changeset/base/302504 Log: Add emulation for Intel e1000 (e82545) network adapter. The code was successfully tested with FreeBSD, Linux, Solaris and Windows guests. This interface is predictably slower (about 2x) then virtio-net, but it is very helpful for guests not supporting virtio-net by default. Thanks to Jeremiah Lott and Peter Grehan for doing original heavy lifting. Added: head/usr.sbin/bhyve/pci_e82545.c (contents, props changed) Modified: head/usr.sbin/bhyve/Makefile head/usr.sbin/bhyve/bhyve.8 Modified: head/usr.sbin/bhyve/Makefile ============================================================================== --- head/usr.sbin/bhyve/Makefile Sat Jul 9 20:01:07 2016 (r302503) +++ head/usr.sbin/bhyve/Makefile Sat Jul 9 20:41:59 2016 (r302504) @@ -28,6 +28,7 @@ SRCS= \ mevent.c \ mptbl.c \ pci_ahci.c \ + pci_e82545.c \ pci_emul.c \ pci_fbuf.c \ pci_hostbridge.c \ @@ -61,6 +62,8 @@ SRCS+= vmm_instruction_emul.c LIBADD= vmmapi md pthread z +CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000 +CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller WARNS?= 2 Modified: head/usr.sbin/bhyve/bhyve.8 ============================================================================== --- head/usr.sbin/bhyve/bhyve.8 Sat Jul 9 20:01:07 2016 (r302503) +++ head/usr.sbin/bhyve/bhyve.8 Sat Jul 9 20:41:59 2016 (r302504) @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 8, 2016 +.Dd July 9, 2016 .Dt BHYVE 8 .Os .Sh NAME @@ -177,6 +177,8 @@ AHCI controller attached to arbitraty de AHCI controller attached to an ATAPI CD/DVD. .It Li ahci-hd AHCI controller attached to a SATA hard-drive. +.It Li e1000 +Intel e82545 network interface. .It Li uart PCI 16550 serial device. .It Li lpc Added: head/usr.sbin/bhyve/pci_e82545.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/usr.sbin/bhyve/pci_e82545.c Sat Jul 9 20:41:59 2016 (r302504) @@ -0,0 +1,2372 @@ +/* + * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org> + * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org> + * Copyright (c) 2013 Jeremiah Lott, Avere Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/limits.h> +#include <sys/ioctl.h> +#include <sys/uio.h> +#include <net/ethernet.h> +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include <errno.h> +#include <fcntl.h> +#include <md5.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <pthread_np.h> + +#include "e1000_regs.h" +#include "e1000_defines.h" +#include "mii.h" + +#include "bhyverun.h" +#include "pci_emul.h" +#include "mevent.h" + +/* Hardware/register definitions XXX: move some to common code. */ +#define E82545_VENDOR_ID_INTEL 0x8086 +#define E82545_DEV_ID_82545EM_COPPER 0x100F +#define E82545_SUBDEV_ID 0x1008 + +#define E82545_REVISION_4 4 + +#define E82545_MDIC_DATA_MASK 0x0000FFFF +#define E82545_MDIC_OP_MASK 0x0c000000 +#define E82545_MDIC_IE 0x20000000 + +#define E82545_EECD_FWE_DIS 0x00000010 /* Flash writes disabled */ +#define E82545_EECD_FWE_EN 0x00000020 /* Flash writes enabled */ +#define E82545_EECD_FWE_MASK 0x00000030 /* Flash writes mask */ + +#define E82545_BAR_REGISTER 0 +#define E82545_BAR_REGISTER_LEN (128*1024) +#define E82545_BAR_FLASH 1 +#define E82545_BAR_FLASH_LEN (64*1024) +#define E82545_BAR_IO 2 +#define E82545_BAR_IO_LEN 8 + +#define E82545_IOADDR 0x00000000 +#define E82545_IODATA 0x00000004 +#define E82545_IO_REGISTER_MAX 0x0001FFFF +#define E82545_IO_FLASH_BASE 0x00080000 +#define E82545_IO_FLASH_MAX 0x000FFFFF + +#define E82545_ARRAY_ENTRY(reg, offset) (reg + (offset<<2)) +#define E82545_RAR_MAX 15 +#define E82545_MTA_MAX 127 +#define E82545_VFTA_MAX 127 + +/* Slightly modified from the driver versions, hardcoded for 3 opcode bits, + * followed by 6 address bits. + * TODO: make opcode bits and addr bits configurable? + * NVM Commands - Microwire */ +#define E82545_NVM_OPCODE_BITS 3 +#define E82545_NVM_ADDR_BITS 6 +#define E82545_NVM_DATA_BITS 16 +#define E82545_NVM_OPADDR_BITS (E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS) +#define E82545_NVM_ADDR_MASK ((1 << E82545_NVM_ADDR_BITS)-1) +#define E82545_NVM_OPCODE_MASK \ + (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS) +#define E82545_NVM_OPCODE_READ (0x6 << E82545_NVM_ADDR_BITS) /* read */ +#define E82545_NVM_OPCODE_WRITE (0x5 << E82545_NVM_ADDR_BITS) /* write */ +#define E82545_NVM_OPCODE_ERASE (0x7 << E82545_NVM_ADDR_BITS) /* erase */ +#define E82545_NVM_OPCODE_EWEN (0x4 << E82545_NVM_ADDR_BITS) /* wr-enable */ + +#define E82545_NVM_EEPROM_SIZE 64 /* 64 * 16-bit values == 128K */ + +#define E1000_ICR_SRPD 0x00010000 + +/* + * XXX does this actually have a limit on the 82545 ? + * There is a limit on the max number of bytes, but perhaps not + * on descriptors ?? + */ +#define I82545_MAX_TXSEGS 20 + +/* Legacy receive descriptor */ +struct e1000_rx_desc { + uint64_t buffer_addr; /* Address of the descriptor's data buffer */ + uint16_t length; /* Length of data DMAed into data buffer */ + uint16_t csum; /* Packet checksum */ + uint8_t status; /* Descriptor status */ + uint8_t errors; /* Descriptor Errors */ + uint16_t special; +}; + +/* Transmit descriptor types */ +#define E1000_TXD_MASK (E1000_TXD_CMD_DEXT | 0x00F00000) +#define E1000_TXD_TYP_L (0) +#define E1000_TXD_TYP_C (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C) +#define E1000_TXD_TYP_D (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D) + +/* Legacy transmit descriptor */ +struct e1000_tx_desc { + uint64_t buffer_addr; /* Address of the descriptor's data buffer */ + union { + uint32_t data; + struct { + uint16_t length; /* Data buffer length */ + uint8_t cso; /* Checksum offset */ + uint8_t cmd; /* Descriptor control */ + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t css; /* Checksum start */ + uint16_t special; + } fields; + } upper; +}; + +/* Context descriptor */ +struct e1000_context_desc { + union { + uint32_t ip_config; + struct { + uint8_t ipcss; /* IP checksum start */ + uint8_t ipcso; /* IP checksum offset */ + uint16_t ipcse; /* IP checksum end */ + } ip_fields; + } lower_setup; + union { + uint32_t tcp_config; + struct { + uint8_t tucss; /* TCP checksum start */ + uint8_t tucso; /* TCP checksum offset */ + uint16_t tucse; /* TCP checksum end */ + } tcp_fields; + } upper_setup; + uint32_t cmd_and_length; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t hdr_len; /* Header length */ + uint16_t mss; /* Maximum segment size */ + } fields; + } tcp_seg_setup; +}; + +/* Data descriptor */ +struct e1000_data_desc { + uint64_t buffer_addr; /* Address of the descriptor's buffer address */ + union { + uint32_t data; + struct { + uint16_t length; /* Data buffer length */ + uint8_t typ_len_ext; + uint8_t cmd; + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t popts; /* Packet Options */ + uint16_t special; + } fields; + } upper; +}; + +union e1000_tx_udesc { + struct e1000_tx_desc td; + struct e1000_context_desc cd; + struct e1000_data_desc dd; +}; + +/* Tx checksum info for a packet. */ +struct ck_info { + int ck_valid; /* ck_info is valid */ + uint8_t ck_start; /* start byte of cksum calcuation */ + uint8_t ck_off; /* offset of cksum insertion */ + uint16_t ck_len; /* length of cksum calc: 0 is to packet-end */ +}; + +/* + * Debug printf + */ +static int e82545_debug = 0; +#define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params) +#define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params) + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +/* s/w representation of the RAL/RAH regs */ +struct eth_uni { + int eu_valid; + int eu_addrsel; + struct ether_addr eu_eth; +}; + + +struct e82545_softc { + struct pci_devinst *esc_pi; + struct vmctx *esc_ctx; + struct mevent *esc_mevp; + struct mevent *esc_mevpitr; + pthread_mutex_t esc_mtx; + struct ether_addr esc_mac; + int esc_tapfd; + + /* General */ + uint32_t esc_CTRL; /* x0000 device ctl */ + uint32_t esc_FCAL; /* x0028 flow ctl addr lo */ + uint32_t esc_FCAH; /* x002C flow ctl addr hi */ + uint32_t esc_FCT; /* x0030 flow ctl type */ + uint32_t esc_VET; /* x0038 VLAN eth type */ + uint32_t esc_FCTTV; /* x0170 flow ctl tx timer */ + uint32_t esc_LEDCTL; /* x0E00 LED control */ + uint32_t esc_PBA; /* x1000 pkt buffer allocation */ + + /* Interrupt control */ + int esc_irq_asserted; + uint32_t esc_ICR; /* x00C0 cause read/clear */ + uint32_t esc_ITR; /* x00C4 intr throttling */ + uint32_t esc_ICS; /* x00C8 cause set */ + uint32_t esc_IMS; /* x00D0 mask set/read */ + uint32_t esc_IMC; /* x00D8 mask clear */ + + /* Transmit */ + union e1000_tx_udesc *esc_txdesc; + struct e1000_context_desc esc_txctx; + pthread_t esc_tx_tid; + pthread_cond_t esc_tx_cond; + int esc_tx_enabled; + int esc_tx_active; + uint32_t esc_TXCW; /* x0178 transmit config */ + uint32_t esc_TCTL; /* x0400 transmit ctl */ + uint32_t esc_TIPG; /* x0410 inter-packet gap */ + uint16_t esc_AIT; /* x0458 Adaptive Interframe Throttle */ + uint64_t esc_tdba; /* verified 64-bit desc table addr */ + uint32_t esc_TDBAL; /* x3800 desc table addr, low bits */ + uint32_t esc_TDBAH; /* x3804 desc table addr, hi 32-bits */ + uint32_t esc_TDLEN; /* x3808 # descriptors in bytes */ + uint16_t esc_TDH; /* x3810 desc table head idx */ + uint16_t esc_TDHr; /* internal read version of TDH */ + uint16_t esc_TDT; /* x3818 desc table tail idx */ + uint32_t esc_TIDV; /* x3820 intr delay */ + uint32_t esc_TXDCTL; /* x3828 desc control */ + uint32_t esc_TADV; /* x382C intr absolute delay */ + + /* L2 frame acceptance */ + struct eth_uni esc_uni[16]; /* 16 x unicast MAC addresses */ + uint32_t esc_fmcast[128]; /* Multicast filter bit-match */ + uint32_t esc_fvlan[128]; /* VLAN 4096-bit filter */ + + /* Receive */ + struct e1000_rx_desc *esc_rxdesc; + pthread_cond_t esc_rx_cond; + int esc_rx_enabled; + int esc_rx_active; + int esc_rx_loopback; + uint32_t esc_RCTL; /* x0100 receive ctl */ + uint32_t esc_FCRTL; /* x2160 flow cntl thresh, low */ + uint32_t esc_FCRTH; /* x2168 flow cntl thresh, hi */ + uint64_t esc_rdba; /* verified 64-bit desc table addr */ + uint32_t esc_RDBAL; /* x2800 desc table addr, low bits */ + uint32_t esc_RDBAH; /* x2804 desc table addr, hi 32-bits*/ + uint32_t esc_RDLEN; /* x2808 #descriptors */ + uint16_t esc_RDH; /* x2810 desc table head idx */ + uint16_t esc_RDT; /* x2818 desc table tail idx */ + uint32_t esc_RDTR; /* x2820 intr delay */ + uint32_t esc_RXDCTL; /* x2828 desc control */ + uint32_t esc_RADV; /* x282C intr absolute delay */ + uint32_t esc_RSRPD; /* x2C00 recv small packet detect */ + uint32_t esc_RXCSUM; /* x5000 receive cksum ctl */ + + /* IO Port register access */ + uint32_t io_addr; + + /* Shadow copy of MDIC */ + uint32_t mdi_control; + /* Shadow copy of EECD */ + uint32_t eeprom_control; + /* Latest NVM in/out */ + uint16_t nvm_data; + uint16_t nvm_opaddr; + /* stats */ + uint32_t missed_pkt_count; /* dropped for no room in rx queue */ + uint32_t pkt_rx_by_size[6]; + uint32_t pkt_tx_by_size[6]; + uint32_t good_pkt_rx_count; + uint32_t bcast_pkt_rx_count; + uint32_t mcast_pkt_rx_count; + uint32_t good_pkt_tx_count; + uint32_t bcast_pkt_tx_count; + uint32_t mcast_pkt_tx_count; + uint32_t oversize_rx_count; + uint32_t tso_tx_count; + uint64_t good_octets_rx; + uint64_t good_octets_tx; + uint64_t missed_octets; /* counts missed and oversized */ + + uint8_t nvm_bits:6; /* number of bits remaining in/out */ + uint8_t nvm_mode:2; +#define E82545_NVM_MODE_OPADDR 0x0 +#define E82545_NVM_MODE_DATAIN 0x1 +#define E82545_NVM_MODE_DATAOUT 0x2 + /* EEPROM data */ + uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE]; +}; + +static void e82545_reset(struct e82545_softc *sc, int dev); +static void e82545_rx_enable(struct e82545_softc *sc); +static void e82545_rx_disable(struct e82545_softc *sc); +static void e82545_tap_callback(int fd, enum ev_type type, void *param); +static void e82545_tx_start(struct e82545_softc *sc); +static void e82545_tx_enable(struct e82545_softc *sc); +static void e82545_tx_disable(struct e82545_softc *sc); + +static inline int +e82545_size_stat_index(uint32_t size) +{ + if (size <= 64) { + return 0; + } else if (size >= 1024) { + return 5; + } else { + /* should be 1-4 */ + return (ffs(size) - 6); + } +} + +static void +e82545_init_eeprom(struct e82545_softc *sc) +{ + uint16_t checksum, i; + + /* mac addr */ + sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) | + (((uint16_t)sc->esc_mac.octet[1]) << 8); + sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) | + (((uint16_t)sc->esc_mac.octet[3]) << 8); + sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) | + (((uint16_t)sc->esc_mac.octet[5]) << 8); + + /* pci ids */ + sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID; + sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL; + sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER; + sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL; + + /* fill in the checksum */ + checksum = 0; + for (i = 0; i < NVM_CHECKSUM_REG; i++) { + checksum += sc->eeprom_data[i]; + } + checksum = NVM_SUM - checksum; + sc->eeprom_data[NVM_CHECKSUM_REG] = checksum; + DPRINTF("eeprom checksum: 0x%x\r\n", checksum); +} + +static void +e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr, + uint8_t phy_addr, uint32_t data) +{ + DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data); +} + +static uint32_t +e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, + uint8_t phy_addr) +{ + //DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); + switch (reg_addr) { + case PHY_STATUS: + return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | + MII_SR_AUTONEG_COMPLETE); + case PHY_AUTONEG_ADV: + return NWAY_AR_SELECTOR_FIELD; + case PHY_LP_ABILITY: + return 0; + case PHY_1000T_STATUS: + return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS | + SR_1000T_LOCAL_RX_STATUS); + case PHY_ID1: + return (M88E1011_I_PHY_ID >> 16) & 0xFFFF; + case PHY_ID2: + return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF; + default: + DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); + return 0; + } + /* not reached */ +} + +static void +e82545_eecd_strobe(struct e82545_softc *sc) +{ + /* Microwire state machine */ + /* + DPRINTF("eeprom state machine srtobe " + "0x%x 0x%x 0x%x 0x%x\r\n", + sc->nvm_mode, sc->nvm_bits, + sc->nvm_opaddr, sc->nvm_data);*/ + + if (sc->nvm_bits == 0) { + DPRINTF("eeprom state machine not expecting data! " + "0x%x 0x%x 0x%x 0x%x\r\n", + sc->nvm_mode, sc->nvm_bits, + sc->nvm_opaddr, sc->nvm_data); + return; + } + sc->nvm_bits--; + if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) { + /* shifting out */ + if (sc->nvm_data & 0x8000) { + sc->eeprom_control |= E1000_EECD_DO; + } else { + sc->eeprom_control &= ~E1000_EECD_DO; + } + sc->nvm_data <<= 1; + if (sc->nvm_bits == 0) { + /* read done, back to opcode mode. */ + sc->nvm_opaddr = 0; + sc->nvm_mode = E82545_NVM_MODE_OPADDR; + sc->nvm_bits = E82545_NVM_OPADDR_BITS; + } + } else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) { + /* shifting in */ + sc->nvm_data <<= 1; + if (sc->eeprom_control & E1000_EECD_DI) { + sc->nvm_data |= 1; + } + if (sc->nvm_bits == 0) { + /* eeprom write */ + uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; + uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; + if (op != E82545_NVM_OPCODE_WRITE) { + DPRINTF("Illegal eeprom write op 0x%x\r\n", + sc->nvm_opaddr); + } else if (addr >= E82545_NVM_EEPROM_SIZE) { + DPRINTF("Illegal eeprom write addr 0x%x\r\n", + sc->nvm_opaddr); + } else { + DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n", + addr, sc->nvm_data); + sc->eeprom_data[addr] = sc->nvm_data; + } + /* back to opcode mode */ + sc->nvm_opaddr = 0; + sc->nvm_mode = E82545_NVM_MODE_OPADDR; + sc->nvm_bits = E82545_NVM_OPADDR_BITS; + } + } else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) { + sc->nvm_opaddr <<= 1; + if (sc->eeprom_control & E1000_EECD_DI) { + sc->nvm_opaddr |= 1; + } + if (sc->nvm_bits == 0) { + uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; + switch (op) { + case E82545_NVM_OPCODE_EWEN: + DPRINTF("eeprom write enable: 0x%x\r\n", + sc->nvm_opaddr); + /* back to opcode mode */ + sc->nvm_opaddr = 0; + sc->nvm_mode = E82545_NVM_MODE_OPADDR; + sc->nvm_bits = E82545_NVM_OPADDR_BITS; + break; + case E82545_NVM_OPCODE_READ: + { + uint16_t addr = sc->nvm_opaddr & + E82545_NVM_ADDR_MASK; + sc->nvm_mode = E82545_NVM_MODE_DATAOUT; + sc->nvm_bits = E82545_NVM_DATA_BITS; + if (addr < E82545_NVM_EEPROM_SIZE) { + sc->nvm_data = sc->eeprom_data[addr]; + DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n", + addr, sc->nvm_data); + } else { + DPRINTF("eeprom illegal read: 0x%x\r\n", + sc->nvm_opaddr); + sc->nvm_data = 0; + } + break; + } + case E82545_NVM_OPCODE_WRITE: + sc->nvm_mode = E82545_NVM_MODE_DATAIN; + sc->nvm_bits = E82545_NVM_DATA_BITS; + sc->nvm_data = 0; + break; + default: + DPRINTF("eeprom unknown op: 0x%x\r\r", + sc->nvm_opaddr); + /* back to opcode mode */ + sc->nvm_opaddr = 0; + sc->nvm_mode = E82545_NVM_MODE_OPADDR; + sc->nvm_bits = E82545_NVM_OPADDR_BITS; + } + } + } else { + DPRINTF("eeprom state machine wrong state! " + "0x%x 0x%x 0x%x 0x%x\r\n", + sc->nvm_mode, sc->nvm_bits, + sc->nvm_opaddr, sc->nvm_data); + } +} + +static void +e82545_itr_callback(int fd, enum ev_type type, void *param) +{ + uint32_t new; + struct e82545_softc *sc = param; + + pthread_mutex_lock(&sc->esc_mtx); + new = sc->esc_ICR & sc->esc_IMS; + if (new && !sc->esc_irq_asserted) { + DPRINTF("itr callback: lintr assert %x\r\n", new); + sc->esc_irq_asserted = 1; + pci_lintr_assert(sc->esc_pi); + } else { + mevent_delete(sc->esc_mevpitr); + sc->esc_mevpitr = NULL; + } + pthread_mutex_unlock(&sc->esc_mtx); +} + +static void +e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) +{ + uint32_t new; + + DPRINTF("icr assert: 0x%x\r\n", bits); + + /* + * An interrupt is only generated if bits are set that + * aren't already in the ICR, these bits are unmasked, + * and there isn't an interrupt already pending. + */ + new = bits & ~sc->esc_ICR & sc->esc_IMS; + sc->esc_ICR |= bits; + + if (new == 0) { + DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS); + } else if (sc->esc_mevpitr != NULL) { + DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS); + } else if (!sc->esc_irq_asserted) { + DPRINTF("icr assert: lintr assert %x\r\n", new); + sc->esc_irq_asserted = 1; + pci_lintr_assert(sc->esc_pi); + if (sc->esc_ITR != 0) { + sc->esc_mevpitr = mevent_add( + (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ + EVF_TIMER, e82545_itr_callback, sc); + } + } +} + +static void +e82545_ims_change(struct e82545_softc *sc, uint32_t bits) +{ + uint32_t new; + + /* + * Changing the mask may allow previously asserted + * but masked interrupt requests to generate an interrupt. + */ + new = bits & sc->esc_ICR & ~sc->esc_IMS; + sc->esc_IMS |= bits; + + if (new == 0) { + DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS); + } else if (sc->esc_mevpitr != NULL) { + DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS); + } else if (!sc->esc_irq_asserted) { + DPRINTF("ims change: lintr assert %x\n\r", new); + sc->esc_irq_asserted = 1; + pci_lintr_assert(sc->esc_pi); + if (sc->esc_ITR != 0) { + sc->esc_mevpitr = mevent_add( + (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ + EVF_TIMER, e82545_itr_callback, sc); + } + } +} + +static void +e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) +{ + + DPRINTF("icr deassert: 0x%x\r\n", bits); + sc->esc_ICR &= ~bits; + + /* + * If there are no longer any interrupt sources and there + * was an asserted interrupt, clear it + */ + if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) { + DPRINTF("icr deassert: lintr deassert %x\r\n", bits); + pci_lintr_deassert(sc->esc_pi); + sc->esc_irq_asserted = 0; + } +} + +static void +e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value) +{ + + DPRINTF("intr_write: off %x, val %x\n\r", offset, value); + + switch (offset) { + case E1000_ICR: + e82545_icr_deassert(sc, value); + break; + case E1000_ITR: + sc->esc_ITR = value; + break; + case E1000_ICS: + sc->esc_ICS = value; /* not used: store for debug */ + e82545_icr_assert(sc, value); + break; + case E1000_IMS: + e82545_ims_change(sc, value); + break; + case E1000_IMC: + sc->esc_IMC = value; /* for debug */ + sc->esc_IMS &= ~value; + // XXX clear interrupts if all ICR bits now masked + // and interrupt was pending ? + break; + default: + break; + } +} + +static uint32_t +e82545_intr_read(struct e82545_softc *sc, uint32_t offset) +{ + uint32_t retval; + + retval = 0; + + DPRINTF("intr_read: off %x\n\r", offset); + + switch (offset) { + case E1000_ICR: + retval = sc->esc_ICR; + sc->esc_ICR = 0; + e82545_icr_deassert(sc, ~0); + break; + case E1000_ITR: + retval = sc->esc_ITR; + break; + case E1000_ICS: + /* write-only register */ + break; + case E1000_IMS: + retval = sc->esc_IMS; + break; + case E1000_IMC: + /* write-only register */ + break; + default: + break; + } + + return (retval); +} + +static void +e82545_devctl(struct e82545_softc *sc, uint32_t val) +{ + + sc->esc_CTRL = val & ~E1000_CTRL_RST; + + if (val & E1000_CTRL_RST) { + DPRINTF("e1k: s/w reset, ctl %x\n", val); + e82545_reset(sc, 1); + } + /* XXX check for phy reset ? */ +} + +static void +e82545_rx_update_rdba(struct e82545_softc *sc) +{ + + /* XXX verify desc base/len within phys mem range */ + sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 | + sc->esc_RDBAL; + + /* Cache host mapping of guest descriptor array */ + sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx, + sc->esc_rdba, sc->esc_RDLEN); +} + +static void +e82545_rx_ctl(struct e82545_softc *sc, uint32_t val) +{ + int on; + + on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN); + + /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */ + sc->esc_RCTL = val & ~0xF9204c01; + + DPRINTF("rx_ctl - %s RCTL %x, val %x\n", + on ? "on" : "off", sc->esc_RCTL, val); + + /* state change requested */ + if (on != sc->esc_rx_enabled) { + if (on) { + /* Catch disallowed/unimplemented settings */ + //assert(!(val & E1000_RCTL_LBM_TCVR)); + + if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) { + sc->esc_rx_loopback = 1; + } else { + sc->esc_rx_loopback = 0; + } + + e82545_rx_update_rdba(sc); + e82545_rx_enable(sc); + } else { + e82545_rx_disable(sc); + sc->esc_rx_loopback = 0; + sc->esc_rdba = 0; + sc->esc_rxdesc = NULL; + } + } +} + +static void +e82545_tx_update_tdba(struct e82545_softc *sc) +{ + + /* XXX verify desc base/len within phys mem range */ + sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL; + + /* Cache host mapping of guest descriptor array */ + sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba, + sc->esc_TDLEN); +} + +static void +e82545_tx_ctl(struct e82545_softc *sc, uint32_t val) +{ + int on; + + on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN); + + /* ignore TCTL_EN settings that don't change state */ + if (on == sc->esc_tx_enabled) + return; + + if (on) { + e82545_tx_update_tdba(sc); + e82545_tx_enable(sc); + } else { + e82545_tx_disable(sc); + sc->esc_tdba = 0; + sc->esc_txdesc = NULL; + } + + /* Save TCTL value after stripping reserved bits 31:25,23,2,0 */ + sc->esc_TCTL = val & ~0xFE800005; +} + +int +e82545_bufsz(uint32_t rctl) +{ + + switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) { + case (E1000_RCTL_SZ_2048): return (2048); + case (E1000_RCTL_SZ_1024): return (1024); + case (E1000_RCTL_SZ_512): return (512); + case (E1000_RCTL_SZ_256): return (256); + case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384); + case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192); + case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096); + } + return (256); /* Forbidden value. */ +} + +static uint8_t dummybuf[2048]; + +/* XXX one packet at a time until this is debugged */ +static void +e82545_tap_callback(int fd, enum ev_type type, void *param) +{ + struct e82545_softc *sc = param; + struct e1000_rx_desc *rxd; + struct iovec vec[64]; + int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size; + uint32_t cause = 0; + uint16_t *tp, tag, head; + + pthread_mutex_lock(&sc->esc_mtx); + DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); + + if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { + DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n", + sc->esc_rx_enabled, sc->esc_rx_loopback); + while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { + } + goto done1; + } + bufsz = e82545_bufsz(sc->esc_RCTL); + maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522; + maxpktdesc = (maxpktsz + bufsz - 1) / bufsz; + size = sc->esc_RDLEN / 16; + head = sc->esc_RDH; + left = (size + sc->esc_RDT - head) % size; + if (left < maxpktdesc) { + DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n", + left, maxpktdesc); + while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { + } + goto done1; + } + + sc->esc_rx_active = 1; + pthread_mutex_unlock(&sc->esc_mtx); + + for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) { + + /* Grab rx descriptor pointed to by the head pointer */ + for (i = 0; i < maxpktdesc; i++) { + rxd = &sc->esc_rxdesc[(head + i) % size]; + vec[i].iov_base = paddr_guest2host(sc->esc_ctx, + rxd->buffer_addr, bufsz); + vec[i].iov_len = bufsz; + } + len = readv(sc->esc_tapfd, vec, maxpktdesc); + if (len <= 0) { + DPRINTF("tap: readv() returned %d\n", len); + goto done; + } + + /* + * Adjust the packet length based on whether the CRC needs + * to be stripped or if the packet is less than the minimum + * eth packet size. + */ + if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) + len = ETHER_MIN_LEN - ETHER_CRC_LEN; + if (!(sc->esc_RCTL & E1000_RCTL_SECRC)) + len += ETHER_CRC_LEN; + n = (len + bufsz - 1) / bufsz; + + DPRINTF("packet read %d bytes, %d segs, head %d\r\n", + len, n, head); + + /* Apply VLAN filter. */ + tp = (uint16_t *)vec[0].iov_base + 6; + if ((sc->esc_RCTL & E1000_RCTL_VFE) && + (ntohs(tp[0]) == sc->esc_VET)) { + tag = ntohs(tp[1]) & 0x0fff; + if ((sc->esc_fvlan[tag >> 5] & + (1 << (tag & 0x1f))) != 0) { + DPRINTF("known VLAN %d\r\n", tag); + } else { + DPRINTF("unknown VLAN %d\r\n", tag); + n = 0; + continue; + } + } + + /* Update all consumed descriptors. */ + for (i = 0; i < n - 1; i++) { + rxd = &sc->esc_rxdesc[(head + i) % size]; + rxd->length = bufsz; + rxd->csum = 0; + rxd->errors = 0; + rxd->special = 0; + rxd->status = E1000_RXD_STAT_DD; + } + rxd = &sc->esc_rxdesc[(head + i) % size]; + rxd->length = len % bufsz; + rxd->csum = 0; + rxd->errors = 0; + rxd->special = 0; + /* XXX signal no checksum for now */ + rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM | + E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD; + + /* Schedule receive interrupts. */ + if (len <= sc->esc_RSRPD) { + cause |= E1000_ICR_SRPD | E1000_ICR_RXT0; + } else { + /* XXX: RDRT and RADV timers should be here. */ + cause |= E1000_ICR_RXT0; + } + + head = (head + n) % size; + left -= n; + } + +done: + pthread_mutex_lock(&sc->esc_mtx); + sc->esc_rx_active = 0; + if (sc->esc_rx_enabled == 0) + pthread_cond_signal(&sc->esc_rx_cond); + + sc->esc_RDH = head; + /* Respect E1000_RCTL_RDMTS */ + left = (size + sc->esc_RDT - head) % size; + if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1))) + cause |= E1000_ICR_RXDMT0; + /* Assert all accumulated interrupts. */ + if (cause != 0) + e82545_icr_assert(sc, cause); +done1: + DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); + pthread_mutex_unlock(&sc->esc_mtx); +} + +static uint16_t +e82545_carry(uint32_t sum) +{ + + sum = (sum & 0xFFFF) + (sum >> 16); + if (sum > 0xFFFF) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201607092041.u69Kfxk2095184>