Date: Mon, 11 Feb 2019 23:24:39 +0000 (UTC) From: Patrick Kelsey <pkelsey@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r344027 - in stable/12/sys: dev/vmware/vmxnet3 modules/vmware/vmxnet3 net Message-ID: <201902112324.x1BNOdL4090379@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: pkelsey Date: Mon Feb 11 23:24:39 2019 New Revision: 344027 URL: https://svnweb.freebsd.org/changeset/base/344027 Log: MFC r343291: Convert vmx(4) to being an iflib driver. Also, expose IFLIB_MAX_RX_SEGS to iflib drivers and add iflib_dma_alloc_align() to the iflib API. Performance is generally better with the tunable/sysctl dev.vmx.<index>.iflib.tx_abdicate=1. Reviewed by: shurd Relnotes: yes Sponsored by: RG Nets Differential Revision: https://reviews.freebsd.org/D18761 MFC r343301: Add missing dependency to vmxnet3 Makefile and clean it up a bit otherwise. MFC r343688: Fix interrupt index configuration when using MSI interrupts. When in MSI mode, the device was only being configured with one interrupt index, but it needs two - one for the actual interrupt and one to park the tx queue at. Also clarified comments relating to interrupt index assignment. Reported by: Yuri Pankov <yuripv@yuripv.net> Modified: stable/12/sys/dev/vmware/vmxnet3/if_vmx.c stable/12/sys/dev/vmware/vmxnet3/if_vmxvar.h stable/12/sys/modules/vmware/vmxnet3/Makefile stable/12/sys/net/iflib.c stable/12/sys/net/iflib.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/dev/vmware/vmxnet3/if_vmx.c ============================================================================== --- stable/12/sys/dev/vmware/vmxnet3/if_vmx.c Mon Feb 11 23:13:38 2019 (r344026) +++ stable/12/sys/dev/vmware/vmxnet3/if_vmx.c Mon Feb 11 23:24:39 2019 (r344027) @@ -1,6 +1,7 @@ /*- * Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org> + * Copyright (c) 2018 Patrick Kelsey * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -24,7 +25,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <sys/eventhandler.h> #include <sys/kernel.h> #include <sys/endian.h> #include <sys/sockio.h> @@ -34,7 +34,6 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/sysctl.h> #include <sys/smp.h> -#include <sys/taskqueue.h> #include <vm/vm.h> #include <vm/pmap.h> @@ -46,9 +45,8 @@ __FBSDID("$FreeBSD$"); #include <net/if_types.h> #include <net/if_media.h> #include <net/if_vlan_var.h> +#include <net/iflib.h> -#include <net/bpf.h> - #include <netinet/in_systm.h> #include <netinet/in.h> #include <netinet/ip.h> @@ -57,8 +55,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/udp.h> #include <netinet/tcp.h> -#include <machine/in_cksum.h> - #include <machine/bus.h> #include <machine/resource.h> #include <sys/bus.h> @@ -67,139 +63,95 @@ __FBSDID("$FreeBSD$"); #include <dev/pci/pcireg.h> #include <dev/pci/pcivar.h> +#include "ifdi_if.h" + #include "if_vmxreg.h" #include "if_vmxvar.h" #include "opt_inet.h" #include "opt_inet6.h" -#ifdef VMXNET3_FAILPOINTS -#include <sys/fail.h> -static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0, - "vmxnet3 fail points"); -#define VMXNET3_FP _debug_fail_point_vmxnet3 -#endif -static int vmxnet3_probe(device_t); -static int vmxnet3_attach(device_t); -static int vmxnet3_detach(device_t); -static int vmxnet3_shutdown(device_t); +#define VMXNET3_VMWARE_VENDOR_ID 0x15AD +#define VMXNET3_VMWARE_DEVICE_ID 0x07B0 +static pci_vendor_info_t vmxnet3_vendor_info_array[] = +{ + PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"), + /* required last entry */ + PVID_END +}; + +static void *vmxnet3_register(device_t); +static int vmxnet3_attach_pre(if_ctx_t); +static int vmxnet3_msix_intr_assign(if_ctx_t, int); +static void vmxnet3_free_irqs(struct vmxnet3_softc *); +static int vmxnet3_attach_post(if_ctx_t); +static int vmxnet3_detach(if_ctx_t); +static int vmxnet3_shutdown(if_ctx_t); +static int vmxnet3_suspend(if_ctx_t); +static int vmxnet3_resume(if_ctx_t); + static int vmxnet3_alloc_resources(struct vmxnet3_softc *); static void vmxnet3_free_resources(struct vmxnet3_softc *); static int vmxnet3_check_version(struct vmxnet3_softc *); -static void vmxnet3_initial_config(struct vmxnet3_softc *); -static void vmxnet3_check_multiqueue(struct vmxnet3_softc *); +static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *); -static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *); -static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *); -static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *); -static int vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int, - struct vmxnet3_interrupt *); -static int vmxnet3_alloc_intr_resources(struct vmxnet3_softc *); -static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *); -static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *); -static int vmxnet3_setup_interrupts(struct vmxnet3_softc *); -static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *); +static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *); +static void vmxnet3_init_txq(struct vmxnet3_softc *, int); +static int vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); +static void vmxnet3_init_rxq(struct vmxnet3_softc *, int, int); +static int vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); +static void vmxnet3_queues_free(if_ctx_t); -static void vmxnet3_free_interrupt(struct vmxnet3_softc *, - struct vmxnet3_interrupt *); -static void vmxnet3_free_interrupts(struct vmxnet3_softc *); - -#ifndef VMXNET3_LEGACY_TX -static int vmxnet3_alloc_taskqueue(struct vmxnet3_softc *); -static void vmxnet3_start_taskqueue(struct vmxnet3_softc *); -static void vmxnet3_drain_taskqueue(struct vmxnet3_softc *); -static void vmxnet3_free_taskqueue(struct vmxnet3_softc *); -#endif - -static int vmxnet3_init_rxq(struct vmxnet3_softc *, int); -static int vmxnet3_init_txq(struct vmxnet3_softc *, int); -static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *); -static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *); -static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *); -static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *); - static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *); static void vmxnet3_free_shared_data(struct vmxnet3_softc *); -static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *); -static void vmxnet3_free_txq_data(struct vmxnet3_softc *); -static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *); -static void vmxnet3_free_rxq_data(struct vmxnet3_softc *); -static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *); -static void vmxnet3_free_queue_data(struct vmxnet3_softc *); static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *); +static void vmxnet3_free_mcast_table(struct vmxnet3_softc *); static void vmxnet3_init_shared_data(struct vmxnet3_softc *); -static void vmxnet3_init_hwassist(struct vmxnet3_softc *); -static void vmxnet3_reinit_interface(struct vmxnet3_softc *); static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_data(struct vmxnet3_softc *); static void vmxnet3_free_data(struct vmxnet3_softc *); -static int vmxnet3_setup_interface(struct vmxnet3_softc *); static void vmxnet3_evintr(struct vmxnet3_softc *); -static void vmxnet3_txq_eof(struct vmxnet3_txqueue *); -static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *); -static int vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *); -static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *, - struct vmxnet3_rxring *, int); -static void vmxnet3_rxq_eof(struct vmxnet3_rxqueue *); -static void vmxnet3_legacy_intr(void *); -static void vmxnet3_txq_intr(void *); -static void vmxnet3_rxq_intr(void *); -static void vmxnet3_event_intr(void *); +static int vmxnet3_isc_txd_encap(void *, if_pkt_info_t); +static void vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t); +static int vmxnet3_isc_txd_credits_update(void *, uint16_t, bool); +static int vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); +static int vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t); +static void vmxnet3_isc_rxd_refill(void *, if_rxd_update_t); +static void vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); +static int vmxnet3_legacy_intr(void *); +static int vmxnet3_rxq_intr(void *); +static int vmxnet3_event_intr(void *); -static void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *); -static void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); -static void vmxnet3_stop(struct vmxnet3_softc *); +static void vmxnet3_stop(if_ctx_t); static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *); -static int vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); -static int vmxnet3_reinit_queues(struct vmxnet3_softc *); +static void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); +static void vmxnet3_reinit_queues(struct vmxnet3_softc *); static int vmxnet3_enable_device(struct vmxnet3_softc *); static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *); -static int vmxnet3_reinit(struct vmxnet3_softc *); -static void vmxnet3_init_locked(struct vmxnet3_softc *); -static void vmxnet3_init(void *); +static void vmxnet3_init(if_ctx_t); +static void vmxnet3_multi_set(if_ctx_t); +static int vmxnet3_mtu_set(if_ctx_t, uint32_t); +static void vmxnet3_media_status(if_ctx_t, struct ifmediareq *); +static int vmxnet3_media_change(if_ctx_t); +static int vmxnet3_promisc_set(if_ctx_t, int); +static uint64_t vmxnet3_get_counter(if_ctx_t, ift_counter); +static void vmxnet3_update_admin_status(if_ctx_t); +static void vmxnet3_txq_timer(if_ctx_t, uint16_t); -static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *, - int *, int *, int *); -static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, - bus_dmamap_t, bus_dma_segment_t [], int *); -static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t); -static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **); -#ifdef VMXNET3_LEGACY_TX -static void vmxnet3_start_locked(struct ifnet *); -static void vmxnet3_start(struct ifnet *); -#else -static int vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *, - struct mbuf *); -static int vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *); -static void vmxnet3_txq_tq_deferred(void *, int); -#endif -static void vmxnet3_txq_start(struct vmxnet3_txqueue *); -static void vmxnet3_tx_start_all(struct vmxnet3_softc *); - static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int, uint16_t); -static void vmxnet3_register_vlan(void *, struct ifnet *, uint16_t); -static void vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t); -static void vmxnet3_set_rxfilter(struct vmxnet3_softc *); -static int vmxnet3_change_mtu(struct vmxnet3_softc *, int); -static int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t); -static uint64_t vmxnet3_get_counter(struct ifnet *, ift_counter); +static void vmxnet3_vlan_register(if_ctx_t, uint16_t); +static void vmxnet3_vlan_unregister(if_ctx_t, uint16_t); +static void vmxnet3_set_rxfilter(struct vmxnet3_softc *, int); -#ifndef VMXNET3_LEGACY_TX -static void vmxnet3_qflush(struct ifnet *); -#endif - -static int vmxnet3_watchdog(struct vmxnet3_txqueue *); static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *); -static void vmxnet3_tick(void *); +static int vmxnet3_link_is_up(struct vmxnet3_softc *); static void vmxnet3_link_status(struct vmxnet3_softc *); -static void vmxnet3_media_status(struct ifnet *, struct ifmediareq *); -static int vmxnet3_media_change(struct ifnet *); static void vmxnet3_set_lladdr(struct vmxnet3_softc *); static void vmxnet3_get_lladdr(struct vmxnet3_softc *); @@ -219,18 +171,14 @@ static void vmxnet3_write_bar1(struct vmxnet3_softc *, static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t); static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t); +static int vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t); +static int vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t); +static void vmxnet3_link_intr_enable(if_ctx_t); static void vmxnet3_enable_intr(struct vmxnet3_softc *, int); static void vmxnet3_disable_intr(struct vmxnet3_softc *, int); -static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *); -static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *); +static void vmxnet3_intr_enable_all(if_ctx_t); +static void vmxnet3_intr_disable_all(if_ctx_t); -static int vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t, - bus_size_t, struct vmxnet3_dma_alloc *); -static void vmxnet3_dma_free(struct vmxnet3_softc *, - struct vmxnet3_dma_alloc *); -static int vmxnet3_tunable_int(struct vmxnet3_softc *, - const char *, int); - typedef enum { VMXNET3_BARRIER_RD, VMXNET3_BARRIER_WR, @@ -239,25 +187,16 @@ typedef enum { static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); -/* Tunables. */ -static int vmxnet3_mq_disable = 0; -TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable); -static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES; -TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue); -static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES; -TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue); -static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC; -TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc); -static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC; -TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc); static device_method_t vmxnet3_methods[] = { - /* Device interface. */ - DEVMETHOD(device_probe, vmxnet3_probe), - DEVMETHOD(device_attach, vmxnet3_attach), - DEVMETHOD(device_detach, vmxnet3_detach), - DEVMETHOD(device_shutdown, vmxnet3_shutdown), - + /* Device interface */ + DEVMETHOD(device_register, vmxnet3_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), + DEVMETHOD(device_suspend, iflib_device_suspend), + DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; @@ -267,153 +206,382 @@ static driver_t vmxnet3_driver = { static devclass_t vmxnet3_devclass; DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0); +IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array); +MODULE_VERSION(vmx, 2); MODULE_DEPEND(vmx, pci, 1, 1, 1); MODULE_DEPEND(vmx, ether, 1, 1, 1); +MODULE_DEPEND(vmx, iflib, 1, 1, 1); -#define VMXNET3_VMWARE_VENDOR_ID 0x15AD -#define VMXNET3_VMWARE_DEVICE_ID 0x07B0 +static device_method_t vmxnet3_iflib_methods[] = { + DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc), + DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc), + DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free), -static int -vmxnet3_probe(device_t dev) -{ + DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre), + DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post), + DEVMETHOD(ifdi_detach, vmxnet3_detach), - if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID && - pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) { - device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter"); - return (BUS_PROBE_DEFAULT); - } + DEVMETHOD(ifdi_init, vmxnet3_init), + DEVMETHOD(ifdi_stop, vmxnet3_stop), + DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set), + DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set), + DEVMETHOD(ifdi_media_status, vmxnet3_media_status), + DEVMETHOD(ifdi_media_change, vmxnet3_media_change), + DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set), + DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter), + DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status), + DEVMETHOD(ifdi_timer, vmxnet3_txq_timer), - return (ENXIO); + DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable), + DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable), + DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable), + DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all), + DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all), + DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign), + + DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register), + DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister), + + DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown), + DEVMETHOD(ifdi_suspend, vmxnet3_suspend), + DEVMETHOD(ifdi_resume, vmxnet3_resume), + + DEVMETHOD_END +}; + +static driver_t vmxnet3_iflib_driver = { + "vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc) +}; + +struct if_txrx vmxnet3_txrx = { + .ift_txd_encap = vmxnet3_isc_txd_encap, + .ift_txd_flush = vmxnet3_isc_txd_flush, + .ift_txd_credits_update = vmxnet3_isc_txd_credits_update, + .ift_rxd_available = vmxnet3_isc_rxd_available, + .ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get, + .ift_rxd_refill = vmxnet3_isc_rxd_refill, + .ift_rxd_flush = vmxnet3_isc_rxd_flush, + .ift_legacy_intr = vmxnet3_legacy_intr +}; + +static struct if_shared_ctx vmxnet3_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = 512, + + .isc_tx_maxsize = VMXNET3_TX_MAXSIZE, + .isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE, + .isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header), + .isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE, + + /* + * These values are used to configure the busdma tag used for + * receive descriptors. Each receive descriptor only points to one + * buffer. + */ + .isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */ + .isc_rx_nsegments = 1, /* One mapping per descriptor */ + .isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE, + + .isc_admin_intrcnt = 1, + .isc_vendor_info = vmxnet3_vendor_info_array, + .isc_driver_version = "2", + .isc_driver = &vmxnet3_iflib_driver, + .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ, + + /* + * Number of receive queues per receive queue set, with associated + * descriptor settings for each. + */ + .isc_nrxqs = 3, + .isc_nfl = 2, /* one free list for each receive command queue */ + .isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC}, + .isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC}, + .isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC}, + + /* + * Number of transmit queues per transmit queue set, with associated + * descriptor settings for each. + */ + .isc_ntxqs = 2, + .isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC}, + .isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC}, + .isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC}, +}; + +static void * +vmxnet3_register(device_t dev) +{ + return (&vmxnet3_sctx_init); } static int -vmxnet3_attach(device_t dev) +vmxnet3_attach_pre(if_ctx_t ctx) { + device_t dev; + if_softc_ctx_t scctx; struct vmxnet3_softc *sc; + uint32_t intr_config; int error; - sc = device_get_softc(dev); + dev = iflib_get_dev(ctx); + sc = iflib_get_softc(ctx); sc->vmx_dev = dev; + sc->vmx_ctx = ctx; + sc->vmx_sctx = iflib_get_sctx(ctx); + sc->vmx_scctx = iflib_get_softc_ctx(ctx); + sc->vmx_ifp = iflib_get_ifp(ctx); + sc->vmx_media = iflib_get_media(ctx); + scctx = sc->vmx_scctx; - pci_enable_busmaster(dev); + scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS; + scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS; + /* isc_tx_tso_size_max doesn't include possible vlan header */ + scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE; + scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE; + scctx->isc_txrx = &vmxnet3_txrx; - VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev)); - callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0); + /* If 0, the iflib tunable was not set, so set to the default */ + if (scctx->isc_nrxqsets == 0) + scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES; + scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus); - vmxnet3_initial_config(sc); + /* If 0, the iflib tunable was not set, so set to the default */ + if (scctx->isc_ntxqsets == 0) + scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES; + scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus); - error = vmxnet3_alloc_resources(sc); - if (error) - goto fail; + /* + * Enforce that the transmit completion queue descriptor count is + * the same as the transmit command queue descriptor count. + */ + scctx->isc_ntxd[0] = scctx->isc_ntxd[1]; + scctx->isc_txqsizes[0] = + sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0]; + scctx->isc_txqsizes[1] = + sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1]; - error = vmxnet3_check_version(sc); - if (error) - goto fail; + /* + * Enforce that the receive completion queue descriptor count is the + * sum of the receive command queue descriptor counts, and that the + * second receive command queue descriptor count is the same as the + * first one. + */ + scctx->isc_nrxd[2] = scctx->isc_nrxd[1]; + scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2]; + scctx->isc_rxqsizes[0] = + sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0]; + scctx->isc_rxqsizes[1] = + sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1]; + scctx->isc_rxqsizes[2] = + sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2]; - error = vmxnet3_alloc_rxtx_queues(sc); - if (error) - goto fail; + scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; -#ifndef VMXNET3_LEGACY_TX - error = vmxnet3_alloc_taskqueue(sc); + /* Map PCI BARs */ + error = vmxnet3_alloc_resources(sc); if (error) goto fail; -#endif - error = vmxnet3_alloc_interrupts(sc); + /* Check device versions */ + error = vmxnet3_check_version(sc); if (error) goto fail; - vmxnet3_check_multiqueue(sc); + /* + * The interrupt mode can be set in the hypervisor configuration via + * the parameter ethernet<N>.intrMode. + */ + intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG); + sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03; - error = vmxnet3_alloc_data(sc); - if (error) - goto fail; + /* + * Configure the softc context to attempt to configure the interrupt + * mode now indicated by intr_config. iflib will follow the usual + * fallback path MSIX -> MSI -> LEGACY, starting at the configured + * starting mode. + */ + switch (intr_config & 0x03) { + case VMXNET3_IT_AUTO: + case VMXNET3_IT_MSIX: + scctx->isc_msix_bar = pci_msix_table_bar(dev); + break; + case VMXNET3_IT_MSI: + scctx->isc_msix_bar = -1; + scctx->isc_disable_msix = 1; + break; + case VMXNET3_IT_LEGACY: + scctx->isc_msix_bar = 0; + break; + } - error = vmxnet3_setup_interface(sc); - if (error) - goto fail; + scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD; + scctx->isc_capabilities = scctx->isc_capenable = + IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | + IFCAP_TSO4 | IFCAP_TSO6 | + IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | + IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | + IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | + IFCAP_JUMBO_MTU; - error = vmxnet3_setup_interrupts(sc); - if (error) { - ether_ifdetach(sc->vmx_ifp); - device_printf(dev, "could not set up interrupt\n"); - goto fail; - } + /* These capabilities are not enabled by default. */ + scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; - vmxnet3_setup_sysctl(sc); -#ifndef VMXNET3_LEGACY_TX - vmxnet3_start_taskqueue(sc); -#endif + vmxnet3_get_lladdr(sc); + iflib_set_mac(ctx, sc->vmx_lladdr); + return (0); fail: - if (error) - vmxnet3_detach(dev); + /* + * We must completely clean up anything allocated above as iflib + * will not invoke any other driver entry points as a result of this + * failure. + */ + vmxnet3_free_resources(sc); return (error); } static int -vmxnet3_detach(device_t dev) +vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix) { struct vmxnet3_softc *sc; - struct ifnet *ifp; + if_softc_ctx_t scctx; + struct vmxnet3_rxqueue *rxq; + int error; + int i; + char irq_name[16]; - sc = device_get_softc(dev); - ifp = sc->vmx_ifp; + sc = iflib_get_softc(ctx); + scctx = sc->vmx_scctx; + + for (i = 0; i < scctx->isc_nrxqsets; i++) { + snprintf(irq_name, sizeof(irq_name), "rxq%d", i); - if (device_is_attached(dev)) { - VMXNET3_CORE_LOCK(sc); - vmxnet3_stop(sc); - VMXNET3_CORE_UNLOCK(sc); + rxq = &sc->vmx_rxq[i]; + error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1, + IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name); + if (error) { + device_printf(iflib_get_dev(ctx), + "Failed to register rxq %d interrupt handler\n", i); + return (error); + } + } - callout_drain(&sc->vmx_tick); -#ifndef VMXNET3_LEGACY_TX - vmxnet3_drain_taskqueue(sc); -#endif + for (i = 0; i < scctx->isc_ntxqsets; i++) { + snprintf(irq_name, sizeof(irq_name), "txq%d", i); - ether_ifdetach(ifp); + /* + * Don't provide the corresponding rxq irq for reference - + * we want the transmit task to be attached to a task queue + * that is different from the one used by the corresponding + * rxq irq. That is because the TX doorbell writes are very + * expensive as virtualized MMIO operations, so we want to + * be able to defer them to another core when possible so + * that they don't steal receive processing cycles during + * stack turnarounds like TCP ACK generation. The other + * piece to this approach is enabling the iflib abdicate + * option (currently via an interface-specific + * tunable/sysctl). + */ + iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, + irq_name); } - if (sc->vmx_vlan_attach != NULL) { - EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach); - sc->vmx_vlan_attach = NULL; + error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq, + scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0, + "event"); + if (error) { + device_printf(iflib_get_dev(ctx), + "Failed to register event interrupt handler\n"); + return (error); } - if (sc->vmx_vlan_detach != NULL) { - EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach); - sc->vmx_vlan_detach = NULL; - } -#ifndef VMXNET3_LEGACY_TX - vmxnet3_free_taskqueue(sc); -#endif - vmxnet3_free_interrupts(sc); + return (0); +} - if (ifp != NULL) { - if_free(ifp); - sc->vmx_ifp = NULL; +static void +vmxnet3_free_irqs(struct vmxnet3_softc *sc) +{ + if_softc_ctx_t scctx; + struct vmxnet3_rxqueue *rxq; + int i; + + scctx = sc->vmx_scctx; + + for (i = 0; i < scctx->isc_nrxqsets; i++) { + rxq = &sc->vmx_rxq[i]; + iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq); } - ifmedia_removeall(&sc->vmx_media); + iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq); +} +static int +vmxnet3_attach_post(if_ctx_t ctx) +{ + device_t dev; + if_softc_ctx_t scctx; + struct vmxnet3_softc *sc; + int error; + + dev = iflib_get_dev(ctx); + scctx = iflib_get_softc_ctx(ctx); + sc = iflib_get_softc(ctx); + + if (scctx->isc_nrxqsets > 1) + sc->vmx_flags |= VMXNET3_FLAG_RSS; + + error = vmxnet3_alloc_data(sc); + if (error) + goto fail; + + vmxnet3_set_interrupt_idx(sc); + vmxnet3_setup_sysctl(sc); + + ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO); + +fail: + return (error); +} + +static int +vmxnet3_detach(if_ctx_t ctx) +{ + struct vmxnet3_softc *sc; + + sc = iflib_get_softc(ctx); + + vmxnet3_free_irqs(sc); vmxnet3_free_data(sc); vmxnet3_free_resources(sc); - vmxnet3_free_rxtx_queues(sc); - VMXNET3_CORE_LOCK_DESTROY(sc); + return (0); +} +static int +vmxnet3_shutdown(if_ctx_t ctx) +{ + return (0); } static int -vmxnet3_shutdown(device_t dev) +vmxnet3_suspend(if_ctx_t ctx) { return (0); } static int +vmxnet3_resume(if_ctx_t ctx) +{ + + return (0); +} + +static int vmxnet3_alloc_resources(struct vmxnet3_softc *sc) { device_t dev; @@ -445,15 +613,6 @@ vmxnet3_alloc_resources(struct vmxnet3_softc *sc) sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1); sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1); - if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) { - rid = PCIR_BAR(2); - sc->vmx_msix_res = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &rid, RF_ACTIVE); - } - - if (sc->vmx_msix_res == NULL) - sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX; - return (0); } @@ -475,12 +634,6 @@ vmxnet3_free_resources(struct vmxnet3_softc *sc) rman_get_rid(sc->vmx_res1), sc->vmx_res1); sc->vmx_res1 = NULL; } - - if (sc->vmx_msix_res != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, - rman_get_rid(sc->vmx_msix_res), sc->vmx_msix_res); - sc->vmx_msix_res = NULL; - } } static int @@ -509,603 +662,284 @@ vmxnet3_check_version(struct vmxnet3_softc *sc) return (0); } -static int -trunc_powerof2(int val) -{ - - return (1U << (fls(val) - 1)); -} - static void -vmxnet3_initial_config(struct vmxnet3_softc *sc) -{ - int nqueue, ndesc; - - nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue); - if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1) - nqueue = VMXNET3_DEF_TX_QUEUES; - if (nqueue > mp_ncpus) - nqueue = mp_ncpus; - sc->vmx_max_ntxqueues = trunc_powerof2(nqueue); - - nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue); - if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1) - nqueue = VMXNET3_DEF_RX_QUEUES; - if (nqueue > mp_ncpus) - nqueue = mp_ncpus; - sc->vmx_max_nrxqueues = trunc_powerof2(nqueue); - - if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) { - sc->vmx_max_nrxqueues = 1; - sc->vmx_max_ntxqueues = 1; - } - - ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc); - if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC) - ndesc = VMXNET3_DEF_TX_NDESC; - if (ndesc & VMXNET3_MASK_TX_NDESC) - ndesc &= ~VMXNET3_MASK_TX_NDESC; - sc->vmx_ntxdescs = ndesc; - - ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc); - if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC) - ndesc = VMXNET3_DEF_RX_NDESC; - if (ndesc & VMXNET3_MASK_RX_NDESC) - ndesc &= ~VMXNET3_MASK_RX_NDESC; - sc->vmx_nrxdescs = ndesc; - sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS; -} - -static void -vmxnet3_check_multiqueue(struct vmxnet3_softc *sc) -{ - - if (sc->vmx_intr_type != VMXNET3_IT_MSIX) - goto out; - - /* BMV: Just use the maximum configured for now. */ - sc->vmx_nrxqueues = sc->vmx_max_nrxqueues; - sc->vmx_ntxqueues = sc->vmx_max_ntxqueues; - - if (sc->vmx_nrxqueues > 1) - sc->vmx_flags |= VMXNET3_FLAG_RSS; - - return; - -out: - sc->vmx_ntxqueues = 1; - sc->vmx_nrxqueues = 1; -} - -static int -vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc) -{ - device_t dev; - int nmsix, cnt, required; - - dev = sc->vmx_dev; - - if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) - return (1); - - /* Allocate an additional vector for the events interrupt. */ - required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1; - - nmsix = pci_msix_count(dev); - if (nmsix < required) - return (1); - - cnt = required; - if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { - sc->vmx_nintrs = required; - return (0); - } else - pci_release_msi(dev); - - /* BMV TODO Fallback to sharing MSIX vectors if possible. */ - - return (1); -} - -static int -vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc) -{ - device_t dev; - int nmsi, cnt, required; - - dev = sc->vmx_dev; - required = 1; - - nmsi = pci_msi_count(dev); - if (nmsi < required) - return (1); - - cnt = required; - if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) { - sc->vmx_nintrs = 1; - return (0); - } else - pci_release_msi(dev); - - return (1); -} - -static int -vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc) -{ - - sc->vmx_nintrs = 1; - return (0); -} - -static int -vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags, - struct vmxnet3_interrupt *intr) -{ - struct resource *irq; - - irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags); - if (irq == NULL) - return (ENXIO); - - intr->vmxi_irq = irq; - intr->vmxi_rid = rid; - - return (0); -} - -static int -vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc) -{ - int i, rid, flags, error; - - rid = 0; - flags = RF_ACTIVE; - - if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) - flags |= RF_SHAREABLE; - else - rid = 1; - - for (i = 0; i < sc->vmx_nintrs; i++, rid++) { - error = vmxnet3_alloc_interrupt(sc, rid, flags, - &sc->vmx_intrs[i]); - if (error) - return (error); - } - - return (0); -} - -static int -vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc) -{ - device_t dev; - struct vmxnet3_txqueue *txq; - struct vmxnet3_rxqueue *rxq; - struct vmxnet3_interrupt *intr; - enum intr_type type; - int i, error; - - dev = sc->vmx_dev; - intr = &sc->vmx_intrs[0]; - type = INTR_TYPE_NET | INTR_MPSAFE; - - for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) { - txq = &sc->vmx_txq[i]; - error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, - vmxnet3_txq_intr, txq, &intr->vmxi_handler); - if (error) - return (error); - bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, - "tq%d", i); - txq->vxtxq_intr_idx = intr->vmxi_rid - 1; - } - - for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) { - rxq = &sc->vmx_rxq[i]; - error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, - vmxnet3_rxq_intr, rxq, &intr->vmxi_handler); - if (error) - return (error); - bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, - "rq%d", i); - rxq->vxrxq_intr_idx = intr->vmxi_rid - 1; - } - - error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201902112324.x1BNOdL4090379>