Date: Sun, 18 Oct 2015 21:39:15 +0000 (UTC) From: Zbigniew Bodek <zbb@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r289550 - head/sys/dev/vnic Message-ID: <201510182139.t9ILdF6f017364@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: zbb Date: Sun Oct 18 21:39:15 2015 New Revision: 289550 URL: https://svnweb.freebsd.org/changeset/base/289550 Log: Raw import of ThunderX VNIC networking driver components This import brings following components of the Linux driver: - Thunder BGX (programmable MAC) - Physical Function driver - Virtual Function driver - Headers Revision: 1.0 Obtained from: Cavium License information: Cavium provided these files under BSD license Added: head/sys/dev/vnic/ head/sys/dev/vnic/nic.h (contents, props changed) head/sys/dev/vnic/nic_main.c (contents, props changed) head/sys/dev/vnic/nic_reg.h (contents, props changed) head/sys/dev/vnic/nicvf_main.c (contents, props changed) head/sys/dev/vnic/nicvf_queues.c (contents, props changed) head/sys/dev/vnic/nicvf_queues.h (contents, props changed) head/sys/dev/vnic/q_struct.h (contents, props changed) head/sys/dev/vnic/thunder_bgx.c (contents, props changed) head/sys/dev/vnic/thunder_bgx.h (contents, props changed) Added: head/sys/dev/vnic/nic.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/dev/vnic/nic.h Sun Oct 18 21:39:15 2015 (r289550) @@ -0,0 +1,539 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef NIC_H +#define NIC_H + +#include <linux/netdevice.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include "thunder_bgx.h" + +/* PCI device IDs */ +#define PCI_DEVICE_ID_THUNDER_NIC_PF 0xA01E +#define PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF 0x0011 +#define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034 +#define PCI_DEVICE_ID_THUNDER_BGX 0xA026 + +/* PCI BAR nos */ +#define PCI_CFG_REG_BAR_NUM 0 +#define PCI_MSIX_REG_BAR_NUM 4 + +/* NIC SRIOV VF count */ +#define MAX_NUM_VFS_SUPPORTED 128 +#define DEFAULT_NUM_VF_ENABLED 8 + +#define NIC_TNS_BYPASS_MODE 0 +#define NIC_TNS_MODE 1 + +/* NIC priv flags */ +#define NIC_SRIOV_ENABLED BIT(0) +#define NIC_TNS_ENABLED BIT(1) + +/* VNIC HW optimiation features */ +#define VNIC_RSS_SUPPORT +#define VNIC_MULTI_QSET_SUPPORT + +/* Min/Max packet size */ +#define NIC_HW_MIN_FRS 64 +#define NIC_HW_MAX_FRS 9200 /* 9216 max packet including FCS */ + +/* Max pkinds */ +#define NIC_MAX_PKIND 16 + +/* Rx Channels */ +/* Receive channel configuration in TNS bypass mode + * Below is configuration in TNS bypass mode + * BGX0-LMAC0-CHAN0 - VNIC CHAN0 + * BGX0-LMAC1-CHAN0 - VNIC CHAN16 + * ... + * BGX1-LMAC0-CHAN0 - VNIC CHAN128 + * ... + * BGX1-LMAC3-CHAN0 - VNIC CHAN174 + */ +#define NIC_INTF_COUNT 2 /* Interfaces btw VNIC and TNS/BGX */ +#define NIC_CHANS_PER_INF 128 +#define NIC_MAX_CHANS (NIC_INTF_COUNT * NIC_CHANS_PER_INF) +#define NIC_CPI_COUNT 2048 /* No of channel parse indices */ + +/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */ +#define NIC_MAX_BGX MAX_BGX_PER_CN88XX +#define NIC_CPI_PER_BGX (NIC_CPI_COUNT / NIC_MAX_BGX) +#define NIC_MAX_CPI_PER_LMAC 64 /* Max when CPI_ALG is IP diffserv */ +#define NIC_RSSI_PER_BGX (NIC_RSSI_COUNT / NIC_MAX_BGX) + +/* Tx scheduling */ +#define NIC_MAX_TL4 1024 +#define NIC_MAX_TL4_SHAPERS 256 /* 1 shaper for 4 TL4s */ +#define NIC_MAX_TL3 256 +#define NIC_MAX_TL3_SHAPERS 64 /* 1 shaper for 4 TL3s */ +#define NIC_MAX_TL2 64 +#define NIC_MAX_TL2_SHAPERS 2 /* 1 shaper for 32 TL2s */ +#define NIC_MAX_TL1 2 + +/* TNS bypass mode */ +#define NIC_TL2_PER_BGX 32 +#define NIC_TL4_PER_BGX (NIC_MAX_TL4 / NIC_MAX_BGX) +#define NIC_TL4_PER_LMAC (NIC_MAX_TL4 / NIC_CHANS_PER_INF) + +/* NIC VF Interrupts */ +#define NICVF_INTR_CQ 0 +#define NICVF_INTR_SQ 1 +#define NICVF_INTR_RBDR 2 +#define NICVF_INTR_PKT_DROP 3 +#define NICVF_INTR_TCP_TIMER 4 +#define NICVF_INTR_MBOX 5 +#define NICVF_INTR_QS_ERR 6 + +#define NICVF_INTR_CQ_SHIFT 0 +#define NICVF_INTR_SQ_SHIFT 8 +#define NICVF_INTR_RBDR_SHIFT 16 +#define NICVF_INTR_PKT_DROP_SHIFT 20 +#define NICVF_INTR_TCP_TIMER_SHIFT 21 +#define NICVF_INTR_MBOX_SHIFT 22 +#define NICVF_INTR_QS_ERR_SHIFT 23 + +#define NICVF_INTR_CQ_MASK (0xFF << NICVF_INTR_CQ_SHIFT) +#define NICVF_INTR_SQ_MASK (0xFF << NICVF_INTR_SQ_SHIFT) +#define NICVF_INTR_RBDR_MASK (0x03 << NICVF_INTR_RBDR_SHIFT) +#define NICVF_INTR_PKT_DROP_MASK BIT(NICVF_INTR_PKT_DROP_SHIFT) +#define NICVF_INTR_TCP_TIMER_MASK BIT(NICVF_INTR_TCP_TIMER_SHIFT) +#define NICVF_INTR_MBOX_MASK BIT(NICVF_INTR_MBOX_SHIFT) +#define NICVF_INTR_QS_ERR_MASK BIT(NICVF_INTR_QS_ERR_SHIFT) + +/* MSI-X interrupts */ +#define NIC_PF_MSIX_VECTORS 10 +#define NIC_VF_MSIX_VECTORS 20 + +#define NIC_PF_INTR_ID_ECC0_SBE 0 +#define NIC_PF_INTR_ID_ECC0_DBE 1 +#define NIC_PF_INTR_ID_ECC1_SBE 2 +#define NIC_PF_INTR_ID_ECC1_DBE 3 +#define NIC_PF_INTR_ID_ECC2_SBE 4 +#define NIC_PF_INTR_ID_ECC2_DBE 5 +#define NIC_PF_INTR_ID_ECC3_SBE 6 +#define NIC_PF_INTR_ID_ECC3_DBE 7 +#define NIC_PF_INTR_ID_MBOX0 8 +#define NIC_PF_INTR_ID_MBOX1 9 + +/* Global timer for CQ timer thresh interrupts + * Calculated for SCLK of 700Mhz + * value written should be a 1/16th of what is expected + * + * 1 tick per 0.05usec = value of 2.2 + * This 10% would be covered in CQ timer thresh value + */ +#define NICPF_CLK_PER_INT_TICK 2 + +/* Time to wait before we decide that a SQ is stuck. + * + * Since both pkt rx and tx notifications are done with same CQ, + * when packets are being received at very high rate (eg: L2 forwarding) + * then freeing transmitted skbs will be delayed and watchdog + * will kick in, resetting interface. Hence keeping this value high. + */ +#define NICVF_TX_TIMEOUT (50 * HZ) + +struct nicvf_cq_poll { + struct nicvf *nicvf; + u8 cq_idx; /* Completion queue index */ + struct napi_struct napi; +}; + +#define NIC_RSSI_COUNT 4096 /* Total no of RSS indices */ +#define NIC_MAX_RSS_HASH_BITS 8 +#define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS) +#define RSS_HASH_KEY_SIZE 5 /* 320 bit key */ + +#ifdef VNIC_RSS_SUPPORT +struct nicvf_rss_info { + bool enable; +#define RSS_L2_EXTENDED_HASH_ENA BIT(0) +#define RSS_IP_HASH_ENA BIT(1) +#define RSS_TCP_HASH_ENA BIT(2) +#define RSS_TCP_SYN_DIS BIT(3) +#define RSS_UDP_HASH_ENA BIT(4) +#define RSS_L4_EXTENDED_HASH_ENA BIT(5) +#define RSS_ROCE_ENA BIT(6) +#define RSS_L3_BI_DIRECTION_ENA BIT(7) +#define RSS_L4_BI_DIRECTION_ENA BIT(8) + u64 cfg; + u8 hash_bits; + u16 rss_size; + u8 ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE]; + u64 key[RSS_HASH_KEY_SIZE]; +} ____cacheline_aligned_in_smp; +#endif + +enum rx_stats_reg_offset { + RX_OCTS = 0x0, + RX_UCAST = 0x1, + RX_BCAST = 0x2, + RX_MCAST = 0x3, + RX_RED = 0x4, + RX_RED_OCTS = 0x5, + RX_ORUN = 0x6, + RX_ORUN_OCTS = 0x7, + RX_FCS = 0x8, + RX_L2ERR = 0x9, + RX_DRP_BCAST = 0xa, + RX_DRP_MCAST = 0xb, + RX_DRP_L3BCAST = 0xc, + RX_DRP_L3MCAST = 0xd, + RX_STATS_ENUM_LAST, +}; + +enum tx_stats_reg_offset { + TX_OCTS = 0x0, + TX_UCAST = 0x1, + TX_BCAST = 0x2, + TX_MCAST = 0x3, + TX_DROP = 0x4, + TX_STATS_ENUM_LAST, +}; + +struct nicvf_hw_stats { + u64 rx_bytes; + u64 rx_ucast_frames; + u64 rx_bcast_frames; + u64 rx_mcast_frames; + u64 rx_fcs_errors; + u64 rx_l2_errors; + u64 rx_drop_red; + u64 rx_drop_red_bytes; + u64 rx_drop_overrun; + u64 rx_drop_overrun_bytes; + u64 rx_drop_bcast; + u64 rx_drop_mcast; + u64 rx_drop_l3_bcast; + u64 rx_drop_l3_mcast; + u64 rx_bgx_truncated_pkts; + u64 rx_jabber_errs; + u64 rx_fcs_errs; + u64 rx_bgx_errs; + u64 rx_prel2_errs; + u64 rx_l2_hdr_malformed; + u64 rx_oversize; + u64 rx_undersize; + u64 rx_l2_len_mismatch; + u64 rx_l2_pclp; + u64 rx_ip_ver_errs; + u64 rx_ip_csum_errs; + u64 rx_ip_hdr_malformed; + u64 rx_ip_payload_malformed; + u64 rx_ip_ttl_errs; + u64 rx_l3_pclp; + u64 rx_l4_malformed; + u64 rx_l4_csum_errs; + u64 rx_udp_len_errs; + u64 rx_l4_port_errs; + u64 rx_tcp_flag_errs; + u64 rx_tcp_offset_errs; + u64 rx_l4_pclp; + u64 rx_truncated_pkts; + + u64 tx_bytes_ok; + u64 tx_ucast_frames_ok; + u64 tx_bcast_frames_ok; + u64 tx_mcast_frames_ok; + u64 tx_drops; +}; + +struct nicvf_drv_stats { + /* Rx */ + u64 rx_frames_ok; + u64 rx_frames_64; + u64 rx_frames_127; + u64 rx_frames_255; + u64 rx_frames_511; + u64 rx_frames_1023; + u64 rx_frames_1518; + u64 rx_frames_jumbo; + u64 rx_drops; + + /* Tx */ + u64 tx_frames_ok; + u64 tx_drops; + u64 tx_tso; + u64 txq_stop; + u64 txq_wake; +}; + +struct nicvf { + struct nicvf *pnicvf; + struct net_device *netdev; + struct pci_dev *pdev; + u8 vf_id; + u8 node; + bool tns_mode:1; + bool sqs_mode:1; + bool loopback_supported:1; + u16 mtu; + struct queue_set *qs; +#ifdef VNIC_MULTI_QSET_SUPPORT +#define MAX_SQS_PER_VF_SINGLE_NODE 5 +#define MAX_SQS_PER_VF 11 + u8 sqs_id; + u8 sqs_count; /* Secondary Qset count */ + struct nicvf *snicvf[MAX_SQS_PER_VF]; +#endif + u8 rx_queues; + u8 tx_queues; + u8 max_queues; + void __iomem *reg_base; + bool link_up; + u8 duplex; + u32 speed; + struct page *rb_page; + u32 rb_page_offset; + bool rb_alloc_fail; + bool rb_work_scheduled; + struct delayed_work rbdr_work; + struct tasklet_struct rbdr_task; + struct tasklet_struct qs_err_task; + struct tasklet_struct cq_task; + struct nicvf_cq_poll *napi[8]; +#ifdef VNIC_RSS_SUPPORT + struct nicvf_rss_info rss_info; +#endif + u8 cpi_alg; + /* Interrupt coalescing settings */ + u32 cq_coalesce_usecs; + + u32 msg_enable; + struct nicvf_hw_stats hw_stats; + struct nicvf_drv_stats drv_stats; + struct bgx_stats bgx_stats; + struct work_struct reset_task; + + /* MSI-X */ + bool msix_enabled; + u8 num_vec; + struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; + char irq_name[NIC_VF_MSIX_VECTORS][20]; + bool irq_allocated[NIC_VF_MSIX_VECTORS]; + + /* VF <-> PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +} ____cacheline_aligned_in_smp; + +/* PF <--> VF Mailbox communication + * Eight 64bit registers are shared between PF and VF. + * Separate set for each VF. + * Writing '1' into last register mbx7 means end of message. + */ + +/* PF <--> VF mailbox communication */ +#define NIC_PF_VF_MAILBOX_SIZE 2 +#define NIC_MBOX_MSG_TIMEOUT 2000 /* ms */ + +/* Mailbox message types */ +#define NIC_MBOX_MSG_READY 0x01 /* Is PF ready to rcv msgs */ +#define NIC_MBOX_MSG_ACK 0x02 /* ACK the message received */ +#define NIC_MBOX_MSG_NACK 0x03 /* NACK the message received */ +#define NIC_MBOX_MSG_QS_CFG 0x04 /* Configure Qset */ +#define NIC_MBOX_MSG_RQ_CFG 0x05 /* Configure receive queue */ +#define NIC_MBOX_MSG_SQ_CFG 0x06 /* Configure Send queue */ +#define NIC_MBOX_MSG_RQ_DROP_CFG 0x07 /* Configure receive queue */ +#define NIC_MBOX_MSG_SET_MAC 0x08 /* Add MAC ID to DMAC filter */ +#define NIC_MBOX_MSG_SET_MAX_FRS 0x09 /* Set max frame size */ +#define NIC_MBOX_MSG_CPI_CFG 0x0A /* Config CPI, RSSI */ +#define NIC_MBOX_MSG_RSS_SIZE 0x0B /* Get RSS indir_tbl size */ +#define NIC_MBOX_MSG_RSS_CFG 0x0C /* Config RSS table */ +#define NIC_MBOX_MSG_RSS_CFG_CONT 0x0D /* RSS config continuation */ +#define NIC_MBOX_MSG_RQ_BP_CFG 0x0E /* RQ backpressure config */ +#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */ +#define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */ +#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */ +#define NIC_MBOX_MSG_ALLOC_SQS 0x12 /* Allocate secondary Qset */ +#define NIC_MBOX_MSG_NICVF_PTR 0x13 /* Send nicvf ptr to PF */ +#define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */ +#define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */ +#define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */ +#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */ +#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */ + +struct nic_cfg_msg { + u8 msg; + u8 vf_id; + u8 node_id; + bool tns_mode:1; + bool sqs_mode:1; + bool loopback_supported:1; + u8 mac_addr[ETH_ALEN]; +}; + +/* Qset configuration */ +struct qs_cfg_msg { + u8 msg; + u8 num; + u8 sqs_count; + u64 cfg; +}; + +/* Receive queue configuration */ +struct rq_cfg_msg { + u8 msg; + u8 qs_num; + u8 rq_num; + u64 cfg; +}; + +/* Send queue configuration */ +struct sq_cfg_msg { + u8 msg; + u8 qs_num; + u8 sq_num; + bool sqs_mode; + u64 cfg; +}; + +/* Set VF's MAC address */ +struct set_mac_msg { + u8 msg; + u8 vf_id; + u8 mac_addr[ETH_ALEN]; +}; + +/* Set Maximum frame size */ +struct set_frs_msg { + u8 msg; + u8 vf_id; + u16 max_frs; +}; + +/* Set CPI algorithm type */ +struct cpi_cfg_msg { + u8 msg; + u8 vf_id; + u8 rq_cnt; + u8 cpi_alg; +}; + +/* Get RSS table size */ +struct rss_sz_msg { + u8 msg; + u8 vf_id; + u16 ind_tbl_size; +}; + +/* Set RSS configuration */ +struct rss_cfg_msg { + u8 msg; + u8 vf_id; + u8 hash_bits; + u8 tbl_len; + u8 tbl_offset; +#define RSS_IND_TBL_LEN_PER_MBX_MSG 8 + u8 ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG]; +}; + +struct bgx_stats_msg { + u8 msg; + u8 vf_id; + u8 rx; + u8 idx; + u64 stats; +}; + +/* Physical interface link status */ +struct bgx_link_status { + u8 msg; + u8 link_up; + u8 duplex; + u32 speed; +}; + +#ifdef VNIC_MULTI_QSET_SUPPORT +/* Get Extra Qset IDs */ +struct sqs_alloc { + u8 msg; + u8 vf_id; + u8 qs_count; +}; + +struct nicvf_ptr { + u8 msg; + u8 vf_id; + bool sqs_mode; + u8 sqs_id; + u64 nicvf; +}; +#endif + +/* Set interface in loopback mode */ +struct set_loopback { + u8 msg; + u8 vf_id; + bool enable; +}; + +/* 128 bit shared memory between PF and each VF */ +union nic_mbx { + struct { u8 msg; } msg; + struct nic_cfg_msg nic_cfg; + struct qs_cfg_msg qs; + struct rq_cfg_msg rq; + struct sq_cfg_msg sq; + struct set_mac_msg mac; + struct set_frs_msg frs; + struct cpi_cfg_msg cpi_cfg; + struct rss_sz_msg rss_size; + struct rss_cfg_msg rss_cfg; + struct bgx_stats_msg bgx_stats; + struct bgx_link_status link_status; +#ifdef VNIC_MULTI_QSET_SUPPORT + struct sqs_alloc sqs_alloc; + struct nicvf_ptr nicvf; +#endif + struct set_loopback lbk; +}; + +#define NIC_NODE_ID_MASK 0x03 +#define NIC_NODE_ID_SHIFT 44 + +static inline int nic_get_node_id(struct pci_dev *pdev) +{ + u64 addr = pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM); + return ((addr >> NIC_NODE_ID_SHIFT) & NIC_NODE_ID_MASK); +} + +int nicvf_set_real_num_queues(struct net_device *netdev, + int tx_queues, int rx_queues); +int nicvf_open(struct net_device *netdev); +int nicvf_stop(struct net_device *netdev); +int nicvf_send_msg_to_pf(struct nicvf *vf, union nic_mbx *mbx); +void nicvf_config_rss(struct nicvf *nic); +void nicvf_set_rss_key(struct nicvf *nic); +void nicvf_set_ethtool_ops(struct net_device *netdev); +void nicvf_update_stats(struct nicvf *nic); +void nicvf_update_lmac_stats(struct nicvf *nic); + +#endif /* NIC_H */ Added: head/sys/dev/vnic/nic_main.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/dev/vnic/nic_main.c Sun Oct 18 21:39:15 2015 (r289550) @@ -0,0 +1,1192 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/etherdevice.h> +#include <linux/of.h> + +#include "nic_reg.h" +#include "nic.h" +#include "q_struct.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-nic" +#define DRV_VERSION "1.0" + +struct nicpf { + struct pci_dev *pdev; + u8 rev_id; + u8 node; + unsigned int flags; + u8 num_vf_en; /* No of VF enabled */ + bool vf_enabled[MAX_NUM_VFS_SUPPORTED]; + void __iomem *reg_base; /* Register start address */ +#ifdef VNIC_MULTI_QSET_SUPPORT + u8 num_sqs_en; /* Secondary qsets enabled */ + u64 nicvf[MAX_NUM_VFS_SUPPORTED]; + u8 vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF]; + u8 pqs_vf[MAX_NUM_VFS_SUPPORTED]; + bool sqs_used[MAX_NUM_VFS_SUPPORTED]; +#endif + struct pkind_cfg pkind; +#define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF)) +#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) +#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) + u8 vf_lmac_map[MAX_LMAC]; + struct delayed_work dwork; + struct workqueue_struct *check_link; + u8 link[MAX_LMAC]; + u8 duplex[MAX_LMAC]; + u32 speed[MAX_LMAC]; + u16 cpi_base[MAX_NUM_VFS_SUPPORTED]; + u16 rss_ind_tbl_size; + bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; + + /* MSI-X */ + bool msix_enabled; + u8 num_vec; + struct msix_entry msix_entries[NIC_PF_MSIX_VECTORS]; + bool irq_allocated[NIC_PF_MSIX_VECTORS]; +}; + +/* Supported devices */ +static const struct pci_device_id nic_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Sunil Goutham"); +MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, nic_id_table); + +/* The Cavium ThunderX network controller can *only* be found in SoCs + * containing the ThunderX ARM64 CPU implementation. All accesses to the device + * registers on this platform are implicitly strongly ordered with respect + * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use + * with no memory barriers in this driver. The readq()/writeq() functions add + * explicit ordering operation which in this case are redundant, and only + * add overhead. + */ + +/* Register read/write APIs */ +static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val) +{ + writeq_relaxed(val, nic->reg_base + offset); +} + +static u64 nic_reg_read(struct nicpf *nic, u64 offset) +{ + return readq_relaxed(nic->reg_base + offset); +} + +/* PF -> VF mailbox communication APIs */ +static void nic_enable_mbx_intr(struct nicpf *nic) +{ + /* Enable mailbox interrupt for all 128 VFs */ + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull); + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull); +} + +static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg) +{ + nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf)); +} + +static u64 nic_get_mbx_addr(int vf) +{ + return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT); +} + +/* Send a mailbox message to VF + * @vf: vf to which this message to be sent + * @mbx: Message to be sent + */ +static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx) +{ + void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf); + u64 *msg = (u64 *)mbx; + + /* In first revision HW, mbox interrupt is triggerred + * when PF writes to MBOX(1), in next revisions when + * PF writes to MBOX(0) + */ + if (nic->rev_id == 0) { + /* see the comment for nic_reg_write()/nic_reg_read() + * functions above + */ + writeq_relaxed(msg[0], mbx_addr); + writeq_relaxed(msg[1], mbx_addr + 8); + } else { + writeq_relaxed(msg[1], mbx_addr + 8); + writeq_relaxed(msg[0], mbx_addr); + } +} + +/* Responds to VF's READY message with VF's + * ID, node, MAC address e.t.c + * @vf: VF which sent READY message + */ +static void nic_mbx_send_ready(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + int bgx_idx, lmac; + const char *mac; + + mbx.nic_cfg.msg = NIC_MBOX_MSG_READY; + mbx.nic_cfg.vf_id = vf; + + if (nic->flags & NIC_TNS_ENABLED) + mbx.nic_cfg.tns_mode = NIC_TNS_MODE; + else + mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE; + + if (vf < MAX_LMAC) { + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac); + if (mac) + ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac); + } +#ifdef VNIC_MULTI_QSET_SUPPORT + mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false; +#endif + mbx.nic_cfg.node_id = nic->node; + + mbx.nic_cfg.loopback_supported = vf < MAX_LMAC; + + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void nic_mbx_send_ack(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_ACK; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* NACKs VF's mailbox message that PF is not able to + * complete the action + * @vf: VF to which ACK to be sent + */ +static void nic_mbx_send_nack(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_NACK; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* Flush all in flight receive packets to memory and + * bring down an active RQ + */ +static int nic_rcv_queue_sw_sync(struct nicpf *nic) +{ + u16 timeout = ~0x00; + + nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01); + /* Wait till sync cycle is finished */ + while (timeout) { + if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1) + break; + timeout--; + } + nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00); + if (!timeout) { + dev_err(&nic->pdev->dev, "Receive queue software sync failed"); + return 1; + } + return 0; +} + +/* Get BGX Rx/Tx stats and respond to VF's request */ +static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx) +{ + int bgx_idx, lmac; + union nic_mbx mbx = {}; + + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]); + + mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; + mbx.bgx_stats.vf_id = bgx->vf_id; + mbx.bgx_stats.rx = bgx->rx; + mbx.bgx_stats.idx = bgx->idx; + if (bgx->rx) + mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx, + lmac, bgx->idx); + else + mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx, + lmac, bgx->idx); + nic_send_msg_to_vf(nic, bgx->vf_id, &mbx); +} + +/* Update hardware min/max frame size */ +static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf) +{ + if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) { + dev_err(&nic->pdev->dev, + "Invalid MTU setting from VF%d rejected, should be between %d and %d\n", + vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS); + return 1; + } + new_frs += ETH_HLEN; + if (new_frs <= nic->pkind.maxlen) + return 0; + + nic->pkind.maxlen = new_frs; + nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind); + return 0; +} + +/* Set minimum transmit packet size */ +static void nic_set_tx_pkt_pad(struct nicpf *nic, int size) +{ + int lmac; + u64 lmac_cfg; + + /* Max value that can be set is 60 */ + if (size > 60) + size = 60; + + for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) { + lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3)); + lmac_cfg &= ~(0xF << 2); + lmac_cfg |= ((size / 4) << 2); + nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg); + } +} + +/* Function to check number of LMACs present and set VF::LMAC mapping. + * Mapping will be used while initializing channels. + */ +static void nic_set_lmac_vf_mapping(struct nicpf *nic) +{ + unsigned bgx_map = bgx_get_map(nic->node); + int bgx, next_bgx_lmac = 0; + int lmac, lmac_cnt = 0; + u64 lmac_credit; + + nic->num_vf_en = 0; + if (nic->flags & NIC_TNS_ENABLED) { + nic->num_vf_en = DEFAULT_NUM_VF_ENABLED; + return; + } + + for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) { + if (!(bgx_map & (1 << bgx))) + continue; + lmac_cnt = bgx_get_lmac_count(nic->node, bgx); + for (lmac = 0; lmac < lmac_cnt; lmac++) + nic->vf_lmac_map[next_bgx_lmac++] = + NIC_SET_VF_LMAC_MAP(bgx, lmac); + nic->num_vf_en += lmac_cnt; + + /* Program LMAC credits */ + lmac_credit = (1ull << 1); /* channel credit enable */ + lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */ + /* 48KB BGX Tx buffer size, each unit is of size 16bytes */ + lmac_credit |= (((((48 * 1024) / lmac_cnt) - + NIC_HW_MAX_FRS) / 16) << 12); + lmac = bgx * MAX_LMAC_PER_BGX; + for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++) + nic_reg_write(nic, + NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), + lmac_credit); + } +} + +#define TNS_PORT0_BLOCK 6 +#define TNS_PORT1_BLOCK 7 +#define BGX0_BLOCK 8 +#define BGX1_BLOCK 9 + +static void nic_init_hw(struct nicpf *nic) +{ + int i; + + /* Reset NIC, in case the driver is repeatedly inserted and removed */ + nic_reg_write(nic, NIC_PF_SOFT_RESET, 1); + + /* Enable NIC HW block */ + nic_reg_write(nic, NIC_PF_CFG, 0x3); + + /* Enable backpressure */ + nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03); + + if (nic->flags & NIC_TNS_ENABLED) { + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, + (NIC_TNS_MODE << 7) | TNS_PORT0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), + (NIC_TNS_MODE << 7) | TNS_PORT1_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG, + (1ULL << 63) | TNS_PORT0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8), + (1ULL << 63) | TNS_PORT1_BLOCK); + + } else { + /* Disable TNS mode on both interfaces */ + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, + (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), + (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG, + (1ULL << 63) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8), + (1ULL << 63) | BGX1_BLOCK); + } + + /* PKIND configuration */ + nic->pkind.minlen = 0; + nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN; + nic->pkind.lenerr_en = 1; + nic->pkind.rx_hdr = 0; + nic->pkind.hdr_sl = 0; + + for (i = 0; i < NIC_MAX_PKIND; i++) + nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3), + *(u64 *)&nic->pkind); + + nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS); + + /* Timer config */ + nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK); + + /* Enable VLAN ethertype matching and stripping */ + nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7, + (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q); +} + +/* Channel parse index configuration */ +static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) +{ + u32 vnic, bgx, lmac, chan; + u32 padd, cpi_count = 0; + u64 cpi_base, cpi, rssi_base, rssi; + u8 qset, rq_idx = 0; + + vnic = cfg->vf_id; + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + + chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); + cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX); + rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX); + + /* Rx channel configuration */ + nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3), + (1ull << 63) | (vnic << 0)); + nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3), + ((u64)cfg->cpi_alg << 62) | (cpi_base << 48)); + + if (cfg->cpi_alg == CPI_ALG_NONE) + cpi_count = 1; + else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */ + cpi_count = 8; + else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */ + cpi_count = 16; + else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */ + cpi_count = NIC_MAX_CPI_PER_LMAC; + + /* RSS Qset, Qidx mapping */ + qset = cfg->vf_id; + rssi = rssi_base; + for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) { + nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3), + (qset << 3) | rq_idx); + rq_idx++; + } + + rssi = 0; + cpi = cpi_base; + for (; cpi < (cpi_base + cpi_count); cpi++) { + /* Determine port to channel adder */ + if (cfg->cpi_alg != CPI_ALG_DIFF) + padd = cpi % cpi_count; + else + padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */ + + /* Leave RSS_SIZE as '0' to disable RSS */ + nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3), + (vnic << 24) | (padd << 16) | (rssi_base + rssi)); + + if ((rssi + 1) >= cfg->rq_cnt) + continue; + + if (cfg->cpi_alg == CPI_ALG_VLAN) + rssi++; + else if (cfg->cpi_alg == CPI_ALG_VLAN16) + rssi = ((cpi - cpi_base) & 0xe) >> 1; + else if (cfg->cpi_alg == CPI_ALG_DIFF) + rssi = ((cpi - cpi_base) & 0x38) >> 3; + } *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201510182139.t9ILdF6f017364>