From owner-svn-src-user@FreeBSD.ORG Sat May 26 01:45:53 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id EBCA8106566B; Sat, 26 May 2012 01:45:53 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id D57B78FC17; Sat, 26 May 2012 01:45:53 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q4Q1jrXt045307; Sat, 26 May 2012 01:45:53 GMT (envelope-from np@svn.freebsd.org) Received: (from np@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q4Q1jrak045301; Sat, 26 May 2012 01:45:53 GMT (envelope-from np@svn.freebsd.org) Message-Id: <201205260145.q4Q1jrak045301@svn.freebsd.org> From: Navdeep Parhar Date: Sat, 26 May 2012 01:45:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r236040 - in user/np/toe_iwarp/sys: dev/cxgbe dev/cxgbe/tom modules/cxgbe modules/cxgbe/tom X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 26 May 2012 01:45:54 -0000 Author: np Date: Sat May 26 01:45:53 2012 New Revision: 236040 URL: http://svn.freebsd.org/changeset/base/236040 Log: Full stateful TCP offload driver for the Terminator 4 ASIC. Added: user/np/toe_iwarp/sys/dev/cxgbe/tom/ user/np/toe_iwarp/sys/dev/cxgbe/tom/t4_connect.c (contents, props changed) user/np/toe_iwarp/sys/dev/cxgbe/tom/t4_cpl_io.c (contents, props changed) user/np/toe_iwarp/sys/dev/cxgbe/tom/t4_listen.c (contents, props changed) user/np/toe_iwarp/sys/dev/cxgbe/tom/t4_tom.c (contents, props changed) user/np/toe_iwarp/sys/dev/cxgbe/tom/t4_tom.h (contents, props changed) user/np/toe_iwarp/sys/dev/cxgbe/tom/t4_tom_l2t.c (contents, props changed) user/np/toe_iwarp/sys/dev/cxgbe/tom/t4_tom_l2t.h (contents, props changed) user/np/toe_iwarp/sys/modules/cxgbe/tom/ user/np/toe_iwarp/sys/modules/cxgbe/tom/Makefile (contents, props changed) Modified: user/np/toe_iwarp/sys/dev/cxgbe/adapter.h user/np/toe_iwarp/sys/dev/cxgbe/offload.h user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.c user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.h user/np/toe_iwarp/sys/dev/cxgbe/t4_main.c user/np/toe_iwarp/sys/dev/cxgbe/t4_sge.c user/np/toe_iwarp/sys/modules/cxgbe/Makefile Modified: user/np/toe_iwarp/sys/dev/cxgbe/adapter.h ============================================================================== --- user/np/toe_iwarp/sys/dev/cxgbe/adapter.h Sat May 26 01:36:25 2012 (r236039) +++ user/np/toe_iwarp/sys/dev/cxgbe/adapter.h Sat May 26 01:45:53 2012 (r236040) @@ -156,6 +156,7 @@ enum { INTR_DIRECT = (1 << 2), /* direct interrupts for everything */ MASTER_PF = (1 << 3), ADAP_SYSCTL_CTX = (1 << 4), + TOM_INIT_DONE = (1 << 5), CXGBE_BUSY = (1 << 9), @@ -198,7 +199,7 @@ struct port_info { int first_txq; /* index of first tx queue */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ -#ifndef TCP_OFFLOAD_DISABLE +#ifdef TCP_OFFLOAD int nofldtxq; /* # of offload tx queues */ int first_ofld_txq; /* index of first offload tx queue */ int nofldrxq; /* # of offload rx queues */ @@ -295,7 +296,7 @@ struct sge_iq { enum { EQ_CTRL = 1, EQ_ETH = 2, -#ifndef TCP_OFFLOAD_DISABLE +#ifdef TCP_OFFLOAD EQ_OFLD = 3, #endif @@ -421,14 +422,36 @@ struct sge_rxq { } __aligned(CACHE_LINE_SIZE); -#ifndef TCP_OFFLOAD_DISABLE +static inline struct sge_rxq * +iq_to_rxq(struct sge_iq *iq) +{ + + return (member2struct(sge_rxq, iq, iq)); +} + + +#ifdef TCP_OFFLOAD /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_ofld_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ } __aligned(CACHE_LINE_SIZE); + +static inline struct sge_ofld_rxq * +iq_to_ofld_rxq(struct sge_iq *iq) +{ + + return (member2struct(sge_ofld_rxq, iq, iq)); +} #endif +struct wrqe { + STAILQ_ENTRY(wrqe) link; + struct sge_wrq *wrq; + int wr_len; + uint64_t wr[] __aligned(16); +}; + /* * wrq: SGE egress queue that is given prebuilt work requests. Both the control * and offload tx queues are of this type. @@ -437,8 +460,9 @@ struct sge_wrq { struct sge_eq eq; /* MUST be first */ struct adapter *adapter; - struct mbuf *head; /* held up due to lack of descriptors */ - struct mbuf *tail; /* valid only if head is valid */ + + /* List of WRs held up due to lack of tx descriptors */ + STAILQ_HEAD(, wrqe) wr_list; /* stats for common events first */ @@ -456,7 +480,7 @@ struct sge { int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx tx queues */ -#ifndef TCP_OFFLOAD_DISABLE +#ifdef TCP_OFFLOAD int nofldrxq; /* total # of TOE rx queues */ int nofldtxq; /* total # of TOE tx queues */ #endif @@ -468,7 +492,7 @@ struct sge { struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ -#ifndef TCP_OFFLOAD_DISABLE +#ifdef TCP_OFFLOAD struct sge_wrq *ofld_txq; /* TOE tx queues */ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ #endif @@ -518,15 +542,15 @@ struct adapter { uint8_t chan_map[NCHAN]; uint32_t filter_mode; -#ifndef TCP_OFFLOAD_DISABLE - struct uld_softc tom; +#ifdef TCP_OFFLOAD + void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; #endif struct l2t_data *l2t; /* L2 table */ struct tid_info tids; int open_device_map; -#ifndef TCP_OFFLOAD_DISABLE +#ifdef TCP_OFFLOAD int offload_map; #endif int flags; @@ -608,82 +632,96 @@ struct adapter { static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { + return bus_space_read_4(sc->bt, sc->bh, reg); } static inline void t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val) { + bus_space_write_4(sc->bt, sc->bh, reg, val); } static inline uint64_t t4_read_reg64(struct adapter *sc, uint32_t reg) { + return t4_bus_space_read_8(sc->bt, sc->bh, reg); } static inline void t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val) { + t4_bus_space_write_8(sc->bt, sc->bh, reg, val); } static inline void t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val) { + *val = pci_read_config(sc->dev, reg, 1); } static inline void t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val) { + pci_write_config(sc->dev, reg, val, 1); } static inline void t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val) { + *val = pci_read_config(sc->dev, reg, 2); } static inline void t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val) { + pci_write_config(sc->dev, reg, val, 2); } static inline void t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val) { + *val = pci_read_config(sc->dev, reg, 4); } static inline void t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val) { + pci_write_config(sc->dev, reg, val, 4); } static inline struct port_info * adap2pinfo(struct adapter *sc, int idx) { + return (sc->port[idx]); } static inline void t4_os_set_hw_addr(struct adapter *sc, int idx, uint8_t hw_addr[]) { + bcopy(hw_addr, sc->port[idx]->hw_addr, ETHER_ADDR_LEN); } static inline bool is_10G_port(const struct port_info *pi) { + return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0); } static inline int tx_resume_threshold(struct sge_eq *eq) { + return (eq->qsize / 4); } @@ -713,21 +751,45 @@ void t4_intr_all(void *); void t4_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); -int t4_mgmt_tx(struct adapter *, struct mbuf *); -int t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct mbuf *); +void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *); void t4_update_fl_bufsize(struct ifnet *); int can_resume_tx(struct sge_eq *); -static inline int t4_wrq_tx(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m) +static inline struct wrqe * +alloc_wrqe(int wr_len, struct sge_wrq *wrq) { - int rc; + int len = offsetof(struct wrqe, wr) + wr_len; + struct wrqe *wr; + + wr = malloc(len, M_CXGBE, M_NOWAIT); + if (__predict_false(wr == NULL)) + return (NULL); + wr->wr_len = wr_len; + wr->wrq = wrq; + return (wr); +} + +static inline void * +wrtod(struct wrqe *wr) +{ + return (&wr->wr[0]); +} + +static inline void +free_wrqe(struct wrqe *wr) +{ + free(wr, M_CXGBE); +} + +static inline void +t4_wrq_tx(struct adapter *sc, struct wrqe *wr) +{ + struct sge_wrq *wrq = wr->wrq; TXQ_LOCK(wrq); - rc = t4_wrq_tx_locked(sc, wrq, m); + t4_wrq_tx_locked(sc, wrq, wr); TXQ_UNLOCK(wrq); - return (rc); } - #endif Modified: user/np/toe_iwarp/sys/dev/cxgbe/offload.h ============================================================================== --- user/np/toe_iwarp/sys/dev/cxgbe/offload.h Sat May 26 01:36:25 2012 (r236039) +++ user/np/toe_iwarp/sys/dev/cxgbe/offload.h Sat May 26 01:45:53 2012 (r236040) @@ -31,12 +31,6 @@ #ifndef __T4_OFFLOAD_H__ #define __T4_OFFLOAD_H__ -/* XXX: flagrant misuse of mbuf fields (during tx by TOM) */ -#define MBUF_EQ(m) (*((void **)(&(m)->m_pkthdr.rcvif))) -/* These have to work for !M_PKTHDR so we use a field from m_hdr. */ -#define MBUF_TX_CREDITS(m) ((m)->m_hdr.pad[0]) -#define MBUF_DMA_MAPPED(m) ((m)->m_hdr.pad[1]) - #define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \ (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \ (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \ @@ -119,7 +113,7 @@ struct t4_virt_res { struct t4_range ocq; }; -#ifndef TCP_OFFLOAD_DISABLE +#ifdef TCP_OFFLOAD enum { ULD_TOM = 1, }; @@ -130,13 +124,8 @@ struct uld_info { SLIST_ENTRY(uld_info) link; int refcount; int uld_id; - int (*attach)(struct adapter *, void **); - int (*detach)(void *); -}; - -struct uld_softc { - struct uld_info *uld; - void *softc; + int (*activate)(struct adapter *); + int (*deactivate)(struct adapter *); }; struct tom_tunables { @@ -148,6 +137,8 @@ struct tom_tunables { int t4_register_uld(struct uld_info *); int t4_unregister_uld(struct uld_info *); +int t4_activate_uld(struct adapter *, int); +int t4_deactivate_uld(struct adapter *, int); #endif #endif Modified: user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.c ============================================================================== --- user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.c Sat May 26 01:36:25 2012 (r236039) +++ user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.c Sat May 26 01:45:53 2012 (r236040) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2011 Chelsio Communications, Inc. + * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,16 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include #include "common/common.h" #include "common/jhash.h" @@ -72,42 +63,11 @@ __FBSDID("$FreeBSD$"); * lifetime of an L2T entry is fully contained in the lifetime of the TOE. */ -/* identifies sync vs async L2T_WRITE_REQs */ -#define S_SYNC_WR 12 -#define V_SYNC_WR(x) ((x) << S_SYNC_WR) -#define F_SYNC_WR V_SYNC_WR(1) - -enum { - L2T_STATE_VALID, /* entry is up to date */ - L2T_STATE_STALE, /* entry may be used but needs revalidation */ - L2T_STATE_RESOLVING, /* entry needs address resolution */ - L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */ - - /* when state is one of the below the entry is not hashed */ - L2T_STATE_SWITCHING, /* entry is being used by a switching filter */ - L2T_STATE_UNUSED /* entry not in use */ -}; - -struct l2t_data { - struct rwlock lock; - volatile int nfree; /* number of free entries */ - struct l2t_entry *rover;/* starting point for next allocation */ - struct l2t_entry l2tab[L2T_SIZE]; -}; - -static int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *, - struct mbuf *); - -#define VLAN_NONE 0xfff -#define SA(x) ((struct sockaddr *)(x)) -#define SIN(x) ((struct sockaddr_in *)(x)) -#define SINADDR(x) (SIN(x)->sin_addr.s_addr) - /* * Allocate a free L2T entry. Must be called with l2t_data.lock held. */ -static struct l2t_entry * -alloc_l2e(struct l2t_data *d) +struct l2t_entry * +t4_alloc_l2e(struct l2t_data *d) { struct l2t_entry *end, *e, **p; @@ -121,7 +81,8 @@ alloc_l2e(struct l2t_data *d) if (atomic_load_acq_int(&e->refcnt) == 0) goto found; - for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ; + for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) + continue; found: d->rover = e + 1; atomic_subtract_int(&d->nfree, 1); @@ -148,19 +109,18 @@ found: * Write an L2T entry. Must be called with the entry locked. * The write may be synchronous or asynchronous. */ -static int -write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) +int +t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) { - struct mbuf *m; + struct wrqe *wr; struct cpl_l2t_write_req *req; mtx_assert(&e->lock, MA_OWNED); - if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) + wr = alloc_wrqe(sizeof(*req), &sc->sge.mgmtq); + if (wr == NULL) return (ENOMEM); - - req = mtod(m, struct cpl_l2t_write_req *); - m->m_pkthdr.len = m->m_len = sizeof(*req); + req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx | @@ -170,7 +130,7 @@ write_l2e(struct adapter *sc, struct l2t req->vlan = htons(e->vlan); memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); - t4_mgmt_tx(sc, m); + t4_wrq_tx(sc, wr); if (sync && e->state != L2T_STATE_SWITCHING) e->state = L2T_STATE_SYNC_WRITE; @@ -189,7 +149,7 @@ t4_l2t_alloc_switching(struct l2t_data * struct l2t_entry *e; rw_rlock(&d->lock); - e = alloc_l2e(d); + e = t4_alloc_l2e(d); if (e) { mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ e->state = L2T_STATE_SWITCHING; @@ -214,7 +174,7 @@ t4_l2t_set_switching(struct adapter *sc, e->lport = port; memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN); mtx_lock(&e->lock); - rc = write_l2e(sc, e, 0); + rc = t4_write_l2e(sc, e, 0); mtx_unlock(&e->lock); return (rc); } @@ -234,10 +194,13 @@ t4_init_l2t(struct adapter *sc, int flag rw_init(&d->lock, "L2T"); for (i = 0; i < L2T_SIZE; i++) { - d->l2tab[i].idx = i; - d->l2tab[i].state = L2T_STATE_UNUSED; - mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF); - atomic_store_rel_int(&d->l2tab[i].refcnt, 0); + struct l2t_entry *e = &d->l2tab[i]; + + e->idx = i; + e->state = L2T_STATE_UNUSED; + mtx_init(&e->lock, "L2T_E", NULL, MTX_DEF); + STAILQ_INIT(&e->wr_list); + atomic_store_rel_int(&e->refcnt, 0); } sc->l2t = d; @@ -259,6 +222,24 @@ t4_free_l2t(struct l2t_data *d) return (0); } +int +do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) +{ + const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); + unsigned int tid = GET_TID(rpl); + unsigned int idx = tid & (L2T_SIZE - 1); + + if (__predict_false(rpl->status != CPL_ERR_NONE)) { + log(LOG_ERR, + "Unexpected L2T_WRITE_RPL status %u for entry %u\n", + rpl->status, idx); + return (EINVAL); + } + + return (0); +} + #ifdef SBUF_DRAIN static inline unsigned int vlan_prio(const struct l2t_entry *e) @@ -273,7 +254,7 @@ l2e_state(const struct l2t_entry *e) case L2T_STATE_VALID: return 'V'; /* valid, fast-path entry */ case L2T_STATE_STALE: return 'S'; /* needs revalidation, but usable */ case L2T_STATE_SYNC_WRITE: return 'W'; - case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R'; + case L2T_STATE_RESOLVING: return STAILQ_EMPTY(&e->wr_list) ? 'R' : 'A'; case L2T_STATE_SWITCHING: return 'X'; default: return 'U'; } @@ -311,20 +292,20 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS) "Ethernet address VLAN/P LP State Users Port"); header = 1; } - if (e->state == L2T_STATE_SWITCHING || e->v6) + if (e->state == L2T_STATE_SWITCHING) ip[0] = 0; else snprintf(ip, sizeof(ip), "%s", - inet_ntoa(*(struct in_addr *)&e->addr[0])); + inet_ntoa(*(struct in_addr *)&e->addr)); - /* XXX: accessing lle probably not safe? */ + /* XXX: e->ifp may not be around */ sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d" " %u %2u %c %5u %s", e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5], e->vlan & 0xfff, vlan_prio(e), e->lport, l2e_state(e), atomic_load_acq_int(&e->refcnt), - e->lle ? e->lle->lle_tbl->llt_ifp->if_xname : ""); + e->ifp->if_xname); skip: mtx_unlock(&e->lock); } @@ -335,459 +316,3 @@ skip: return (rc); } #endif - -#ifndef TCP_OFFLOAD_DISABLE -static inline void -l2t_hold(struct l2t_data *d, struct l2t_entry *e) -{ - if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ - atomic_subtract_int(&d->nfree, 1); -} - -/* - * To avoid having to check address families we do not allow v4 and v6 - * neighbors to be on the same hash chain. We keep v4 entries in the first - * half of available hash buckets and v6 in the second. - */ -enum { - L2T_SZ_HALF = L2T_SIZE / 2, - L2T_HASH_MASK = L2T_SZ_HALF - 1 -}; - -static inline unsigned int -arp_hash(const uint32_t *key, int ifindex) -{ - return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK; -} - -static inline unsigned int -ipv6_hash(const uint32_t *key, int ifindex) -{ - uint32_t xor = key[0] ^ key[1] ^ key[2] ^ key[3]; - - return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK); -} - -static inline unsigned int -addr_hash(const uint32_t *addr, int addr_len, int ifindex) -{ - return addr_len == 4 ? arp_hash(addr, ifindex) : - ipv6_hash(addr, ifindex); -} - -/* - * Checks if an L2T entry is for the given IP/IPv6 address. It does not check - * whether the L2T entry and the address are of the same address family. - * Callers ensure an address is only checked against L2T entries of the same - * family, something made trivial by the separation of IP and IPv6 hash chains - * mentioned above. Returns 0 if there's a match, - */ -static inline int -addreq(const struct l2t_entry *e, const uint32_t *addr) -{ - if (e->v6) - return (e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) | - (e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3]); - return e->addr[0] ^ addr[0]; -} - -/* - * Add a packet to an L2T entry's queue of packets awaiting resolution. - * Must be called with the entry's lock held. - */ -static inline void -arpq_enqueue(struct l2t_entry *e, struct mbuf *m) -{ - mtx_assert(&e->lock, MA_OWNED); - - KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt not NULL", __func__)); - if (e->arpq_head) - e->arpq_tail->m_nextpkt = m; - else - e->arpq_head = m; - e->arpq_tail = m; -} - -static inline void -send_pending(struct adapter *sc, struct l2t_entry *e) -{ - struct mbuf *m, *next; - - mtx_assert(&e->lock, MA_OWNED); - - for (m = e->arpq_head; m; m = next) { - next = m->m_nextpkt; - m->m_nextpkt = NULL; - t4_wrq_tx(sc, MBUF_EQ(m), m); - } - e->arpq_head = e->arpq_tail = NULL; -} - -#ifdef INET -/* - * Looks up and fills up an l2t_entry's lle. We grab all the locks that we need - * ourself, and update e->state at the end if e->lle was successfully filled. - * - * The lle passed in comes from arpresolve and is ignored as it does not appear - * to be of much use. - */ -static int -l2t_fill_lle(struct adapter *sc, struct l2t_entry *e, struct llentry *unused) -{ - int rc = 0; - struct sockaddr_in sin; - struct ifnet *ifp = e->ifp; - struct llentry *lle; - - bzero(&sin, sizeof(struct sockaddr_in)); - if (e->v6) - panic("%s: IPv6 L2 resolution not supported yet.", __func__); - - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in)); - - mtx_assert(&e->lock, MA_NOTOWNED); - KASSERT(e->addr && ifp, ("%s: bad prep before call", __func__)); - - IF_AFDATA_LOCK(ifp); - lle = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, SA(&sin)); - IF_AFDATA_UNLOCK(ifp); - if (!LLE_IS_VALID(lle)) - return (ENOMEM); - if (!(lle->la_flags & LLE_VALID)) { - rc = EINVAL; - goto done; - } - - LLE_ADDREF(lle); - - mtx_lock(&e->lock); - if (e->state == L2T_STATE_RESOLVING) { - KASSERT(e->lle == NULL, ("%s: lle already valid", __func__)); - e->lle = lle; - memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN); - write_l2e(sc, e, 1); - } else { - KASSERT(e->lle == lle, ("%s: lle changed", __func__)); - LLE_REMREF(lle); - } - mtx_unlock(&e->lock); -done: - LLE_WUNLOCK(lle); - return (rc); -} -#endif - -int -t4_l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e) -{ -#ifndef INET - return (EINVAL); -#else - struct llentry *lle = NULL; - struct sockaddr_in sin; - struct ifnet *ifp = e->ifp; - - if (e->v6) - panic("%s: IPv6 L2 resolution not supported yet.", __func__); - - bzero(&sin, sizeof(struct sockaddr_in)); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in)); - -again: - switch (e->state) { - case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ - if (arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0) - l2t_fill_lle(sc, e, lle); - - /* Fall through */ - - case L2T_STATE_VALID: /* fast-path, send the packet on */ - return t4_wrq_tx(sc, MBUF_EQ(m), m); - - case L2T_STATE_RESOLVING: - case L2T_STATE_SYNC_WRITE: - mtx_lock(&e->lock); - if (e->state != L2T_STATE_SYNC_WRITE && - e->state != L2T_STATE_RESOLVING) { - /* state changed by the time we got here */ - mtx_unlock(&e->lock); - goto again; - } - arpq_enqueue(e, m); - mtx_unlock(&e->lock); - - if (e->state == L2T_STATE_RESOLVING && - arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0) - l2t_fill_lle(sc, e, lle); - } - - return (0); -#endif -} - -/* - * Called when an L2T entry has no more users. The entry is left in the hash - * table since it is likely to be reused but we also bump nfree to indicate - * that the entry can be reallocated for a different neighbor. We also drop - * the existing neighbor reference in case the neighbor is going away and is - * waiting on our reference. - * - * Because entries can be reallocated to other neighbors once their ref count - * drops to 0 we need to take the entry's lock to avoid races with a new - * incarnation. - */ -static void -t4_l2e_free(struct l2t_entry *e) -{ - struct llentry *lle = NULL; - struct l2t_data *d; - - mtx_lock(&e->lock); - if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */ - lle = e->lle; - e->lle = NULL; - /* - * Don't need to worry about the arpq, an L2T entry can't be - * released if any packets are waiting for resolution as we - * need to be able to communicate with the device to close a - * connection. - */ - } - mtx_unlock(&e->lock); - - d = container_of(e, struct l2t_data, l2tab[e->idx]); - atomic_add_int(&d->nfree, 1); - - if (lle) - LLE_FREE(lle); -} - -void -t4_l2t_release(struct l2t_entry *e) -{ - if (atomic_fetchadd_int(&e->refcnt, -1) == 1) - t4_l2e_free(e); -} - -static int -do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, - struct mbuf *m) -{ - struct adapter *sc = iq->adapter; - const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); - unsigned int tid = GET_TID(rpl); - unsigned int idx = tid & (L2T_SIZE - 1); - - if (__predict_false(rpl->status != CPL_ERR_NONE)) { - log(LOG_ERR, - "Unexpected L2T_WRITE_RPL status %u for entry %u\n", - rpl->status, idx); - return (EINVAL); - } - - if (tid & F_SYNC_WR) { - struct l2t_entry *e = &sc->l2t->l2tab[idx]; - - mtx_lock(&e->lock); - if (e->state != L2T_STATE_SWITCHING) { - send_pending(sc, e); - e->state = L2T_STATE_VALID; - } - mtx_unlock(&e->lock); - } - - return (0); -} - -/* - * Reuse an L2T entry that was previously used for the same next hop. - */ -static void -reuse_entry(struct l2t_entry *e) -{ - struct llentry *lle; - - mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ - lle = e->lle; - if (lle) { - KASSERT(lle->la_flags & LLE_VALID, - ("%s: invalid lle stored in l2t_entry", __func__)); - - if (lle->la_expire >= time_uptime) - e->state = L2T_STATE_STALE; - else - e->state = L2T_STATE_VALID; - } else - e->state = L2T_STATE_RESOLVING; - mtx_unlock(&e->lock); -} - -/* - * The TOE wants an L2 table entry that it can use to reach the next hop over - * the specified port. Produce such an entry - create one if needed. - * - * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on - * top of the real cxgbe interface. - */ -struct l2t_entry * -t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) -{ - struct l2t_entry *e; - struct l2t_data *d = pi->adapter->l2t; - int addr_len; - uint32_t *addr; - int hash; - struct sockaddr_in6 *sin6; - unsigned int smt_idx = pi->port_id; - - if (sa->sa_family == AF_INET) { - addr = (uint32_t *)&SINADDR(sa); - addr_len = sizeof(SINADDR(sa)); - } else if (sa->sa_family == AF_INET6) { - sin6 = (struct sockaddr_in6 *)sa; - addr = (uint32_t *)&sin6->sin6_addr.s6_addr; - addr_len = sizeof(sin6->sin6_addr.s6_addr); - } else - return (NULL); - -#ifndef VLAN_TAG - if (ifp->if_type == IFT_L2VLAN) - return (NULL); -#endif - - hash = addr_hash(addr, addr_len, ifp->if_index); - - rw_wlock(&d->lock); - for (e = d->l2tab[hash].first; e; e = e->next) { - if (!addreq(e, addr) && e->ifp == ifp && e->smt_idx == smt_idx){ - l2t_hold(d, e); - if (atomic_load_acq_int(&e->refcnt) == 1) - reuse_entry(e); - goto done; - } - } - - /* Need to allocate a new entry */ - e = alloc_l2e(d); - if (e) { - mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ - e->state = L2T_STATE_RESOLVING; - memcpy(e->addr, addr, addr_len); - e->ifindex = ifp->if_index; - e->smt_idx = smt_idx; - e->ifp = ifp; - e->hash = hash; - e->lport = pi->lport; - e->v6 = (addr_len == 16); - e->lle = NULL; - atomic_store_rel_int(&e->refcnt, 1); -#ifdef VLAN_TAG - if (ifp->if_type == IFT_L2VLAN) - VLAN_TAG(ifp, &e->vlan); - else - e->vlan = VLAN_NONE; -#endif - e->next = d->l2tab[hash].first; - d->l2tab[hash].first = e; - mtx_unlock(&e->lock); - } -done: - rw_wunlock(&d->lock); - return e; -} - -/* - * Called when the host's neighbor layer makes a change to some entry that is - * loaded into the HW L2 table. - */ -void -t4_l2t_update(struct adapter *sc, struct llentry *lle) -{ - struct l2t_entry *e; - struct l2t_data *d = sc->l2t; - struct sockaddr *sa = L3_ADDR(lle); - struct llentry *old_lle = NULL; - uint32_t *addr = (uint32_t *)&SINADDR(sa); - struct ifnet *ifp = lle->lle_tbl->llt_ifp; - int hash = addr_hash(addr, sizeof(*addr), ifp->if_index); - - KASSERT(d != NULL, ("%s: no L2 table", __func__)); - LLE_WLOCK_ASSERT(lle); - KASSERT(lle->la_flags & LLE_VALID || lle->la_flags & LLE_DELETED, - ("%s: entry neither valid nor deleted.", __func__)); - - rw_rlock(&d->lock); - for (e = d->l2tab[hash].first; e; e = e->next) { - if (!addreq(e, addr) && e->ifp == ifp) { - mtx_lock(&e->lock); - if (atomic_load_acq_int(&e->refcnt)) - goto found; - e->state = L2T_STATE_STALE; - mtx_unlock(&e->lock); - break; - } - } - rw_runlock(&d->lock); - - /* The TOE has no interest in this LLE */ - return; - - found: - rw_runlock(&d->lock); - - if (atomic_load_acq_int(&e->refcnt)) { - - /* Entry is referenced by at least 1 offloaded connection. */ - - /* Handle deletes first */ - if (lle->la_flags & LLE_DELETED) { - if (lle == e->lle) { - e->lle = NULL; - e->state = L2T_STATE_RESOLVING; - LLE_REMREF(lle); - } - goto done; - } - - if (lle != e->lle) { - old_lle = e->lle; - LLE_ADDREF(lle); - e->lle = lle; - } - - if (e->state == L2T_STATE_RESOLVING || - memcmp(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN)) { - - /* unresolved -> resolved; or dmac changed */ - - memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN); - write_l2e(sc, e, 1); - } else { - - /* +ve reinforcement of a valid or stale entry */ - - } - - e->state = L2T_STATE_VALID; - - } else { - /* - * Entry was used previously but is unreferenced right now. - * e->lle has been released and NULL'd out by t4_l2t_free, or - * l2t_release is about to call t4_l2t_free and do that. - * - * Either way this is of no interest to us. - */ - } - -done: - mtx_unlock(&e->lock); - if (old_lle) - LLE_FREE(old_lle); -} - -#endif Modified: user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.h ============================================================================== --- user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.h Sat May 26 01:36:25 2012 (r236039) +++ user/np/toe_iwarp/sys/dev/cxgbe/t4_l2t.h Sat May 26 01:45:53 2012 (r236040) @@ -30,8 +30,25 @@ #ifndef __T4_L2T_H #define __T4_L2T_H +/* identifies sync vs async L2T_WRITE_REQs */ +#define S_SYNC_WR 12 +#define V_SYNC_WR(x) ((x) << S_SYNC_WR) +#define F_SYNC_WR V_SYNC_WR(1) + enum { L2T_SIZE = 4096 }; /* # of L2T entries */ +enum { + L2T_STATE_VALID, /* entry is up to date */ *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***