Date: Tue, 7 Nov 2017 23:52:14 +0000 (UTC) From: Navdeep Parhar <np@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r325522 - in projects/bsd_rdma_4_9: contrib/ofed/libcxgb4 sys/compat/linuxkpi/common/include/linux sys/dev/cxgbe sys/dev/cxgbe/common sys/dev/cxgbe/iw_cxgbe sys/modules/cxgbe/iw_cxgbe Message-ID: <201711072352.vA7NqEHU046048@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: np Date: Tue Nov 7 23:52:14 2017 New Revision: 325522 URL: https://svnweb.freebsd.org/changeset/base/325522 Log: Update the iw_cxgbe bits in the projects branch. Submitted by: Krishnamraju Eraparaju @ Chelsio Sponsored by: Chelsio Communications Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cq.c projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/device.c projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/mem.c projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/provider.c projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/qp.c projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/t4.h projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/user.h projects/bsd_rdma_4_9/sys/dev/cxgbe/offload.h projects/bsd_rdma_4_9/sys/dev/cxgbe/t4_main.c projects/bsd_rdma_4_9/sys/modules/cxgbe/iw_cxgbe/Makefile Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c ============================================================================== --- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/cq.c Tue Nov 7 23:52:14 2017 (r325522) @@ -437,7 +437,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, if (!*cqe_flushed && CQE_STATUS(hw_cqe)) dump_cqe(hw_cqe); - BUG_ON((*cqe_flushed == 0) && !SW_CQE(hw_cqe)); + BUG_ON((cqe_flushed == 0) && !SW_CQE(hw_cqe)); goto proc_cqe; } Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c ============================================================================== --- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/dev.c Tue Nov 7 23:52:14 2017 (r325522) @@ -39,6 +39,7 @@ #include <pthread.h> #include <string.h> #include <signal.h> +#include <stdbool.h> #include "libcxgb4.h" #include "cxgb4-abi.h" @@ -194,6 +195,17 @@ static struct ibv_context *c4iw_alloc_context(struct i rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *)); if (!rhp->cqid2ptr) goto err_unmap; + + /* Disable userspace WC if architecture/adapter does not + * support WC. + * Note: To forcefully disable WC in kernel driver use the + * loader tunable "hw.cxl.write_combine=0" + */ + if (t5_en_wc && !context->status_page->wc_supported) { + fprintf(stderr, "iw_cxgb4 driver doesn't support Write " + "Combine, so regular DB writes will be used\n"); + t5_en_wc = 0; + } } return &context->ibv_ctx; @@ -400,11 +412,44 @@ int c4iw_abi_version = 1; static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path, int abi_version) { - char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[32], *cp; + char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp; + char dev_str[IBV_SYSFS_PATH_MAX]; struct c4iw_dev *dev; unsigned vendor, device, fw_maj, fw_min; int i; + char devnum; + char ib_param[16]; +#ifndef __linux__ + if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", + ibdev, sizeof ibdev) < 0) + return NULL; + + devnum = atoi(&ibdev[5]); + + if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' && + strstr(&ibdev[2], "nex") && devnum >= 0) { + snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1], + devnum); + } else + return NULL; + + if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0) + return NULL; + else { + if (strstr(value, "vendor=")) { + strncpy(ib_param, strstr(value, "vendor=") + + strlen("vendor="), 6); + sscanf(ib_param, "%i", &vendor); + } + + if (strstr(value, "device=")) { + strncpy(ib_param, strstr(value, "device=") + + strlen("device="), 6); + sscanf(ib_param, "%i", &device); + } + } +#else if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", value, sizeof value) < 0) return NULL; @@ -414,6 +459,7 @@ static struct verbs_device *cxgb4_driver_init(const ch value, sizeof value) < 0) return NULL; sscanf(value, "%i", &device); +#endif for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) if (vendor == hca_table[i].vendor && @@ -425,6 +471,11 @@ static struct verbs_device *cxgb4_driver_init(const ch found: c4iw_abi_version = abi_version; +#ifndef __linux__ + if (ibv_read_sysfs_file(dev_str, "firmware_version", + value, sizeof value) < 0) + return NULL; +#else /* * Verify that the firmware major number matches. Major number * mismatches are fatal. Minor number mismatches are tolerated. @@ -438,6 +489,7 @@ found: ibv_get_sysfs_path(), ibdev); if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0) return NULL; +#endif cp = strtok(value+1, "."); sscanf(cp, "%i", &fw_maj); Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c ============================================================================== --- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/qp.c Tue Nov 7 23:52:14 2017 (r325522) @@ -44,10 +44,13 @@ struct c4iw_stats c4iw_stats; static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) { - u64 *src, *dst; + void *src, *dst; + uintptr_t end; + int total, len; - src = (u64 *)wqe; - dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE); + src = &wqe->flits[0]; + dst = &wq->sq.queue->flits[wq->sq.wq_pidx * + (T4_EQ_ENTRY_SIZE / sizeof(__be64))]; if (t4_sq_onchip(wq)) { len16 = align(len16, 4); @@ -57,17 +60,18 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_w * happens */ mmio_wc_start(); } - while (len16) { - *dst++ = *src++; - if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) - dst = (u64 *)wq->sq.queue; - *dst++ = *src++; - if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) - dst = (u64 *)wq->sq.queue; - len16--; - /* NOTE len16 cannot be large enough to write to the - same sq.queue memory twice in this loop */ + /* NOTE len16 cannot be large enough to write to the + same sq.queue memory twice in this loop */ + total = len16 * 16; + end = (uintptr_t)&wq->sq.queue[wq->sq.size]; + if (__predict_true((uintptr_t)dst + total <= end)) { + /* Won't wrap around. */ + memcpy(dst, src, total); + } else { + len = end - (uintptr_t)dst; + memcpy(dst, src, len); + memcpy(wq->sq.queue, src + len, total - len); } if (t4_sq_onchip(wq)) @@ -76,18 +80,23 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_w static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16) { - u64 *src, *dst; + void *src, *dst; + uintptr_t end; + int total, len; - src = (u64 *)wqe; - dst = (u64 *)((u8 *)wq->rq.queue + wq->rq.wq_pidx * T4_EQ_ENTRY_SIZE); - while (len16) { - *dst++ = *src++; - if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) - dst = (u64 *)wq->rq.queue; - *dst++ = *src++; - if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) - dst = (u64 *)wq->rq.queue; - len16--; + src = &wqe->flits[0]; + dst = &wq->rq.queue->flits[wq->rq.wq_pidx * + (T4_EQ_ENTRY_SIZE / sizeof(__be64))]; + + total = len16 * 16; + end = (uintptr_t)&wq->rq.queue[wq->rq.size]; + if (__predict_true((uintptr_t)dst + total <= end)) { + /* Won't wrap around. */ + memcpy(dst, src, total); + } else { + len = end - (uintptr_t)dst; + memcpy(dst, src, len); + memcpy(wq->rq.queue, src + len, total - len); } } Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h ============================================================================== --- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/t4.h Tue Nov 7 23:52:14 2017 (r325522) @@ -87,7 +87,7 @@ #define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1) #define T4_MAX_NUM_STAG (1<<15) #define T4_MAX_MR_SIZE (~0ULL - 1) -#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ +#define T4_PAGESIZE_MASK 0xffffffff000 /* 4KB-8TB */ #define T4_STAG_UNSET 0xffffffff #define T4_FW_MAJ 0 @@ -723,7 +723,7 @@ static inline void t4_reset_cq_in_error(struct t4_cq * struct t4_dev_status_page { u8 db_off; - u8 pad1; + u8 wc_supported; u16 pad2; u32 pad3; u64 qp_start; Modified: projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c ============================================================================== --- projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/contrib/ofed/libcxgb4/verbs.c Tue Nov 7 23:52:14 2017 (r325522) @@ -468,7 +468,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, } qhp->wq.sq.queue = mmap(NULL, qhp->wq.sq.memsize, - PROT_WRITE, MAP_SHARED, + PROT_READ|PROT_WRITE, MAP_SHARED, pd->context->cmd_fd, resp.sq_key); if (qhp->wq.sq.queue == MAP_FAILED) goto err4; @@ -490,7 +490,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, qhp->wq.rq.udb += 2; } qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize, - PROT_WRITE, MAP_SHARED, + PROT_READ|PROT_WRITE, MAP_SHARED, pd->context->cmd_fd, resp.rq_key); if (qhp->wq.rq.queue == MAP_FAILED) goto err6; Modified: projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h ============================================================================== --- projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/sys/compat/linuxkpi/common/include/linux/dma-mapping.h Tue Nov 7 23:52:14 2017 (r325522) @@ -127,7 +127,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dm size_t align; void *mem; - if (dev->dma_mask) + if (dev != NULL && dev->dma_mask) high = *dev->dma_mask; else if (flag & GFP_DMA32) high = BUS_SPACE_MAXADDR_32BIT; Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h ============================================================================== --- projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/sys/dev/cxgbe/adapter.h Tue Nov 7 23:52:14 2017 (r325522) @@ -801,6 +801,7 @@ struct adapter { void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; + struct iw_tunables iwt; void *iwarp_softc; /* (struct c4iw_dev *) */ void *iscsi_ulp_softc; /* (struct cxgbei_data *) */ void *ccr_softc; /* (struct ccr_softc *) */ Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h ============================================================================== --- projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/sys/dev/cxgbe/common/common.h Tue Nov 7 23:52:14 2017 (r325522) @@ -68,6 +68,8 @@ enum { FEC_RESERVED = 1 << 2, }; +enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS }; + struct port_stats { u64 tx_octets; /* total # of octets in good frames */ u64 tx_frames; /* all good frames */ @@ -843,5 +845,8 @@ int t4vf_get_sge_params(struct adapter *adapter); int t4vf_get_rss_glb_config(struct adapter *adapter); int t4vf_get_vfres(struct adapter *adapter); int t4vf_prep_adapter(struct adapter *adapter); +int t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid, + enum t4_bar2_qtype qtype, int user, u64 *pbar2_qoffset, + unsigned int *pbar2_qid); #endif /* __CHELSIO_COMMON_H */ Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c ============================================================================== --- projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/sys/dev/cxgbe/common/t4_hw.c Tue Nov 7 23:52:14 2017 (r325522) @@ -8081,6 +8081,98 @@ int t4_shutdown_adapter(struct adapter *adapter) } /** + * t4_bar2_sge_qregs - return BAR2 SGE Queue register information + * @adapter: the adapter + * @qid: the Queue ID + * @qtype: the Ingress or Egress type for @qid + * @user: true if this request is for a user mode queue + * @pbar2_qoffset: BAR2 Queue Offset + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues + * + * Returns the BAR2 SGE Queue Registers information associated with the + * indicated Absolute Queue ID. These are passed back in return value + * pointers. @qtype should be T4_BAR2_QTYPE_EGRESS for Egress Queue + * and T4_BAR2_QTYPE_INGRESS for Ingress Queues. + * + * This may return an error which indicates that BAR2 SGE Queue + * registers aren't available. If an error is not returned, then the + * following values are returned: + * + * *@pbar2_qoffset: the BAR2 Offset of the @qid Registers + * *@pbar2_qid: the BAR2 SGE Queue ID or 0 of @qid + * + * If the returned BAR2 Queue ID is 0, then BAR2 SGE registers which + * require the "Inferred Queue ID" ability may be used. E.g. the + * Write Combining Doorbell Buffer. If the BAR2 Queue ID is not 0, + * then these "Inferred Queue ID" register may not be used. + */ +int t4_bar2_sge_qregs(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + int user, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid) +{ + unsigned int page_shift, page_size, qpp_shift, qpp_mask; + u64 bar2_page_offset, bar2_qoffset; + unsigned int bar2_qid, bar2_qid_offset, bar2_qinferred; + + /* T4 doesn't support BAR2 SGE Queue registers for kernel + * mode queues. + */ + if (!user && is_t4(adapter)) + return -EINVAL; + + /* Get our SGE Page Size parameters. + */ + page_shift = adapter->params.sge.page_shift; + page_size = 1 << page_shift; + + /* Get the right Queues per Page parameters for our Queue. + */ + qpp_shift = (qtype == T4_BAR2_QTYPE_EGRESS + ? adapter->params.sge.eq_s_qpp + : adapter->params.sge.iq_s_qpp); + qpp_mask = (1 << qpp_shift) - 1; + + /* Calculate the basics of the BAR2 SGE Queue register area: + * o The BAR2 page the Queue registers will be in. + * o The BAR2 Queue ID. + * o The BAR2 Queue ID Offset into the BAR2 page. + */ + bar2_page_offset = ((u64)(qid >> qpp_shift) << page_shift); + bar2_qid = qid & qpp_mask; + bar2_qid_offset = bar2_qid * SGE_UDB_SIZE; + + /* If the BAR2 Queue ID Offset is less than the Page Size, then the + * hardware will infer the Absolute Queue ID simply from the writes to + * the BAR2 Queue ID Offset within the BAR2 Page (and we need to use a + * BAR2 Queue ID of 0 for those writes). Otherwise, we'll simply + * write to the first BAR2 SGE Queue Area within the BAR2 Page with + * the BAR2 Queue ID and the hardware will infer the Absolute Queue ID + * from the BAR2 Page and BAR2 Queue ID. + * + * One important censequence of this is that some BAR2 SGE registers + * have a "Queue ID" field and we can write the BAR2 SGE Queue ID + * there. But other registers synthesize the SGE Queue ID purely + * from the writes to the registers -- the Write Combined Doorbell + * Buffer is a good example. These BAR2 SGE Registers are only + * available for those BAR2 SGE Register areas where the SGE Absolute + * Queue ID can be inferred from simple writes. + */ + bar2_qoffset = bar2_page_offset; + bar2_qinferred = (bar2_qid_offset < page_size); + if (bar2_qinferred) { + bar2_qoffset += bar2_qid_offset; + bar2_qid = 0; + } + + *pbar2_qoffset = bar2_qoffset; + *pbar2_qid = bar2_qid; + return 0; +} + +/** * t4_init_devlog_params - initialize adapter->params.devlog * @adap: the adapter * @fw_attach: whether we can talk to the firmware Modified: projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c ============================================================================== --- projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c Tue Nov 7 19:12:20 2017 (r325521) +++ projects/bsd_rdma_4_9/sys/dev/cxgbe/iw_cxgbe/cm.c Tue Nov 7 23:52:14 2017 (r325522) @@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_systm.h> #include <netinet/in_pcb.h> +#include <netinet6/in6_pcb.h> #include <netinet/ip.h> #include <netinet/in_fib.h> +#include <netinet6/in6_fib.h> +#include <netinet6/scope6_var.h> #include <netinet/ip_var.h> #include <netinet/tcp_var.h> #include <netinet/tcp.h> @@ -78,6 +81,8 @@ static struct work_struct c4iw_task; static struct workqueue_struct *c4iw_taskq; static LIST_HEAD(err_cqe_list); static spinlock_t err_cqe_lock; +static LIST_HEAD(listen_port_list); +static DEFINE_MUTEX(listen_port_mutex); static void process_req(struct work_struct *ctx); static void start_ep_timer(struct c4iw_ep *ep); @@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep); static int set_tcpinfo(struct c4iw_ep *ep); static void process_timeout(struct c4iw_ep *ep); static void process_err_cqes(void); -static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); -static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); -static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void *alloc_ep(int size, gfp_t flags); -static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); static void close_socket(struct socket *so); static int send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); @@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep); static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m); static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events); +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep); +static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep); +static struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so); +static int get_ifnet_from_raddr(struct sockaddr_storage *raddr, + struct ifnet **ifp); +static void process_newconn(struct c4iw_listen_ep *master_lep, + struct socket *new_so); #define START_EP_TIMER(ep) \ do { \ CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ @@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int stop_ep_timer(ep); \ }) +#define GET_LOCAL_ADDR(pladdr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getsockaddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getsockaddr(so, (struct sockaddr **)&__a); \ + *(pladdr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + +#define GET_REMOTE_ADDR(praddr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getpeeraddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getpeeraddr(so, (struct sockaddr **)&__a); \ + *(praddr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + #ifdef KTR static char *states[] = { "idle", @@ -152,7 +189,6 @@ static char *states[] = { }; #endif - static void deref_cm_id(struct c4iw_ep_common *epc) { epc->cm_id->rem_ref(epc->cm_id); @@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep) set_bit(QP_REFED, &ep->com.history); c4iw_qp_add_ref(&ep->com.qp->ibqp); } +/* allocated per TCP port while listening */ +struct listen_port_info { + uint16_t port_num; /* TCP port address */ + struct list_head list; /* belongs to listen_port_list */ + struct list_head lep_list; /* per port lep list */ + uint32_t refcnt; /* number of lep's listening */ +}; +/* + * Following two lists are used to manage INADDR_ANY listeners: + * 1)listen_port_list + * 2)lep_list + * + * Below is the INADDR_ANY listener lists overview on a system with a two port + * adapter: + * |------------------| + * |listen_port_list | + * |------------------| + * | + * | |-----------| |-----------| + * | | port_num:X| | port_num:X| + * |--------------|-list------|-------|-list------|-------.... + * | lep_list----| | lep_list----| + * | refcnt | | | refcnt | | + * | | | | | | + * | | | | | | + * |-----------| | |-----------| | + * | | + * | | + * | | + * | | lep1 lep2 + * | | |----------------| |----------------| + * | |----| listen_ep_list |----| listen_ep_list | + * | |----------------| |----------------| + * | + * | + * | lep1 lep2 + * | |----------------| |----------------| + * |---| listen_ep_list |----| listen_ep_list | + * |----------------| |----------------| + * + * Because of two port adapter, the number of lep's are two(lep1 & lep2) for + * each TCP port number. + * + * Here 'lep1' is always marked as Master lep, because solisten() is always + * called through first lep. + * + */ +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) + goto found_port; + + port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK); + port_info->port_num = port; + port_info->refcnt = 0; + + list_add_tail(&port_info->list, &listen_port_list); + INIT_LIST_HEAD(&port_info->lep_list); + +found_port: + port_info->refcnt++; + list_add_tail(&lep->listen_ep_list, &port_info->lep_list); + mutex_unlock(&listen_port_mutex); + return port_info; +} + +static int +rem_ep_from_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + int refcnt = 0; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + /* get the port_info structure based on the lep's port address */ + list_for_each_entry(port_info, &listen_port_list, list) { + if (port_info->port_num == port) { + port_info->refcnt--; + refcnt = port_info->refcnt; + /* remove the current lep from the listen list */ + list_del(&lep->listen_ep_list); + if (port_info->refcnt == 0) { + /* Remove this entry from the list as there + * are no more listeners for this port_num. + */ + list_del(&port_info->list); + kfree(port_info); + } + break; + } + } + mutex_unlock(&listen_port_mutex); + return refcnt; +} + +/* + * Find the lep that belongs to the ifnet on which the SYN frame was received. + */ +struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so) +{ + struct adapter *adap = NULL; + struct c4iw_listen_ep *lep = NULL; + struct sockaddr_storage remote = { 0 }; + struct ifnet *new_conn_ifp = NULL; + struct listen_port_info *port_info = NULL; + int err = 0, i = 0, + found_portinfo = 0, found_lep = 0; + uint16_t port; + + /* STEP 1: get 'ifnet' based on socket's remote address */ + GET_REMOTE_ADDR(&remote, so); + + err = get_ifnet_from_raddr(&remote, &new_conn_ifp); + if (err) { + CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, " + "master_lep %p err %d", + __func__, so, master_lep, err); + return (NULL); + } + + /* STEP 2: Find 'port_info' with listener local port address. */ + port = (master_lep->com.local_addr.ss_family == AF_INET) ? + ((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port : + ((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port; + + + mutex_lock(&listen_port_mutex); + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) { + found_portinfo =1; + break; + } + if (!found_portinfo) + goto out; + + /* STEP 3: Traverse through list of lep's that are bound to the current + * TCP port address and find the lep that belongs to the ifnet on which + * the SYN frame was received. + */ + list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) { + adap = lep->com.dev->rdev.adap; + for_each_port(adap, i) { + if (new_conn_ifp == adap->port[i]->vi[0].ifp) { + found_lep =1; + goto out; + } + } + } +out: + mutex_unlock(&listen_port_mutex); + return found_lep ? lep : (NULL); +} + static void process_timeout(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int abort = 1; - mutex_lock(&ep->com.mutex); CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__, ep, ep->hwtid, ep->com.state); set_bit(TIMEDOUT, &ep->com.history); @@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep) , __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); @@ -273,14 +479,16 @@ process_req(struct work_struct *ctx) ep_events = epc->ep_events; epc->ep_events = 0; spin_unlock_irqrestore(&req_lock, flag); - CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__, - epc->so, epc, ep_events); + mutex_lock(&epc->mutex); + CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x", + __func__, epc->so, epc, states[epc->state], ep_events); if (ep_events & C4IW_EVENT_TERM) process_terminate((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_TIMEOUT) process_timeout((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_SOCKET) process_socket_event((struct c4iw_ep *)epc); + mutex_unlock(&epc->mutex); c4iw_put_ep(epc); process_err_cqes(); spin_lock_irqsave(&req_lock, flag); @@ -321,55 +529,67 @@ done: return (rc); } - static int -find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) +get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp) { - struct in_addr addr; - int err; + int err = 0; - CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, - peer_ip, ntohs(local_port), ntohs(peer_port)); + if (raddr->ss_family == AF_INET) { + struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr; + struct nhop4_extended nh4 = {0}; - addr.s_addr = peer_ip; - err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4); + err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr, + NHR_REF, 0, &nh4); + *ifp = nh4.nh_ifp; + if (err) + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); + } else { + struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr; + struct nhop6_extended nh6 = {0}; + struct in6_addr addr6; + uint32_t scopeid; - CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err); + memset(&addr6, 0, sizeof(addr6)); + in6_splitscope((struct in6_addr *)&raddr6->sin6_addr, + &addr6, &scopeid); + err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid, + NHR_REF, 0, &nh6); + *ifp = nh6.nh_ifp; + if (err) + fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6); + } + + CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err); return err; } static void close_socket(struct socket *so) { - uninit_iwarp_socket(so); - sodisconnect(so); + soclose(so); } static void process_peer_close(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int disconnect = 1; int release = 0; CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); - mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: - CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD", __func__, ep); - __state_set(&ep->com, CLOSING); - break; - + /* Fallthrough */ case MPA_REQ_SENT: - CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD", __func__, ep); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; connect_reply_upcall(ep, -ECONNABORTED); disconnect = 0; @@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep) */ CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); - c4iw_get_ep(&ep->com); + ep->com.state = CLOSING; break; case MPA_REP_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; break; case FPDU_MODE: CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", __func__, ep); START_EP_TIMER(ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); @@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; disconnect = 0; break; @@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep) } close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; disconnect = 0; break; @@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep) break; } - mutex_unlock(&ep->com.mutex); if (disconnect) { @@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep) static void process_conn_error(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int ret; int state; - mutex_lock(&ep->com.mutex); state = ep->com.state; CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", __func__, ep, ep->com.so, ep->com.so->so_error, @@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep) case MPA_REQ_WAIT: STOP_EP_TIMER(ep); + c4iw_put_ep(&ep->parent_ep->com); break; case MPA_REQ_SENT: @@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep) break; case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. - */ - c4iw_get_ep(&ep->com); break; case MORIBUND: @@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep) case DEAD: CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", __func__, ep->com.so->so_error); - mutex_unlock(&ep->com.mutex); return; default: @@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep) if (state != ABORTING) { close_socket(ep->com.so); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; c4iw_put_ep(&ep->com); } - mutex_unlock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); return; } @@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep) static void process_close_complete(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int release = 0; CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); /* The cm_id may be null if we failed to connect */ - mutex_lock(&ep->com.mutex); set_bit(CLOSE_CON_RPL, &ep->com.history); switch (ep->com.state) { @@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; break; case MORIBUND: @@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep) close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; break; @@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep) panic("%s:pcc6 %p unknown ep state", __func__, ep); break; } - mutex_unlock(&ep->com.mutex); if (release) { CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep); - c4iw_put_ep(&ep->com); + release_ep_resources(ep); } CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); return; @@ -639,49 +846,56 @@ setiwsockopt(struct socket *so) static void init_iwarp_socket(struct socket *so, void *arg) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); - so->so_state |= SS_NBIO; - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void uninit_iwarp_socket(struct socket *so) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_clear(so, SO_RCV); - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, NULL, NULL); + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_clear(so, SO_RCV); + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void process_data(struct c4iw_ep *ep) { - struct sockaddr_in *local, *remote; int disconnect = 0; CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: disconnect = process_mpa_reply(ep); break; case MPA_REQ_WAIT: - in_getsockaddr(ep->com.so, (struct sockaddr **)&local); - in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); - ep->com.local_addr = *local; - ep->com.remote_addr = *remote; - free(local, M_SONAME); - free(remote, M_SONAME); disconnect = process_mpa_request(ep); + if (disconnect) + /* Refered in process_newconn() */ + c4iw_put_ep(&ep->parent_ep->com); break; default: if (sbused(&ep->com.so->so_rcv)) log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " "state %d, so %p, so_state 0x%x, sbused %u\n", - __func__, ep, state_read(&ep->com), ep->com.so, *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201711072352.vA7NqEHU046048>