Date: Wed, 15 Dec 2010 01:08:19 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r216450 - projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp Message-ID: <201012150108.oBF18JYg036405@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jeff Date: Wed Dec 15 01:08:19 2010 New Revision: 216450 URL: http://svn.freebsd.org/changeset/base/216450 Log: Initial port and rewrite of Sockets Direct Protocol (IB socket layer) - sdp_main.c was GPL polluted as a result of copy & paste code from linux tcp sources. It was rewritten from BSD tcp sources and copyrights were properly retained. - Remaining files are dual BSD/GPL licensed and appear to be free of unsafe copy & paste code. Most linuxisms removed although the wrapper layer is still included as it must be for the rdma/* includes. Sponsored by: Isilon Systems, iX Systems, and Panasas. Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_dbg.h projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_zcopy.c Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h Tue Dec 14 21:33:17 2010 (r216449) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h Wed Dec 15 01:08:19 2010 (r216450) @@ -1,16 +1,66 @@ #ifndef _SDP_H_ #define _SDP_H_ +#include "opt_ddb.h" +#include "opt_inet.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/mbuf.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> +#include <sys/proc.h> +#include <sys/jail.h> +#include <sys/domain.h> + +#ifdef DDB +#include <ddb/ddb.h> +#endif + +#include <net/if.h> +#include <net/route.h> +#include <net/vnet.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/in_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_fsm.h> +#include <netinet/tcp_timer.h> + +#include <linux/device.h> +#include <linux/err.h> +#include <linux/sched.h> #include <linux/workqueue.h> #include <linux/wait.h> -#include <net/inet_sock.h> -#include <net/tcp.h> /* For urgent data flags */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> + #include <rdma/ib_verbs.h> -#include <linux/sched.h> #include <rdma/rdma_cm.h> #include <rdma/ib_cm.h> +#include <rdma/sdp_socket.h> +#include <rdma/ib_fmr_pool.h> + +#define CONFIG_INFINIBAND_SDP_DEBUG 1 +#define CONFIG_INFINIBAND_SDP_DEBUG_DATA 1 + #include "sdp_dbg.h" +#undef LIST_HEAD +/* From sys/queue.h */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + /* Interval between sucessive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define SDP_TX_POLL_MODER 16 @@ -40,12 +90,13 @@ #define SDP_MAX_SEND_SGES 9 /* same as above */ /* mb inlined data len - rest will be rx'ed into frags */ -#define SDP_SKB_HEAD_SIZE (0x500 + sizeof(struct sdp_bsdh)) +#define SDP_HEAD_SIZE (sizeof(struct sdp_bsdh)) /* limit tx payload len, if the sink supports bigger buffers than the source * can handle. * or rx fragment size (limited by sge->length size) */ -#define SDP_MAX_PAYLOAD ((1 << 16) - SDP_SKB_HEAD_SIZE) +#define SDP_MAX_PACKET (1 << 16) +#define SDP_MAX_PAYLOAD (SDP_MAX_PACKET - SDP_HEAD_SIZE) #define SDP_NUM_WC 4 @@ -66,13 +117,14 @@ struct sdp_mb_cb { __u32 seq; /* Starting sequence number */ - __u32 end_seq; /* SEQ + FIN + SYN + datalen */ - __u8 flags; /* TCP header flags. */ struct bzcopy_state *bz; struct rx_srcavail_state *rx_sa; struct tx_srcavail_state *tx_sa; }; +#define M_PUSH M_PROTO1 /* Do a 'push'. */ +#define M_URG M_PROTO2 /* Mark as urgent (oob). */ + #define SDP_SKB_CB(__mb) ((struct sdp_mb_cb *)&((__mb)->cb[0])) #define BZCOPY_STATE(mb) (SDP_SKB_CB(mb)->bz) #define RX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->rx_sa) @@ -87,29 +139,17 @@ struct sdp_mb_cb { #define ring_posted(ring) (ring_head(ring) - ring_tail(ring)) #define rx_ring_posted(ssk) ring_posted(ssk->rx_ring) +#ifdef SDP_ZCOPY #define tx_ring_posted(ssk) (ring_posted(ssk->tx_ring) + \ (ssk->tx_ring.rdma_inflight ? ssk->tx_ring.rdma_inflight->busy : 0)) - -#define posts_handler(ssk) atomic_read(&ssk->somebody_is_doing_posts) -#define posts_handler_get(ssk) atomic_inc(&ssk->somebody_is_doing_posts) -#define posts_handler_put(ssk) do {\ - atomic_dec(&ssk->somebody_is_doing_posts); \ - sdp_do_posts(ssk); \ -} while (0) +#else +#define tx_ring_posted(ssk) ring_posted(ssk->tx_ring) +#endif extern int sdp_zcopy_thresh; -extern struct workqueue_struct *sdp_wq; -extern struct list_head sock_list; -extern spinlock_t sock_list_lock; extern int rcvbuf_initial_size; -extern struct proto sdp_proto; extern struct workqueue_struct *rx_comp_wq; -extern atomic_t sdp_current_mem_usage; -extern spinlock_t sdp_large_sockets_lock; extern struct ib_client sdp_client; -#ifdef SDPSTATS_ON -DECLARE_PER_CPU(struct sdpstats, sdpstats); -#endif enum sdp_mid { SDP_MID_HELLO = 0x0, @@ -264,7 +304,9 @@ struct tx_srcavail_state { }; struct sdp_tx_ring { +#ifdef SDP_ZCOPY struct rx_srcavail_state *rdma_inflight; +#endif struct sdp_buf *buffer; atomic_t head; atomic_t tail; @@ -274,8 +316,7 @@ struct sdp_tx_ring { atomic_t credits; #define tx_credits(ssk) (atomic_read(&ssk->tx_ring.credits)) - struct timer_list timer; - struct tasklet_struct tasklet; + struct callout timer; u16 poll_cnt; }; @@ -286,9 +327,7 @@ struct sdp_rx_ring { struct ib_cq *cq; int destroyed; - rwlock_t destroyed_lock; - - struct tasklet_struct tasklet; + struct rwlock destroyed_lock; }; struct sdp_device { @@ -318,68 +357,49 @@ struct sdp_moderation { int moder_time; }; +#define SDP_TIMEWAIT 0x0001 /* In ssk timewait state. */ +#define SDP_DROPPED 0x0002 /* Socket has been dropped. */ +#define SDP_SOCKREF 0x0004 /* Holding a sockref for close. */ +#define SDP_NODELAY 0x0008 /* Disble nagle. */ +#define SDP_NEEDFIN 0x0010 /* Send a fin on the next tx. */ +#define SDP_DREQWAIT 0x0020 /* Waiting on DREQ. */ +#define SDP_HAVEOOB 0x0040 /* Have OOB data. */ +#define SDP_HADOOB 0x0080 /* Had OOB data. */ +#define SDP_DESTROY 0x0100 /* Being destroyed. */ + struct sdp_sock { - /* sk has to be the first member of inet_sock */ - struct inet_sock isk; - struct list_head sock_list; - struct list_head accept_queue; - struct list_head backlog_queue; - struct mbuf_head rx_ctl_q; - struct socket *parent; + LIST_ENTRY(sdp_sock) list; + struct socket *socket; + struct rdma_cm_id *id; + struct ib_device *ib_device; struct sdp_device *sdp_dev; - - int qp_active; - struct tx_srcavail_state *tx_sa; - struct rx_srcavail_state *rx_sa; - spinlock_t tx_sa_lock; - struct delayed_work srcavail_cancel_work; - int srcavail_cancel_mseq; - + struct ib_qp *qp; + struct ucred *cred; + struct callout keep2msl; /* 2msl and keepalive timer. */ + struct callout nagle_timer; /* timeout waiting for ack */ struct ib_ucontext context; - - int max_sge; - - struct work_struct rx_comp_work; - wait_queue_head_t wq; - - struct delayed_work dreq_wait_work; - struct work_struct destroy_work; - - int tx_compl_pending; - atomic_t somebody_is_doing_posts; - - /* Like tcp_sock */ - u16 urg_data; - u32 urg_seq; - u32 copied_seq; -#define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt)) - atomic_t rcv_nxt; - - int write_seq; - int pushed_seq; + in_port_t lport; + in_addr_t laddr; + in_port_t fport; + in_addr_t faddr; + int flags; + int state; + int softerror; + int recv_bytes; /* Bytes per recv. buf including header */ int xmit_size_goal; - int nonagle; - - int dreq_wait_timeout; - - unsigned keepalive_time; - - spinlock_t lock; - - /* tx_head/rx_head when keepalive timer started */ - unsigned keepalive_tx_head; - unsigned keepalive_rx_head; - - int destructed_already; - int sdp_disconnect; - int destruct_in_process; + char iobc; struct sdp_rx_ring rx_ring; struct sdp_tx_ring tx_ring; + struct rwlock lock; + struct mbuf *rx_ctl_q; + struct mbuf *rx_ctl_tail; - /* Data below will be reset on error */ - struct rdma_cm_id *id; - struct ib_device *ib_device; + int qp_active; /* XXX Flag. */ + int max_sge; + struct work_struct rx_comp_work; +#define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt)) + atomic_t rcv_nxt; /* SDP specific */ atomic_t mseq_ack; @@ -388,39 +408,42 @@ struct sdp_sock { unsigned min_bufs; /* Low water mark to wake senders */ unsigned long nagle_last_unacked; /* mseq of lastest unacked packet */ - struct timer_list nagle_timer; /* timeout waiting for ack */ atomic_t remote_credits; #define remote_credits(ssk) (atomic_read(&ssk->remote_credits)) int poll_cq; - /* rdma specific */ - struct ib_qp *qp; - /* SDP slow start */ - int rcvbuf_scale; /* local recv buf scale for each socket */ - int sent_request_head; /* mark the tx_head of the last send resize - request */ - int sent_request; /* 0 - not sent yet, 1 - request pending - -1 - resize done succesfully */ int recv_request_head; /* mark the rx_head when the resize request was recieved */ - int recv_request; /* flag if request to resize was recieved */ - int recv_frags; /* max mb frags in recv packets */ - int send_frags; /* max mb frags in send packets */ + int recv_request; /* XXX flag if request to resize was recieved */ unsigned long tx_packets; unsigned long rx_packets; unsigned long tx_bytes; unsigned long rx_bytes; struct sdp_moderation auto_mod; - +#ifdef SDP_ZCOPY + struct tx_srcavail_state *tx_sa; + struct rx_srcavail_state *rx_sa; + spinlock_t tx_sa_lock; + struct delayed_work srcavail_cancel_work; + int srcavail_cancel_mseq; /* ZCOPY data: -1:use global; 0:disable zcopy; >0: zcopy threshold */ int zcopy_thresh; - - int last_bind_err; +#endif }; +#define sdp_sk(so) ((struct sdp_sock *)(so->so_pcb)) + +#define SDP_RLOCK(ssk) rw_rlock(&(ssk)->lock) +#define SDP_WLOCK(ssk) rw_wlock(&(ssk)->lock) +#define SDP_RUNLOCK(ssk) rw_runlock(&(ssk)->lock) +#define SDP_WUNLOCK(ssk) rw_wunlock(&(ssk)->lock) +#define SDP_WLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_WLOCKED) +#define SDP_RLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_RLOCKED) +#define SDP_LOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_LOCKED) + static inline void tx_sa_reset(struct tx_srcavail_state *tx_sa) { memset((void *)&tx_sa->busy, 0, @@ -429,12 +452,12 @@ static inline void tx_sa_reset(struct tx static inline void rx_ring_unlock(struct sdp_rx_ring *rx_ring) { - read_unlock_bh(&rx_ring->destroyed_lock); + rw_runlock(&rx_ring->destroyed_lock); } static inline int rx_ring_trylock(struct sdp_rx_ring *rx_ring) { - read_lock_bh(&rx_ring->destroyed_lock); + rw_rlock(&rx_ring->destroyed_lock); if (rx_ring->destroyed) { rx_ring_unlock(rx_ring); return 0; @@ -444,76 +467,26 @@ static inline int rx_ring_trylock(struct static inline void rx_ring_destroy_lock(struct sdp_rx_ring *rx_ring) { - write_lock_bh(&rx_ring->destroyed_lock); + rw_wlock(&rx_ring->destroyed_lock); rx_ring->destroyed = 1; - write_unlock_bh(&rx_ring->destroyed_lock); -} - -static inline struct sdp_sock *sdp_sk(const struct socket *sk) -{ - return (struct sdp_sock *)sk; -} - -static inline int _sdp_exch_state(const char *func, int line, struct socket *sk, - int from_states, int state) -{ - unsigned long flags; - int old; - - spin_lock_irqsave(&sdp_sk(sk)->lock, flags); - - sdp_dbg(sk, "%s:%d - set state: %s -> %s 0x%x\n", func, line, - sdp_state_str(sk->sk_state), - sdp_state_str(state), from_states); - - if ((1 << sk->sk_state) & ~from_states) { - sdp_warn(sk, "trying to exchange state from unexpected state " - "%s to state %s. expected states: 0x%x\n", - sdp_state_str(sk->sk_state), sdp_state_str(state), - from_states); - } - - old = sk->sk_state; - sk->sk_state = state; - - spin_unlock_irqrestore(&sdp_sk(sk)->lock, flags); - - return old; -} -#define sdp_exch_state(sk, from_states, state) \ - _sdp_exch_state(__func__, __LINE__, sk, from_states, state) - -static inline void sdp_set_error(struct socket *sk, int err) -{ - int ib_teardown_states = TCPF_FIN_WAIT1 | TCPF_CLOSE_WAIT - | TCPF_LAST_ACK; - sk->sk_err = -err; - if (sk->sk_socket) - sk->sk_socket->state = SS_DISCONNECTING; - - if ((1 << sk->sk_state) & ib_teardown_states) - sdp_exch_state(sk, ib_teardown_states, TCPS_TIME_WAIT); - else - sdp_exch_state(sk, ~0, TCPS_CLOSED); - - sk->sk_error_report(sk); + rw_wunlock(&rx_ring->destroyed_lock); } -static inline void sdp_arm_rx_cq(struct socket *sk) +static inline void sdp_arm_rx_cq(struct sdp_sock *ssk) { - sdp_prf(sk, NULL, "Arming RX cq"); - sdp_dbg_data(sk, "Arming RX cq\n"); + sdp_prf(ssk->socket, NULL, "Arming RX cq"); + sdp_dbg_data(ssk->socket, "Arming RX cq\n"); - ib_req_notify_cq(sdp_sk(sk)->rx_ring.cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(ssk->rx_ring.cq, IB_CQ_NEXT_COMP); } -static inline void sdp_arm_tx_cq(struct socket *sk) +static inline void sdp_arm_tx_cq(struct sdp_sock *ssk) { - sdp_prf(sk, NULL, "Arming TX cq"); - sdp_dbg_data(sk, "Arming TX cq. credits: %d, posted: %d\n", - tx_credits(sdp_sk(sk)), tx_ring_posted(sdp_sk(sk))); + sdp_prf(ssk->socket, NULL, "Arming TX cq"); + sdp_dbg_data(ssk->socket, "Arming TX cq. credits: %d, posted: %d\n", + tx_credits(ssk), tx_ring_posted(ssk)); - ib_req_notify_cq(sdp_sk(sk)->tx_ring.cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(ssk->tx_ring.cq, IB_CQ_NEXT_COMP); } /* return the min of: @@ -557,90 +530,68 @@ static inline char *mid2str(int mid) return mid2str[mid]; } -static inline struct mbuf *sdp_stream_alloc_mb(struct socket *sk, int size, - gfp_t gfp) -{ - struct mbuf *mb; - - /* The TCP header must be at least 32-bit aligned. */ - size = ALIGN(size, 4); - - mb = alloc_mb_fclone(size + sk->sk_prot->max_header, gfp); - if (mb) { - if (sk_wmem_schedule(sk, mb->truesize)) { - /* - * Make sure that we have exactly size bytes - * available to the caller, no more, no less. - */ - mb_reserve(mb, mb_tailroom(mb) - size); - return mb; - } - m_freem(mb); - } else { - sk->sk_prot->enter_memory_pressure(sk); - sk_stream_moderate_sndbuf(sk); - } - return NULL; -} - -static inline struct mbuf *sdp_alloc_mb(struct socket *sk, u8 mid, int size, - gfp_t gfp) +static inline struct mbuf * +sdp_alloc_mb(struct socket *sk, u8 mid, int size, int wait) { struct sdp_bsdh *h; struct mbuf *mb; - if (!gfp) { - if (unlikely(sk->sk_allocation)) - gfp = sk->sk_allocation; - else - gfp = GFP_KERNEL; - } - - mb = sdp_stream_alloc_mb(sk, sizeof(struct sdp_bsdh) + size, gfp); - BUG_ON(!mb); - - mb_header_release(mb); - - h = (struct sdp_bsdh *)mb_push(mb, sizeof *h); + MGETHDR(mb, wait, MT_DATA); + if (mb == NULL) + return (NULL); + mb->m_pkthdr.len = mb->m_len = sizeof(struct sdp_bsdh); + h = mtod(mb, struct sdp_bsdh *); h->mid = mid; - mb_reset_transport_header(mb); - return mb; } -static inline struct mbuf *sdp_alloc_mb_data(struct socket *sk, gfp_t gfp) +static inline struct mbuf * +sdp_alloc_mb_data(struct socket *sk, int wait) { - return sdp_alloc_mb(sk, SDP_MID_DATA, 0, gfp); + return sdp_alloc_mb(sk, SDP_MID_DATA, 0, wait); } -static inline struct mbuf *sdp_alloc_mb_disconnect(struct socket *sk, - gfp_t gfp) +static inline struct mbuf * +sdp_alloc_mb_disconnect(struct socket *sk, int wait) { - return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, gfp); + return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, wait); } -static inline struct mbuf *sdp_alloc_mb_chrcvbuf_ack(struct socket *sk, - int size, gfp_t gfp) +static inline void * +mb_put(struct mbuf *mb, int len) +{ + uint8_t *data; + + data = mb->m_data; + data += mb->m_len; + mb->m_len += len; + return (void *)data; +} + +static inline struct mbuf * +sdp_alloc_mb_chrcvbuf_ack(struct socket *sk, int size, int wait) { struct mbuf *mb; struct sdp_chrecvbuf *resp_size; - mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), gfp); - + mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), wait); + if (mb == NULL) + return (NULL); resp_size = (struct sdp_chrecvbuf *)mb_put(mb, sizeof *resp_size); resp_size->size = htonl(size); return mb; } -static inline struct mbuf *sdp_alloc_mb_srcavail(struct socket *sk, - u32 len, u32 rkey, u64 vaddr, gfp_t gfp) +static inline struct mbuf * +sdp_alloc_mb_srcavail(struct socket *sk, u32 len, u32 rkey, u64 vaddr, int wait) { struct mbuf *mb; struct sdp_srcah *srcah; - mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), gfp); - + mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), wait); + if (mb == NULL) + return (NULL); srcah = (struct sdp_srcah *)mb_put(mb, sizeof(*srcah)); srcah->len = htonl(len); srcah->rkey = htonl(rkey); @@ -649,29 +600,31 @@ static inline struct mbuf *sdp_alloc_mb_ return mb; } -static inline struct mbuf *sdp_alloc_mb_srcavail_cancel(struct socket *sk, - gfp_t gfp) +static inline struct mbuf * +sdp_alloc_mb_srcavail_cancel(struct socket *sk, int wait) { - return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, gfp); + return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, wait); } -static inline struct mbuf *sdp_alloc_mb_rdmardcompl(struct socket *sk, - u32 len, gfp_t gfp) +static inline struct mbuf * +sdp_alloc_mb_rdmardcompl(struct socket *sk, u32 len, int wait) { struct mbuf *mb; struct sdp_rrch *rrch; - mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), gfp); - + mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), wait); + if (mb == NULL) + return (NULL); rrch = (struct sdp_rrch *)mb_put(mb, sizeof(*rrch)); rrch->len = htonl(len); return mb; } -static inline struct mbuf *sdp_alloc_mb_sendsm(struct socket *sk, gfp_t gfp) +static inline struct mbuf * +sdp_alloc_mb_sendsm(struct socket *sk, int wait) { - return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, gfp); + return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, wait); } static inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk) { @@ -691,98 +644,33 @@ static inline int credit_update_needed(s } -#ifdef SDPSTATS_ON - -#define SDPSTATS_MAX_HIST_SIZE 256 -struct sdpstats { - u32 post_send[256]; - u32 sendmsg_bcopy_segment; - u32 sendmsg_bzcopy_segment; - u32 sendmsg_zcopy_segment; - u32 sendmsg; - u32 post_send_credits; - u32 sendmsg_nagle_skip; - u32 sendmsg_seglen[25]; - u32 send_size[25]; - u32 post_recv; - u32 rx_int_count; - u32 tx_int_count; - u32 bzcopy_poll_miss; - u32 send_wait_for_mem; - u32 send_miss_no_credits; - u32 rx_poll_miss; - u32 tx_poll_miss; - u32 tx_poll_hit; - u32 tx_poll_busy; - u32 memcpy_count; - u32 credits_before_update[64]; - u32 zcopy_tx_timeout; - u32 zcopy_cross_send; - u32 zcopy_tx_aborted; - u32 zcopy_tx_error; -}; - -static inline void sdpstats_hist(u32 *h, u32 val, u32 maxidx, int is_log) -{ - int idx = is_log ? ilog2(val) : val; - if (idx > maxidx) - idx = maxidx; - - h[idx]++; -} - -#define SDPSTATS_COUNTER_INC(stat) do { __get_cpu_var(sdpstats).stat++; } while (0) -#define SDPSTATS_COUNTER_ADD(stat, val) do { __get_cpu_var(sdpstats).stat += val; } while (0) -#define SDPSTATS_COUNTER_MID_INC(stat, mid) do { __get_cpu_var(sdpstats).stat[mid]++; } \ - while (0) -#define SDPSTATS_HIST(stat, size) \ - sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 1) - -#define SDPSTATS_HIST_LINEAR(stat, size) \ - sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 0) - -#else #define SDPSTATS_COUNTER_INC(stat) #define SDPSTATS_COUNTER_ADD(stat, val) #define SDPSTATS_COUNTER_MID_INC(stat, mid) #define SDPSTATS_HIST_LINEAR(stat, size) #define SDPSTATS_HIST(stat, size) -#endif -static inline void sdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf, - size_t head_size, enum dma_data_direction dir) +static inline void +sdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf, + enum dma_data_direction dir) { - int i; + struct ib_device *dev; struct mbuf *mb; - struct ib_device *dev = ssk->ib_device; - - mb = sbuf->mb; - - ib_dma_unmap_single(dev, sbuf->mapping[0], head_size, dir); + int i; - for (i = 0; i < mb_shinfo(mb)->nr_frags; i++) { - ib_dma_unmap_page(dev, sbuf->mapping[i + 1], - mb_shinfo(mb)->frags[i].size, - dir); - } + dev = ssk->ib_device; + for (i = 0, mb = sbuf->mb; mb != NULL; mb = mb->m_next, i++) + ib_dma_unmap_single(dev, sbuf->mapping[i], mb->m_len, dir); } /* sdp_main.c */ void sdp_set_default_moderation(struct sdp_sock *ssk); -int sdp_init_sock(struct socket *sk); void sdp_start_keepalive_timer(struct socket *sk); -void sdp_remove_sock(struct sdp_sock *ssk); -void sdp_add_sock(struct sdp_sock *ssk); void sdp_urg(struct sdp_sock *ssk, struct mbuf *mb); void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk); -void sdp_reset_sk(struct socket *sk, int rc); -void sdp_reset(struct socket *sk); -int sdp_tx_wait_memory(struct sdp_sock *ssk, long *timeo_p, int *credits_needed); -void mb_entail(struct socket *sk, struct sdp_sock *ssk, struct mbuf *mb); - -/* sdp_proc.c */ -int __init sdp_proc_init(void); -void sdp_proc_unregister(void); +void sdp_abort(struct socket *sk); +struct sdp_sock *sdp_notify(struct sdp_sock *ssk, int error); + /* sdp_cma.c */ int sdp_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *); @@ -792,8 +680,7 @@ int sdp_tx_ring_create(struct sdp_sock * void sdp_tx_ring_destroy(struct sdp_sock *ssk); int sdp_xmit_poll(struct sdp_sock *ssk, int force); void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb); -void sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp); -void sdp_nagle_timeout(unsigned long data); +void sdp_post_sends(struct sdp_sock *ssk, int wait); void sdp_post_keepalive(struct sdp_sock *ssk); /* sdp_rx.c */ @@ -804,8 +691,6 @@ int sdp_resize_buffers(struct sdp_sock * int sdp_init_buffers(struct sdp_sock *ssk, u32 new_size); void sdp_do_posts(struct sdp_sock *ssk); void sdp_rx_comp_full(struct sdp_sock *ssk); -void sdp_remove_large_sock(struct sdp_sock *ssk); -void sdp_handle_disconn(struct socket *sk); /* sdp_zcopy.c */ int sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov); Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c Tue Dec 14 21:33:17 2010 (r216449) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c Wed Dec 15 01:08:19 2010 (r216450) @@ -33,6 +33,8 @@ */ #include "sdp.h" +static void sdp_nagle_timeout(void *data); + #ifdef CONFIG_INFINIBAND_SDP_DEBUG_DATA void _dump_packet(const char *func, int line, struct socket *sk, char *str, struct mbuf *mb, const struct sdp_bsdh *h) @@ -85,7 +87,7 @@ void _dump_packet(const char *func, int srcah = (struct sdp_srcah *)(h+1); len += snprintf(buf + len, 255-len, " | payload: 0x%lx, " - "len: 0x%x, rkey: 0x%x, vaddr: 0x%llx |", + "len: 0x%x, rkey: 0x%x, vaddr: 0x%jx |", ntohl(h->len) - sizeof(struct sdp_bsdh) - sizeof(struct sdp_srcah), ntohl(srcah->len), ntohl(srcah->rkey), @@ -99,95 +101,79 @@ void _dump_packet(const char *func, int } #endif -static inline void update_send_head(struct socket *sk, struct mbuf *mb) +static inline int +sdp_nagle_off(struct sdp_sock *ssk, struct mbuf *mb) { - struct page *page; - sk->sk_send_head = mb->next; - if (sk->sk_send_head == (struct mbuf *)&sk->sk_write_queue) { - sk->sk_send_head = NULL; - page = sk->sk_sndmsg_page; - if (page) { - put_page(page); - sk->sk_sndmsg_page = NULL; - } - } -} -static inline int sdp_nagle_off(struct sdp_sock *ssk, struct mbuf *mb) -{ - struct sdp_bsdh *h = (struct sdp_bsdh *)mb_transport_header(mb); + struct sdp_bsdh *h; + + h = mtod(mb, struct sdp_bsdh *); int send_now = +#ifdef SDP_ZCOPY BZCOPY_STATE(mb) || +#endif unlikely(h->mid != SDP_MID_DATA) || - (ssk->nonagle & TCP_NAGLE_OFF) || + (ssk->flags & SDP_NODELAY) || !ssk->nagle_last_unacked || - mb->next != (struct mbuf *)&ssk->isk.sk.sk_write_queue || - mb->len + sizeof(struct sdp_bsdh) >= ssk->xmit_size_goal || - (SDP_SKB_CB(mb)->flags & TCPCB_FLAG_PSH); + mb->m_pkthdr.len >= ssk->xmit_size_goal || + (mb->m_flags & M_PUSH); if (send_now) { unsigned long mseq = ring_head(ssk->tx_ring); ssk->nagle_last_unacked = mseq; } else { - if (!timer_pending(&ssk->nagle_timer)) { - mod_timer(&ssk->nagle_timer, - jiffies + SDP_NAGLE_TIMEOUT); - sdp_dbg_data(&ssk->isk.sk, "Starting nagle timer\n"); + if (!callout_pending(&ssk->nagle_timer)) { + callout_reset(&ssk->nagle_timer, SDP_NAGLE_TIMEOUT, + sdp_nagle_timeout, ssk); + sdp_dbg_data(ssk->socket, "Starting nagle timer\n"); } } - sdp_dbg_data(&ssk->isk.sk, "send_now = %d last_unacked = %ld\n", + sdp_dbg_data(ssk->socket, "send_now = %d last_unacked = %ld\n", send_now, ssk->nagle_last_unacked); return send_now; } -void sdp_nagle_timeout(unsigned long data) +static void +sdp_nagle_timeout(void *data) { struct sdp_sock *ssk = (struct sdp_sock *)data; - struct socket *sk = &ssk->isk.sk; + struct socket *sk = ssk->socket; sdp_dbg_data(sk, "last_unacked = %ld\n", ssk->nagle_last_unacked); - if (!ssk->nagle_last_unacked) - goto out2; + if (!callout_active(&ssk->nagle_timer)) + return; + callout_deactivate(&ssk->nagle_timer); - /* Only process if the socket is not in use */ - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - sdp_dbg_data(sk, "socket is busy - will try later\n"); + if (!ssk->nagle_last_unacked) goto out; - } - - if (sk->sk_state == TCPS_CLOSED) { - bh_unlock_sock(sk); + if (ssk->state == TCPS_CLOSED) return; - } - ssk->nagle_last_unacked = 0; - sdp_post_sends(ssk, GFP_ATOMIC); + sdp_post_sends(ssk, M_DONTWAIT); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - sk_stream_write_space(&ssk->isk.sk); + sowwakeup(ssk->socket); out: - bh_unlock_sock(sk); -out2: - if (sk->sk_send_head) /* If has pending sends - rearm */ - mod_timer(&ssk->nagle_timer, jiffies + SDP_NAGLE_TIMEOUT); + if (sk->so_snd.sb_sndptr) + callout_reset(&ssk->nagle_timer, SDP_NAGLE_TIMEOUT, + sdp_nagle_timeout, ssk); } -void sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp) +void +sdp_post_sends(struct sdp_sock *ssk, int wait) { /* TODO: nonagle? */ struct mbuf *mb; int post_count = 0; - struct socket *sk = &ssk->isk.sk; + struct socket *sk; + sk = ssk->socket; if (unlikely(!ssk->id)) { - if (ssk->isk.sk.sk_send_head) { - sdp_dbg(&ssk->isk.sk, + if (sk->so_snd.sb_sndptr) { + sdp_dbg(ssk->socket, "Send on socket without cmid ECONNRESET.\n"); - /* TODO: flush send queue? */ - sdp_reset(&ssk->isk.sk); + sdp_notify(ssk, ECONNRESET); } return; } @@ -199,39 +185,42 @@ void sdp_post_sends(struct sdp_sock *ssk ring_tail(ssk->rx_ring) >= ssk->recv_request_head && tx_credits(ssk) >= SDP_MIN_TX_CREDITS && sdp_tx_ring_slots_left(ssk)) { + mb = sdp_alloc_mb_chrcvbuf_ack(sk, + ssk->recv_bytes - SDP_HEAD_SIZE, wait); + if (mb == NULL) + goto allocfail; ssk->recv_request = 0; - - mb = sdp_alloc_mb_chrcvbuf_ack(sk, - ssk->recv_frags * PAGE_SIZE, gfp); - sdp_post_send(ssk, mb); post_count++; } if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS && - sdp_tx_ring_slots_left(ssk) && - ssk->isk.sk.sk_send_head && - sdp_nagle_off(ssk, ssk->isk.sk.sk_send_head)) { + sdp_tx_ring_slots_left(ssk) && sk->so_snd.sb_sndptr && + sdp_nagle_off(ssk, sk->so_snd.sb_sndptr)) { SDPSTATS_COUNTER_INC(send_miss_no_credits); } while (tx_credits(ssk) > SDP_MIN_TX_CREDITS && - sdp_tx_ring_slots_left(ssk) && - (mb = ssk->isk.sk.sk_send_head) && - sdp_nagle_off(ssk, mb)) { - update_send_head(&ssk->isk.sk, mb); - __mb_dequeue(&ssk->isk.sk.sk_write_queue); - + sdp_tx_ring_slots_left(ssk) && (mb = sk->so_snd.sb_sndptr) && + sdp_nagle_off(ssk, mb)) { + struct mbuf *n; + + SOCKBUF_LOCK(&sk->so_snd); + sk->so_snd.sb_sndptr = mb->m_nextpkt; + sk->so_snd.sb_mb = mb->m_nextpkt; + for (n = mb; n != NULL; n = mb->m_next) + sbfree(&sk->so_snd, mb); + SB_EMPTY_FIXUP(&sk->so_snd); + SOCKBUF_UNLOCK(&sk->so_snd); sdp_post_send(ssk, mb); - post_count++; } - if (credit_update_needed(ssk) && - likely((1 << ssk->isk.sk.sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) { - - mb = sdp_alloc_mb_data(&ssk->isk.sk, gfp); + if (credit_update_needed(ssk) && ssk->state >= TCPS_ESTABLISHED && + ssk->state < TCPS_FIN_WAIT_2) { + mb = sdp_alloc_mb_data(ssk->socket, wait); + if (mb == NULL) + goto allocfail; sdp_post_send(ssk, mb); SDPSTATS_COUNTER_INC(post_send_credits); @@ -243,17 +232,21 @@ void sdp_post_sends(struct sdp_sock *ssk * If one credit is available, an implementation shall only send SDP * messages that provide additional credits and also do not contain ULP * payload. */ - if (unlikely(ssk->sdp_disconnect) && - !ssk->isk.sk.sk_send_head && - tx_credits(ssk) > 1) { - ssk->sdp_disconnect = 0; - - mb = sdp_alloc_mb_disconnect(sk, gfp); + if ((ssk->flags & SDP_NEEDFIN) && !sk->so_snd.sb_sndptr && + tx_credits(ssk) > 1) { + mb = sdp_alloc_mb_disconnect(sk, wait); + if (mb == NULL) + goto allocfail; + ssk->flags &= ~SDP_NEEDFIN; sdp_post_send(ssk, mb); - post_count++; } - if (post_count) sdp_xmit_poll(ssk, 0); + return; + +allocfail: + ssk->nagle_last_unacked = -1; + callout_reset(&ssk->nagle_timer, 1, sdp_nagle_timeout, ssk); + return; } Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c Tue Dec 14 21:33:17 2010 (r216449) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c Wed Dec 15 01:08:19 2010 (r216450) @@ -31,19 +31,6 @@ * * $Id$ */ -#include <linux/device.h> -#include <linux/in.h> -#include <linux/err.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/pci.h> -#include <linux/time.h> -#include <linux/workqueue.h> - -#include <rdma/ib_verbs.h> -#include <rdma/rdma_cm.h> -#include <net/tcp_states.h> -#include <rdma/sdp_socket.h> #include "sdp.h" #define SDP_MAJV_MINV 0x22 @@ -56,11 +43,13 @@ enum { SDP_HAH_SIZE = 180, }; -static void sdp_qp_event_handler(struct ib_event *event, void *data) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201012150108.oBF18JYg036405>