Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 Jan 2008 07:36:01 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 133977 for review
Message-ID:  <200801240736.m0O7a1ED093489@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=133977

Change 133977 by kmacy@kmacy:storage:toehead on 2008/01/24 07:35:33

	import cpl_io ddp support

Affected files ...

.. //depot/projects/toehead/sys/dev/cxgb/sys/mvec.h#6 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#5 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#4 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#4 edit

Differences ...

==== //depot/projects/toehead/sys/dev/cxgb/sys/mvec.h#6 (text+ko) ====

@@ -48,9 +48,10 @@
 extern int cxgb_mbufs_outstanding;
 extern int cxgb_pack_outstanding;
 
-#define mtomv(m)          ((struct mbuf_vec *)((m)->m_pktdat))
-#define M_IOVEC               0x100000 /* mbuf immediate data area is used for cluster ptrs */
-#define	EXT_PHYS	10	/* physical/bus address  */
+#define	mtomv(m)          ((struct mbuf_vec *)((m)->m_pktdat))
+#define	M_IOVEC		0x100000	/* mbuf immediate data area is used for cluster ptrs */
+#define	M_DDP		0x200000	/* direct data placement mbuf */
+#define	EXT_PHYS	10		/* physical/bus address  */
 
 
 /*
@@ -74,6 +75,11 @@
 #define EXT_CLIOVEC     9
 #define EXT_JMPIOVEC    10
 
+#define m_cur_offset	m_ext.ext_size		/* override to provide ddp offset */
+#define m_seq		m_pkthdr.csum_data	/* stored sequence */
+#define m_ddp_gl	m_ext.ext_buf		/* ddp list	*/
+#define m_ddp_flags	m_pkthdr.csum_flags	/* ddp flags	*/
+#define m_ulp_mode	m_ext.ext_type		/* upper level protocol	*/
 
 extern uma_zone_t zone_miovec;
 

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#5 (text+ko) ====

@@ -478,6 +478,14 @@
 	return (credits);
 }
 
+/*
+ * Returns true if a socket cannot accept new Rx data.
+ */
+static inline int
+so_no_receive(const struct socket *so)
+{
+	return (so->so_state & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
+}
 
 /*
  * Set of states for which we should return RX credits.
@@ -1465,6 +1473,253 @@
 }
 
 /*
+ * Returns true if we need to explicitly request RST when we receive new data
+ * on an RX-closed connection.
+ */
+static inline int
+need_rst_on_excess_rx(const struct toepcb *toep)
+{
+	return (1);
+}
+
+/*
+ * Handles Rx data that arrives in a state where the socket isn't accepting
+ * new data.
+ */
+static void
+handle_excess_rx(struct toepcb *toep, struct mbuf *m)
+{
+	
+	if (need_rst_on_excess_rx(toep) && !(toep->tp_flags & TP_ABORT_SHUTDOWN))
+		t3_send_reset(toep);
+	m_freem(m); 
+}
+
+/*
+ * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
+ * by getting the DDP offset from the TCB.
+ */
+static void
+tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
+{
+	struct ddp_state *q = &toep->tp_ddp_state;
+	struct ddp_buf_state *bsp;
+	struct cpl_get_tcb_rpl *hdr;
+	unsigned int ddp_offset;
+	struct socket *so;
+	struct tcpcb *tp;
+	
+	uint64_t t;
+	__be64 *tcb;
+
+
+	/* Note that we only accout for CPL_GET_TCB issued by the DDP code. We
+	 * really need a cookie in order to dispatch the RPLs.
+	 */
+	q->get_tcb_count--;
+
+	/* It is a possible that a previous CPL already invalidated UBUF DDP
+	 * and moved the cur_buf idx and hence no further processing of this
+	 * skb is required. However, the app might be sleeping on
+	 * !q->get_tcb_count and we need to wake it up.
+	 */
+	if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
+		struct socket *so = toeptoso(toep);
+		
+		m_freem(m);
+		if (__predict_true((so->so_state & SS_NOFDREF) == 0))
+			sorwakeup(so);
+
+		return;
+	}
+
+	bsp = &q->buf_state[q->cur_buf];
+	hdr = cplhdr(m);
+	tcb = (__be64 *)(hdr + 1);
+	if (q->cur_buf == 0) {
+		t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
+		ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
+	} else {
+		t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
+		ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
+	}
+	ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
+
+#ifdef T3_TRACE
+	T3_TRACE3(TIDTB(so),
+		  "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u ddp_offset %u",
+		  tp->rcv_nxt, q->cur_buf, ddp_offset);
+#endif
+
+#if 0
+{
+	unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
+
+	t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
+	ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
+
+        t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
+        rcv_nxt = t >> S_TCB_RCV_NXT;
+        rcv_nxt &= M_TCB_RCV_NXT;
+
+        t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
+        rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
+        rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
+
+	T3_TRACE2(TIDTB(sk),
+		  "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
+		  ddp_flags, rcv_nxt - rx_hdr_offset);
+	T3_TRACE4(TB(q),
+		  "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
+		  tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
+	T3_TRACE3(TB(q),
+		  "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
+		  rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
+	T3_TRACE2(TB(q),
+		  "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
+		 q->buf_state[0].flags, q->buf_state[1].flags);
+
+}
+#endif
+	m->m_cur_offset = bsp->cur_offset;
+	bsp->cur_offset = ddp_offset;
+	m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
+	so = toeptoso(toep);
+	
+	if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
+		handle_excess_rx(toep, m);
+		return;
+	}
+
+#ifdef T3_TRACE
+	if ((int)m->m_pkthdr.len < 0) {
+		t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
+	}
+#endif
+	if (bsp->flags & DDP_BF_NOCOPY) {
+#ifdef T3_TRACE
+		T3_TRACE0(TB(q),
+			  "tcb_rpl_as_ddp_complete: CANCEL UBUF");
+
+		if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+			printk("!cancel_ubuf");
+			t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
+		}
+#endif
+		m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
+		bsp->flags &= ~DDP_BF_NOCOPY;
+		q->cur_buf ^= 1;
+	} else if (bsp->flags & DDP_BF_NOFLIP) {
+
+		m->m_ddp_flags = 1;    /* always a kernel buffer */
+
+		/* now HW buffer carries a user buffer */
+		bsp->flags &= ~DDP_BF_NOFLIP;
+		bsp->flags |= DDP_BF_NOCOPY;
+
+		/* It is possible that the CPL_GET_TCB_RPL doesn't indicate
+		 * any new data in which case we're done. If in addition the
+		 * offset is 0, then there wasn't a completion for the kbuf
+		 * and we need to decrement the posted count.
+		 */
+		if (m->m_pkthdr.len == 0) {
+			if (ddp_offset == 0)
+				q->kbuf_posted--;
+			panic("length not set");
+			m_free(m);
+			return;
+		}
+	} else {
+		/* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
+		 * but it got here way late and nobody cares anymore.
+		 */
+		m_free(m);
+		return;
+	}
+
+	tp = toep->tp_tp;
+	m->m_ddp_gl = (unsigned char *)bsp->gl;
+	m->m_seq = tp->rcv_nxt;
+	tp->rcv_nxt += m->m_pkthdr.len;
+	tp->t_rcvtime = ticks;
+
+#if 0	
+	skb->h.th = tcphdr_skb->h.th;
+#endif
+#ifdef T3_TRACE
+	T3_TRACE3(TB(q),
+		  "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u",
+		  m->m_seq, q->cur_buf, m->m_pkthdr.len);
+#endif
+#ifdef notyet
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+#endif
+	if (__predict_true((so->so_state & SS_NOFDREF) == 0))
+		sorwakeup(so);
+}
+
+/*
+ * Process a CPL_GET_TCB_RPL.  These can also be generated by the DDP code,
+ * in that case they are similar to DDP completions.
+ */
+static int
+do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+	struct toepcb *toep = (struct toepcb *)ctx;
+
+	/* OK if socket doesn't exist */
+	if (toep == NULL)
+		return (CPL_RET_BUF_DONE);
+
+	tcb_rpl_as_ddp_complete(toep, m);
+
+	return (0);
+}
+
+static void
+handle_ddp_data(struct toepcb *toep, struct mbuf *m)
+{
+	struct tcpcb *tp = toep->tp_tp;
+	struct ddp_state *q;
+	struct ddp_buf_state *bsp;
+	struct cpl_rx_data *hdr = cplhdr(m);
+	unsigned int rcv_nxt = ntohl(hdr->seq);
+
+	if (tp->rcv_nxt == rcv_nxt)
+		return;
+
+	q = &toep->tp_ddp_state;
+	bsp = &q->buf_state[q->cur_buf];
+	m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+
+#ifdef T3_TRACE
+	if ((int)m->m_pkthdr.len < 0) {
+		t3_ddp_error(so, "handle_ddp_data: neg len");
+	}
+#endif
+
+	m->m_ddp_gl = (unsigned char *)bsp->gl;
+	m->m_cur_offset = bsp->cur_offset;
+	m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
+	if (bsp->flags & DDP_BF_NOCOPY)
+		bsp->flags &= ~DDP_BF_NOCOPY;
+
+	m->m_seq = tp->rcv_nxt;
+	tp->rcv_nxt = rcv_nxt;
+	bsp->cur_offset += m->m_pkthdr.len;
+	if (!(bsp->flags & DDP_BF_NOFLIP))
+		q->cur_buf ^= 1;
+	tp->t_rcvtime = ticks;
+#ifdef notyet	
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+#endif
+	/* For now, don't re-enable DDP after a connection fell out of  DDP
+	 * mode.
+	 */
+	q->ubuf_ddp_ready = 0;
+}
+
+/*
  * Process new data received for a connection.
  */
 static void
@@ -1477,26 +1732,25 @@
 
 	INP_LOCK(tp->t_inpcb);
 	
-#ifdef notyet	
-	if (__predict_false(sk_no_receive(sk))) {
-		handle_excess_rx(so, skb);
+	if (__predict_false(so_no_receive(so))) {
+		handle_excess_rx(toep, m);
 		return;
 	}
 
-	if (ULP_MODE(tp) == ULP_MODE_TCPDDP)
-		handle_ddp_data(so, skb);
+	if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
+		handle_ddp_data(toep, m);
+
+	m->m_seq = ntohl(hdr->seq);
+	m->m_ddp_flags = 0;
+	m->m_ulp_mode = 0;                    /* for iSCSI */
 
-	TCP_SKB_CB(skb)->seq = ntohl(hdr->seq);
-	TCP_SKB_CB(skb)->flags = 0;
-	skb_ulp_mode(skb) = 0;                    /* for iSCSI */
-#endif
 #if VALIDATE_SEQ
-	if (__predict_false(TCP_SKB_CB(skb)->seq != tp->rcv_nxt)) {
-		printk(KERN_ERR
+	if (__predict_false(m->m_seq != tp->rcv_nxt)) {
+		log(LOG_ERR,
 		       "%s: TID %u: Bad sequence number %u, expected %u\n",
-		       TOE_DEV(sk)->name, TID(tp), TCP_SKB_CB(skb)->seq,
+		    TOE_DEV(toeptoso(toep))->name, toep->tp_tid, m->m_seq,
 		       tp->rcv_nxt);
-		__kfree_skb(skb);
+		m_freem(m);
 		return;
 	}
 #endif
@@ -1528,8 +1782,8 @@
 	toep->tp_enqueued_bytes += m->m_pkthdr.len;
 #ifdef T3_TRACE
 	T3_TRACE2(TIDTB(sk),
-		  "new_rx_data: seq 0x%x len %u",
-		  TCP_SKB_CB(skb)->seq, skb->len);
+	    "new_rx_data: seq 0x%x len %u",
+	    m->m_seq, m->m_pkthdr.len);
 #endif
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sb_notify(&so->so_rcv))
@@ -1567,21 +1821,20 @@
 }
 
 static void
-new_rx_data_ddp(struct socket *so, struct mbuf *m)
+new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
 {
-	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
+	struct tcpcb *tp;
 	struct ddp_state *q;
 	struct ddp_buf_state *bsp;
 	struct cpl_rx_data_ddp *hdr;
 	unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
+	struct socket *so = toeptoso(toep);
 
-#ifdef notyet
-	if (unlikely(sk_no_receive(sk))) {
-		handle_excess_rx(so, m);
+	if (__predict_false(so_no_receive(so))) {
+		handle_excess_rx(toep, m);
 		return;
 	}
-#endif
+
 	tp = sototcpcb(so);
 	q = &toep->tp_ddp_state;
 	hdr = cplhdr(m);
@@ -1604,7 +1857,7 @@
 	rcv_nxt = ntohl(hdr->seq) + ddp_len;
 
 	/*
-	 * Overload to store old rcv_next
+	 * Overload to store old RCV_NXT
 	 */
 	m->m_pkthdr.csum_data = tp->rcv_nxt;
 	tp->rcv_nxt = rcv_nxt;
@@ -1622,15 +1875,8 @@
 	 * account for page pod's pg_offset.
 	 */
 	end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
-#ifdef notyet	
-	TCP_SKB_CB(skb)->when = end_offset - skb->len;
-
-	/*
-	 * We store in mac.raw the address of the gather list where the
-	 * placement happened.
-	 */
-	skb->mac.raw = (unsigned char *)bsp->gl;
-#endif	
+	m->m_cur_offset = end_offset - m->m_pkthdr.len;
+	m->m_ddp_gl = (unsigned char *)bsp->gl;
 	bsp->cur_offset = end_offset;
 
 	/*
@@ -1638,9 +1884,6 @@
 	 * Note that other parts of the code depend on this being in bit 0.
 	 */
 	if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
-#if 0		
-		TCP_SKB_CB(skb)->flags = 0;  /* potential spurious completion */
-#endif		
 		panic("spurious ddp completion");
 	} else {
 		m->m_pkthdr.csum_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
@@ -1676,7 +1919,6 @@
 do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
 	struct toepcb *toep = ctx;
-	struct socket *so = toeptoso(toep);
 	const struct cpl_rx_data_ddp *hdr = cplhdr(m);
 
 	VALIDATE_SOCK(so);
@@ -1689,26 +1931,25 @@
 #if 0
 	skb->h.th = tcphdr_skb->h.th;
 #endif	
-	new_rx_data_ddp(so, m);
+	new_rx_data_ddp(toep, m);
 	return (0);
 }
 
 static void
-process_ddp_complete(struct socket *so, struct mbuf *m)
+process_ddp_complete(struct toepcb *toep, struct mbuf *m)
 {
-	struct tcpcb *tp = sototcpcb(so);
-	struct toepcb *toep = tp->t_toe;
+	struct tcpcb *tp = toep->tp_tp;
+	struct socket *so = toeptoso(toep);
 	struct ddp_state *q;
 	struct ddp_buf_state *bsp;
 	struct cpl_rx_ddp_complete *hdr;
 	unsigned int ddp_report, buf_idx, when;
 
-#ifdef notyet	
-	if (unlikely(sk_no_receive(sk))) {
-		handle_excess_rx(sk, skb);
+	if (__predict_false(so_no_receive(so))) {
+		handle_excess_rx(toep, m);
 		return;
 	}
-#endif
+
 	q = &toep->tp_ddp_state; 
 	hdr = cplhdr(m);
 	ddp_report = ntohl(hdr->ddp_report);
@@ -1748,11 +1989,11 @@
 	tp->rcv_nxt += m->m_len;
 
 	tp->t_rcvtime = ticks;
-	sbappendstream_locked(&so->so_rcv, m);
-#ifdef notyet	
-	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk, 0);
-#endif	
+	sbappendstream_locked(&so->so_rcv, m)
+	    ;
+	if ((so->so_state & SS_NOFDREF) == 0)
+		sorwakeup_locked(so);
+	
 }
 
 /*
@@ -1762,13 +2003,12 @@
 do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
 {
 	struct toepcb *toep = ctx;
-	struct socket *so = toeptoso(toep);
 
 	VALIDATE_SOCK(so);
 #if 0
 	skb->h.th = tcphdr_skb->h.th;
 #endif	
-	process_ddp_complete(so, m);
+	process_ddp_complete(toep, m);
 	return (0);
 }
 
@@ -3413,8 +3653,8 @@
 #ifdef notyet	
 	t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
 	t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
+#endif
 	t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
-#endif
 	return (0);
 }
 

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#4 (text+ko) ====

@@ -1001,6 +1001,38 @@
 	           (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + 
 		    DDP_RSVD_WIN);
 }
+
+static inline int
+is_ddp(const struct mbuf *m)
+{
+	return (m->m_flags & M_DDP);
+}
+
+static inline int
+is_ddp_psh(const struct mbuf *m)
+{
+        return is_ddp(skb) && (m->m_pkthdr.csum_flags & DDP_BF_PSH);
+}
+
+/*
+ * Copy data from an sk_buff to an iovec.  Deals with RX_DATA, which carry the
+ * data in the sk_buff body, and with RX_DATA_DDP, which place the data in a
+ * DDP buffer.
+ */
+static inline int
+copy_data(const struct mbuf *m, int offset, struct iovec *to, int len)
+{
+	if (__predict_true(!is_ddp(m)))                             /* RX_DATA */
+		return mbuf_copy_datagram_iovec(m, offset, to, len);
+	if (__predict_true(m->pkthdr.csum_flags & DDP_BF_NOCOPY)) { /* user DDP */
+		to->iov_len -= len;
+		to->iov_base += len;
+		return 0;
+	}
+	return t3_ddp_copy(m, offset, to, len);             /* kernel DDP */
+}
+
+
 #endif
 /*
  * Clean up DDP state that needs to survive until socket close time, such as the
@@ -1014,9 +1046,6 @@
 	struct ddp_state *p = &toep->tp_ddp_state;
 	int idx;
 
-	if (!p)
-		return;
-	
 	for (idx = 0; idx < NUM_DDP_KBUF; idx++)
 		if (p->kbuf[idx]) {
 			ddp_gl_free_pages(p->kbuf[idx], 0);
@@ -1026,6 +1055,7 @@
 	if (p->ubuf) {
 		ddp_gl_free_pages(p->ubuf, 0);
 		free(p->ubuf, M_DEVBUF);
+		p->ubuf = NULL;
 	}
 	toep->tp_ulp_mode = 0;
 }

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#4 (text+ko) ====

@@ -135,9 +135,8 @@
 /*
  * Returns 1 if a UBUF DMA buffer might be active.
  */
-static inline int t3_ddp_ubuf_pending(struct socket *so)
+static inline int t3_ddp_ubuf_pending(struct toepcb *toep)
 {
-	struct toepcb *toep = sototcpcb(so)->t_toe;
 	struct ddp_state *p = &toep->tp_ddp_state;
 
 	/* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200801240736.m0O7a1ED093489>