Date: Thu, 28 Jun 2007 17:13:04 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 122495 for review Message-ID: <200706281713.l5SHD4b1031726@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=122495 Change 122495 by kmacy@kmacy_vt-x:opentoe_init on 2007/06/28 17:12:16 implement chelsio_usr_sosend down to t3_push_frames Affected files ... .. //depot/projects/opentoe/sys/dev/cxgb/notes.txt#1 add .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_io.c#6 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_socket.c#4 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_defs.h#4 edit Differences ... ==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_io.c#6 (text+ko) ==== @@ -66,9 +66,6 @@ #include <dev/cxgb/ulp/t3_tom/t3_ddp.h> #include <dev/cxgb/ulp/toecore/toedev.h> - - - #define DEBUG_WR 0 extern struct protosw t3_tcp_proto; @@ -471,9 +468,9 @@ * Returns true if an mbuf carries urgent data. */ static inline int -skb_urgent(struct mbuf *skb) +mbuf_urgent(struct mbuf *m) { - return (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG) != 0; + return (m->m_flags & TCPCB_FLAG_URG) != 0; } /* @@ -491,8 +488,7 @@ struct tx_data_wr *req; struct tcpcb *tp = sototcpcb(so); - skb->h.raw = skb->data; - req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req)); + req = mtod(m, struct tx_data_wr *); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); req->wr_lo = htonl(V_WR_TID(TID(so))); req->sndseq = htonl(tp->snd_nxt); @@ -501,24 +497,18 @@ req->param = htonl(V_TX_PORT(L2T_ENTRY(so)->smt_idx)); /* V_TX_ULP_SUBMODE sets both the mode and submode */ req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(m)) | - V_TX_URG(skb_urgent(m)) | - V_TX_SHOVE((!sock_flag(sk, TX_MORE_DATA)) && - (skb_peek(&sk->sk_write_queue) ? 0 : 1))); + V_TX_URG(mbuf_urgent(m)) | + V_TX_SHOVE((!GET_TOE_FLAG(so, TX_MORE_DATA)) && + (so->so_snd.sb_mb != NULL)); - if (!sock_flag(sk, TX_DATA_SENT)) { - req->flags |= htonl(F_TX_INIT | V_TX_CPU_IDX(qset(so))); - /* - * The send buffer size is in 32KB. In addition Linux doubles - * what the user requested to account for header and mbuf - * overhead. We care about pure payload here so divide by an - * extra 2 to get the user's requested value. - */ - req->param |= htonl((sk->sk_userlocks & SOCK_SNDBUF_LOCK) ? - V_TX_SNDBUF(sk->sk_sndbuf >> 16) : - V_TX_SNDBUF(M_TX_SNDBUF)); - sock_set_flag(sk, TX_DATA_SENT); - } -} + if (GET_TOE_FLAG(so, TX_DATA_SENT) == 0) { + req->flags |= htonl(F_TX_INIT | V_TX_CPU_IDX(qset(so))); + req->param |= htonl((so->so_snd.sb_flags & SB_AUTOSIZE) ? + V_TX_SNDBUF(tcp_autosndbuf_max) : + V_TX_SNDBUF(so->so_cred->cr_uidinfo->ui_sbsize)); + SET_TOE_FLAG(so, TX_DATA_SENT); + } + /* * Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a @@ -535,25 +525,27 @@ struct toedev *cdev; struct tom_data *d; - if (__predict_false(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) + if (__predict_false((tp->t_state == TCPS_SYN_SENT) || + (tp->t_state == TCPS_CLOSE))) return 0; /* * We shouldn't really be called at all after an abort but check just * in case. - */ + * XXX not clear how to check this on FreeBSD + */ +#ifdef notyet if (__predict_false(sock_flag(sk, ABORT_SHUTDOWN))) return 0; - +#endif d = TOM_DATA(TOE_DEV(so)); cdev = d->cdev; - while (WR_AVAIL(so) && (skb = skb_peek(&sk->sk_write_queue)) != NULL && - !sock_flag(sk, TX_WAIT_IDLE) && - (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_HOLD) || - skb_queue_len(&sk->sk_write_queue) > 1)) { + while (WR_AVAIL(so) && (m = so->so_snd.sb_mb) != NULL && + !(TOE_FLAGS(so) & TX_WAIT_IDLE) && + (so->so_snd.sb_mb != so->so_snd.sb_lastrecord)) { - int len = skb->len; /* length before skb_push */ + int len = m->m_pkthdr.len; /* length before skb_push */ int frags = skb_shinfo(skb)->nr_frags + (len != skb->data_len); int wrs_needed = skb_wrs[frags]; @@ -561,39 +553,40 @@ if (WR_AVAIL(so) < wrs_needed) break; - __skb_unlink(skb, &sk->sk_write_queue); + so->so_snd.sb_mb = m->m_nextpkt; + m->m_priority = mkprio(CPL_PRIORITY_DATA, so); m->csum_data = wrs_needed; /* remember this until the WR_ACK */ WR_AVAIL(so) -= wrs_needed; WR_UNACKED(so) += wrs_needed; enqueue_wr(tp, m); - if (__predict_true(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_NEED_HDR)) { + if (__predict_true(m->m_flags & TCPCB_FLAG_NEED_HDR)) { len += ulp_extra_len(m); make_tx_data_wr(so, m, len); tp->snd_nxt += len; - tp->lsndtime = tcp_time_stamp; + tp->ts_recent_age = ticks; #if defined(CONFIG_T3_ZCOPY_SENDMSG) || defined(CONFIG_T3_ZCOPY_SENDMSG_MODULE) atomic_add(skb->len - sizeof (struct tx_data_wr), &d->tx_dma_pending); m->m_pkthdr.priv = so; #endif if ((req_completion && WR_UNACKED(so) == wrs_needed) || - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_COMPL) || + (m->m_flags & TCPCB_FLAG_COMPL) || WR_UNACKED(so) >= WR_MAX(so) / 2) { struct work_request_hdr *wr = cplhdr(m); wr->wr_hi |= htonl(F_WR_COMPL); WR_UNACKED(so) = 0; } - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_NEED_HDR; + m->m_flags &= ~TCPCB_FLAG_NEED_HDR; } else if (skb->data[0] == FW_WROPCODE_OFLD_CLOSE_CON) sock_set_flag(sk, CLOSE_CON_REQUESTED); total_size += skb->truesize; - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_BARRIER) - sock_set_flag(sk, TX_WAIT_IDLE); - set_arp_failure_handler(skb, arp_failure_discard); + if (m->m_flags & TCPCB_FLAG_BARRIER) + SET_TOE_FLAG(so, TX_WAIT_IDLE); + set_arp_failure_handler(m, arp_failure_discard); l2t_send(cdev, m, L2T_ENTRY(so)); } @@ -1686,15 +1679,15 @@ /* * XXX ? */ - sototcpcb(so)->ts_recent = tcp_time_stamp; + sototcpcb(so)->ts_recent_age = ticks; + skb->h.th = tcphdr_skb->h.th; #ifdef T3_TRACE T3_TRACE3(TB(q), "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", TCP_SKB_CB(skb)->seq, q->cur_buf, m->m_len); -#endif - +#endif sbappend(&so->so_rcv, m); if (!sock_flag(so, SOCK_DEAD)) @@ -1753,7 +1746,7 @@ if (!(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; - sototcpcb(so)->ts_recent = tcp_time_stamp; + sototcpcb(so)->ts_recent = ticks; sbappend(so->so_rcv, m); /* For now, don't re-enable DDP after a connection fell out of DDP @@ -1815,7 +1808,7 @@ "new_rx_data: seq 0x%x len %u", TCP_SKB_CB(skb)->seq, m->m_len); #endif - sototcpcb(so)->ts_recent = tcp_time_stamp; + sototcpcb(so)->ts_recent = ticks; sbappend(so->so_rcv, m); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); @@ -1919,7 +1912,7 @@ if (ddp_report & F_DDP_PSH) TCP_SKB_CB(skb)->flags |= DDP_BF_PSH; - sototcpcb(so)->ts_recent = tcp_time_stamp; + sototcpcb(so)->ts_recent = ticks; sbappend(&so->so_rcv, m); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); @@ -2004,7 +1997,7 @@ TCP_SKB_CB(skb)->seq = tp->rcv_nxt; tp->rcv_nxt += skb->len; - sototcpcb(so)->ts_recent = tcp_time_stamp; + sototcpcb(so)->ts_recent = ticks; sbappend(so->so_rcv, m); if (!sock_flag(sk, SOCK_DEAD)) @@ -2094,7 +2087,7 @@ bsp->cur_offset += skb->len; if (!(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; - sototcpcb(so)->ts_recent = tcp_time_stamp; + sototcpcb(so)->ts_recent = ticks; sbappend(&so->so_rcv, m); if (!sock_flag(so, SOCK_DEAD)) sk->sk_data_ready(so, 0); @@ -3135,7 +3128,7 @@ unsigned int tid = TID(so); skb_queue_walk(&sk->sk_write_queue, skb) { - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_NEED_HDR) { + if (m->m_flags & TCPCB_FLAG_NEED_HDR) { TCP_SKB_CB(skb)->seq = tp->write_seq; tp->write_seq += skb->len + ulp_extra_len(m); } else { @@ -3162,7 +3155,7 @@ log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", TID(so), tp->t_state); - tp->rcv_tstamp = tcp_time_stamp; + tp->ts_recent_age = ticks; DELACK_SEQ(tp) = tp->copied_seq = tp->rcv_wup = tp->rcv_nxt = rcv_isn; make_established(sk, ntohl(req->snd_isn), ntohs(req->tcp_opt)); @@ -3285,7 +3278,7 @@ if (tp->snd_una != snd_una) { tp->snd_una = snd_una; dst_confirm(so->sk_dst_cache); - tp->rcv_tstamp = tcp_time_stamp; + tp->ts_recent_age = ticks; if (tp->snd_una == tp->snd_nxt) sock_reset_flag(so, TX_WAIT_IDLE); } ==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_socket.c#4 (text+ko) ==== @@ -90,6 +90,62 @@ #define TCPDEBUG2(req) #endif + +/* + * Returns true if a connection should send more data to the TOE ASAP. + */ +static inline int +should_push(const struct socket *so) +{ + struct tcpcb *tp = sototcpcb(so); + struct toedev *dev = TOE_DEV(so); + + return !(WR_MAX(tp) - WR_AVAIL(tp)) || + tp->snd_nxt - tp->snd_una <= TOM_TUNABLE(dev, tx_hold_thres) || + (tp->t_flags & TF_NODELAY); +} + +/* + * Decide if the last frame on the send queue needs any special annotations + * (e.g., marked URG) and whether it should be transmitted immediately or + * held for additional data. This is the only routine that performs the full + * suite of tests for a Tx packet and therefore must be called for the last + * packet added by the various send*() APIs. + */ +static void +tcp_push(struct socket *so, int flags) +{ + + if ((so->so_snd.sb_mb != so->so_snd.sb_lastrecord) && + should_push(so)) { +#ifdef notyet + /* + * XXX first cut doesn't support OOB data + */ + struct tcpcb *tp = sototcpcb(so); + struct sk_buff *skb = sk->sk_write_queue.prev; + + mark_urg(tp, flags, skb); +#endif + t3_push_frames(so, 1); + } +} + +/* + * Try to transmit the send queue if it has just one packet. This is intended + * to be called as full packets are added to the send queue by the various + * send*() APIs when we expect additional packets to be generated by the + * current API call. It should not be called for the last packet generated, + * use the full tcp_push call above for that. + */ +static inline void +push_frames_if_head(struct sock *so) +{ + if (so->so_snd.sb_mb == so->so_snd.sb_lastrecord) + t3_push_frames(so, 1); +} + + static int chelsio_ip_ctloutput(struct socket *so, struct sockopt *sopt) { @@ -105,7 +161,7 @@ sizeof optval); if (inp->inp_ip_tos != optval) { inp->inp_ip_tos = optval; -#ifdef notyet +#ifdef notyet sk->sk_priority = rt_tos2priority(optval); #endif t3_set_tos(so); @@ -147,7 +203,8 @@ } tp = intotcpcb(inp); - if (sopt->sopt_name == TCP_NODELAY) { + switch (sopt->sopt_name) { + case TCP_NODELAY: { switch (sopt->sopt_dir) { case SOPT_SET: int oldflags = tp->t_flags; @@ -162,7 +219,8 @@ else tp->t_flags &= ~TF_NODELAY; - if ((oldflags & TF_NODELAY) == 0) + if ((oldflags & TF_NODELAY) != + (tp->t_flags & TF_NODELAY)) t3_set_nagle(so); break; @@ -175,8 +233,42 @@ break; } - } else + break; + } + case TCP_NOPUSH: { + switch (sopt->sopt_dir) { + case SOPT_SET: + int oldflags = tp->t_flags; + + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + break; + + if (optval > 0) + tp->t_flags |= TF_NOPUSH; + else + tp->t_flags &= ~TF_NODELAY; + + if ((oldflags & TF_NOPUSH) && + ((tp->t_flags & TF_NOPUSH) == 0)) + tcp_push(so, 0); + + break; + case SOPT_GET: + optval = tp->t_flags & TF_NOPUSH; + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + default: + error = ENOPROTOOPT; + break; + + } + break; + } + default: error = tcp_ctloutput(so, sopt); + } out: INP_UNLOCK(inp); return (error); @@ -401,14 +493,14 @@ chelsio_usr_sosend(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { - long space, resid; + long space, resid, resid_init; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; if (uio != NULL) - resid = uio->uio_resid; + resid_init = resid = uio->uio_resid; else - resid = top->m_pkthdr.len; + resid_init = resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we @@ -521,8 +613,40 @@ #endif resid = uio->uio_resid; } - - + if (dontroute) { + SOCK_LOCK(so); + so->so_options |= SO_DONTROUTE; + SOCK_UNLOCK(so); + } + /* + * XXX ignore OOB date for now + */ + if (resid > 0 && space > 0) + push_frames_if_head(so); + + if (dontroute) { + SOCK_LOCK(so); + so->so_options &= ~SO_DONTROUTE; + SOCK_UNLOCK(so); + } + clen = 0; + control = NULL; + top = NULL; + if (error) + goto release; + } while (resid && space > 0); + } while (resid); + + if (resid_init != resid) + tcp_push(so, flags); +release: + sbunlock(&so->so_snd); +out: + if (top != NULL) + m_freem(top); + if (control != NULL) + m_freem(control); + return (error); } /* ==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_defs.h#4 (text+ko) ==== @@ -22,6 +22,24 @@ struct toedev; struct tom_data; +struct t3_toe_private { + unsigned int t3_tid; + struct toedev t3_toedev; + struct l2t_entry t3_l2t_entry; + unsigned int t3_wr_avail; + unsigned int t3_wr_unacked; + unsigned int t3_wr_max; + struct ddp_state t3_ddp_state; + struct mbuf *t3_ctrl_mbuf_cache; + struct mbuf *t3_mbuf_ulp_lhdr; + uint8_t t3_mtu_idx; + uint8_t t3_ulp_mode; + uint8_t t3_hw_qset_idx; + uint8_t t3_rss_qset_idx; + uint32_t t3_flags; +}; + + /* * These flags track some close related events. They share the same space as * the sock_flags in include/net/sock.h, make sure there are no collisions. @@ -39,6 +57,8 @@ TX_FAILOVER // Tx traffic failing over }; +#define + /* * Flags for tcp_skb_cb.flags. Make sure there are no collisions with the * ones already defined, we use only TCPCB_FLAG_URG currently. @@ -53,8 +73,11 @@ TCPCB_FLAG_ZCOPY = 1 << 6, TCPCB_FLAG_ZCOPY_COW = 1 << 7, #endif +}; + +#define TCPCB_FLAG_NEED_HDR M_PROTO1 +#define TCPCB_FLAG_BARRIER M_PROTO2 -}; #define mtoso(m) ((struct socket *)m->m_pkthdr.priv) /* The ULP mode/submode of an mbuf */ @@ -94,6 +117,9 @@ /* the TOE device */ #define TOE_DEV(so) (T3_TOE(so)->t3_toedev) +/* socket TOE flags */ +#define TOE_FLAGS(so) (T3_TOE(so)->t3_flags) + /* pointer to the L2T entry. */ #define L2T_ENTRY(so) (T3_TOE(so)->t3_l2t_entry) @@ -133,6 +159,8 @@ /* TOE RSS queue set */ #define qset(so) (T3_TOE(so)->t3_qset) +#define GET_TOE_FLAG(so, flag) (TOE_FLAGS((so)) & (flag)) +#define SET_TOE_FLAG(so, flag) (TOE_FLAGS((so)) |= (flag)) typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m); @@ -200,19 +228,6 @@ #endif } -/* - * Set the ULP mode and submode for a Tx packet. - */ -static inline void skb_set_ulp_mode(struct mbuf *m, int mode, int submode) -{ -#ifdef notyet - /* - * XXX need to flesh out t3_priv structure - */ - skb_ulp_mode(m) = (mode << 4) | submode; -#endif -} - extern const unsigned int t3_ulp_extra_len[]; /* * Return the length of any HW additions that will be made to a Tx packet. @@ -220,11 +235,7 @@ */ static inline unsigned int ulp_extra_len(const struct mbuf *m) { -#ifdef notyet - return t3_ulp_extra_len[skb_ulp_mode(m) & 3]; -#else - return (0); -#endif + return t3_ulp_extra_len[ULP_MODE(mtoso(m)) & 3]; } #include <dev/cxgb/cxgb_osdep.h>
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200706281713.l5SHD4b1031726>