Date: Tue, 6 Nov 2007 04:44:29 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 128722 for review Message-ID: <200711060444.lA64iTnj082386@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=128722 Change 128722 by kmacy@kmacy:storage:toestack on 2007/11/06 04:43:31 add interface for setting socket options add functions to set values in the tcb for options Affected files ... .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#15 edit .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_defs.h#6 edit .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#6 edit Differences ... ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#15 (text+ko) ==== @@ -42,6 +42,7 @@ #include <sys/syslog.h> #include <sys/socketvar.h> #include <sys/protosw.h> +#include <sys/priv.h> #include <net/if.h> #include <net/route.h> @@ -55,6 +56,7 @@ #include <dev/cxgb/cxgb_osdep.h> #include <dev/cxgb/sys/mbufq.h> +#include <netinet/ip.h> #include <netinet/tcp.h> #include <netinet/tcp_var.h> #include <netinet/tcp_fsm.h> @@ -62,6 +64,7 @@ #include <netinet/tcp_seq.h> #include <net/route.h> + #include <dev/cxgb/t3cdev.h> #include <dev/cxgb/common/cxgb_firmware_exports.h> #include <dev/cxgb/common/cxgb_t3_cpl.h> @@ -122,6 +125,7 @@ * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. */ #define MIN_RCV_WND (24 * 1024U) +#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) #define VALIDATE_SEQ 0 #define VALIDATE_SOCK(so) @@ -134,6 +138,26 @@ static void t3_send_reset(struct socket *so); +/* + * Determine whether to send a CPL message now or defer it. A message is + * deferred if the connection is in SYN_SENT since we don't know the TID yet. + * For connections in other states the message is sent immediately. + * If through_l2t is set the message is subject to ARP processing, otherwise + * it is sent directly. + */ +static inline void +send_or_defer(struct socket *so, struct tcpcb *tp, struct mbuf *m, int through_l2t) +{ + struct toepcb *toep = tp->t_toe; + + if (__predict_false(tp->t_state == TCPS_SYN_SENT)) + mbufq_tail(&toep->out_of_order_queue, m); // defer + else if (through_l2t) + l2t_send(T3C_DEV(so), m, toep->tp_l2t); // send through L2T + else + cxgb_ofld_send(T3C_DEV(so), m); // send directly +} + static inline unsigned int mkprio(unsigned int cntrl, const struct socket *so) { @@ -481,11 +505,191 @@ .tu_rcvd = cxgb_toe_rcvd, }; + +static void +__set_tcb_field(struct socket *so, struct mbuf *m, uint16_t word, + uint64_t mask, uint64_t val, int no_reply) +{ + struct cpl_set_tcb_field *req; + struct tcpcb *tp = sototcpcb(so); + struct toepcb *toep = tp->t_toe; + + req = mtod(m, struct cpl_set_tcb_field *); + m->m_pkthdr.len = m->m_len = sizeof(*req); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); + req->reply = V_NO_REPLY(no_reply); + req->cpu_idx = 0; + req->word = htons(word); + req->mask = htobe64(mask); + req->val = htobe64(val); + + m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, so)); + send_or_defer(so, tp, m, 0); +} + +static void +t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val) +{ + struct mbuf *m; + struct tcpcb *tp = sototcpcb(so); + struct toepcb *toep = tp->t_toe; + + if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) + return; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + /* + * XXX need lowmem cache + */ + } + + __set_tcb_field(so, m, word, mask, val, 1); +} + +/* + * Set one of the t_flags bits in the TCB. + */ +static void +set_tcb_tflag(struct socket *so, unsigned int bit_pos, int val) +{ + t3_set_tcb_field(so, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); +} + +/* + * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. + */ +static void +t3_set_nagle(struct socket *so) +{ + struct tcpcb *tp = sototcpcb(so); + + set_tcb_tflag(so, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); +} + +/* + * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. + */ +void +t3_set_keepalive(struct socket *so, int on_off) +{ + set_tcb_tflag(so, S_TF_KEEPALIVE, on_off); +} + void +t3_set_rcv_coalesce_enable(struct socket *so, int on_off) +{ + set_tcb_tflag(so, S_TF_RCV_COALESCE_ENABLE, on_off); +} + +/* + * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. + */ +static void +t3_set_tos(struct socket *so) +{ + t3_set_tcb_field(so, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), + V_TCB_TOS(SO_TOS(so))); +} + + +/* + * In DDP mode, TP fails to schedule a timer to push RX data to the host when + * DDP is disabled (data is delivered to freelist). [Note that, the peer should + * set the PSH bit in the last segment, which would trigger delivery.] + * We work around the issue by setting a DDP buffer in a partial placed state, + * which guarantees that TP will schedule a timer. + */ +#define TP_DDP_TIMER_WORKAROUND_MASK\ + (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ + ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ + V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) +#define TP_DDP_TIMER_WORKAROUND_VAL\ + (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ + ((V_TCB_RX_DDP_BUF0_OFFSET((u64)1) | V_TCB_RX_DDP_BUF0_LEN((u64)2)) <<\ + 32)) + +static void t3_enable_ddp(struct socket *so, int on) { - printf("t3_enable_ddp unimplemented !!!! \n"); - + if (on) + t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), + V_TF_DDP_OFF(0)); + else + t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, + V_TF_DDP_OFF(1) | + TP_DDP_TIMER_WORKAROUND_MASK, + V_TF_DDP_OFF(1) | + TP_DDP_TIMER_WORKAROUND_VAL); + +} + + +void +t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag_color) +{ + t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_TAG + buf_idx, + V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), + tag_color); +} + +void +t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, + unsigned int len) +{ + if (buf_idx == 0) + t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_OFFSET, + V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | + V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), + V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | + V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); + else + t3_set_tcb_field(so, W_TCB_RX_DDP_BUF1_OFFSET, + V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | + V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), + V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | + V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); +} + +static int +t3_set_cong_control(struct socket *so, const char *name) +{ +#ifdef notyet + int cong_algo; + + for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) + if (!strcmp(name, t3_cong_ops[cong_algo].name)) + break; + + if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) + return -EINVAL; +#endif + return 0; +} + +int +t3_get_tcb(struct socket *so) +{ + struct cpl_get_tcb *req; + struct tcpcb *tp = sototcpcb(so); + struct toepcb *toep = tp->t_toe; + struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); + + if (!m) + return (ENOMEM); + + m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, so)); + req = mtod(m, struct cpl_get_tcb *); + m->m_pkthdr.len = m->m_len = sizeof(*req); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); + req->cpuno = htons(toep->tp_qset); + if (sototcpcb(so)->t_state == TCPS_SYN_SENT) + mbufq_tail(&toep->out_of_order_queue, m); // defer + else + cxgb_ofld_send(T3C_DEV(so), m); + return 0; } static inline void @@ -607,7 +811,7 @@ toepcb_release(toep); } #ifdef notyet - t3_set_ca_ops(sk, &tcp_init_congestion_ops); + t3_set_ca_ops(so, &tcp_init_congestion_ops); #endif TOE_DEV(so) = NULL; #if 0 @@ -716,7 +920,6 @@ return (0); } -#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) /* * The next two functions calculate the option 0 value for a socket. */ @@ -837,7 +1040,7 @@ if (rpl->status == CPL_ERR_CONN_EXIST && icsk->icsk_retransmit_timer.function != act_open_retry_timer) { icsk->icsk_retransmit_timer.function = act_open_retry_timer; - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, + sk_reset_timer(so, &icsk->icsk_retransmit_timer, jiffies + HZ / 2); } else #endif @@ -979,7 +1182,7 @@ /* Purge the send queue so we don't send anything after an abort. */ sbflush(&so->so_snd); #ifdef notyet - if (sock_flag(sk, CLOSE_CON_REQUESTED) && is_t3a(TOE_DEV(sk))) + if (sock_flag(so, CLOSE_CON_REQUESTED) && is_t3a(TOE_DEV(sk))) mode |= CPL_ABORT_POST_CLOSE_REQ; #endif m = m_gethdr(M_NOWAIT, MT_DATA); @@ -1005,6 +1208,113 @@ l2t_send(T3C_DEV(so), m, toep->tp_l2t); } +static int +t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) +{ + struct inpcb *inp; + int error, optval; + + if (sopt->sopt_name == IP_OPTIONS) + return (ENOPROTOOPT); + + if (sopt->sopt_name != IP_TOS) + return (EOPNOTSUPP); + + error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); + + if (error) + return (error); + + if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) + return (EPERM); + + inp = sotoinpcb(so); + inp->inp_ip_tos = optval; + + t3_set_tos(so); + + return (0); +} + +static int +t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) +{ + int err = 0; + size_t copied; + + if (sopt->sopt_name != TCP_CONGESTION && + sopt->sopt_name != TCP_NODELAY) + return (EOPNOTSUPP); + + if (sopt->sopt_name == TCP_CONGESTION) { + char name[TCP_CA_NAME_MAX]; + int optlen = sopt->sopt_valsize; + struct tcpcb *tp; + + if (optlen < 1) + return (EINVAL); + + err = copyinstr(sopt->sopt_val, name, + min(TCP_CA_NAME_MAX - 1, optlen), &copied); + if (err) + return (err); + if (copied < 1) + return (EINVAL); + + tp = sototcpcb(so); + if ((err = t3_set_cong_control(so, name)) == 0) + tp->t_cong_control = strdup(name, M_DEVBUF); + else + return (err); + } else { + int optval, oldval; + struct inpcb *inp; + struct tcpcb *tp; + + err = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + + if (err) + return (err); + + inp = sotoinpcb(so); + tp = intotcpcb(inp); + + INP_LOCK(inp); + + oldval = tp->t_flags; + if (optval) + tp->t_flags |= TF_NODELAY; + else + tp->t_flags &= ~TF_NODELAY; + INP_UNLOCK(inp); + + if (oldval != tp->t_flags) + t3_set_nagle(so); + + } + + return (0); +} + +static int +t3_ctloutput(struct socket *so, struct sockopt *sopt) +{ + struct tcpcb *tp = sototcpcb(so); + struct toepcb *toep = tp->t_toe; + int err; + + if (sopt->sopt_level != IPPROTO_TCP) + err = t3_ip_ctloutput(so, sopt); + else + err = t3_tcp_ctloutput(so, sopt); + + if (err != EOPNOTSUPP) + return (err); + + return toep->tp_ctloutput(so, sopt); +} + /* * Process new data received for a connection. */ @@ -1018,12 +1328,12 @@ #ifdef notyet if (__predict_false(sk_no_receive(sk))) { - handle_excess_rx(sk, skb); + handle_excess_rx(so, skb); return; } if (ULP_MODE(tp) == ULP_MODE_TCPDDP) - handle_ddp_data(sk, skb); + handle_ddp_data(so, skb); TCP_SKB_CB(skb)->seq = ntohl(hdr->seq); TCP_SKB_CB(skb)->flags = 0; @@ -1046,7 +1356,7 @@ * We don't handle urgent data yet */ if (__predict_false(hdr->urg)) - handle_urg_ptr(sk, tp->rcv_nxt + ntohs(hdr->urg)); + handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); if (__predict_false(tp->urg_data == TCP_URG_NOTYET && tp->urg_seq - tp->rcv_nxt < skb->len)) tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - @@ -1129,16 +1439,16 @@ T3_TRACE0(TIDTB(sk),"do_peer_fin:"); #endif #ifdef notyet - if (!is_t3a(TOE_DEV(sk)) && sock_flag(sk, ABORT_RPL_PENDING)) + if (!is_t3a(TOE_DEV(sk)) && sock_flag(so, ABORT_RPL_PENDING)) goto out; if (ULP_MODE(tp) == ULP_MODE_TCPDDP) { - keep = handle_peer_close_data(sk, skb); + keep = handle_peer_close_data(so, skb); if (keep < 0) return; } sk->sk_shutdown |= RCV_SHUTDOWN; - sock_set_flag(sk, SOCK_DONE); + sock_set_flag(so, SOCK_DONE); #endif switch (tp->t_state) { case TCPS_SYN_RECEIVED: @@ -1177,9 +1487,9 @@ /* Do not send POLL_HUP for half duplex close. */ if ((sk->sk_shutdown & SEND_SHUTDOWN) || sk->sk_state == TCP_CLOSE) - sk_wake_async(sk, 1, POLL_HUP); + sk_wake_async(so, 1, POLL_HUP); else - sk_wake_async(sk, 1, POLL_IN); + sk_wake_async(so, 1, POLL_IN); #endif } #ifdef notyet @@ -1250,8 +1560,8 @@ #if 0 else if (tcp_sk(sk)->linger2 < 0 && - !sock_flag(sk, ABORT_SHUTDOWN)) - abort_conn(sk, skb, LINUX_MIB_TCPABORTONLINGER); + !sock_flag(so, ABORT_SHUTDOWN)) + abort_conn(so, skb, LINUX_MIB_TCPABORTONLINGER); #endif break; default: @@ -1351,6 +1661,9 @@ toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; assign_rxopt(so, opt); + toep->tp_ctloutput = so->so_proto->pr_ctloutput; + so->so_proto->pr_ctloutput = t3_ctloutput; + #if 0 inet_sk(sk)->id = tp->write_seq ^ jiffies; #endif @@ -1406,7 +1719,7 @@ */ if (unlikely(sk->sk_socket)) { // simultaneous opens only sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(so, 0, POLL_OUT); } /* * The state for the new connection is now up to date. @@ -1490,7 +1803,7 @@ toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); - + /* * Now that we finally have a TID send any CPL messages that we had to * defer for lack of a TID. @@ -1505,11 +1818,9 @@ * appears to correspond to sorwakeup_locked */ sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(so, 0, POLL_OUT); #endif } - printf("freeing %p\n", m); - m_free(m); #ifdef notyet /* @@ -1526,7 +1837,7 @@ * them on their way. */ fixup_pending_writeq_buffers(sk); - if (t3_push_frames(sk, 1)) + if (t3_push_frames(so, 1)) sk->sk_write_space(sk); #endif ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_defs.h#6 (text+ko) ==== @@ -12,7 +12,6 @@ void t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev); void t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev); int t3_push_frames(struct socket *so, int req_completion); -void t3_enable_ddp(struct socket *so, int on); int t3_connect(struct toedev *tdev, struct socket *so, struct ifnet *egress_ifp); void t3_init_listen_cpl_handlers(void); int t3_init_cpl_io(void); @@ -28,4 +27,11 @@ void toepcb_release(struct toepcb *); void toepcb_init(struct toepcb *); +void t3_set_rcv_coalesce_enable(struct socket *so, int on_off); +void t3_set_keepalive(struct socket *so, int on_off); +void t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag); +void t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, + unsigned int len); +int t3_get_tcb(struct socket *so); + #endif ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#6 (text+ko) ==== @@ -1,6 +1,6 @@ #ifndef CXGB_TOM_H_ #define CXGB_TOM_H_ - +#include <sys/protosw.h> #define LISTEN_INFO_HASH_SIZE 32 @@ -99,8 +99,9 @@ struct toepcb { struct toedev *tp_toedev; + struct l2t_entry *tp_l2t; + pr_ctloutput_t *tp_ctloutput; int tp_tid; - struct l2t_entry *tp_l2t; int tp_wr_max; int tp_wr_avail; int tp_wr_unacked;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200711060444.lA64iTnj082386>