From owner-p4-projects@FreeBSD.ORG Sun May 6 02:14:34 2007 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id B7C2F16A406; Sun, 6 May 2007 02:14:34 +0000 (UTC) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 9489C16A402 for ; Sun, 6 May 2007 02:14:34 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.freebsd.org (Postfix) with ESMTP id 83C6513C46C for ; Sun, 6 May 2007 02:14:34 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.8/8.13.8) with ESMTP id l462EYJE066423 for ; Sun, 6 May 2007 02:14:34 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.8/8.13.8/Submit) id l462EYpw066417 for perforce@freebsd.org; Sun, 6 May 2007 02:14:34 GMT (envelope-from kmacy@freebsd.org) Date: Sun, 6 May 2007 02:14:34 GMT Message-Id: <200705060214.l462EYpw066417@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 119317 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 06 May 2007 02:14:35 -0000 http://perforce.freebsd.org/chv.cgi?CH=119317 Change 119317 by kmacy@kmacy_vt-x:opentoe_init on 2007/05/06 02:14:19 more incremental BSD-ification of linux TOE code Affected files ... .. //depot/projects/opentoe/sys/dev/cxgb/cxgb_l2t.h#6 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_io.c#4 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_ddp.c#3 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_ddp.h#2 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_defs.h#3 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_listen.c#3 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_tom.c#3 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_tom.h#3 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/toecore/toe_offload.c#3 edit .. //depot/projects/opentoe/sys/dev/cxgb/ulp/toecore/toe_offload.h#4 edit Differences ... ==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_l2t.h#6 (text+ko) ==== @@ -2,6 +2,7 @@ #define _CHELSIO_L2T_H #include +#include #include enum { ==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_io.c#4 (text+ko) ==== @@ -10,11 +10,7 @@ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Chelsio Corporation nor the names of its + 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -31,17 +27,47 @@ POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ -#include + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include #include -#include -#include +#include #include #include #include #include +#include #include -#include +#include + + + #define DEBUG_WR 0 @@ -75,11 +101,6 @@ static unsigned int mbuf_wrs[MAX_MBUF_IOV + 2] __read_mostly; /* - * Socket filter that drops everything by specifying a 0-length filter program. - */ -static struct sk_filter drop_all = { .refcnt = ATOMIC_INIT(1) }; - -/* * TOE information returned through inet_diag for offloaded connections. */ struct t3_inet_diag_info { @@ -100,9 +121,9 @@ process_cpl_msg_ref(void (*fn)(struct socket *, struct mbuf *), struct socket *so, struct mbuf *m) { - sock_hold(so); + soref_lock(so); process_cpl_msg(fn, so, m); - sock_put(so); + sorele_lock(so); } static inline int @@ -121,6 +142,7 @@ static struct mbuf * get_cpl_reply_mbuf(struct mbuf *m, size_t len) { +#ifdef notyet if (__predict_true(!skb_cloned(skb))) { BUG_ON(skb->len < len); __skb_trim(skb, len); @@ -131,6 +153,8 @@ __skb_put(skb, len); } return skb; +#endif + return (NULL); } /* @@ -139,12 +163,15 @@ static struct mbuf * __get_cpl_reply_mbuf(struct mbuf *m, size_t len) { +#ifdef notyet if (__predict_true(!skb_cloned(skb) && !skb->data_len)) { __skb_trim(skb, 0); skb_get(skb); } else skb = alloc_skb(len, gfp); return skb; +#endif + return (NULL); } /* @@ -158,8 +185,8 @@ send_or_defer(struct socket *so, struct tcpcb *tp, struct mbuf *m, int through_l2t) { - if (__predict_false(sk->sk_state == TCP_SYN_SENT)) - __skb_queue_tail(&tp->out_of_order_queue, m); // defer + if (__predict_false(tp->t_state == TCPS_SYN_SENT)) + mbufq_tail(T3_TOE(so)->out_of_order_queue, m); // defer else if (through_l2t) l2t_send(TOE_DEV(so), m, L2T_ENTRY(so)); // send through L2T else @@ -198,8 +225,8 @@ static inline void sk_insert_tid(struct tom_data *d, struct socket *so, unsigned int tid) { - sock_hold(sk); - cxgb_insert_tid(d->cdev, d->client, sk, tid); + soref_lock(so); + cxgb_insert_tid(d->cdev, d->client, so, tid); } /** @@ -226,18 +253,18 @@ unsigned int idx; struct tcpcb *tp = sototcpcb(so); struct dst_entry *dst = __sk_dst_get(sk); - struct tom_data *d = TOM_DATA(TOE_DEV(sk)); + struct tom_data *d = TOM_DATA(TOE_DEV(so)); const struct t3_data *td = TOE_DATA(d->cdev); - tp->advmss = dst_metric(dst, RTAX_ADVMSS); - if (USER_MSS(tp) && tp->advmss > USER_MSS(tp)) - tp->advmss = USER_MSS(tp); - if (tp->advmss > pmtu - 40) - tp->advmss = pmtu - 40; - if (tp->advmss < td->mtus[0] - 40) - tp->advmss = td->mtus[0] - 40; - idx = find_best_mtu(td, tp->advmss + 40); - tp->advmss = td->mtus[idx] - 40; + tp->t_maxseg = dst_metric(dst, RTAX_ADVMSS); + if (USER_MSS(tp) && tp->t_maxseg > USER_MSS(tp)) + tp->t_maxseg = USER_MSS(tp); + if (tp->t_maxseg > pmtu - 40) + tp->t_maxseg = pmtu - 40; + if (tp->t_maxseg < td->mtus[0] - 40) + tp->t_maxseg = td->mtus[0] - 40; + idx = find_best_mtu(td, tp->t_maxseg + 40); + tp->t_maxseg = td->mtus[idx] - 40; inet_csk(sk)->icsk_pmtu_cookie = pmtu; return idx; } @@ -252,13 +279,13 @@ return tid < t->ntids && !t->tid_tab[tid].ctx; } -#define VALIDATE_SOCK(sk) \ +#define VALIDATE_SOCK(so) \ do { \ - if (__predict_false(!(sk))) \ + if (__predict_false(!(so))) \ return CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE; \ } while (0) #else -#define VALIDATE_SOCK(sk) do {} while (0) +#define VALIDATE_SOCK(so) do {} while (0) #endif /* @@ -271,21 +298,21 @@ connection_done(struct socket *so) { #if 0 - printk("connection_done: TID: %u, state: %d, dead %d, refs %d\n", + log("connection_done: TID: %u, state: %d, dead %d, refs %d\n", TID(sototcpcb(so)), sk->sk_state, sock_flag(sk, SOCK_DEAD), atomic_read(&sk->sk_refcnt)); // dump_stack(); #endif #ifdef T3_TRACE - T3_TRACE1(TIDTB(sk), + T3_TRACE1(TIDTB(so), "connection_done: GTS rpl pending %d, if pending wake", sock_flag(sk, ABORT_RPL_PENDING)); #endif if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_all(sk->sk_sleep); - tcp_done(sk); + tcp_done(so); } /* @@ -320,7 +347,7 @@ } /* Returns bits 2:7 of a socket's TOS field */ -#define SK_TOS(sk) ((inet_sk(sk)->tos >> 2) & M_TOS) +#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) /* * The next two functions calculate the option 0 value for a socket. @@ -330,9 +357,9 @@ { struct tcpcb *tp = sototcpcb(so); - return V_NAGLE((tp->nonagle & TCP_NAGLE_OFF) == 0) | - V_KEEP_ALIVE(sock_flag(so, SOCK_KEEPOPEN) != 0) | F_TCAM_BYPASS | - V_WND_SCALE(RCV_WSCALE(so)) | V_MSS_IDX(MTU_IDX(so)); + return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | + V_KEEP_ALIVE(!!(so->so_options & SO_KEEPALIVE)) | F_TCAM_BYPASS | + V_WND_SCALE(tp->rcv_scale) | V_MSS_IDX(MTU_IDX(so)); } static inline unsigned int @@ -340,14 +367,14 @@ { struct tcpcb *tp = sototcpcb(so); - return V_TOS(SK_TOS(so)) | V_ULP_MODE(ULP_MODE(so)) | + return V_TOS(SO_TOS(so)) | V_ULP_MODE(ULP_MODE(so)) | V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (uint32_t)M_RCV_BUFSIZ)); } static inline unsigned int calc_opt2(const struct socket *so) { - const struct toedev *dev = TOE_DEV(sk); + const struct toedev *dev = TOE_DEV(so); int flv_valid = TOM_TUNABLE(dev, cong_alg) != -1; return V_FLAVORS_VALID(flv_valid) | @@ -420,13 +447,13 @@ } static inline void -free_wr_skb(struct mbuf *skb) +free_wr_skb(struct mbuf *m) { #if defined(CONFIG_T3_ZCOPY_SENDMSG) || defined(CONFIG_T3_ZCOPY_SENDMSG_MODULE) if (skb->data[0] == FW_WROPCODE_OFLD_TX_DATA) - t3_zcopy_cleanup_skb(skb); + t3_zcopy_cleanup_skb(m); #endif - kfree_skb(skb); + m_freem(m); } static void @@ -434,7 +461,7 @@ { struct mbuf *skb; while ((skb = dequeue_wr(tp)) != NULL) - free_wr_skb(skb); + free_wr_skb(m); } #define wr_queue_walk(tp, skb) \ @@ -467,19 +494,19 @@ skb->h.raw = skb->data; req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req)); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); - req->wr_lo = htonl(V_WR_TID(TID(tp))); + req->wr_lo = htonl(V_WR_TID(TID(so))); req->sndseq = htonl(tp->snd_nxt); /* len includes the length of any HW ULP additions */ req->len = htonl(len); - req->param = htonl(V_TX_PORT(L2T_ENTRY(sk)->smt_idx)); + req->param = htonl(V_TX_PORT(L2T_ENTRY(so)->smt_idx)); /* V_TX_ULP_SUBMODE sets both the mode and submode */ - req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) | - V_TX_URG(skb_urgent(skb)) | + req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(m)) | + V_TX_URG(skb_urgent(m)) | V_TX_SHOVE((!sock_flag(sk, TX_MORE_DATA)) && (skb_peek(&sk->sk_write_queue) ? 0 : 1))); if (!sock_flag(sk, TX_DATA_SENT)) { - req->flags |= htonl(F_TX_INIT | V_TX_CPU_IDX(qset(tp))); + req->flags |= htonl(F_TX_INIT | V_TX_CPU_IDX(qset(so))); /* * The send buffer size is in 32KB. In addition Linux doubles * what the user requested to account for header and mbuf @@ -518,10 +545,10 @@ if (__predict_false(sock_flag(sk, ABORT_SHUTDOWN))) return 0; - d = TOM_DATA(TOE_DEV(sk)); + d = TOM_DATA(TOE_DEV(so)); cdev = d->cdev; - while (WR_AVAIL(tp) && (skb = skb_peek(&sk->sk_write_queue)) != NULL && + while (WR_AVAIL(so) && (skb = skb_peek(&sk->sk_write_queue)) != NULL && !sock_flag(sk, TX_WAIT_IDLE) && (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_HOLD) || skb_queue_len(&sk->sk_write_queue) > 1)) { @@ -531,18 +558,18 @@ int wrs_needed = skb_wrs[frags]; WARN_ON(frags >= ARRAY_SIZE(skb_wrs) || wrs_needed < 1); - if (WR_AVAIL(tp) < wrs_needed) + if (WR_AVAIL(so) < wrs_needed) break; __skb_unlink(skb, &sk->sk_write_queue); skb->priority = mkprio(CPL_PRIORITY_DATA, sk); skb->csum = wrs_needed; /* remember this until the WR_ACK */ - WR_AVAIL(tp) -= wrs_needed; - WR_UNACKED(tp) += wrs_needed; + WR_AVAIL(so) -= wrs_needed; + WR_UNACKED(so) += wrs_needed; enqueue_wr(tp, skb); if (__predict_true(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_NEED_HDR)) { - len += ulp_extra_len(skb); + len += ulp_extra_len(m); make_tx_data_wr(sk, skb, len); tp->snd_nxt += len; tp->lsndtime = tcp_time_stamp; @@ -551,13 +578,13 @@ &d->tx_dma_pending); skb->sk = sk; #endif - if ((req_completion && WR_UNACKED(tp) == wrs_needed) || + if ((req_completion && WR_UNACKED(so) == wrs_needed) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_COMPL) || - WR_UNACKED(tp) >= WR_MAX(tp) / 2) { - struct work_request_hdr *wr = cplhdr(skb); + WR_UNACKED(so) >= WR_MAX(so) / 2) { + struct work_request_hdr *wr = cplhdr(m); wr->wr_hi |= htonl(F_WR_COMPL); - WR_UNACKED(tp) = 0; + WR_UNACKED(so) = 0; } TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_NEED_HDR; } else if (skb->data[0] == FW_WROPCODE_OFLD_CLOSE_CON) @@ -568,7 +595,7 @@ sock_set_flag(sk, TX_WAIT_IDLE); set_arp_failure_handler(skb, arp_failure_discard); - l2t_send(cdev, skb, L2T_ENTRY(sk)); + l2t_send(cdev, skb, L2T_ENTRY(so)); } sk->sk_wmem_queued -= total_size; return total_size; @@ -581,11 +608,12 @@ }; #endif -static inline void free_atid(struct toedev *cdev, unsigned int tid) +static inline void +free_atid(struct toedev *cdev, unsigned int tid) { struct socket *so = cxgb_free_atid(cdev, tid); - if (sk) - sock_put(sk); + if (so) + sorele_lock(so); } /* * Release resources held by an offload connection (TID, L2T entry, etc.) @@ -594,47 +622,47 @@ t3_release_offload_resources(struct socket *so) { struct tcpcb *tp = sototcpcb(so); - struct toedev *tdev = TOE_DEV(sk); + struct toedev *tdev = TOE_DEV(so); struct toedev *cdev; - unsigned int tid = TID(tp); + unsigned int tid = TID(so); if (!tdev) return; - cdev = TOE_DEV(sk); + cdev = TOE_DEV(so); if (!cdev) return; - qset(tp) = 0; - t3_release_ddp_resources(sk); + qset(so) = 0; + t3_release_ddp_resources(so); #ifdef CTRL_SKB_CACHE - kfree_skb(CTRL_SKB_CACHE(tp)); - CTRL_SKB_CACHE(tp) = NULL; + m_freem(CTRL_MBUF_CACHE(so)); + CTRL_MBUF_CACHE(so) = NULL; #endif - if (WR_AVAIL(tp) != WR_MAX(tp)) { + if (WR_AVAIL(so) != WR_MAX(so)) { purge_wr_queue(tp); reset_wr_list(tp); } - if (L2T_ENTRY(sk)) { - l2t_release(L2DATA(cdev), L2T_ENTRY(sk)); - L2T_ENTRY(sk) = NULL; + if (L2T_ENTRY(so)) { + l2t_release(L2DATA(cdev), L2T_ENTRY(so)); + L2T_ENTRY(so) = NULL; } - if (sk->sk_state == TCP_SYN_SENT) { // we have ATID + if (tp->t_state == TCPS_SYN_SENT) { // we have ATID free_atid(cdev, tid); - __skb_queue_purge(&tp->out_of_order_queue); + mbufq_purge(T3_TOE(so)->out_of_order_queue); } else { // we have TID - cxgb_remove_tid(cdev, (void *)sk, tid); - sock_put(sk); + cxgb_remove_tid(cdev, (void *)so, tid); + sorele_lock(so) } - t3_set_ca_ops(sk, &tcp_init_congestion_ops); - TOE_DEV(sk) = NULL; + t3_set_ca_ops(so, &tcp_init_congestion_ops); + TOE_DEV(so) = NULL; #if 0 - printk(KERN_INFO "closing TID %u, state %u\n", tid, sk->sk_state); + log(KERN_INFO "closing TID %u, state %u\n", tid, sk->sk_state); #endif } @@ -669,17 +697,17 @@ struct mbuf *p; struct tcpcb *tp = sototcpcb(so); - printk("TID %u info:\n", TID(tp)); + log("TID %u info:\n", TID(so)); skb_queue_walk(&sk->sk_write_queue, p) { d = cplhdr(p); - printk(" len %u, frags %u, flags %x, data %llx\n", + log(" len %u, frags %u, flags %x, data %llx\n", p->len, skb_shinfo(p)->nr_frags, TCP_SKB_CB(p)->flags, (unsigned long long)be64_to_cpu(*d)); } - printk("outstanding:\n"); + log("outstanding:\n"); wr_queue_walk(tp, p) { d = cplhdr(p); - printk(" len %u, frags %u, flags %x, data %llx,%llx,%llx\n", + log(" len %u, frags %u, flags %x, data %llx,%llx,%llx\n", p->len, skb_shinfo(p)->nr_frags, TCP_SKB_CB(p)->flags, (unsigned long long)be64_to_cpu(*d), (unsigned long long)be64_to_cpu(d[1]), @@ -703,10 +731,10 @@ { int pending = count_pending_wrs(tp); - if (__predict_false(WR_AVAIL(tp) + pending != WR_MAX(tp))) - printk(KERN_ERR "TID %u: credit imbalance: avail %u, " - "pending %u, total should be %u\n", TID(tp), - WR_AVAIL(tp), pending, WR_MAX(tp)); + if (__predict_false(WR_AVAIL(so) + pending != WR_MAX(so))) + log(LOG_ERR, "TID %u: credit imbalance: avail %u, " + "pending %u, total should be %u\n", TID(so), + WR_AVAIL(so), pending, WR_MAX(so)); } #endif @@ -715,10 +743,10 @@ { #if DEBUG_WR if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - dump_wrs(sk); - bh_unlock_sock(sk); + SOCK_LOCK(so); + if (!sock_owned_by_user(so)) + dump_wrs(so); + SOCK_UNLOCK(so); } #endif @@ -730,11 +758,11 @@ rta = __RTA_PUT(skb, INET_DIAG_MAX + 1, sizeof(*info)); info = RTA_DATA(rta); info->toe_id = TOE_ID_CHELSIO_T3; - info->tid = TID(tp); - info->wrs = WR_MAX(tp) - WR_AVAIL(tp); - info->ulp_mode = ULP_MODE(tp); - info->ddp_enabled = DDP_STATE(tp) != NULL; - strcpy(info->dev_name, TOE_DEV(sk)->name); + info->tid = TID(so); + info->wrs = WR_MAX(so) - WR_AVAIL(so); + info->ulp_mode = ULP_MODE(so); + info->ddp_enabled = DDP_STATE(so) != NULL; + strcpy(info->dev_name, TOE_DEV(so)->name); rtattr_failure: ; } } @@ -757,15 +785,15 @@ req = (struct cpl_act_open_req *)__skb_put(skb, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); - req->local_port = inet_sk(sk)->sport; - req->peer_port = inet_sk(sk)->dport; - req->local_ip = inet_sk(sk)->saddr; - req->peer_ip = inet_sk(sk)->daddr; - req->opt0h = htonl(calc_opt0h(sk) | V_L2T_IDX(e->idx) | + req->local_port = sotoinpcb(so)->sport; + req->peer_port = sotoinpcb(so)->dport; + req->local_ip = sotoinpcb(so)->saddr; + req->peer_ip = sotoinpcb(so)->daddr; + req->opt0h = htonl(calc_opt0h(so) | V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); - req->opt0l = htonl(calc_opt0l(sk)); + req->opt0l = htonl(calc_opt0l(so)); req->params = 0; - req->opt2 = htonl(calc_opt2(sk)); + req->opt2 = htonl(calc_opt2(so)); } /* @@ -784,50 +812,52 @@ case CPL_ERR_TCAM_FULL: return ENOMEM; case CPL_ERR_CONN_EXIST: - printk(KERN_ERR "ACTIVE_OPEN_RPL: 4-tuple in use\n"); + log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); return EADDRINUSE; default: return EIO; } } -static void act_open_req_arp_failure(struct toedev *dev, struct mbuf *skb); +static void act_open_req_arp_failure(struct toedev *dev, struct mbuf *m); static void fail_act_open(struct socket *so, int errno) { sk->sk_err = errno; - sk->sk_error_report(sk); - t3_release_offload_resources(sk); - connection_done(sk); + sk->sk_error_report(so); + t3_release_offload_resources(so); + connection_done(so); TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); } static void act_open_retry_timer(unsigned long data) { - struct mbuf *skb; + struct mbuf *m; struct socket *so = (struct socket *)data; struct inet_connection_sock *icsk = inet_csk(sk); - bh_lock_sock(sk); + SOCK_LOCK(so); if (sock_owned_by_user(sk)) /* try in a bit */ sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + HZ / 20); else { - skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_ATOMIC); - if (!skb) - fail_act_open(sk, ENOMEM); + m = m_gethdr(M_NOWAIT, MT_DATA); + if (!m) + fail_act_open(so, ENOMEM); else { - skb->sk = sk; - set_arp_failure_handler(skb, act_open_req_arp_failure); - mk_act_open_req(sk, skb, TID(sototcpcb(so)), - L2T_ENTRY(sk)); - l2t_send(TOE_DEV(sk), skb, L2T_ENTRY(sk)); + m->m_pkthdr.priv = so; +#ifdef notyet + set_arp_failure_handler(m, act_open_req_arp_failure); +#endif + mk_act_open_req(so, m, TID(so), + L2T_ENTRY(so)); + l2t_send(TOE_DEV(so), m, L2T_ENTRY(so)); } } - bh_unlock_sock(sk); - sock_put(sk); + sorele(so); + SOCK_UNLOCK(so); } /* @@ -836,7 +866,7 @@ static void active_open_failed(struct socket *so, struct mbuf *m) { - struct cpl_act_open_rpl *rpl = cplhdr(skb); + struct cpl_act_open_rpl *rpl = cplhdr(m); struct inet_connection_sock *icsk = inet_csk(sk); if (rpl->status == CPL_ERR_CONN_EXIST && @@ -846,7 +876,7 @@ jiffies + HZ / 2); } else fail_act_open(sk, act_open_rpl_status_to_errno(rpl->status)); - __kfree_skb(skb); + m_freem(m); } /* @@ -873,7 +903,7 @@ if (cdev->type != T3A && act_open_has_tid(rpl->status)) cxgb_queue_tid_release(cdev, GET_TID(rpl)); - process_cpl_msg_ref(active_open_failed, sk, skb); + process_cpl_msg_ref(active_open_failed, sk, m); return 0; } @@ -885,16 +915,17 @@ * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't * free the atid. Hmm. */ -static void act_open_req_arp_failure(struct toedev *dev, struct mbuf *skb) +static void +act_open_req_arp_failure(struct toedev *dev, struct mbuf *m) { - struct socket *so = skb->sk; + struct socket *so = m->m_pkthdr.priv; - sock_hold(sk); - bh_lock_sock(sk); - if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) { + SOCK_LOCK(so); + soref(so); + if (tp->t_state == TCPS_SYN_SENT || sk->sk_state == TCP_SYN_RECV) { if (!sock_owned_by_user(sk)) { fail_act_open(sk, EHOSTUNREACH); - __kfree_skb(skb); + m_freem(m); } else { /* * Smart solution: Synthesize an ACTIVE_OPEN_RPL in the @@ -902,12 +933,12 @@ * are certain the mbuf is not shared. We also * don't bother trimming the buffer. */ - struct cpl_act_open_rpl *rpl = cplhdr(skb); + struct cpl_act_open_rpl *rpl = cplhdr(m); rpl->ot.opcode = CPL_ACT_OPEN_RPL; rpl->status = CPL_ERR_ARP_MISS; - SET_BLOG_CPL_HANDLER(skb, active_open_failed); - sk_add_backlog(sk, skb); + SET_BLOG_CPL_HANDLER(m, active_open_failed); + sk_add_backlog(sk, m); /* * XXX Make sure a PASS_ACCEPT_RPL behind us doesn't @@ -917,14 +948,15 @@ */ } } - bh_unlock_sock(sk); - sock_put(sk); + sorele(so); + SOCK_UNLOCK(so); } /* * Determine the receive window size for a socket. */ -static unsigned int select_rcv_wnd(struct socket *so) +static unsigned int +select_rcv_wnd(struct socket *so) { unsigned int wnd = tcp_full_space(sk); @@ -939,7 +971,8 @@ } #if defined(TCP_CONGESTION_CONTROL) -static void pivot_ca_ops(struct socket *so, int cong) +static void +pivot_ca_ops(struct socket *so, int cong) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -956,25 +989,26 @@ * Assign offload parameters to some socket fields. This code is used by * both active and passive opens. */ -static inline void init_offload_sk(struct socket *so, struct toedev *dev, +static inline void +init_offload_so(struct socket *so, struct toedev *dev, unsigned int tid, struct l2t_entry *e, struct dst_entry *dst) { struct tcpcb *tp = sototcpcb(so); TOE_DEV(sk) = dev; - TID(tp) = tid; + TID(so) = tid; L2T_ENTRY(sk) = e; - WR_MAX(tp) = WR_AVAIL(tp) = TOM_TUNABLE(dev, max_wrs); - WR_UNACKED(tp) = 0; + WR_MAX(so) = WR_AVAIL(so) = TOM_TUNABLE(dev, max_wrs); + WR_UNACKED(so) = 0; DELACK_MODE(sk) = 0; - MTU_IDX(tp) = select_mss(sk, dst_mtu(dst)); + MTU_IDX(so) = select_mss(sk, dst_mtu(dst)); tp->rcv_wnd = select_rcv_wnd(sk); - ULP_MODE(tp) = TOM_TUNABLE(dev, ddp) && !sock_flag(sk, NO_DDP) && + ULP_MODE(so) = TOM_TUNABLE(dev, ddp) && !sock_flag(sk, NO_DDP) && tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; QSET_IDX(sk) = 0; #ifdef CTRL_SKB_CACHE - CTRL_SKB_CACHE(tp) = alloc_skb(CTRL_SKB_LEN, gfp_any()); + CTRL_SKB_CACHE(so) = alloc_skb(CTRL_SKB_LEN, gfp_any()); #endif reset_wr_list(tp); @@ -988,10 +1022,11 @@ /* * Send an active open request. */ -int t3_connect(struct toedev *tdev, struct socket *so, +int +t3_connect(struct toedev *tdev, struct socket *so, struct net_device *egress_dev) { - struct mbuf *skb; + struct mbuf *m; struct l2t_entry *e; struct tom_data *d = TOM_DATA(tdev); struct tcpcb *tp = sototcpcb(so); @@ -1005,26 +1040,25 @@ if (!e) goto free_tid; - skb = alloc_skb_nofail(sizeof(struct cpl_act_open_req)); - skb->sk = sk; - set_arp_failure_handler(skb, act_open_req_arp_failure); + m = alloc_mbuf_nofail(sizeof(struct cpl_act_open_req)); + m->m_pkthdr.priv = so; + set_arp_failure_handler(m, act_open_req_arp_failure); - sock_hold(sk); - - install_offload_ops(sk); + soref_lock(so); + + install_offload_ops(so); init_offload_sk(sk, tdev, atid, e, dst); - RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(sk), + tp->rcv_scale) = select_rcv_wscale(tcp_full_space(sk), sysctl_tcp_window_scaling, tp->window_clamp); - sk->sk_err = 0; sock_reset_flag(sk, SOCK_DONE); TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); - mk_act_open_req(sk, skb, atid, e); - l2t_send(d->cdev, skb, e); - if (ULP_MODE(tp)) - t3_enable_ddp(sk, 0); + mk_act_open_req(sk, m, atid, e); + l2t_send(d->cdev, m, e); + if (ULP_MODE(so)) + t3_enable_ddp(so, 0); return 0; free_tid: @@ -1037,12 +1071,13 @@ * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant * and send it along. */ -static void abort_arp_failure(struct toedev *cdev, struct mbuf *skb) +static void +abort_arp_failure(struct toedev *cdev, struct mbuf *m) { - struct cpl_abort_req *req = cplhdr(skb); + struct cpl_abort_req *req = cplhdr(m); req->cmd = CPL_ABORT_NO_RST; - cxgb_ofld_send(cdev, skb); + cxgb_ofld_send(cdev, m); } /* @@ -1051,20 +1086,24 @@ * not try to send a message after the connection has closed. Returns 1 if * an ABORT_REQ wasn't generated after all, 0 otherwise. */ -int t3_send_reset(struct socket *so, int mode, struct mbuf *skb) +int +t3_send_reset(struct socket *so, int mode, struct mbuf *m) { struct cpl_abort_req *req; struct tcpcb *tp = sototcpcb(so); - unsigned int tid = TID(tp); + unsigned int tid = TID(so); - if (__predict_false(sock_flag(sk, ABORT_SHUTDOWN) || !TOE_DEV(sk))) { - if (skb) - __kfree_skb(skb); + if (__predict_false(so->so_state == SS_DISCONNECTING || + so->so_state == SS_DISCONNECTED || !TOE_DEV(so))) { + if (m) + m_freem(m); return 1; } +#ifdef notyet sock_set_flag(sk, ABORT_RPL_PENDING); sock_set_flag(sk, ABORT_SHUTDOWN); +#endif /* Purge the send queue so we don't send anything after an abort. */ t3_purge_write_queue(sk); @@ -1072,25 +1111,26 @@ if (sock_flag(sk, CLOSE_CON_REQUESTED) && is_t3a(TOE_DEV(sk))) mode |= CPL_ABORT_POST_CLOSE_REQ; - if (!skb) - skb = alloc_skb_nofail(sizeof(*req)); - skb->priority = mkprio(CPL_PRIORITY_DATA, sk); - set_arp_failure_handler(skb, abort_arp_failure); - - req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req)); + if (!m) + m = alloc_mbuf_nofail(sizeof(*req)); + m->m_priority = mkprio(CPL_PRIORITY_DATA, so); +#ifdef notyet + set_arp_failure_handler(m, abort_arp_failure); +#endif + MH_ALIGN(m, sizeof(*req)); + req = mtod(m, struct cpl_abort_req *); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); req->wr.wr_lo = htonl(V_WR_TID(tid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); req->rsvd0 = htonl(tp->snd_nxt); req->rsvd1 = !sock_flag(sk, TX_DATA_SENT); req->cmd = mode; - if (sk->sk_state == TCP_SYN_SENT) - __skb_queue_tail(&tp->out_of_order_queue, skb); // defer + if (tp->t_state == TCPS_SYN_SENT) + mbufq_tail(T3_TOE(so)->out_of_order_queue, m); // defer else - l2t_send(TOE_DEV(sk), skb, L2T_ENTRY(sk)); + l2t_send(TOE_DEV(so), m, L2T_ENTRY(so)); return 0; } -EXPORT_SYMBOL(t3_send_reset); /* * Reset a connection that is on a listener's SYN queue or accept queue, @@ -1099,23 +1139,23 @@ * * Modeled after code in inet_csk_listen_stop(). */ -static void reset_listen_child(struct socket *child) +static void +reset_listen_child(struct socket *child) { - struct mbuf *skb = alloc_skb_nofail(sizeof(struct cpl_abort_req)); + struct mbuf *m = alloc_mbuf_nofail(sizeof(struct cpl_abort_req)); + - sock_hold(child); // need to survive past inet_csk_destroy_sock() - local_bh_disable(); - bh_lock_sock(child); + SOCK_LOCK(child); + soref(child); - t3_send_reset(child, CPL_ABORT_SEND_RST, skb); + t3_send_reset(child, CPL_ABORT_SEND_RST, m); sock_orphan(child); INC_ORPHAN_COUNT(child); if (child->sk_state == TCP_CLOSE) inet_csk_destroy_sock(child); + sorele_lock(child); - bh_unlock_sock(child); - local_bh_enable(); - sock_put(child); + SOCK_UNLOCK(child); } /* @@ -1130,77 +1170,88 @@ /* * Process the reap list. */ -DECLARE_TASK_FUNC(process_reap_list, task_param) +static void +reap_list_proc(void *data, int npending) { + struct socket *so; + mtx_lock(&reap_list_lock); - while (reap_list) { - struct socket *so = reap_list; - - reap_list = sk->sk_user_data; - sk->sk_user_data = NULL; + while (!TAILQ_EMPTY(reap_list->so_incomp)) { + so = TAILQ_HEAD(&reap_list->so_incomp, so_list); + TAILQ_REMOVE_HEAD(&reap_list->so_incomp, so_list); mtx_unlock(&reap_list_lock); - reset_listen_child(sk); + /* + * XXX BSD equiv? + */ + reset_listen_child(so); mtx_lock(&reap_list_lock); } + so = reap_list; + reap_list = NULL; mtx_unlock(&reap_list_lock); + reset_listen_child(so); + } -static T3_DECLARE_WORK(reap_task, process_reap_list, NULL); - /* * Add a socket to the reap list and schedule a work request to process it. * We thread sockets through their sk_user_data pointers. May be called * from softirq context and any associated open request must have already * been freed. */ -static void add_to_reap_list(struct socket *so) +static void +add_to_reap_list(struct socket *so) { - BUG_ON(sk->sk_user_data); + release_tcp_port(so); // release the port immediately, it may be reused - release_tcp_port(sk); // release the port immediately, it may be reused - mtx_lock(&reap_list_lock); - sk->sk_user_data = reap_list; - reap_list = sk; - if (!sk->sk_user_data) - schedule_work(&reap_task); + if (reap_list == NULL) >>> TRUNCATED FOR MAIL (1000 lines) <<<