Date: Thu, 24 Jan 2008 02:05:56 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 133966 for review Message-ID: <200801240205.m0O25uYf051995@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=133966 Change 133966 by kmacy@kmacy:storage:toehead on 2008/01/24 02:04:55 first pass integration of ddp support code Affected files ... .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#4 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#3 edit .. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 edit Differences ... ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#4 (text+ko) ==== @@ -867,14 +867,6 @@ return (idx); } -void -t3_release_ddp_resources(struct toepcb *toep) -{ - /* - * This is a no-op until we have DDP support - */ -} - static inline void free_atid(struct t3cdev *cdev, unsigned int tid) { ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#3 (text+ko) ==== @@ -679,7 +679,7 @@ p->dgl_length = len; p->dgl_offset = pg_off; p->dgl_nelem = npages; -#if 0 +#ifdef notyet p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off, PAGE_SIZE - pg_off, PCI_DMA_FROMDEVICE) - pg_off; @@ -708,6 +708,47 @@ return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS; } + +static void +unmap_ddp_gl(const struct ddp_gather_list *gl) +{ +#ifdef notyet + int i; + + if (!gl->nelem) + return; + + pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset, + PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE); + for (i = 1; i < gl->nelem; ++i) + pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE, + PCI_DMA_FROMDEVICE); + +#endif +} + +static void +ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty) +{ +#ifdef notyet + int i; + + for (i = 0; i < gl->nelem; ++i) { + if (dirty) + set_page_dirty_lock(gl->pages[i]); + put_page(gl->pages[i]); + } +#endif +} + +void +t3_free_ddp_gl(struct ddp_gather_list *gl) +{ + unmap_ddp_gl(gl); + ddp_gl_free_pages(gl, 0); + free(gl, M_DEVBUF); +} + /* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */ #define MAX_PPODS 64U @@ -746,6 +787,46 @@ return 0; } + + +/* + * Reposts the kernel DDP buffer after it has been previously become full and + * invalidated. We just need to reset the offset and adjust the DDP flags. + * Conveniently, we can set the flags and the offset with a single message. + * Note that this function does not set the buffer length. Again conveniently + * our kernel buffer is of fixed size. If the length needs to be changed it + * needs to be done separately. + */ +static void +t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate, + int activate) +{ + struct toepcb *toep = sototcpcb(so)->t_toe; + struct ddp_state *p = &toep->tp_ddp_state; + + p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset; + p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0; + p->buf_state[bufidx].gl = p->kbuf[bufidx]; + p->cur_buf = bufidx; + p->kbuf_idx = bufidx; + if (!bufidx) + t3_setup_ddpbufs(so, 0, 0, 0, 0, + V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) | + V_TF_DDP_BUF0_VALID(1), + V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) | + V_TF_DDP_BUF0_VALID(1) | + V_TF_DDP_ACTIVE_BUF(activate), modulate); + else + t3_setup_ddpbufs(so, 0, 0, 0, 0, + V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) | + V_TF_DDP_BUF1_VALID(1) | + V_TF_DDP_ACTIVE_BUF(activate), + V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) | + V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), + modulate); + +} + /* * Starting offset for the user DDP buffer. A non-0 value ensures a DDP flush * won't block indefinitely if there's nothing to place (which should be rare). @@ -820,7 +901,7 @@ return err; if (gl) { if (p->ubuf) - t3_free_ddp_gl(p->pdev, p->ubuf); + t3_free_ddp_gl(p->ubuf); p->ubuf = gl; t3_setup_ppods(so, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len, gl->dgl_offset, 0); @@ -847,7 +928,7 @@ struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; - if (!p || !p->pdev) + if (!p) return -1; len = setup_iovec_ppods(so, iov, 0); @@ -888,10 +969,251 @@ OVERLAY_MASK | flags, 1); } #ifdef T3_TRACE - T3_TRACE5(TIDTB(sk), + T3_TRACE5(TIDTB(so), "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d " " kbuf_idx %d", p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx); #endif return 0; } + + + +/* + * Returns whether a connection should enable DDP. This happens when all of + * the following conditions are met: + * - the connection's ULP mode is DDP + * - DDP is not already enabled + * - the last receive was above the DDP threshold + * - receive buffers are in user space + * - receive side isn't shutdown (handled by caller) + * - the connection's receive window is big enough so that sizable buffers + * can be posted without closing the window in the middle of DDP (checked + * when the connection is offloaded) + */ +#ifdef notyet +static int +so_should_ddp(const struct toepcb *toep, int last_recv_len) +{ + return toep->tp_ulp_mode == ULP_MODE_TCPDDP && !toep->tp_dpp_state.cur_buf && + last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) && + toep->tp_tp->rcv_wnd > + (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + + DDP_RSVD_WIN); +} +#endif +/* + * Clean up DDP state that needs to survive until socket close time, such as the + * DDP buffers. The buffers are already unmapped at this point as unmapping + * needs the PCI device and a socket may close long after the device is removed. + */ +void +t3_cleanup_ddp(struct socket *so) +{ + struct toepcb *toep = sototcpcb(so)->t_toe; + struct ddp_state *p = &toep->tp_ddp_state; + int idx; + + if (!p) + return; + + for (idx = 0; idx < NUM_DDP_KBUF; idx++) + if (p->kbuf[idx]) { + ddp_gl_free_pages(p->kbuf[idx], 0); + free(p->kbuf[idx], M_DEVBUF); + } + + if (p->ubuf) { + ddp_gl_free_pages(p->ubuf, 0); + free(p->ubuf, M_DEVBUF); + } + toep->tp_ulp_mode = 0; +} + +/* + * This is a companion to t3_cleanup_ddp() and releases the HW resources + * associated with a connection's DDP state, such as the page pods. + * It's called when HW is done with a connection. The rest of the state + * remains available until both HW and the app are done with the connection. + */ +void +t3_release_ddp_resources(struct toepcb *toep) +{ + struct ddp_state *p = &toep->tp_ddp_state; + struct tom_data *d = TOM_DATA(toep->tp_toedev); + int idx; + + for (idx = 0; idx < NUM_DDP_KBUF; idx++) { + t3_free_ppods(d, p->kbuf_tag[idx], + p->kbuf_nppods[idx]); + unmap_ddp_gl(p->kbuf[idx]); + } + + if (p->ubuf_nppods) { + t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods); + p->ubuf_nppods = 0; + } + if (p->ubuf) + unmap_ddp_gl(p->ubuf); + +} + +void +t3_post_kbuf(struct socket *so, int modulate) +{ + struct toepcb *toep = sototcpcb(so)->t_toe; + struct ddp_state *p = &toep->tp_ddp_state; + + t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6); + t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length); + t3_repost_kbuf(so, p->cur_buf, modulate, 1); + +#ifdef T3_TRACE + T3_TRACE1(TIDTB(so), + "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf); +#endif +} + +/* + * Prepare a socket for DDP. Must be called when the socket is known to be + * open. + */ +int +t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall) +{ + int err = ENOMEM; + unsigned int nppods, kbuf_pages, idx = 0; + struct toepcb *toep = sototcpcb(so)->t_toe; + struct ddp_state *p = &toep->tp_ddp_state; + struct tom_data *d = TOM_DATA(toep->tp_toedev); + + if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN) + return (EINVAL); + + kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + nppods = pages2ppods(kbuf_pages); + + p->kbuf_noinval = !!waitall; + + p->kbuf_tag[NUM_DDP_KBUF - 1] = -1; + for (idx = 0; idx < NUM_DDP_KBUF; idx++) { + p->kbuf[idx] = + malloc(sizeof (struct ddp_gather_list) + kbuf_pages * + sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!p->kbuf[idx]) + goto err; + + p->kbuf_tag[idx] = t3_alloc_ppods(d, nppods); + if (p->kbuf_tag[idx] < 0) + goto err; + + p->kbuf_nppods[idx] = nppods; + p->kbuf[idx]->dgl_length = kbuf_size; + p->kbuf[idx]->dgl_offset = 0; + p->kbuf[idx]->dgl_nelem = kbuf_pages; +#ifdef notyet + p->kbuf[idx]->pages = + (struct page **)&p->kbuf[idx]->phys_addr[kbuf_pages]; + + + for (i = 0; i < kbuf_pages; ++i) { + + p->kbuf[idx]->pages[i] = alloc_page(sk->sk_allocation); + if (!p->kbuf[idx]->pages[i]) { + p->kbuf[idx]->nelem = i; + goto err; + } + + } + + for (i = 0; i < kbuf_pages; ++i) + p->kbuf[idx]->phys_addr[i] = + pci_map_page(p->pdev, p->kbuf[idx]->pages[i], + 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); +#endif + t3_setup_ppods(so, p->kbuf[idx], nppods, p->kbuf_tag[idx], + p->kbuf[idx]->dgl_length, 0, 0); + } + t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6); + t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length); + t3_repost_kbuf(so, 0, 0, 1); + t3_set_rcv_coalesce_enable(so, + TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce)); + +#ifdef T3_TRACE + T3_TRACE4(TIDTB(so), + "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d", + kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]); +#endif + + return 0; + +err: + t3_release_ddp_resources(toep); + t3_cleanup_ddp(so); + return err; +} + +int +t3_ddp_copy(const struct mbuf *m, int offset, struct iovec *to, int len) +{ +#ifdef notyet + int err, page_no, page_off; + struct ddp_gather_list *gl = (struct ddp_gather_list *)skb->mac.raw; + + if (!gl->pages) { + dump_stack(); + BUG_ON(1); + } + + offset += gl->offset + TCP_SKB_CB(skb)->when; + page_no = offset >> PAGE_SHIFT; + page_off = offset & ~PAGE_MASK; + + while (len) { + int copy = min_t(int, len, PAGE_SIZE - page_off); + + err = memcpy_toiovec(to, page_address(gl->pages[page_no]) + + page_off, copy); + if (err) + return -EFAULT; + page_no++; + page_off = 0; + len -= copy; + } +#endif + return 0; +} + +/* + * Allocate n page pods. Returns -1 on failure or the page pod tag. + */ +int t3_alloc_ppods(struct tom_data *td, unsigned int n) +{ + unsigned int i, j; + + if (__predict_false(!td->ppod_map)) + return -1; + + mtx_lock(&td->ppod_map_lock); + for (i = 0; i < td->nppods; ) { + for (j = 0; j < n; ++j) /* scan ppod_map[i..i+n-1] */ + if (td->ppod_map[i + j]) { + i = i + j + 1; + goto next; + } + + memset(&td->ppod_map[i], 1, n); /* allocate range */ + mtx_unlock(&td->ppod_map_lock); + return i; +next: ; + } + mtx_unlock(&td->ppod_map_lock); + return (0); +} + +void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n) +{ + /* No need to take ppod_lock here */ + memset(&td->ppod_map[tag], 0, n); +} ==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 (text+ko) ==== @@ -85,7 +85,6 @@ #define M_PPOD_PGSZ 0x3 #define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ) -struct pci_dev; #include <vm/vm.h> #include <vm/vm_page.h> #include <machine/bus.h> @@ -105,7 +104,6 @@ }; struct ddp_state { - struct pci_dev *pdev; struct ddp_buf_state buf_state[2]; /* per buffer state */ int cur_buf; unsigned short kbuf_noinval; @@ -132,33 +130,30 @@ PSH flag set */ }; -#ifdef notyet +#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> + /* * Returns 1 if a UBUF DMA buffer might be active. */ -static inline int t3_ddp_ubuf_pending(struct sock *so) +static inline int t3_ddp_ubuf_pending(struct socket *so) { - struct tcp_sock *tp = tcp_sk(sk); - struct ddp_state *p = DDP_STATE(tp); + struct toepcb *toep = sototcpcb(so)->t_toe; + struct ddp_state *p = &toep->tp_ddp_state; /* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP, * but DDP_STATE() is only valid if the connection actually enabled * DDP. */ - if (!p) - return 0; - return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) || (p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)); } -#endif int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, unsigned int nppods, unsigned int tag, unsigned int maxoff, unsigned int pg_off, unsigned int color); int t3_alloc_ppods(struct tom_data *td, unsigned int n); void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n); -void t3_free_ddp_gl(struct pci_dev *pdev, struct ddp_gather_list *gl); +void t3_free_ddp_gl(struct ddp_gather_list *gl); int t3_ddp_copy(const struct mbuf *skb, int offset, struct iovec *to, int len); //void t3_repost_kbuf(struct socket *so, int modulate, int activate);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200801240205.m0O25uYf051995>