Date: Sun, 6 Jan 2008 01:22:23 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 132604 for review Message-ID: <200801060122.m061MN2O074814@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=132604 Change 132604 by kmacy@pandemonium:kmacy:xen31 on 2008/01/06 01:21:40 get ping working - bring the rx control flow more in line with what exists in current linux - switch to tracking pfn in ext_args to allow for larger than 4GB VMs - move page management structures in to netfront_tinfo (softc) so that multiple devices can operate in parallel Affected files ... .. //depot/projects/xen31/sys/dev/xen/netfront/netfront.c#4 edit Differences ... ==== //depot/projects/xen31/sys/dev/xen/netfront/netfront.c#4 (text+ko) ==== @@ -88,7 +88,11 @@ #define RX_COPY_THRESHOLD 256 +#define net_ratelimit() 0 + struct netfront_info; +struct netfront_rx_info; + static void xn_txeof(struct netfront_info *); static void xn_rxeof(struct netfront_info *); static void network_alloc_rx_buffers(struct netfront_info *); @@ -127,10 +131,12 @@ static void xn_free_tx_ring(struct netfront_info *); +static int xennet_get_responses(struct netfront_info *np, + struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf_head *list, + int *pages_flipped_p); + #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) -/* XXX: This isn't supported in FreeBSD, so ignore it for now. */ -#define TASK_UNINTERRUPTIBLE 0 #define INVALID_P2M_ENTRY (~0UL) /* @@ -144,10 +150,6 @@ }; -static unsigned long rx_pfn_array[NET_RX_RING_SIZE]; -static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; -static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; - struct net_device_stats { unsigned long rx_packets; /* total packets received */ @@ -225,6 +227,11 @@ int xn_if_flags; struct callout xn_stat_ch; + + + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; + multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; + mmu_update_t rx_mmu[NET_RX_RING_SIZE]; }; #define rx_mbufs xn_cdata.xn_rx_chain @@ -251,6 +258,11 @@ mtx_destroy(&(_sc)->tx_lock); \ sx_destroy(&(_sc)->sc_lock); +struct netfront_rx_info { + struct netif_rx_response rx; + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; +}; + #define netfront_carrier_on(netif) ((netif)->carrier = 1) #define netfront_carrier_off(netif) ((netif)->carrier = 0) #define netfront_carrier_ok(netif) ((netif)->carrier) @@ -315,6 +327,7 @@ #define DPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) + static __inline struct mbuf* makembuf (struct mbuf *buf) { @@ -331,11 +344,8 @@ m->m_pkthdr.len = buf->m_pkthdr.len; m->m_len = buf->m_len; m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) ); - /* - * XXX this will break on > 4GB - * - */ - m->m_ext.ext_args = (caddr_t *)(uintptr_t)vtophys(mtod(m,caddr_t)); + + m->m_ext.ext_args = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT); return m; } @@ -737,7 +747,8 @@ if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL) break; - m_new->m_ext.ext_args = (vm_paddr_t *)(uintptr_t)vtophys(m_new->m_ext.ext_buf); + m_new->m_ext.ext_args = (vm_paddr_t *)(uintptr_t)(vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); + id = xennet_rxidx(req_prod + i); PANIC_IF(sc->xn_cdata.xn_rx_chain[id] != NULL); @@ -755,13 +766,13 @@ gnttab_grant_foreign_transfer_ref(ref, sc->xbdev->otherend_id, pfn); - rx_pfn_array[nr_flips] = PFNTOMFN(pfn); + sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn); if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Remove this page before passing * back to Xen. */ set_phys_to_machine(pfn, INVALID_P2M_ENTRY); - MULTI_update_va_mapping(&rx_mcl[i], + MULTI_update_va_mapping(&sc->rx_mcl[i], vaddr, 0, 0); } nr_flips++; @@ -774,7 +785,7 @@ req->id = id; req->gref = ref; - rx_pfn_array[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; + sc->rx_pfn_array[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; } KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ @@ -789,7 +800,7 @@ /* Tell the ballon driver what is going on. */ balloon_update_driver_allowance(i); #endif - set_xen_guest_handle(reservation.extent_start, rx_pfn_array); + set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array); reservation.nr_extents = i; reservation.extent_order = 0; reservation.address_bits = 0; @@ -798,18 +809,18 @@ if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* After all PTEs have been zapped, flush the TLB. */ - rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = + sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; /* Give away a batch of pages. */ - rx_mcl[i].op = __HYPERVISOR_memory_op; - rx_mcl[i].args[0] = XENMEM_decrease_reservation; - rx_mcl[i].args[1] = (unsigned long)&reservation; + sc->rx_mcl[i].op = __HYPERVISOR_memory_op; + sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation; + sc->rx_mcl[i].args[1] = (unsigned long)&reservation; /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(rx_mcl, i+1); + (void)HYPERVISOR_multicall(sc->rx_mcl, i+1); /* Check return status of HYPERVISOR_dom_mem_op(). */ - if (unlikely(rx_mcl[i].result != i)) + if (unlikely(sc->rx_mcl[i].result != i)) panic("Unable to reduce memory reservation\n"); } else { @@ -832,144 +843,119 @@ static void xn_rxeof(struct netfront_info *np) { - struct ifnet *ifp; - netif_rx_response_t *rx; - RING_IDX i, rp; - mmu_update_t *mmu = rx_mmu; - multicall_entry_t *mcl = rx_mcl; - struct mbuf *tail_mbuf = NULL, *head_mbuf = NULL, *m, *next; - unsigned long mfn; - grant_ref_t ref; + struct ifnet *ifp; + struct netfront_rx_info rinfo; + struct netif_rx_response *rx = &rinfo.rx; + struct netif_extra_info *extras = rinfo.extras; + RING_IDX i, rp; + multicall_entry_t *mcl; + struct mbuf *m; + struct mbuf_head rxq, errq, tmpq; + int err, pages_flipped = 0; + + XN_RX_LOCK_ASSERT(np); + if (!netfront_carrier_ok(np)) + return; - XN_RX_LOCK_ASSERT(np); - if (!netfront_carrier_ok(np)) - return; + mbufq_init(&tmpq); + mbufq_init(&errq); + mbufq_init(&rxq); + + ifp = np->xn_ifp; - ifp = np->xn_ifp; - - rp = np->rx.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for (i = np->rx.rsp_cons; i != rp; i++) { - - rx = RING_GET_RESPONSE(&np->rx, i); - KASSERT(rx->id != 0, ("xn_rxeof: found free receive index of 0\n")); - /* - * This definitely indicates a bug, either in this driver or - * in the backend driver. In future this should flag the bad - * situation to the system controller to reboot the backed. - */ - if ((ref = np->grant_rx_ref[rx->id]) == GRANT_INVALID_REF) { - WPRINTK("Bad rx response id %d.\n", rx->id); - continue; - } + rp = np->rx.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + i = np->rx.rsp_cons; + while ((i != rp)) { + memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); + memset(extras, 0, sizeof(rinfo.extras)); + + err = xennet_get_responses(np, &rinfo, rp, &tmpq, + &pages_flipped); + + if (unlikely(err)) { + while ((m = mbufq_dequeue(&tmpq))) + mbufq_tail(&errq, m); + np->stats.rx_errors++; + i = np->rx.rsp_cons; + continue; + } - - /* Memory pressure, insufficient buffer headroom, ... */ - if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) { -#if 0 - if (net_ratelimit()) - WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", - rx->id, rx->status); -#endif - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = - rx->id; - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = - ref; - np->rx.req_prod_pvt++; - RING_PUSH_REQUESTS(&np->rx); - continue; - } + m = mbufq_dequeue(&tmpq); - gnttab_release_grant_reference(&np->gref_rx_head, ref); - np->grant_rx_ref[rx->id] = GRANT_INVALID_REF; + m->m_data += rx->offset;/* (rx->addr & PAGE_MASK); */ + m->m_pkthdr.len = m->m_len = rx->status; + m->m_pkthdr.rcvif = ifp; - m = (struct mbuf *)np->xn_cdata.xn_rx_chain[rx->id]; - if (m->m_next) - panic("mbuf is already part of a valid mbuf chain"); - add_id_to_freelist(np->xn_cdata.xn_rx_chain, rx->id); - - m->m_data += rx->offset;/* (rx->addr & PAGE_MASK); */ - m->m_pkthdr.len = m->m_len = rx->status; - m->m_pkthdr.rcvif = ifp; - - if ( rx->flags & NETRXF_data_validated ) { - /* Tell the stack the checksums are okay */ - m->m_pkthdr.csum_flags |= - (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - m->m_pkthdr.csum_data = 0xffff; - } + if ( rx->flags & NETRXF_data_validated ) { + /* Tell the stack the checksums are okay */ + /* + * XXX this isn't necessarily the case - need to add check + * + */ + + m->m_pkthdr.csum_flags |= + (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + m->m_pkthdr.csum_data = 0xffff; + } - np->stats.rx_packets++; - np->stats.rx_bytes += rx->status; - - - /* Remap the page. */ - mmu->ptr = ((vm_offset_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; - mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT; - mmu++; - /* XXX validate me */ - mcl->op = __HYPERVISOR_update_va_mapping; - mcl->args[0] = (unsigned long)m->m_data; - mcl->args[1] = (mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A; - mcl->args[2] = 0; - mcl->args[3] = 0; - mcl++; + np->stats.rx_packets++; + np->stats.rx_bytes += rx->status; - xen_phys_machine[((unsigned long)m->m_ext.ext_args >> PAGE_SHIFT)] = - mfn; + mbufq_tail(&rxq, m); + np->rx.rsp_cons = ++i; + } - if (unlikely(!head_mbuf)) - head_mbuf = m; - - if (tail_mbuf) - tail_mbuf->m_next = m; - tail_mbuf = m; - - np->xn_cdata.xn_rx_chain[rx->id] = NULL; - } + if (pages_flipped) { + /* Some pages are no longer absent... */ +#ifdef notyet + balloon_update_driver_allowance(-pages_flipped); +#endif + /* Do all the remapping work, and M->P updates, in one big hypercall. */ + if (!!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = np->rx_mcl + pages_flipped; + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)np->rx_mmu; + mcl->args[1] = pages_flipped; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + (void)HYPERVISOR_multicall(np->rx_mcl, pages_flipped + 1); + } + } - /* Do all the remapping work, and M->P updates, in one big hypercall. */ - if (likely((mcl - rx_mcl) != 0)) { - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)rx_mmu; - mcl->args[1] = mmu - rx_mmu; - mcl->args[2] = 0; - mcl->args[3] = DOMID_SELF; - mcl++; - (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); - } + while ((m = mbufq_dequeue(&errq))) + m_freem(m); + + + /* + * Process all the mbufs after the remapping is complete. + * Break the mbuf chain first though. + */ + while ((m = mbufq_dequeue(&rxq)) != NULL) { + ifp->if_ipackets++; + + /* + * Do we really need to drop the rx lock? + */ + XN_RX_UNLOCK(np); + /* Pass it up. */ + (*ifp->if_input)(ifp, m); + XN_RX_LOCK(np); + } + np->rx.rsp_cons = i; - /* - * Process all the mbufs after the remapping is complete. - * Break the mbuf chain first though. - */ - for (m = head_mbuf; m; m = next) { - next = m->m_next; - m->m_next = NULL; - - ifp->if_ipackets++; - - XN_RX_UNLOCK(np); - - /* Pass it up. */ - (*ifp->if_input)(ifp, m); - XN_RX_LOCK(np); - } + /* If we get a callback with very few responses, reduce fill target. */ + /* NB. Note exponential increase, linear decrease. */ + if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > + ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) + np->rx_target = np->rx_min_target; - np->rx.rsp_cons = i; - - /* If we get a callback with very few responses, reduce fill target. */ - /* NB. Note exponential increase, linear decrease. */ - if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > - ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) - np->rx_target = np->rx_min_target; - - network_alloc_rx_buffers(np); + network_alloc_rx_buffers(np); - np->rx.sring->rsp_event = i + 1; + np->rx.sring->rsp_event = i + 1; } @@ -1054,23 +1040,212 @@ struct ifnet *ifp = np->xn_ifp; - while (np->rx.rsp_cons != np->rx.sring->rsp_prod && + if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod && likely(netfront_carrier_ok(np)) && - ifp->if_drv_flags & IFF_DRV_RUNNING) { - - XN_RX_LOCK(np); - xn_rxeof(np); - XN_RX_UNLOCK(np); - if (np->tx.rsp_cons != np->tx.sring->rsp_prod) { + ifp->if_drv_flags & IFF_DRV_RUNNING)) + return; + if (np->tx.rsp_cons != np->tx.sring->rsp_prod) { XN_TX_LOCK(np); xn_txeof(np); XN_TX_UNLOCK(np); + } + + XN_RX_LOCK(np); + xn_rxeof(np); + XN_RX_UNLOCK(np); + + if (ifp->if_drv_flags & IFF_DRV_RUNNING && + !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + xn_start(ifp); +} + + +static void +xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m, + grant_ref_t ref) +{ + int new = xennet_rxidx(np->rx.req_prod_pvt); + + PANIC_IF(np->rx_mbufs[new] != NULL); + np->rx_mbufs[new] = m; + np->grant_rx_ref[new] = ref; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; + np->rx.req_prod_pvt++; +} + +static int +xennet_get_extras(struct netfront_info *np, + struct netif_extra_info *extras, RING_IDX rp) + +{ + struct netif_extra_info *extra; + RING_IDX cons = np->rx.rsp_cons; + + int err = 0; + + do { + struct mbuf *m; + grant_ref_t ref; + + if (unlikely(cons + 1 == rp)) { +#if 0 + if (net_ratelimit()) + WPRINTK("Missing extra info\n"); +#endif + err = -EINVAL; + break; + } + + extra = (struct netif_extra_info *) + RING_GET_RESPONSE(&np->rx, ++cons); + + if (unlikely(!extra->type || + extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { +#if 0 + if (net_ratelimit()) + WPRINTK("Invalid extra type: %d\n", + extra->type); +#endif + err = -EINVAL; + } else { + memcpy(&extras[extra->type - 1], extra, + sizeof(*extra)); + } + + m = xennet_get_rx_mbuf(np, cons); + ref = xennet_get_rx_ref(np, cons); + xennet_move_rx_slot(np, m, ref); + } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + + np->rx.rsp_cons = cons; + return err; +} + +static int +xennet_get_responses(struct netfront_info *np, + struct netfront_rx_info *rinfo, RING_IDX rp, + struct mbuf_head *list, + int *pages_flipped_p) +{ + int pages_flipped = *pages_flipped_p; + struct mmu_update *mmu; + struct multicall_entry *mcl; + struct netif_rx_response *rx = &rinfo->rx; + struct netif_extra_info *extras = rinfo->extras; + RING_IDX cons = np->rx.rsp_cons; + struct mbuf *m = xennet_get_rx_mbuf(np, cons); + grant_ref_t ref = xennet_get_rx_ref(np, cons); + int max = 24 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */; + int frags = 1; + int err = 0; + unsigned long ret; + + if (rx->flags & NETRXF_extra_info) { + err = xennet_get_extras(np, extras, rp); + cons = np->rx.rsp_cons; } - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - xn_start(ifp); - } - return; + + for (;;) { + unsigned long mfn; + + if (unlikely(rx->status < 0 || + rx->offset + rx->status > PAGE_SIZE)) { +#if 0 + if (net_ratelimit()) + WPRINTK("rx->offset: %x, size: %u\n", + rx->offset, rx->status); +#endif + xennet_move_rx_slot(np, m, ref); + err = -EINVAL; + goto next; + } + + /* + * This definitely indicates a bug, either in this driver or in + * the backend driver. In future this should flag the bad + * situation to the system controller to reboot the backed. + */ + if (ref == GRANT_INVALID_REF) { +#if 0 + if (net_ratelimit()) + WPRINTK("Bad rx response id %d.\n", rx->id); +#endif + err = -EINVAL; + goto next; + } + + if (!np->copying_receiver) { + /* Memory pressure, insufficient buffer + * headroom, ... + */ + if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { + if (net_ratelimit()) + WPRINTK("Unfulfilled rx req " + "(id=%d, st=%d).\n", + rx->id, rx->status); + xennet_move_rx_slot(np, m, ref); + err = -ENOMEM; + goto next; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + void *vaddr = mtod(m, void *); + uint32_t pfn; + + mcl = np->rx_mcl + pages_flipped; + mmu = np->rx_mmu + pages_flipped; + + MULTI_update_va_mapping(mcl, + (unsigned long)vaddr, + (mfn << PAGE_SHIFT) | PG_RW | PG_V | PG_M | PG_A, + 0); + pfn = (uint32_t)m->m_ext.ext_args; + mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + + set_phys_to_machine(pfn, mfn); + } + pages_flipped++; + } else { + ret = gnttab_end_foreign_access_ref(ref, 0); + PANIC_IF(!ret); + } + + gnttab_release_grant_reference(&np->gref_rx_head, ref); + mbufq_tail(list, m); + + next: + if (!(rx->flags & NETRXF_more_data)) + break; + + if (cons + frags == rp) { + if (net_ratelimit()) + WPRINTK("Need more frags\n"); + err = -ENOENT; + break; + } + + rx = RING_GET_RESPONSE(&np->rx, cons + frags); + m = xennet_get_rx_mbuf(np, cons + frags); + ref = xennet_get_rx_ref(np, cons + frags); + frags++; + } + + if (unlikely(frags > max)) { + if (net_ratelimit()) + WPRINTK("Too many frags\n"); + err = -E2BIG; + } + + if (unlikely(err)) + np->rx.rsp_cons = cons + frags; + + *pages_flipped_p = pages_flipped; + + return err; } static void @@ -1164,9 +1339,7 @@ if (notify) notify_remote_via_irq(sc->irq); - XN_TX_LOCK(sc); xn_txeof(sc); - XN_TX_UNLOCK(sc); if (RING_FULL(&sc->tx)) { sc->tx_full = 1; @@ -1190,8 +1363,6 @@ XN_TX_UNLOCK(sc); } - - /* equivalent of network_open() in Linux */ static void xn_ifinit_locked(struct netfront_info *sc)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200801060122.m061MN2O074814>