Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 16 Oct 2016 21:51:59 +0200
From:      Oliver Pinter <oliver.pinter@hardenedbsd.org>
To:        Luigi Rizzo <luigi@freebsd.org>
Cc:        "src-committers@freebsd.org" <src-committers@freebsd.org>,  "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>,  "svn-src-head@freebsd.org" <svn-src-head@freebsd.org>
Subject:   Re: svn commit: r307396 - head/sys/net
Message-ID:  <CAPQ4ffv6sx0ZPhVA1k3oVAinNtj6%2BB-Qy8tJ0=1_cYkyt1cefg@mail.gmail.com>
In-Reply-To: <201610161827.u9GIRfXG008590@repo.freebsd.org>
References:  <201610161827.u9GIRfXG008590@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On Sun, Oct 16, 2016 at 8:27 PM, Luigi Rizzo <luigi@freebsd.org> wrote:
> Author: luigi
> Date: Sun Oct 16 18:27:41 2016
> New Revision: 307396
> URL: https://svnweb.freebsd.org/changeset/base/307396
>
> Log:
>   add a missing header.
>
> Added:
>   head/sys/net/netmap_virt.h   (contents, props changed)
>
> Added: head/sys/net/netmap_virt.h
> ==============================================================================
> --- /dev/null   00:00:00 1970   (empty, because file is newly added)
> +++ head/sys/net/netmap_virt.h  Sun Oct 16 18:27:41 2016        (r307396)
> @@ -0,0 +1,325 @@
> +/*
> + * Copyright (C) 2013-2016 Luigi Rizzo
> + * Copyright (C) 2013-2016 Giuseppe Lettieri
> + * Copyright (C) 2013-2016 Vincenzo Maffione
> + * Copyright (C) 2015 Stefano Garzarella
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *   1. Redistributions of source code must retain the above copyright
> + *      notice, this list of conditions and the following disclaimer.
> + *   2. Redistributions in binary form must reproduce the above copyright
> + *      notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + * $FreeBSD$
> + */
> +
> +#ifndef NETMAP_VIRT_H
> +#define NETMAP_VIRT_H
> +
> +#define NETMAP_VIRT_CSB_SIZE   4096
> +
> +/* ptnetmap features */
> +#define PTNETMAP_F_BASE            1
> +#define PTNETMAP_F_FULL            2 /* not used */
> +#define PTNETMAP_F_VNET_HDR        4
> +
> +/*
> + * ptnetmap_memdev: device used to expose memory into the guest VM
> + *
> + * These macros are used in the hypervisor frontend (QEMU, bhyve) and in the
> + * guest device driver.
> + */
> +
> +/* PCI identifiers and PCI BARs for the ptnetmap memdev
> + * and ptnetmap network interface. */
> +#define PTNETMAP_MEMDEV_NAME            "ptnetmap-memdev"
> +#define PTNETMAP_PCI_VENDOR_ID          0x3333  /* TODO change vendor_id */

This TODO is still valid?

> +#define PTNETMAP_PCI_DEVICE_ID          0x0001  /* memory device */
> +#define PTNETMAP_PCI_NETIF_ID           0x0002  /* ptnet network interface */
> +#define PTNETMAP_IO_PCI_BAR             0
> +#define PTNETMAP_MEM_PCI_BAR            1
> +#define PTNETMAP_MSIX_PCI_BAR           2
> +
> +/* Registers for the ptnetmap memdev */
> +/* 32 bit r/o */
> +#define PTNETMAP_IO_PCI_FEATURES        0      /* XXX should be removed */

And this?

> +/* 32 bit r/o */
> +#define PTNETMAP_IO_PCI_MEMSIZE         4      /* size of the netmap memory shared
> +                                                * between guest and host */
> +/* 16 bit r/o */
> +#define PTNETMAP_IO_PCI_HOSTID          8      /* memory allocator ID in netmap host */
> +#define PTNETMAP_IO_SIZE                10
> +
> +/*
> + * ptnetmap configuration
> + *
> + * The hypervisor (QEMU or bhyve) sends this struct to the host netmap
> + * module through an ioctl() command when it wants to start the ptnetmap
> + * kthreads.
> + */
> +struct ptnetmap_cfg {
> +#define PTNETMAP_CFG_FEAT_CSB           0x0001
> +#define PTNETMAP_CFG_FEAT_EVENTFD       0x0002
> +#define PTNETMAP_CFG_FEAT_IOCTL                0x0004
> +       uint32_t features;
> +       void *ptrings;                          /* ptrings inside CSB */
> +       uint32_t num_rings;                     /* number of entries */
> +       struct ptnet_ring_cfg entries[0];       /* per-ptring configuration */
> +};
> +
> +/*
> + * Functions used to write ptnetmap_cfg from/to the nmreq.
> + * The user-space application writes the pointer of ptnetmap_cfg
> + * (user-space buffer) starting from nr_arg1 field, so that the kernel
> + * can read it with copyin (copy_from_user).
> + */
> +static inline void
> +ptnetmap_write_cfg(struct nmreq *nmr, struct ptnetmap_cfg *cfg)
> +{
> +       uintptr_t *nmr_ptncfg = (uintptr_t *)&nmr->nr_arg1;
> +       *nmr_ptncfg = (uintptr_t)cfg;
> +}
> +
> +/* ptnetmap control commands */
> +#define PTNETMAP_PTCTL_CONFIG  1
> +#define PTNETMAP_PTCTL_FINALIZE        2
> +#define PTNETMAP_PTCTL_IFNEW   3
> +#define PTNETMAP_PTCTL_IFDELETE        4
> +#define PTNETMAP_PTCTL_RINGSCREATE     5
> +#define PTNETMAP_PTCTL_RINGSDELETE     6
> +#define PTNETMAP_PTCTL_DEREF   7
> +#define PTNETMAP_PTCTL_TXSYNC  8
> +#define PTNETMAP_PTCTL_RXSYNC  9
> +#define PTNETMAP_PTCTL_REGIF        10
> +#define PTNETMAP_PTCTL_UNREGIF      11
> +#define PTNETMAP_PTCTL_HOSTMEMID       12
> +
> +
> +/* I/O registers for the ptnet device. */
> +#define PTNET_IO_PTFEAT                0
> +#define PTNET_IO_PTCTL         4
> +#define PTNET_IO_PTSTS         8
> +/* hole */

Hole?

> +#define PTNET_IO_MAC_LO                16
> +#define PTNET_IO_MAC_HI                20
> +#define PTNET_IO_CSBBAH         24
> +#define PTNET_IO_CSBBAL         28
> +#define PTNET_IO_NIFP_OFS      32
> +#define PTNET_IO_NUM_TX_RINGS  36
> +#define PTNET_IO_NUM_RX_RINGS  40
> +#define PTNET_IO_NUM_TX_SLOTS  44
> +#define PTNET_IO_NUM_RX_SLOTS  48
> +#define PTNET_IO_VNET_HDR_LEN  52
> +#define PTNET_IO_END           56
> +#define PTNET_IO_KICK_BASE     128
> +#define PTNET_IO_MASK           0xff
> +
> +/* If defined, CSB is allocated by the guest, not by the host. */
> +#define PTNET_CSB_ALLOC
> +
> +/* ptnetmap ring fields shared between guest and host */
> +struct ptnet_ring {
> +       /* XXX revise the layout to minimize cache bounces. */
> +       uint32_t head;            /* GW+ HR+ the head of the guest netmap_ring */
> +       uint32_t cur;             /* GW+ HR+ the cur of the guest netmap_ring */
> +       uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */
> +       char pad[4];
> +       uint32_t hwcur;           /* GR+ HW+ the hwcur of the host netmap_kring */
> +       uint32_t hwtail;          /* GR+ HW+ the hwtail of the host netmap_kring */
> +       uint32_t host_need_kick;  /* GR+ HW+ guest-->host notification enable */
> +       uint32_t sync_flags;      /* GW+ HR+ the flags of the guest [tx|rx]sync() */
> +};
> +
> +/* CSB for the ptnet device. */
> +struct ptnet_csb {
> +       struct ptnet_ring rings[NETMAP_VIRT_CSB_SIZE/sizeof(struct ptnet_ring)];
> +};
> +
> +#if defined (WITH_PTNETMAP_HOST) || defined (WITH_PTNETMAP_GUEST)
> +
> +/* return l_elem - r_elem with wraparound */
> +static inline uint32_t
> +ptn_sub(uint32_t l_elem, uint32_t r_elem, uint32_t num_slots)
> +{
> +    int64_t res;
> +
> +    res = (int64_t)(l_elem) - r_elem;
> +
> +    return (res < 0) ? res + num_slots : res;
> +}
> +#endif /* WITH_PTNETMAP_HOST || WITH_PTNETMAP_GUEST */
> +
> +#ifdef WITH_PTNETMAP_HOST
> +/*
> + * ptnetmap kernel thread routines
> + * */
> +
> +/* Functions to read and write CSB fields in the host */
> +#if defined (linux)
> +#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
> +#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
> +#else  /* ! linux */
> +#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
> +#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
> +#endif /* ! linux */
> +
> +/*
> + * HOST read/write kring pointers from/in CSB
> + */
> +
> +/* Host: Read kring pointers (head, cur, sync_flags) from CSB */
> +static inline void
> +ptnetmap_host_read_kring_csb(struct ptnet_ring __user *ptr,
> +                            struct netmap_ring *g_ring,
> +                            uint32_t num_slots)
> +{
> +    uint32_t old_head = g_ring->head, old_cur = g_ring->cur;
> +    uint32_t d, inc_h, inc_c;
> +
> +    //mb(); /* Force memory complete before read CSB */

And all of these commented out mb() calls?

> +
> +    /*
> +     * We must first read head and then cur with a barrier in the
> +     * middle, because cur can exceed head, but not vice versa.
> +     * The guest must first write cur and then head with a barrier.
> +     *
> +     * head <= cur
> +     *
> +     *          guest           host
> +     *
> +     *          STORE(cur)      LOAD(head)
> +     *            mb() ----------- mb()
> +     *          STORE(head)     LOAD(cur)
> +     *
> +     * This approach ensures that every head that we read is
> +     * associated with the correct cur. In this way head can not exceed cur.
> +     */
> +    CSB_READ(ptr, head, g_ring->head);
> +    mb();
> +    CSB_READ(ptr, cur, g_ring->cur);
> +    CSB_READ(ptr, sync_flags, g_ring->flags);
> +
> +    /*
> +     * Even with the previous barrier, it is still possible that we read an
> +     * updated cur and an old head.
> +     * To detect this situation, we can check if the new cur overtakes
> +     * the (apparently) new head.
> +     */
> +    d = ptn_sub(old_cur, old_head, num_slots);     /* previous distance */
> +    inc_c = ptn_sub(g_ring->cur, old_cur, num_slots);   /* increase of cur */
> +    inc_h = ptn_sub(g_ring->head, old_head, num_slots); /* increase of head */
> +
> +    if (unlikely(inc_c > num_slots - d + inc_h)) { /* cur overtakes head */
> +        ND(1,"ERROR cur overtakes head - old_cur: %u cur: %u old_head: %u head: %u",
> +                old_cur, g_ring->cur, old_head, g_ring->head);
> +        g_ring->cur = nm_prev(g_ring->head, num_slots - 1);
> +        //*g_cur = *g_head;

Why is this commented out? And why not removed before the upstreaming?

> +    }
> +}
> +
> +/* Host: Write kring pointers (hwcur, hwtail) into the CSB */
> +static inline void
> +ptnetmap_host_write_kring_csb(struct ptnet_ring __user *ptr, uint32_t hwcur,
> +        uint32_t hwtail)
> +{
> +    /* We must write hwtail before hwcur (see below). */
> +    CSB_WRITE(ptr, hwtail, hwtail);
> +    mb();
> +    CSB_WRITE(ptr, hwcur, hwcur);
> +
> +    //mb(); /* Force memory complete before send notification */
> +}
> +
> +#endif /* WITH_PTNETMAP_HOST */
> +
> +#ifdef WITH_PTNETMAP_GUEST
> +/*
> + * GUEST read/write kring pointers from/in CSB.
> + * To use into device driver.
> + */
> +
> +/* Guest: Write kring pointers (cur, head) into the CSB */
> +static inline void
> +ptnetmap_guest_write_kring_csb(struct ptnet_ring *ptr, uint32_t cur,
> +                              uint32_t head)
> +{
> +    /* We must write cur before head for sync reason (see above) */
> +    ptr->cur = cur;
> +    mb();
> +    ptr->head = head;
> +
> +    //mb(); /* Force memory complete before send notification */
> +}
> +
> +/* Guest: Read kring pointers (hwcur, hwtail) from CSB */
> +static inline void
> +ptnetmap_guest_read_kring_csb(struct ptnet_ring *ptr, struct netmap_kring *kring)
> +{
> +    uint32_t old_hwcur = kring->nr_hwcur, old_hwtail = kring->nr_hwtail;
> +    uint32_t num_slots = kring->nkr_num_slots;
> +    uint32_t d, inc_hc, inc_ht;
> +
> +    //mb(); /* Force memory complete before read CSB */
> +
> +    /*
> +     * We must first read hwcur and then hwtail with a barrier in the
> +     * middle, because hwtail can exceed hwcur, but not vice versa.
> +     * The host must first write hwtail and then hwcur with a barrier.
> +     *
> +     * hwcur <= hwtail
> +     *
> +     *          host            guest
> +     *
> +     *          STORE(hwtail)   LOAD(hwcur)
> +     *            mb()  ---------  mb()
> +     *          STORE(hwcur)    LOAD(hwtail)
> +     *
> +     * This approach ensures that every hwcur that the guest reads is
> +     * associated with the correct hwtail. In this way hwcur can not exceed
> +     * hwtail.
> +     */
> +    kring->nr_hwcur = ptr->hwcur;
> +    mb();
> +    kring->nr_hwtail = ptr->hwtail;
> +
> +    /*
> +     * Even with the previous barrier, it is still possible that we read an
> +     * updated hwtail and an old hwcur.
> +     * To detect this situation, we can check if the new hwtail overtakes
> +     * the (apparently) new hwcur.
> +     */
> +    d = ptn_sub(old_hwtail, old_hwcur, num_slots);       /* previous distance */
> +    inc_ht = ptn_sub(kring->nr_hwtail, old_hwtail, num_slots);  /* increase of hwtail */
> +    inc_hc = ptn_sub(kring->nr_hwcur, old_hwcur, num_slots);    /* increase of hwcur */
> +
> +    if (unlikely(inc_ht > num_slots - d + inc_hc)) {
> +        ND(1, "ERROR hwtail overtakes hwcur - old_hwtail: %u hwtail: %u old_hwcur: %u hwcur: %u",
> +                old_hwtail, kring->nr_hwtail, old_hwcur, kring->nr_hwcur);
> +        kring->nr_hwtail = nm_prev(kring->nr_hwcur, num_slots - 1);
> +        //kring->nr_hwtail = kring->nr_hwcur;

Uhm, which one is the good? And why exists the other one?

> +    }
> +}
> +
> +/* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */
> +struct ptnetmap_memdev;
> +int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **);
> +void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
> +#endif /* WITH_PTNETMAP_GUEST */
> +
> +#endif /* NETMAP_VIRT_H */
> _______________________________________________
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAPQ4ffv6sx0ZPhVA1k3oVAinNtj6%2BB-Qy8tJ0=1_cYkyt1cefg>