Date: Tue, 20 May 2025 23:51:15 GMT From: Mark Johnston <markj@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: b044f125373e - main - gve: Use load-acquire to fetch generation bits Message-ID: <202505202351.54KNpFWE058687@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=b044f125373e05c20ca16edfd29cccad1f4025bd commit b044f125373e05c20ca16edfd29cccad1f4025bd Author: Jasper Tran O'Leary <jtranoleary@google.com> AuthorDate: 2025-05-20 23:34:44 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2025-05-20 23:50:59 +0000 gve: Use load-acquire to fetch generation bits When running the driver using the DQO queue format, we must load the generation bit and check it before possibly reading the rest of the descriptor's fields. Previously, we guarded against reordering of reads using an explicit thread fence. This commit changes the thread fence to a load with acquire semantics. Because the tx and rx generation fields are in a bitfield, we cannot explicitly address them in an atomic load. Instead we load the respective containing bytes in the descriptor and mask them appropriately. Signed-off-by: Jasper Tran O'Leary <jtranoleary@google.com> Reviewed by: markj MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D50384 --- sys/dev/gve/gve_adminq.h | 3 ++- sys/dev/gve/gve_dqo.h | 20 ++++++++++++++++---- sys/dev/gve/gve_rx_dqo.c | 24 +++++++++++++++++------- sys/dev/gve/gve_tx_dqo.c | 25 +++++++++++++++++-------- 4 files changed, 52 insertions(+), 20 deletions(-) diff --git a/sys/dev/gve/gve_adminq.h b/sys/dev/gve/gve_adminq.h index bc51046a3037..531a844f7d90 100644 --- a/sys/dev/gve/gve_adminq.h +++ b/sys/dev/gve/gve_adminq.h @@ -377,7 +377,8 @@ struct stats { _Static_assert(sizeof(struct stats) == 16, "gve: bad admin queue struct length"); -/* These are control path types for PTYPE which are the same as the data path +/* + * These are control path types for PTYPE which are the same as the data path * types. */ struct gve_ptype_entry { diff --git a/sys/dev/gve/gve_dqo.h b/sys/dev/gve/gve_dqo.h index 214138303a77..212bfa1a6ad3 100644 --- a/sys/dev/gve/gve_dqo.h +++ b/sys/dev/gve/gve_dqo.h @@ -208,9 +208,14 @@ _Static_assert(sizeof(struct gve_tx_metadata_dqo) == 12, #define GVE_TX_METADATA_VERSION_DQO 0 +/* Used to access the generation bit within a TX completion descriptor. */ +#define GVE_TX_DESC_DQO_GEN_BYTE_OFFSET 1 +#define GVE_TX_DESC_DQO_GEN_BIT_MASK 0x80 + /* TX completion descriptor */ struct gve_tx_compl_desc_dqo { - /* For types 0-4 this is the TX queue ID associated with this + /* + * For types 0-4 this is the TX queue ID associated with this * completion. */ uint16_t id:11; @@ -222,12 +227,14 @@ struct gve_tx_compl_desc_dqo { /* Flipped by HW to notify the descriptor is populated. */ uint16_t generation:1; union { - /* For descriptor completions, this is the last index fetched + /* + * For descriptor completions, this is the last index fetched * by HW + 1. */ __le16 tx_head; - /* For packet completions, this is the completion tag set on the + /* + * For packet completions, this is the completion tag set on the * TX packet descriptors. */ __le16 completion_tag; @@ -258,6 +265,10 @@ struct gve_rx_desc_dqo { _Static_assert(sizeof(struct gve_rx_desc_dqo) == 32, "gve: bad dqo desc struct length"); +/* Used to access the generation bit within an RX completion descriptor. */ +#define GVE_RX_DESC_DQO_GEN_BYTE_OFFSET 5 +#define GVE_RX_DESC_DQO_GEN_BIT_MASK 0x40 + /* Descriptor for HW to notify SW of new packets received on RX queue. */ struct gve_rx_compl_desc_dqo { /* Must be 1 */ @@ -266,7 +277,8 @@ struct gve_rx_compl_desc_dqo { /* Packet originated from this system rather than the network. */ uint8_t loopback:1; - /* Set when IPv6 packet contains a destination options header or routing + /* + * Set when IPv6 packet contains a destination options header or routing * header. */ uint8_t ipv6_ex_add:1; diff --git a/sys/dev/gve/gve_rx_dqo.c b/sys/dev/gve/gve_rx_dqo.c index a499ac9d3c6a..11b2c7ea0c55 100644 --- a/sys/dev/gve/gve_rx_dqo.c +++ b/sys/dev/gve/gve_rx_dqo.c @@ -962,6 +962,19 @@ drop_frag_clear_ctx: rx->ctx = (struct gve_rx_ctx){}; } +static uint8_t +gve_rx_get_gen_bit(uint8_t *desc) +{ + uint8_t byte; + + /* + * Prevent generation bit from being read after the rest of the + * descriptor. + */ + byte = atomic_load_acq_8(desc + GVE_RX_DESC_DQO_GEN_BYTE_OFFSET); + return ((byte & GVE_RX_DESC_DQO_GEN_BIT_MASK) != 0); +} + static bool gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) { @@ -971,17 +984,14 @@ gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) NET_EPOCH_ASSERT(); while (work_done < budget) { - bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map, + bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, + rx->dqo.compl_ring_mem.map, BUS_DMASYNC_POSTREAD); compl_desc = &rx->dqo.compl_ring[rx->dqo.tail]; - if (compl_desc->generation == rx->dqo.cur_gen_bit) + if (gve_rx_get_gen_bit((uint8_t *)compl_desc) == + rx->dqo.cur_gen_bit) break; - /* - * Prevent generation bit from being read after the rest of the - * descriptor. - */ - atomic_thread_fence_acq(); rx->cnt++; rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask; diff --git a/sys/dev/gve/gve_tx_dqo.c b/sys/dev/gve/gve_tx_dqo.c index 7361d47b8ce6..8a1993c3e712 100644 --- a/sys/dev/gve/gve_tx_dqo.c +++ b/sys/dev/gve/gve_tx_dqo.c @@ -1029,6 +1029,19 @@ gve_clear_tx_ring_dqo(struct gve_priv *priv, int i) gve_tx_clear_compl_ring_dqo(tx); } +static uint8_t +gve_tx_get_gen_bit(uint8_t *desc) +{ + uint8_t byte; + + /* + * Prevent generation bit from being read after the rest of the + * descriptor. + */ + byte = atomic_load_acq_8(desc + GVE_TX_DESC_DQO_GEN_BYTE_OFFSET); + return ((byte & GVE_TX_DESC_DQO_GEN_BIT_MASK) != 0); +} + static bool gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget) { @@ -1041,20 +1054,16 @@ gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget) uint16_t type; while (work_done < budget) { - bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map, + bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, + tx->dqo.compl_ring_mem.map, BUS_DMASYNC_POSTREAD); compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head]; - if (compl_desc->generation == tx->dqo.cur_gen_bit) + if (gve_tx_get_gen_bit((uint8_t *)compl_desc) == + tx->dqo.cur_gen_bit) break; - /* - * Prevent generation bit from being read after the rest of the - * descriptor. - */ - atomic_thread_fence_acq(); type = compl_desc->type; - if (type == GVE_COMPL_TYPE_DQO_DESC) { /* This is the last descriptor fetched by HW plus one */ tx_head = le16toh(compl_desc->tx_head);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202505202351.54KNpFWE058687>