Date: Fri, 13 Jun 2025 18:54:48 GMT From: Mark Johnston <markj@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 71702df61262 - main - gve: Add support for 4k RX Buffers when using DQO queue formats Message-ID: <202506131854.55DIsm67059734@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=71702df6126226b31dc3ec66459388e32b993be1 commit 71702df6126226b31dc3ec66459388e32b993be1 Author: Vee Agarwal <veethebee@google.com> AuthorDate: 2025-06-13 17:52:25 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2025-06-13 18:53:51 +0000 gve: Add support for 4k RX Buffers when using DQO queue formats This change adds support for using 4K RX Buffers when using DQO queue formats when a boot-time tunable flag is set to true by the user. When this flag is enabled, the driver will use 4K RX Buffer size either when HW LRO is enabled or mtu > 2048. Signed-off-by: Vee Agarwal <veethebee@google.com> Reviewed by: markj, ziaee MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D50786 --- share/man/man4/gve.4 | 8 ++++++++ sys/dev/gve/gve.h | 20 ++++++++++++++++++++ sys/dev/gve/gve_adminq.c | 5 ++++- sys/dev/gve/gve_dqo.h | 8 ++++++-- sys/dev/gve/gve_main.c | 30 +++++++++++++++++++++++++++--- sys/dev/gve/gve_rx_dqo.c | 26 +++++++++++++++----------- sys/dev/gve/gve_sysctl.c | 4 ++++ 7 files changed, 84 insertions(+), 17 deletions(-) diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4 index 924a01a06d08..c5627e929044 100644 --- a/share/man/man4/gve.4 +++ b/share/man/man4/gve.4 @@ -230,6 +230,14 @@ The default value is 0, which means hardware LRO is enabled by default. The software LRO stack in the kernel is always used. This sysctl variable needs to be set before loading the driver, using .Xr loader.conf 5 . +.It Va hw.gve.allow_4k_rx_buffers +Setting this boot-time tunable to 1 enables support for 4K RX Buffers. +The default value is 0, which means 2K RX Buffers will be used. +4K RX Buffers are only supported on DQO_RDA and DQO_QPL queue formats. +When enabled, 4K RX Buffers will be used either when HW LRO is enabled +or mtu is greated than 2048. +This sysctl variable needs to be set before loading the driver, using +.Xr loader.conf 5 . .It Va dev.gve.X.num_rx_queues and dev.gve.X.num_tx_queues Run-time tunables that represent the number of currently used RX/TX queues. The default value is the max number of RX/TX queues the device can support. diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h index 48e9a371df21..dd432dc0eb45 100644 --- a/sys/dev/gve/gve.h +++ b/sys/dev/gve/gve.h @@ -65,6 +65,7 @@ #define ADMINQ_SIZE PAGE_SIZE #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 +#define GVE_4K_RX_BUFFER_SIZE_DQO 4096 /* Each RX bounce buffer page can fit two packet buffers. */ #define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2) @@ -84,6 +85,11 @@ static MALLOC_DEFINE(M_GVE, "gve", "gve allocations"); +_Static_assert(MCLBYTES == GVE_DEFAULT_RX_BUFFER_SIZE, + "gve: bad MCLBYTES length"); +_Static_assert(MJUMPAGESIZE >= GVE_4K_RX_BUFFER_SIZE_DQO, + "gve: bad MJUMPAGESIZE length"); + struct gve_dma_handle { bus_addr_t bus_addr; void *cpu_addr; @@ -633,6 +639,7 @@ struct gve_priv { /* The index of tx queue that the timer service will check on its next invocation */ uint16_t check_tx_queue_idx; + uint16_t rx_buf_size_dqo; }; static inline bool @@ -666,6 +673,18 @@ gve_is_qpl(struct gve_priv *priv) priv->queue_format == GVE_DQO_QPL_FORMAT); } +static inline bool +gve_is_4k_rx_buf(struct gve_priv *priv) +{ + return (priv->rx_buf_size_dqo == GVE_4K_RX_BUFFER_SIZE_DQO); +} + +static inline bus_size_t +gve_rx_dqo_mbuf_segment_size(struct gve_priv *priv) +{ + return (gve_is_4k_rx_buf(priv) ? MJUMPAGESIZE : MCLBYTES); +} + /* Defined in gve_main.c */ void gve_schedule_reset(struct gve_priv *priv); int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); @@ -746,6 +765,7 @@ bool gve_timestamp_valid(int64_t *timestamp_sec); /* Systcl functions defined in gve_sysctl.c */ extern bool gve_disable_hw_lro; +extern bool gve_allow_4k_rx_buffers; extern char gve_queue_format[8]; extern char gve_version[8]; void gve_setup_sysctl(struct gve_priv *priv); diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c index 3415d2fa4b60..9b59570a2af4 100644 --- a/sys/dev/gve/gve_adminq.c +++ b/sys/dev/gve/gve_adminq.c @@ -296,7 +296,6 @@ gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index) .ntfy_id = htobe32(rx->com.ntfy_id), .queue_resources_addr = htobe64(qres_dma->bus_addr), .rx_ring_size = htobe16(priv->rx_desc_cnt), - .packet_buffer_size = htobe16(GVE_DEFAULT_RX_BUFFER_SIZE), }; if (gve_is_gqi(priv)) { @@ -308,6 +307,8 @@ gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index) htobe32(queue_index); cmd.create_rx_queue.queue_page_list_id = htobe32((rx->com.qpl)->id); + cmd.create_rx_queue.packet_buffer_size = + htobe16(GVE_DEFAULT_RX_BUFFER_SIZE); } else { cmd.create_rx_queue.queue_page_list_id = htobe32(GVE_RAW_ADDRESSING_QPL_ID); @@ -320,6 +321,8 @@ gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index) cmd.create_rx_queue.enable_rsc = !!((if_getcapenable(priv->ifp) & IFCAP_LRO) && !gve_disable_hw_lro); + cmd.create_rx_queue.packet_buffer_size = + htobe16(priv->rx_buf_size_dqo); } return (gve_adminq_execute_cmd(priv, &cmd)); diff --git a/sys/dev/gve/gve_dqo.h b/sys/dev/gve/gve_dqo.h index 212bfa1a6ad3..542f8ff7d888 100644 --- a/sys/dev/gve/gve_dqo.h +++ b/sys/dev/gve/gve_dqo.h @@ -59,8 +59,6 @@ */ #define GVE_RX_DQO_MIN_PENDING_BUFS 128 -#define GVE_DQ_NUM_FRAGS_IN_PAGE (PAGE_SIZE / GVE_DEFAULT_RX_BUFFER_SIZE) - /* * gve_rx_qpl_buf_id_dqo's 11 bit wide buf_id field limits the total * number of pages per QPL to 2048. @@ -330,4 +328,10 @@ struct gve_rx_compl_desc_dqo { _Static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32, "gve: bad dqo desc struct length"); + +static inline uint8_t +gve_get_dq_num_frags_in_page(struct gve_priv *priv) +{ + return (PAGE_SIZE / priv->rx_buf_size_dqo); +} #endif /* _GVE_DESC_DQO_H_ */ diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c index 2abd9d1aa698..10197a8e15f8 100644 --- a/sys/dev/gve/gve_main.c +++ b/sys/dev/gve/gve_main.c @@ -35,7 +35,7 @@ #define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n" #define GVE_VERSION_MAJOR 1 #define GVE_VERSION_MINOR 3 -#define GVE_VERSION_SUB 4 +#define GVE_VERSION_SUB 5 #define GVE_DEFAULT_RX_COPYBREAK 256 @@ -382,12 +382,27 @@ gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx) return (0); } +static int +gve_get_dqo_rx_buf_size(struct gve_priv *priv, uint16_t mtu) +{ + /* + * Use 4k buffers only if mode is DQ, 4k buffers flag is on, + * and either hw LRO is enabled or mtu is greater than 2048 + */ + if (!gve_is_gqi(priv) && gve_allow_4k_rx_buffers && + (!gve_disable_hw_lro || mtu > GVE_DEFAULT_RX_BUFFER_SIZE)) + return (GVE_4K_RX_BUFFER_SIZE_DQO); + + return (GVE_DEFAULT_RX_BUFFER_SIZE); +} + static int gve_set_mtu(if_t ifp, uint32_t new_mtu) { struct gve_priv *priv = if_getsoftc(ifp); const uint32_t max_problem_range = 8227; const uint32_t min_problem_range = 7822; + uint16_t new_rx_buf_size = gve_get_dqo_rx_buf_size(priv, new_mtu); int err; if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) { @@ -402,9 +417,10 @@ gve_set_mtu(if_t ifp, uint32_t new_mtu) * in throughput. */ if (!gve_is_gqi(priv) && !gve_disable_hw_lro && - new_mtu >= min_problem_range && new_mtu <= max_problem_range) { + new_mtu >= min_problem_range && new_mtu <= max_problem_range && + new_rx_buf_size != GVE_4K_RX_BUFFER_SIZE_DQO) { device_printf(priv->dev, - "Cannot set to MTU to %d within the range [%d, %d] while hardware LRO is enabled\n", + "Cannot set to MTU to %d within the range [%d, %d] while HW LRO is enabled and not using 4k RX Buffers\n", new_mtu, min_problem_range, max_problem_range); return (EINVAL); } @@ -414,6 +430,13 @@ gve_set_mtu(if_t ifp, uint32_t new_mtu) if (bootverbose) device_printf(priv->dev, "MTU set to %d\n", new_mtu); if_setmtu(ifp, new_mtu); + /* Need to re-alloc RX queues if RX buffer size changed */ + if (!gve_is_gqi(priv) && + new_rx_buf_size != priv->rx_buf_size_dqo) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + priv->rx_buf_size_dqo = new_rx_buf_size; + gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } } else { device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu); } @@ -1064,6 +1087,7 @@ gve_attach(device_t dev) if (err != 0) goto abort; + priv->rx_buf_size_dqo = gve_get_dqo_rx_buf_size(priv, priv->max_mtu); err = gve_alloc_rings(priv); if (err != 0) goto abort; diff --git a/sys/dev/gve/gve_rx_dqo.c b/sys/dev/gve/gve_rx_dqo.c index 11b2c7ea0c55..cf914913da09 100644 --- a/sys/dev/gve/gve_rx_dqo.c +++ b/sys/dev/gve/gve_rx_dqo.c @@ -140,15 +140,17 @@ gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i) return (0); } + bus_size_t max_seg_size = gve_rx_dqo_mbuf_segment_size(priv); + err = bus_dma_tag_create( bus_get_dma_tag(priv->dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ - MCLBYTES, /* maxsize */ + max_seg_size, /* maxsize */ 1, /* nsegments */ - MCLBYTES, /* maxsegsize */ + max_seg_size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ @@ -317,7 +319,8 @@ gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how) } SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); - buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR); + bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv); + buf->mbuf = m_getjcl(how, MT_DATA, M_PKTHDR, segment_size); if (__predict_false(!buf->mbuf)) { err = ENOMEM; counter_enter(); @@ -325,7 +328,7 @@ gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how) counter_exit(); goto abort_with_buf; } - buf->mbuf->m_len = MCLBYTES; + buf->mbuf->m_len = segment_size; err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap, buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); @@ -371,7 +374,7 @@ gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf, bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, BUS_DMASYNC_PREREAD); desc->buf_addr = htole64(page_dma_handle->bus_addr + - frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); + frag_num * rx->com.priv->rx_buf_size_dqo); buf->num_nic_frags++; gve_rx_advance_head_dqo(rx); @@ -430,7 +433,7 @@ gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx) } gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx); - if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1) + if (buf->next_idx == gve_get_dq_num_frags_in_page(rx->com.priv) - 1) buf->next_idx = 0; else buf->next_idx++; @@ -742,7 +745,7 @@ gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx, int page_idx = buf - rx->dqo.bufs; void *va = rx->com.qpl->dmas[page_idx].cpu_addr; - va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); + va = (char *)va + (buf_frag_num * rx->com.priv->rx_buf_size_dqo); return (va); } @@ -753,15 +756,16 @@ gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx, { void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); struct mbuf *mbuf; + bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv); if (ctx->mbuf_tail == NULL) { - mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, segment_size); if (mbuf == NULL) return (ENOMEM); ctx->mbuf_head = mbuf; ctx->mbuf_tail = mbuf; } else { - mbuf = m_getcl(M_NOWAIT, MT_DATA, 0); + mbuf = m_getjcl(M_NOWAIT, MT_DATA, 0, segment_size); if (mbuf == NULL) return (ENOMEM); ctx->mbuf_tail->m_next = mbuf; @@ -809,7 +813,7 @@ gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx, page_idx = buf - rx->dqo.bufs; page = rx->com.qpl->pages[page_idx]; page_addr = rx->com.qpl->dmas[page_idx].cpu_addr; - va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); + va = (char *)page_addr + (buf_frag_num * rx->com.priv->rx_buf_size_dqo); /* * Grab an extra ref to the page so that gve_mextadd_free @@ -855,7 +859,7 @@ gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx, } buf = &rx->dqo.bufs[buf_id]; if (__predict_false(buf->num_nic_frags == 0 || - buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) { + buf_frag_num > gve_get_dq_num_frags_in_page(priv) - 1)) { device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d " "with buf_frag_num %d and num_nic_frags %d, issuing reset\n", buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags); diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c index 661f61918853..a3874cc921ee 100644 --- a/sys/dev/gve/gve_sysctl.c +++ b/sys/dev/gve/gve_sysctl.c @@ -37,6 +37,10 @@ bool gve_disable_hw_lro = false; SYSCTL_BOOL(_hw_gve, OID_AUTO, disable_hw_lro, CTLFLAG_RDTUN, &gve_disable_hw_lro, 0, "Controls if hardware LRO is used"); +bool gve_allow_4k_rx_buffers = false; +SYSCTL_BOOL(_hw_gve, OID_AUTO, allow_4k_rx_buffers, CTLFLAG_RDTUN, + &gve_allow_4k_rx_buffers, 0, "Controls if 4K RX Buffers are allowed"); + char gve_queue_format[8]; SYSCTL_STRING(_hw_gve, OID_AUTO, queue_format, CTLFLAG_RD, &gve_queue_format, 0, "Queue format being used by the iface");
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202506131854.55DIsm67059734>