From owner-dev-commits-src-branches@freebsd.org Sat Sep 4 07:45:57 2021 Return-Path: Delivered-To: dev-commits-src-branches@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 46BBB6ADB63; Sat, 4 Sep 2021 07:45:57 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4H1mtX3jzBz3tcs; Sat, 4 Sep 2021 07:45:56 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 594EB141AF; Sat, 4 Sep 2021 07:45:56 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 1847ju7N062667; Sat, 4 Sep 2021 07:45:56 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 1847juhe062666; Sat, 4 Sep 2021 07:45:56 GMT (envelope-from git) Date: Sat, 4 Sep 2021 07:45:56 GMT Message-Id: <202109040745.1847juhe062666@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Kyle Evans Subject: git: c0674b71720b - stable/11 - pxeboot: improve and simplify rx handling MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: kevans X-Git-Repository: src X-Git-Refname: refs/heads/stable/11 X-Git-Reftype: branch X-Git-Commit: c0674b71720bb333cbc2da823e2f67412dc00e78 Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-branches@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commits to the stable branches of the FreeBSD src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 04 Sep 2021 07:45:57 -0000 The branch stable/11 has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=c0674b71720bb333cbc2da823e2f67412dc00e78 commit c0674b71720bb333cbc2da823e2f67412dc00e78 Author: Kyle Evans AuthorDate: 2021-08-12 02:49:17 +0000 Commit: Kyle Evans CommitDate: 2021-09-04 07:45:19 +0000 pxeboot: improve and simplify rx handling This pushes the bulk of the rx servicing into a single loop that's only slightly convoluted, and it addresses a problem with rx handling in the process. If we hit a tx interrupt while we're processing, we'd previously drop the frame on the floor completely and ultimately timeout, increasing boot time on particularly busy hosts as we keep having to backoff and resend. After this patch, we don't seem to hit timeouts at all on zoo anymore though loading a 27M kernel is still relatively slow (~1m20s). Sponsored By: National Bureau of Economic Research Sponsored by: Klara, Inc. (cherry picked from commit 3daa8e165c661c1b45e759f4997f447384c15446) --- stand/i386/libi386/pxe.c | 154 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 106 insertions(+), 48 deletions(-) diff --git a/stand/i386/libi386/pxe.c b/stand/i386/libi386/pxe.c index 217692b515f0..e80a1961e191 100644 --- a/stand/i386/libi386/pxe.c +++ b/stand/i386/libi386/pxe.c @@ -30,6 +30,8 @@ __FBSDID("$FreeBSD$"); #include +#include +#include #include #include #include @@ -432,91 +434,144 @@ pxe_netif_init(struct iodesc *desc, void *machdep_hint) } static int -pxe_netif_receive(void **pkt) +pxe_netif_receive_isr(t_PXENV_UNDI_ISR *isr, void **pkt, ssize_t *retsize) { - t_PXENV_UNDI_ISR *isr; + static bool data_pending; char *buf, *ptr, *frame; size_t size, rsize; - isr = bio_alloc(sizeof(*isr)); - if (isr == NULL) - return (-1); + buf = NULL; + size = rsize = 0; + + /* + * We can save ourselves the next two pxe calls because we already know + * we weren't done grabbing everything. + */ + if (data_pending) { + data_pending = false; + goto nextbuf; + } + /* + * We explicitly don't check for OURS/NOT_OURS as a result of START; + * it's been reported that some cards are known to mishandle these. + */ bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_START; pxe_call(PXENV_UNDI_ISR, isr); + /* We could translate Status... */ if (isr->Status != 0) { - bio_free(isr, sizeof(*isr)); - return (-1); + return (ENXIO); } bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_PROCESS; pxe_call(PXENV_UNDI_ISR, isr); if (isr->Status != 0) { - bio_free(isr, sizeof(*isr)); - return (-1); + return (ENXIO); } - - while (isr->FuncFlag == PXENV_UNDI_ISR_OUT_TRANSMIT) { + if (isr->FuncFlag == PXENV_UNDI_ISR_OUT_BUSY) { /* - * Wait till transmit is done. + * Let the caller decide if we need to be restarted. It will + * currently blindly restart us, but it could check timeout in + * the future. */ - bzero(isr, sizeof(*isr)); - isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT; - pxe_call(PXENV_UNDI_ISR, isr); - if (isr->Status != 0 || - isr->FuncFlag == PXENV_UNDI_ISR_OUT_DONE) { - bio_free(isr, sizeof(*isr)); - return (-1); - } + return (ERESTART); } - while (isr->FuncFlag != PXENV_UNDI_ISR_OUT_RECEIVE) { - if (isr->Status != 0 || - isr->FuncFlag == PXENV_UNDI_ISR_OUT_DONE) { - bio_free(isr, sizeof(*isr)); - return (-1); + /* + * By design, we'll hardly ever hit this terminal condition unless we + * pick up nothing but tx interrupts here. More frequently, we will + * process rx buffers until we hit the terminal condition in the middle. + */ + while (isr->FuncFlag != PXENV_UNDI_ISR_OUT_DONE) { + /* + * This might have given us PXENV_UNDI_ISR_OUT_TRANSMIT, in + * which case we can just disregard and move on to the next + * buffer/frame. + */ + if (isr->FuncFlag != PXENV_UNDI_ISR_OUT_RECEIVE) + goto nextbuf; + + if (buf == NULL) { + /* + * Grab size from the first Frame that we picked up, + * allocate an rx buf to hold. Careful here, as we may + * see a fragmented frame that's spread out across + * multiple GET_NEXT calls. + */ + size = isr->FrameLength; + buf = malloc(size + ETHER_ALIGN); + if (buf == NULL) + return (ENOMEM); + + ptr = buf + ETHER_ALIGN; } - bzero(isr, sizeof(*isr)); - isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT; - pxe_call(PXENV_UNDI_ISR, isr); - } - - size = isr->FrameLength; - buf = malloc(size + ETHER_ALIGN); - if (buf == NULL) { - bio_free(isr, sizeof(*isr)); - return (-1); - } - ptr = buf + ETHER_ALIGN; - rsize = 0; - while (rsize < size) { frame = (char *)((uintptr_t)isr->Frame.segment << 4); frame += isr->Frame.offset; bcopy(PTOV(frame), ptr, isr->BufferLength); ptr += isr->BufferLength; rsize += isr->BufferLength; + /* + * Stop here before we risk catching the start of another frame. + * It would be nice to continue reading until we actually get a + * PXENV_UNDI_ISR_OUT_DONE, but our network stack in libsa isn't + * suitable for reading more than one packet at a time. + */ + if (rsize >= size) { + data_pending = true; + break; + } + +nextbuf: bzero(isr, sizeof(*isr)); isr->FuncFlag = PXENV_UNDI_ISR_IN_GET_NEXT; pxe_call(PXENV_UNDI_ISR, isr); if (isr->Status != 0) { - bio_free(isr, sizeof(*isr)); free(buf); - return (-1); + return (ENXIO); } + } - /* Did we got another update? */ - if (isr->FuncFlag == PXENV_UNDI_ISR_OUT_RECEIVE) - continue; - break; + /* + * We may have never picked up a frame at all (all tx), in which case + * the caller should restart us. + */ + if (rsize == 0) { + return (ERESTART); } *pkt = buf; + *retsize = rsize; + return (0); +} + +static int +pxe_netif_receive(void **pkt, ssize_t *size) +{ + t_PXENV_UNDI_ISR *isr; + int ret; + + isr = bio_alloc(sizeof(*isr)); + if (isr == NULL) + return (ENOMEM); + + /* + * This completely ignores the timeout specified in pxe_netif_get(), but + * we shouldn't be running long enough here for that to make a + * difference. + */ + for (;;) { + /* We'll only really re-enter for PXENV_UNDI_ISR_OUT_BUSY. */ + ret = pxe_netif_receive_isr(isr, pkt, size); + if (ret != ERESTART) + break; + } + bio_free(isr, sizeof(*isr)); - return (rsize); + return (ret); } static ssize_t @@ -525,16 +580,19 @@ pxe_netif_get(struct iodesc *desc, void **pkt, time_t timeout) time_t t; void *ptr; int ret = -1; + ssize_t size; t = getsecs(); + size = 0; while ((getsecs() - t) < timeout) { - ret = pxe_netif_receive(&ptr); + ret = pxe_netif_receive(&ptr, &size); if (ret != -1) { *pkt = ptr; break; } } - return (ret); + + return (ret == 0 ? size : -1); } static ssize_t