Date: Tue, 29 Jan 2019 19:26:27 +0100 From: Marko Zec <zec@fer.hr> To: Mark Johnston <markj@freebsd.org> Cc: <src-committers@freebsd.org>, <svn-src-all@freebsd.org> Subject: Re: svn commit: r343527 - in stable/12/sys/riscv: include riscv Message-ID: <20190129192627.51acc721@x23> In-Reply-To: <201901281614.x0SGErMq078921@repo.freebsd.org> References: <201901281614.x0SGErMq078921@repo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Mon, 28 Jan 2019 16:14:53 +0000 Mark Johnston <markj@freebsd.org> wrote: > Author: markj > Date: Mon Jan 28 16:14:53 2019 > New Revision: 343527 > URL: https://svnweb.freebsd.org/changeset/base/343527 > > Log: > MFC r343274, r343275: > Optimize RISC-V copyin(9)/copyout(9) routines. Was this subjected to any benchmarks? I'd bet that placing addi a2, a2, -XLEN_BYTES before sd a4, 0(a1) instead of being scheduled after (the same goes for the byte copy loop) would make the loops run faster on most in-order RV cores out there... Marko > > Modified: > stable/12/sys/riscv/include/riscvreg.h > stable/12/sys/riscv/riscv/copyinout.S > Directory Properties: > stable/12/ (props changed) > > Modified: stable/12/sys/riscv/include/riscvreg.h > ============================================================================== > --- stable/12/sys/riscv/include/riscvreg.h Mon Jan 28 14:34:59 > 2019 (r343526) +++ > stable/12/sys/riscv/include/riscvreg.h Mon Jan 28 16:14:53 > 2019 (r343527) @@ -155,7 +155,8 @@ #define > SATP_MODE_SV39 (8ULL << SATP_MODE_S) #define > SATP_MODE_SV48 (9ULL << SATP_MODE_S) > -#define XLEN 8 > +#define XLEN __riscv_xlen > +#define XLEN_BYTES (XLEN / 8) > #define INSN_SIZE 4 > #define INSN_C_SIZE 2 > > > Modified: stable/12/sys/riscv/riscv/copyinout.S > ============================================================================== > --- stable/12/sys/riscv/riscv/copyinout.S Mon Jan 28 14:34:59 > 2019 (r343526) +++ > stable/12/sys/riscv/riscv/copyinout.S Mon Jan 28 16:14:53 > 2019 (r343527) @@ -1,5 +1,6 @@ /*- > * Copyright (c) 2015-2018 Ruslan Bukin <br@bsdpad.com> > + * Copyright (c) 2019 Mitchell Horne > * All rights reserved. > * > * Portions of this software were developed by SRI International and > the @@ -52,60 +53,94 @@ copyio_fault_nopcb: > END(copyio_fault) > > /* > + * copycommon - common copy routine > + * > + * a0 - Source address > + * a1 - Destination address > + * a2 - Size of copy > + */ > + .macro copycommon > + la a6, copyio_fault /* Get the handler address > */ > + SET_FAULT_HANDLER(a6, a7) /* Set the handler */ > + ENTER_USER_ACCESS(a7) > + > + li t2, XLEN_BYTES > + blt a2, t2, 3f /* Byte-copy if len < > XLEN_BYTES */ + > + /* > + * Compare lower bits of src and dest. > + * If they are aligned with each other, we can do word copy. > + */ > + andi t0, a0, (XLEN_BYTES-1) /* Low bits of src > */ > + andi t1, a1, (XLEN_BYTES-1) /* Low bits of > dest */ > + bne t0, t1, 3f /* Misaligned. Go to > byte copy */ > + beqz t0, 2f /* Already > word-aligned, skip ahead */ + > + /* Byte copy until the first word-aligned address */ > +1: lb a4, 0(a0) /* Load byte from src */ > + addi a0, a0, 1 > + sb a4, 0(a1) /* Store byte in dest */ > + addi a1, a1, 1 > + addi a2, a2, -1 /* len-- */ > + andi t0, a0, (XLEN_BYTES-1) > + bnez t0, 1b > + > + /* Copy words */ > +2: ld a4, 0(a0) /* Load word from src */ > + addi a0, a0, XLEN_BYTES > + sd a4, 0(a1) /* Store word in dest */ > + addi a1, a1, XLEN_BYTES > + addi a2, a2, -XLEN_BYTES /* len -= XLEN_BYTES > */ > + bgeu a2, t2, 2b /* Again if len >= > XLEN_BYTES */ + > + /* Check if we're finished */ > + beqz a2, 4f > + > + /* Copy any remaining bytes */ > +3: lb a4, 0(a0) /* Load byte from src */ > + addi a0, a0, 1 > + sb a4, 0(a1) /* Store byte in dest */ > + addi a1, a1, 1 > + addi a2, a2, -1 /* len-- */ > + bnez a2, 3b > + > +4: EXIT_USER_ACCESS(a7) > + SET_FAULT_HANDLER(x0, a7) /* Clear the handler */ > + .endm > + > +/* > * Copies from a kernel to user address > * > * int copyout(const void *kaddr, void *udaddr, size_t len) > */ > ENTRY(copyout) > - beqz a2, 2f /* If len == 0 then skip > loop */ > + beqz a2, copyout_end /* If len == 0 then skip > loop */ add a3, a1, a2 > li a4, VM_MAXUSER_ADDRESS > bgt a3, a4, copyio_fault_nopcb > > - la a6, copyio_fault /* Get the handler address */ > - SET_FAULT_HANDLER(a6, a7) /* Set the handler */ > - ENTER_USER_ACCESS(a7) > + copycommon > > -1: lb a4, 0(a0) /* Load from kaddr */ > - addi a0, a0, 1 > - sb a4, 0(a1) /* Store in uaddr */ > - addi a1, a1, 1 > - addi a2, a2, -1 /* len-- */ > - bnez a2, 1b > - > - EXIT_USER_ACCESS(a7) > - SET_FAULT_HANDLER(x0, a7) /* Clear the handler */ > - > -2: li a0, 0 /* return 0 */ > +copyout_end: > + li a0, 0 /* return 0 */ > ret > END(copyout) > > /* > * Copies from a user to kernel address > * > - * int copyin(const void *uaddr, void *kdaddr, size_t len) > + * int copyin(const void *uaddr, void *kaddr, size_t len) > */ > ENTRY(copyin) > - beqz a2, 2f /* If len == 0 then skip > loop */ > + beqz a2, copyin_end /* If len == 0 then skip > loop */ add a3, a0, a2 > li a4, VM_MAXUSER_ADDRESS > bgt a3, a4, copyio_fault_nopcb > > - la a6, copyio_fault /* Get the handler address */ > - SET_FAULT_HANDLER(a6, a7) /* Set the handler */ > - ENTER_USER_ACCESS(a7) > + copycommon > > -1: lb a4, 0(a0) /* Load from uaddr */ > - addi a0, a0, 1 > - sb a4, 0(a1) /* Store in kaddr */ > - addi a1, a1, 1 > - addi a2, a2, -1 /* len-- */ > - bnez a2, 1b > - > - EXIT_USER_ACCESS(a7) > - SET_FAULT_HANDLER(x0, a7) /* Clear the handler */ > - > -2: li a0, 0 /* return 0 */ > +copyin_end: > + li a0, 0 /* return 0 */ > ret > END(copyin) > >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20190129192627.51acc721>