From owner-svn-src-all@freebsd.org Sun Nov 29 16:44:23 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id A0AF04A7993; Sun, 29 Nov 2020 16:44:23 +0000 (UTC) (envelope-from mmel@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4CkZ2b46C2z4sYK; Sun, 29 Nov 2020 16:44:23 +0000 (UTC) (envelope-from mmel@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 7F1CD14A9F; Sun, 29 Nov 2020 16:44:23 +0000 (UTC) (envelope-from mmel@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 0ATGiN1A074328; Sun, 29 Nov 2020 16:44:23 GMT (envelope-from mmel@FreeBSD.org) Received: (from mmel@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 0ATGiMfn074322; Sun, 29 Nov 2020 16:44:22 GMT (envelope-from mmel@FreeBSD.org) Message-Id: <202011291644.0ATGiMfn074322@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mmel set sender to mmel@FreeBSD.org using -f From: Michal Meloun Date: Sun, 29 Nov 2020 16:44:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r368158 - head/sys/arm/arm X-SVN-Group: head X-SVN-Commit-Author: mmel X-SVN-Commit-Paths: head/sys/arm/arm X-SVN-Commit-Revision: 368158 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 29 Nov 2020 16:44:23 -0000 Author: mmel Date: Sun Nov 29 16:44:22 2020 New Revision: 368158 URL: https://svnweb.freebsd.org/changeset/base/368158 Log: _ARM_ARCH_5E is always defined, we not support older CPUs. Modified: head/sys/arm/arm/bcopy_page.S head/sys/arm/arm/bcopyinout.S head/sys/arm/arm/in_cksum_arm.S head/sys/arm/arm/machdep.c head/sys/arm/arm/support.S Modified: head/sys/arm/arm/bcopy_page.S ============================================================================== --- head/sys/arm/arm/bcopy_page.S Sun Nov 29 16:29:40 2020 (r368157) +++ head/sys/arm/arm/bcopy_page.S Sun Nov 29 16:44:22 2020 (r368158) @@ -44,147 +44,8 @@ __FBSDID("$FreeBSD$"); #include "assym.inc" -#ifndef _ARM_ARCH_5E -/* #define BIG_LOOPS */ - /* - * bcopy_page(src, dest) - * - * Optimised copy page routine. - * - * On entry: - * r0 - src address - * r1 - dest address - * - * Requires: - * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 - * otherwise. - */ - -#define CHUNK_SIZE 32 - -#define PREFETCH_FIRST_CHUNK /* nothing */ -#define PREFETCH_NEXT_CHUNK /* nothing */ - -#ifndef COPY_CHUNK -#define COPY_CHUNK \ - PREFETCH_NEXT_CHUNK ; \ - ldmia r0!, {r3-r8,ip,lr} ; \ - stmia r1!, {r3-r8,ip,lr} -#endif /* ! COPY_CHUNK */ - -#ifndef SAVE_REGS -#define SAVE_REGS stmfd sp!, {r4-r8, lr}; _SAVE({r4-r8, lr}) -#define RESTORE_REGS ldmfd sp!, {r4-r8, pc} -#endif - -ENTRY(bcopy_page) - PREFETCH_FIRST_CHUNK - SAVE_REGS -#ifdef BIG_LOOPS - mov r2, #(PAGE_SIZE >> 9) -#else - mov r2, #(PAGE_SIZE >> 7) -#endif - -1: - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK - -#ifdef BIG_LOOPS - /* There is little point making the loop any larger; unless we are - running with the cache off, the load/store overheads will - completely dominate this loop. */ - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK - - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK - - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK - COPY_CHUNK -#endif - subs r2, r2, #1 - bne 1b - - RESTORE_REGS /* ...and return. */ -END(bcopy_page) - -/* - * bzero_page(dest) - * - * Optimised zero page routine. - * - * On entry: - * r0 - dest address - * - * Requires: - * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 - * otherwise - */ - -ENTRY(bzero_page) - stmfd sp!, {r4-r8, lr} - _SAVE({r4-r8, lr}) -#ifdef BIG_LOOPS - mov r2, #(PAGE_SIZE >> 9) -#else - mov r2, #(PAGE_SIZE >> 7) -#endif - mov r3, #0 - mov r4, #0 - mov r5, #0 - mov r6, #0 - mov r7, #0 - mov r8, #0 - mov ip, #0 - mov lr, #0 - -1: - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - -#ifdef BIG_LOOPS - /* There is little point making the loop any larger; unless we are - running with the cache off, the load/store overheads will - completely dominate this loop. */ - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - stmia r0!, {r3-r8,ip,lr} - -#endif - - subs r2, r2, #1 - bne 1b - - ldmfd sp!, {r4-r8, pc} -END(bzero_page) - -#else /* _ARM_ARCH_5E */ - -/* * armv5e version of bcopy_page */ ENTRY(bcopy_page) @@ -279,4 +140,3 @@ ENTRY(bzero_page) bne 1b RET END(bzero_page) -#endif /* _ARM_ARCH_5E */ Modified: head/sys/arm/arm/bcopyinout.S ============================================================================== --- head/sys/arm/arm/bcopyinout.S Sun Nov 29 16:29:40 2020 (r368157) +++ head/sys/arm/arm/bcopyinout.S Sun Nov 29 16:44:22 2020 (r368158) @@ -47,510 +47,7 @@ .word _C_LABEL(_min_memcpy_size) __FBSDID("$FreeBSD$"); -#ifdef _ARM_ARCH_5E #include -#else - - .text - .align 2 - -#define GET_PCB(tmp) \ - mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(TD_PCB) - -#define SAVE_REGS stmfd sp!, {r4-r11}; _SAVE({r4-r11}) -#define RESTORE_REGS ldmfd sp!, {r4-r11} - -#if defined(_ARM_ARCH_5E) -#define HELLOCPP # -#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ] -#else -#define PREFETCH(rx,o) -#endif - -/* - * r0 = user space address - * r1 = kernel space address - * r2 = length - * - * Copies bytes from user space to kernel space - * - * We save/restore r4-r11: - * r4-r11 are scratch - */ -ENTRY(copyin) - /* Quick exit if length is zero */ - teq r2, #0 - moveq r0, #0 - RETeq - - adds r3, r0, r2 - movcs r0, #EFAULT - RETc(cs) - - ldr r12, =(VM_MAXUSER_ADDRESS + 1) - cmp r3, r12 - movcs r0, #EFAULT - RETc(cs) - - ldr r3, .L_arm_memcpy - ldr r3, [r3] - cmp r3, #0 - beq .Lnormal - ldr r3, .L_min_memcpy_size - ldr r3, [r3] - cmp r2, r3 - blt .Lnormal - stmfd sp!, {r0-r2, r4, lr} - mov r3, r0 - mov r0, r1 - mov r1, r3 - mov r3, #2 /* SRC_IS_USER */ - ldr r4, .L_arm_memcpy - mov lr, pc - ldr pc, [r4] - cmp r0, #0 - ldmfd sp!, {r0-r2, r4, lr} - moveq r0, #0 - RETeq - -.Lnormal: - SAVE_REGS - GET_PCB(r4) - ldr r4, [r4] - - - ldr r5, [r4, #PCB_ONFAULT] - adr r3, .Lcopyfault - str r3, [r4, #PCB_ONFAULT] - - PREFETCH(r0, 0) - PREFETCH(r1, 0) - - /* - * If not too many bytes, take the slow path. - */ - cmp r2, #0x08 - blt .Licleanup - - /* - * Align destination to word boundary. - */ - and r6, r1, #0x3 - ldr pc, [pc, r6, lsl #2] - b .Lialend - .word .Lialend - .word .Lial3 - .word .Lial2 - .word .Lial1 -.Lial3: ldrbt r6, [r0], #1 - sub r2, r2, #1 - strb r6, [r1], #1 -.Lial2: ldrbt r7, [r0], #1 - sub r2, r2, #1 - strb r7, [r1], #1 -.Lial1: ldrbt r6, [r0], #1 - sub r2, r2, #1 - strb r6, [r1], #1 -.Lialend: - - /* - * If few bytes left, finish slow. - */ - cmp r2, #0x08 - blt .Licleanup - - /* - * If source is not aligned, finish slow. - */ - ands r3, r0, #0x03 - bne .Licleanup - - cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ - blt .Licleanup8 - - /* - * Align destination to cacheline boundary. - * If source and destination are nicely aligned, this can be a big - * win. If not, it's still cheaper to copy in groups of 32 even if - * we don't get the nice cacheline alignment. - */ - and r6, r1, #0x1f - ldr pc, [pc, r6] - b .Licaligned - .word .Licaligned - .word .Lical28 - .word .Lical24 - .word .Lical20 - .word .Lical16 - .word .Lical12 - .word .Lical8 - .word .Lical4 -.Lical28:ldrt r6, [r0], #4 - sub r2, r2, #4 - str r6, [r1], #4 -.Lical24:ldrt r7, [r0], #4 - sub r2, r2, #4 - str r7, [r1], #4 -.Lical20:ldrt r6, [r0], #4 - sub r2, r2, #4 - str r6, [r1], #4 -.Lical16:ldrt r7, [r0], #4 - sub r2, r2, #4 - str r7, [r1], #4 -.Lical12:ldrt r6, [r0], #4 - sub r2, r2, #4 - str r6, [r1], #4 -.Lical8:ldrt r7, [r0], #4 - sub r2, r2, #4 - str r7, [r1], #4 -.Lical4:ldrt r6, [r0], #4 - sub r2, r2, #4 - str r6, [r1], #4 - - /* - * We start with > 0x40 bytes to copy (>= 0x60 got us into this - * part of the code, and we may have knocked that down by as much - * as 0x1c getting aligned). - * - * This loop basically works out to: - * do { - * prefetch-next-cacheline(s) - * bytes -= 0x20; - * copy cacheline - * } while (bytes >= 0x40); - * bytes -= 0x20; - * copy cacheline - */ -.Licaligned: - PREFETCH(r0, 32) - PREFETCH(r1, 32) - - sub r2, r2, #0x20 - - /* Copy a cacheline */ - ldrt r10, [r0], #4 - ldrt r11, [r0], #4 - ldrt r6, [r0], #4 - ldrt r7, [r0], #4 - ldrt r8, [r0], #4 - ldrt r9, [r0], #4 - stmia r1!, {r10-r11} - ldrt r10, [r0], #4 - ldrt r11, [r0], #4 - stmia r1!, {r6-r11} - - cmp r2, #0x40 - bge .Licaligned - - sub r2, r2, #0x20 - - /* Copy a cacheline */ - ldrt r10, [r0], #4 - ldrt r11, [r0], #4 - ldrt r6, [r0], #4 - ldrt r7, [r0], #4 - ldrt r8, [r0], #4 - ldrt r9, [r0], #4 - stmia r1!, {r10-r11} - ldrt r10, [r0], #4 - ldrt r11, [r0], #4 - stmia r1!, {r6-r11} - - cmp r2, #0x08 - blt .Liprecleanup - -.Licleanup8: - ldrt r8, [r0], #4 - ldrt r9, [r0], #4 - sub r2, r2, #8 - stmia r1!, {r8, r9} - cmp r2, #8 - bge .Licleanup8 - -.Liprecleanup: - /* - * If we're done, bail. - */ - cmp r2, #0 - beq .Lout - -.Licleanup: - and r6, r2, #0x3 - ldr pc, [pc, r6, lsl #2] - b .Licend - .word .Lic4 - .word .Lic1 - .word .Lic2 - .word .Lic3 -.Lic4: ldrbt r6, [r0], #1 - sub r2, r2, #1 - strb r6, [r1], #1 -.Lic3: ldrbt r7, [r0], #1 - sub r2, r2, #1 - strb r7, [r1], #1 -.Lic2: ldrbt r6, [r0], #1 - sub r2, r2, #1 - strb r6, [r1], #1 -.Lic1: ldrbt r7, [r0], #1 - subs r2, r2, #1 - strb r7, [r1], #1 -.Licend: - bne .Licleanup - -.Liout: - mov r0, #0 - - str r5, [r4, #PCB_ONFAULT] - RESTORE_REGS - - RET - -.Lcopyfault: - ldr r0, =EFAULT - str r5, [r4, #PCB_ONFAULT] - RESTORE_REGS - - RET -END(copyin) - -/* - * r0 = kernel space address - * r1 = user space address - * r2 = length - * - * Copies bytes from kernel space to user space - * - * We save/restore r4-r11: - * r4-r11 are scratch - */ - -ENTRY(copyout) - /* Quick exit if length is zero */ - teq r2, #0 - moveq r0, #0 - RETeq - - adds r3, r1, r2 - movcs r0, #EFAULT - RETc(cs) - - ldr r12, =(VM_MAXUSER_ADDRESS + 1) - cmp r3, r12 - movcs r0, #EFAULT - RETc(cs) - - ldr r3, .L_arm_memcpy - ldr r3, [r3] - cmp r3, #0 - beq .Lnormale - ldr r3, .L_min_memcpy_size - ldr r3, [r3] - cmp r2, r3 - blt .Lnormale - stmfd sp!, {r0-r2, r4, lr} - _SAVE({r0-r2, r4, lr}) - mov r3, r0 - mov r0, r1 - mov r1, r3 - mov r3, #1 /* DST_IS_USER */ - ldr r4, .L_arm_memcpy - mov lr, pc - ldr pc, [r4] - cmp r0, #0 - ldmfd sp!, {r0-r2, r4, lr} - moveq r0, #0 - RETeq - -.Lnormale: - SAVE_REGS - GET_PCB(r4) - ldr r4, [r4] - - ldr r5, [r4, #PCB_ONFAULT] - adr r3, .Lcopyfault - str r3, [r4, #PCB_ONFAULT] - - PREFETCH(r0, 0) - PREFETCH(r1, 0) - - /* - * If not too many bytes, take the slow path. - */ - cmp r2, #0x08 - blt .Lcleanup - - /* - * Align destination to word boundary. - */ - and r6, r1, #0x3 - ldr pc, [pc, r6, lsl #2] - b .Lalend - .word .Lalend - .word .Lal3 - .word .Lal2 - .word .Lal1 -.Lal3: ldrb r6, [r0], #1 - sub r2, r2, #1 - strbt r6, [r1], #1 -.Lal2: ldrb r7, [r0], #1 - sub r2, r2, #1 - strbt r7, [r1], #1 -.Lal1: ldrb r6, [r0], #1 - sub r2, r2, #1 - strbt r6, [r1], #1 -.Lalend: - - /* - * If few bytes left, finish slow. - */ - cmp r2, #0x08 - blt .Lcleanup - - /* - * If source is not aligned, finish slow. - */ - ands r3, r0, #0x03 - bne .Lcleanup - - cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */ - blt .Lcleanup8 - - /* - * Align source & destination to cacheline boundary. - */ - and r6, r1, #0x1f - ldr pc, [pc, r6] - b .Lcaligned - .word .Lcaligned - .word .Lcal28 - .word .Lcal24 - .word .Lcal20 - .word .Lcal16 - .word .Lcal12 - .word .Lcal8 - .word .Lcal4 -.Lcal28:ldr r6, [r0], #4 - sub r2, r2, #4 - strt r6, [r1], #4 -.Lcal24:ldr r7, [r0], #4 - sub r2, r2, #4 - strt r7, [r1], #4 -.Lcal20:ldr r6, [r0], #4 - sub r2, r2, #4 - strt r6, [r1], #4 -.Lcal16:ldr r7, [r0], #4 - sub r2, r2, #4 - strt r7, [r1], #4 -.Lcal12:ldr r6, [r0], #4 - sub r2, r2, #4 - strt r6, [r1], #4 -.Lcal8: ldr r7, [r0], #4 - sub r2, r2, #4 - strt r7, [r1], #4 -.Lcal4: ldr r6, [r0], #4 - sub r2, r2, #4 - strt r6, [r1], #4 - - /* - * We start with > 0x40 bytes to copy (>= 0x60 got us into this - * part of the code, and we may have knocked that down by as much - * as 0x1c getting aligned). - * - * This loop basically works out to: - * do { - * prefetch-next-cacheline(s) - * bytes -= 0x20; - * copy cacheline - * } while (bytes >= 0x40); - * bytes -= 0x20; - * copy cacheline - */ -.Lcaligned: - PREFETCH(r0, 32) - PREFETCH(r1, 32) - - sub r2, r2, #0x20 - - /* Copy a cacheline */ - ldmia r0!, {r6-r11} - strt r6, [r1], #4 - strt r7, [r1], #4 - ldmia r0!, {r6-r7} - strt r8, [r1], #4 - strt r9, [r1], #4 - strt r10, [r1], #4 - strt r11, [r1], #4 - strt r6, [r1], #4 - strt r7, [r1], #4 - - cmp r2, #0x40 - bge .Lcaligned - - sub r2, r2, #0x20 - - /* Copy a cacheline */ - ldmia r0!, {r6-r11} - strt r6, [r1], #4 - strt r7, [r1], #4 - ldmia r0!, {r6-r7} - strt r8, [r1], #4 - strt r9, [r1], #4 - strt r10, [r1], #4 - strt r11, [r1], #4 - strt r6, [r1], #4 - strt r7, [r1], #4 - - cmp r2, #0x08 - blt .Lprecleanup - -.Lcleanup8: - ldmia r0!, {r8-r9} - sub r2, r2, #8 - strt r8, [r1], #4 - strt r9, [r1], #4 - cmp r2, #8 - bge .Lcleanup8 - -.Lprecleanup: - /* - * If we're done, bail. - */ - cmp r2, #0 - beq .Lout - -.Lcleanup: - and r6, r2, #0x3 - ldr pc, [pc, r6, lsl #2] - b .Lcend - .word .Lc4 - .word .Lc1 - .word .Lc2 - .word .Lc3 -.Lc4: ldrb r6, [r0], #1 - sub r2, r2, #1 - strbt r6, [r1], #1 -.Lc3: ldrb r7, [r0], #1 - sub r2, r2, #1 - strbt r7, [r1], #1 -.Lc2: ldrb r6, [r0], #1 - sub r2, r2, #1 - strbt r6, [r1], #1 -.Lc1: ldrb r7, [r0], #1 - subs r2, r2, #1 - strbt r7, [r1], #1 -.Lcend: - bne .Lcleanup - -.Lout: - mov r0, #0 - - str r5, [r4, #PCB_ONFAULT] - RESTORE_REGS - - RET -END(copyout) -#endif /* * int badaddr_read_1(const uint8_t *src, uint8_t *dest) Modified: head/sys/arm/arm/in_cksum_arm.S ============================================================================== --- head/sys/arm/arm/in_cksum_arm.S Sun Nov 29 16:29:40 2020 (r368157) +++ head/sys/arm/arm/in_cksum_arm.S Sun Nov 29 16:44:22 2020 (r368158) @@ -116,9 +116,7 @@ END(do_cksum) */ /* LINTSTUB: Ignore */ ASENTRY_NP(L_cksumdata) -#ifdef _ARM_ARCH_5E pld [r0] /* Pre-fetch the start of the buffer */ -#endif mov r2, #0 /* We first have to word-align the buffer. */ @@ -144,7 +142,6 @@ ASENTRY_NP(L_cksumdata) /* Buffer is now word aligned */ .Lcksumdata_wordaligned: -#ifdef _ARM_ARCH_5E cmp r1, #0x04 /* Less than 4 bytes left? */ blt .Lcksumdata_endgame /* Yup */ @@ -199,43 +196,10 @@ ASENTRY_NP(L_cksumdata) adcs r2, r2, r7 adc r2, r2, #0x00 -#else /* !_ARM_ARCH_5E */ - - subs r1, r1, #0x40 - blt .Lcksumdata_bigloop_end - -.Lcksumdata_bigloop: - ldmia r0!, {r3, r4, r5, r6} - adds r2, r2, r3 - adcs r2, r2, r4 - adcs r2, r2, r5 - ldmia r0!, {r3, r4, r5, r7} - adcs r2, r2, r6 - adcs r2, r2, r3 - adcs r2, r2, r4 - adcs r2, r2, r5 - ldmia r0!, {r3, r4, r5, r6} - adcs r2, r2, r7 - adcs r2, r2, r3 - adcs r2, r2, r4 - adcs r2, r2, r5 - ldmia r0!, {r3, r4, r5, r7} - adcs r2, r2, r6 - adcs r2, r2, r3 - adcs r2, r2, r4 - adcs r2, r2, r5 - adcs r2, r2, r7 - adc r2, r2, #0x00 - subs r1, r1, #0x40 - bge .Lcksumdata_bigloop -.Lcksumdata_bigloop_end: -#endif - adds r1, r1, #0x40 RETeq cmp r1, #0x20 -#ifdef _ARM_ARCH_5E ldrdge r4, [r0], #0x08 /* Avoid stalling pld and result */ blt .Lcksumdata_less_than_32 pld [r0, #0x18] @@ -250,19 +214,6 @@ ASENTRY_NP(L_cksumdata) adcs r2, r2, r5 adcs r2, r2, r6 /* XXX: Unavoidable result stall */ adcs r2, r2, r7 -#else - blt .Lcksumdata_less_than_32 - ldmia r0!, {r3, r4, r5, r6} - adds r2, r2, r3 - adcs r2, r2, r4 - adcs r2, r2, r5 - ldmia r0!, {r3, r4, r5, r7} - adcs r2, r2, r6 - adcs r2, r2, r3 - adcs r2, r2, r4 - adcs r2, r2, r5 - adcs r2, r2, r7 -#endif adc r2, r2, #0x00 subs r1, r1, #0x20 RETeq Modified: head/sys/arm/arm/machdep.c ============================================================================== --- head/sys/arm/arm/machdep.c Sun Nov 29 16:29:40 2020 (r368157) +++ head/sys/arm/arm/machdep.c Sun Nov 29 16:44:22 2020 (r368158) @@ -107,8 +107,8 @@ __FBSDID("$FreeBSD$"); #endif -#ifndef _ARM_ARCH_5E -#error FreeBSD requires ARMv5 or later +#ifndef _ARM_ARCH_6 +#error FreeBSD requires ARMv6 or later #endif struct pcpu __pcpu[MAXCPU]; Modified: head/sys/arm/arm/support.S ============================================================================== --- head/sys/arm/arm/support.S Sun Nov 29 16:29:40 2020 (r368157) +++ head/sys/arm/arm/support.S Sun Nov 29 16:44:22 2020 (r368158) @@ -149,17 +149,11 @@ do_memset: /* We are now word aligned */ .Lmemset_wordaligned: orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ -#ifdef _ARM_ARCH_5E tst ip, #0x04 /* Quad-align for armv5e */ -#else - cmp r1, #0x10 -#endif orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ -#ifdef _ARM_ARCH_5E subne r1, r1, #0x04 /* Quad-align if necessary */ strne r3, [ip], #0x04 cmp r1, #0x10 -#endif blt .Lmemset_loop4 /* If less than 16 then use words */ mov r2, r3 /* Duplicate data */ cmp r1, #0x80 /* If < 128 then skip the big loop */ @@ -168,7 +162,6 @@ do_memset: /* Do 128 bytes at a time */ .Lmemset_loop128: subs r1, r1, #0x80 -#ifdef _ARM_ARCH_5E strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 @@ -185,24 +178,6 @@ do_memset: strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 -#else - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} -#endif bgt .Lmemset_loop128 RETeq /* Zero length so just exit */ @@ -211,30 +186,18 @@ do_memset: /* Do 32 bytes at a time */ .Lmemset_loop32: subs r1, r1, #0x20 -#ifdef _ARM_ARCH_5E strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 -#else - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} -#endif bgt .Lmemset_loop32 RETeq /* Zero length so just exit */ adds r1, r1, #0x10 /* Partially adjust for extra sub */ /* Deal with 16 bytes or more */ -#ifdef _ARM_ARCH_5E strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 -#else - stmiage ip!, {r2-r3} - stmiage ip!, {r2-r3} -#endif RETeq /* Zero length so just exit */ addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ @@ -246,14 +209,10 @@ do_memset: bgt .Lmemset_loop4 RETeq /* Zero length so just exit */ -#ifdef _ARM_ARCH_5E /* Compensate for 64-bit alignment check */ adds r1, r1, #0x04 RETeq cmp r1, #2 -#else - cmp r1, #-2 -#endif strb r3, [ip], #0x01 /* Set 1 byte */ strbge r3, [ip], #0x01 /* Set another byte */ @@ -804,243 +763,6 @@ EENTRY(memmove) EEND(memmove) END(bcopy) -#if !defined(_ARM_ARCH_5E) -ENTRY(memcpy) - /* save leaf functions having to store this away */ - /* Do not check arm_memcpy if we're running from flash */ -#if defined(FLASHADDR) && defined(PHYSADDR) -#if FLASHADDR > PHYSADDR - ldr r3, =FLASHADDR - cmp r3, pc - bls .Lnormal -#else - ldr r3, =FLASHADDR - cmp r3, pc - bhi .Lnormal -#endif -#endif - ldr r3, .L_arm_memcpy - ldr r3, [r3] - cmp r3, #0 - beq .Lnormal - ldr r3, .L_min_memcpy_size - ldr r3, [r3] - cmp r2, r3 - blt .Lnormal - stmfd sp!, {r0-r2, r4, lr} - mov r3, #0 - ldr r4, .L_arm_memcpy - mov lr, pc - ldr pc, [r4] - cmp r0, #0 - ldmfd sp!, {r0-r2, r4, lr} - RETeq - -.Lnormal: - stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ - - subs r2, r2, #4 - blt .Lmemcpy_l4 /* less than 4 bytes */ - ands r12, r0, #3 - bne .Lmemcpy_destul /* oh unaligned destination addr */ - ands r12, r1, #3 - bne .Lmemcpy_srcul /* oh unaligned source addr */ - -.Lmemcpy_t8: - /* We have aligned source and destination */ - subs r2, r2, #8 - blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ - subs r2, r2, #0x14 - blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ - stmdb sp!, {r4} /* borrow r4 */ - - /* blat 32 bytes at a time */ - /* XXX for really big copies perhaps we should use more registers */ -.Lmemcpy_loop32: - ldmia r1!, {r3, r4, r12, lr} - stmia r0!, {r3, r4, r12, lr} - ldmia r1!, {r3, r4, r12, lr} - stmia r0!, {r3, r4, r12, lr} - subs r2, r2, #0x20 - bge .Lmemcpy_loop32 - - cmn r2, #0x10 - ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ - stmiage r0!, {r3, r4, r12, lr} - subge r2, r2, #0x10 - ldmia sp!, {r4} /* return r4 */ - -.Lmemcpy_l32: - adds r2, r2, #0x14 - - /* blat 12 bytes at a time */ -.Lmemcpy_loop12: - ldmiage r1!, {r3, r12, lr} - stmiage r0!, {r3, r12, lr} - subsge r2, r2, #0x0c - bge .Lmemcpy_loop12 - -.Lmemcpy_l12: - adds r2, r2, #8 - blt .Lmemcpy_l4 - - subs r2, r2, #4 - ldrlt r3, [r1], #4 - strlt r3, [r0], #4 - ldmiage r1!, {r3, r12} - stmiage r0!, {r3, r12} - subge r2, r2, #4 - -.Lmemcpy_l4: - /* less than 4 bytes to go */ - adds r2, r2, #4 -#ifdef __APCS_26_ - ldmiaeq sp!, {r0, pc}^ /* done */ -#else - ldmiaeq sp!, {r0, pc} /* done */ -#endif - /* copy the crud byte at a time */ - cmp r2, #2 - ldrb r3, [r1], #1 - strb r3, [r0], #1 - ldrbge r3, [r1], #1 - strbge r3, [r0], #1 - ldrbgt r3, [r1], #1 - strbgt r3, [r0], #1 - ldmia sp!, {r0, pc} - - /* erg - unaligned destination */ -.Lmemcpy_destul: - rsb r12, r12, #4 - cmp r12, #2 - - /* align destination with byte copies */ - ldrb r3, [r1], #1 - strb r3, [r0], #1 - ldrbge r3, [r1], #1 - strbge r3, [r0], #1 - ldrbgt r3, [r1], #1 - strbgt r3, [r0], #1 - subs r2, r2, r12 - blt .Lmemcpy_l4 /* less the 4 bytes */ - - ands r12, r1, #3 - beq .Lmemcpy_t8 /* we have an aligned source */ - - /* erg - unaligned source */ - /* This is where it gets nasty ... */ -.Lmemcpy_srcul: - bic r1, r1, #3 - ldr lr, [r1], #4 - cmp r12, #2 - bgt .Lmemcpy_srcul3 - beq .Lmemcpy_srcul2 - cmp r2, #0x0c *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***