From owner-freebsd-arm@FreeBSD.ORG Wed Nov 28 15:03:39 2007 Return-Path: Delivered-To: freebsd-arm@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 9AE5B16A418 for ; Wed, 28 Nov 2007 15:03:39 +0000 (UTC) (envelope-from raj@semihalf.com) Received: from mail.semihalf.com (mail.semihalf.com [83.15.139.206]) by mx1.freebsd.org (Postfix) with ESMTP id D9F0B13C4CE for ; Wed, 28 Nov 2007 15:03:38 +0000 (UTC) (envelope-from raj@semihalf.com) Received: from localhost (unknown [127.0.0.1]) by mail.semihalf.com (Postfix) with ESMTP id CD180143E7; Wed, 28 Nov 2007 16:05:32 +0100 (CET) Received: from mail.semihalf.com ([127.0.0.1]) by localhost (mail.semihalf.com [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 22632-02; Wed, 28 Nov 2007 16:05:29 +0100 (CET) Message-ID: <474D8345.30505@semihalf.com> Date: Wed, 28 Nov 2007 16:03:33 +0100 From: Rafal Jaworowski MIME-Version: 1.0 To: freebsd-arm@freebsd.org Content-Type: multipart/mixed; boundary="------------030700030908050405090503" X-Virus-Scanned: by amavisd-new at semihalf.com Subject: ARM9E issues X-BeenThere: freebsd-arm@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Porting FreeBSD to the StrongARM Processor List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 Nov 2007 15:03:39 -0000 This is a multi-part message in MIME format. --------------030700030908050405090503 Content-Type: text/plain; charset=ISO-8859-2 Content-Transfer-Encoding: 7bit Hi, When trying to produce code for ARM9E variant I encountered the following issues with building kernel: 1. Per earlier discussion with Warner, the CPU_ARM9E is supposed to be mutually exclusive with CPU_ARM9 or other cpu idents, but there are some fixes needed to build with CPU_ARM9E alone -- see attached arm9e_cpufuncs-fix.diff. 2. When the kernel is built with -mcpu=arm9e the low-level routines (bcopy_page(), bzero_page() and friends) fail to assemble: src/sys/arm/arm/bcopy_page.S:190: Error: selected processor does not support `pld [r0]' src/sys/arm/arm/bcopy_page.S:195: Error: selected processor does not support `pld [r0,#0x18]' ... ... Those problematic instructions appear within _ARM_ARCH_5E conditionals, which confused me a lot since -mcpu=arm9e should be compliant with v5te, right? Upon further investigation and comparison with equivalent NetBSD code I noticed that all those conditional sections we now have _ARM_ARCH_5E'ed originally were __XSCALE__ conditionals... When I revert _ARM_ARCH_5E -> __XSCALE__ (xscale_v5te-low_level.diff) the kernel builds with -mcpu=arm9e (no assembly errors) and works. I assume we have imported the NetBSD base, so what was the reason for this conditionals change? Were all those sections of code considered to work on any v5te and not only Xscale? Maybe what I'm seeing is some FreeBSD/arm toolchain problem. I'll look into this further, but any comments or hints are appreciated. Rafal --------------030700030908050405090503 Content-Type: text/plain; name="arm9e_cpufuncs-fix.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="arm9e_cpufuncs-fix.diff" diff --git a/src/sys/arm/arm/cpufunc.c b/src/sys/arm/arm/cpufunc.c index 1c51140..b3f4829 100644 --- a/src/sys/arm/arm/cpufunc.c +++ b/src/sys/arm/arm/cpufunc.c @@ -737,10 +737,10 @@ struct cpu_functions cpufuncs; u_int cputype; u_int cpu_reset_needs_v4_MMU_disable; /* flag used in locore.s */ -#if defined(CPU_ARM7TDMI) || defined(CPU_ARM8) || defined(CPU_ARM9) || \ - defined (CPU_ARM10) || \ - defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ - defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ +#if defined(CPU_ARM7TDMI) || defined(CPU_ARM8) || defined(CPU_ARM9) || \ + defined(CPU_ARM9E) || defined (CPU_ARM10) || \ + defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ + defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342) static void get_cachetype_cp15(void); @@ -938,7 +938,7 @@ #if defined(CPU_ARM9E) || defined(CPU_AR cpu_reset_needs_v4_MMU_disable = 1; /* V4 or higher */ get_cachetype_cp15(); pmap_pte_init_generic(); - return 0; + goto out; } #endif /* CPU_ARM9E || CPU_ARM10 */ #ifdef CPU_ARM10 @@ -1798,6 +1798,9 @@ #endif /* Now really make sure they are clean. */ __asm __volatile ("mcr\tp15, 0, r0, c7, c7, 0" : : ); + if (vector_page == ARM_VECTORS_HIGH) + cpuctrl |= CPU_CONTROL_VECRELOC; + /* Set the control register */ ctrl = cpuctrl; cpu_control(0xffffffff, cpuctrl); --------------030700030908050405090503 Content-Type: text/plain; name="xscale_v5e-low_level.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="xscale_v5e-low_level.diff" diff --git a/src/sys/arm/arm/bcopy_page.S b/src/sys/arm/arm/bcopy_page.S index ebbd1dd..c50e152 100644 --- a/src/sys/arm/arm/bcopy_page.S +++ b/src/sys/arm/arm/bcopy_page.S @@ -44,7 +44,7 @@ __FBSDID("$FreeBSD: src/sys/arm/arm/bcop #include "assym.s" -#ifndef _ARM_ARCH_5E +#ifndef __XSCALE__ /* #define BIG_LOOPS */ @@ -179,10 +179,10 @@ #endif ldmfd sp!, {r4-r8, pc} -#else /* _ARM_ARCH_5E */ +#else /* __XSCALE__ */ /* - * armv5e version of bcopy_page + * XSCALE version of bcopy_page */ ENTRY(bcopy_page) pld [r0] @@ -248,7 +248,7 @@ ENTRY(bcopy_page) RET /* - * armv5e version of bzero_page + * XSCALE version of bzero_page */ ENTRY(bzero_page) mov r1, #PAGE_SIZE @@ -273,4 +273,4 @@ ENTRY(bzero_page) subs r1, r1, #128 bne 1b RET -#endif /* _ARM_ARCH_5E */ +#endif /* __XSCALE__ */ diff --git a/src/sys/arm/arm/bcopyinout.S b/src/sys/arm/arm/bcopyinout.S index 1c695e6..7476d39 100644 --- a/src/sys/arm/arm/bcopyinout.S +++ b/src/sys/arm/arm/bcopyinout.S @@ -46,7 +46,7 @@ #include .word _C_LABEL(_min_memcpy_size) __FBSDID("$FreeBSD: src/sys/arm/arm/bcopyinout.S,v 1.9 2007/10/13 12:05:03 cognet Exp $"); -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ #include #else @@ -64,7 +64,7 @@ #endif #define SAVE_REGS stmfd sp!, {r4-r11} #define RESTORE_REGS ldmfd sp!, {r4-r11} -#if defined(_ARM_ARCH_5E) +#if defined(__XSCALE__) #define HELLOCPP # #define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ] #else diff --git a/src/sys/arm/arm/in_cksum_arm.S b/src/sys/arm/arm/in_cksum_arm.S index 879a0e9..e021c7c 100644 --- a/src/sys/arm/arm/in_cksum_arm.S +++ b/src/sys/arm/arm/in_cksum_arm.S @@ -113,7 +113,7 @@ ENTRY(do_cksum) */ /* LINTSTUB: Ignore */ ASENTRY_NP(L_cksumdata) -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ pld [r0] /* Pre-fetch the start of the buffer */ #endif mov r2, #0 @@ -147,7 +147,7 @@ #endif /* Buffer is now word aligned */ .Lcksumdata_wordaligned: -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ cmp r1, #0x04 /* Less than 4 bytes left? */ blt .Lcksumdata_endgame /* Yup */ @@ -202,7 +202,7 @@ #ifdef _ARM_ARCH_5E adcs r2, r2, r7 adc r2, r2, #0x00 -#else /* !_ARM_ARCH_5E */ +#else /* !__XSCALE__ */ subs r1, r1, #0x40 blt .Lcksumdata_bigloop_end @@ -238,7 +238,7 @@ #endif RETeq cmp r1, #0x20 -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ ldrged r4, [r0], #0x08 /* Avoid stalling pld and result */ blt .Lcksumdata_less_than_32 pld [r0, #0x18] diff --git a/src/sys/arm/arm/support.S b/src/sys/arm/arm/support.S index 37bd317..882888c 100644 --- a/src/sys/arm/arm/support.S +++ b/src/sys/arm/arm/support.S @@ -86,13 +86,13 @@ do_memset: /* We are now word aligned */ .Lmemset_wordaligned: orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ tst ip, #0x04 /* Quad-align for armv5e */ #else cmp r1, #0x10 #endif orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ subne r1, r1, #0x04 /* Quad-align if necessary */ strne r3, [ip], #0x04 cmp r1, #0x10 @@ -105,7 +105,7 @@ #endif /* Do 128 bytes at a time */ .Lmemset_loop128: subs r1, r1, #0x80 -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 @@ -148,7 +148,7 @@ #endif /* Do 32 bytes at a time */ .Lmemset_loop32: subs r1, r1, #0x20 -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 @@ -165,7 +165,7 @@ #endif adds r1, r1, #0x10 /* Partially adjust for extra sub */ /* Deal with 16 bytes or more */ -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ strged r2, [ip], #0x08 strged r2, [ip], #0x08 #else @@ -183,7 +183,7 @@ #endif bgt .Lmemset_loop4 RETeq /* Zero length so just exit */ -#ifdef _ARM_ARCH_5E +#ifdef __XSCALE__ /* Compensate for 64-bit alignment check */ adds r1, r1, #0x04 RETeq @@ -868,7 +868,7 @@ #endif add r1, r1, #1 b .Lmemmove_bl4 -#if !defined(_ARM_ARCH_5E) +#if !defined(__XSCALE__) ENTRY(memcpy) /* save leaf functions having to store this away */ /* Do not check arm_memcpy if we're running from flash */ @@ -2870,7 +2870,7 @@ #endif strh r2, [r0, #0x09] strb r1, [r0, #0x0b] RET -#endif /* _ARM_ARCH_5E */ +#endif /* __XSCALE__ */ #ifdef GPROF diff --git a/src/sys/arm/arm/swtch.S b/src/sys/arm/arm/swtch.S index 03fabfb..3e46904 100644 --- a/src/sys/arm/arm/swtch.S +++ b/src/sys/arm/arm/swtch.S @@ -187,7 +187,7 @@ #endif /* PMAP_INCLUDE_PTE_SYNC */ ldr pc, [r9, #CF_CONTEXT_SWITCH] /* Restore all the save registers */ -#ifndef _ARM_ARCH_5E +#ifndef __XSCALE__ add r1, r7, #PCB_R8 ldmia r1, {r8-r13} #else @@ -249,7 +249,7 @@ #endif mov r4, r0 /* Save the old thread. */ /* Save all the registers in the old thread's pcb */ -#ifndef _ARM_ARCH_5E +#ifndef __XSCALE__ add r7, r2, #(PCB_R8) stmia r7, {r8-r13} #else @@ -419,7 +419,7 @@ #endif /* PMAP_INCLUDE_PTE_SYNC */ msr cpsr_c, r3 /* Restore the old mode */ /* Restore all the save registers */ -#ifndef _ARM_ARCH_5E +#ifndef __XSCALE__ add r7, r9, #PCB_R8 ldmia r7, {r8-r13} sub r7, r7, #PCB_R8 /* restore PCB pointer */ --------------030700030908050405090503--