Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 6 Jun 2020 03:09:12 +0000 (UTC)
From:      Justin Hibbits <jhibbits@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r361861 - in head/sys: conf powerpc/aim powerpc/booke powerpc/include powerpc/powerpc
Message-ID:  <202006060309.05639Cwd016143@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhibbits
Date: Sat Jun  6 03:09:12 2020
New Revision: 361861
URL: https://svnweb.freebsd.org/changeset/base/361861

Log:
  powerpc: Use IFUNCs for copyin/copyout/etc
  
  Summary:
  Radix on AIM, and all of Book-E (currently), can do direct addressing of
  user space, instead of needing to map user addresses into kernel space.
  Take advantage of this to optimize the copy(9) functions for this
  behavior, and avoid effectively NOP translations.
  
  Test Plan: Tested on powerpcspe, powerpc64/booke, powerpc64/AIM
  
  Reviewed by:	bdragon
  Differential Revision:	https://reviews.freebsd.org/D25129

Added:
  head/sys/powerpc/powerpc/support.S   (contents, props changed)
Modified:
  head/sys/conf/files.powerpc
  head/sys/powerpc/aim/mmu_radix.c
  head/sys/powerpc/booke/pmap.c
  head/sys/powerpc/include/asm.h
  head/sys/powerpc/include/mmuvar.h
  head/sys/powerpc/include/param.h
  head/sys/powerpc/powerpc/copyinout.c
  head/sys/powerpc/powerpc/pmap_dispatch.c
  head/sys/powerpc/powerpc/trap.c

Modified: head/sys/conf/files.powerpc
==============================================================================
--- head/sys/conf/files.powerpc	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/conf/files.powerpc	Sat Jun  6 03:09:12 2020	(r361861)
@@ -239,7 +239,7 @@ powerpc/powerpc/autoconf.c	standard
 powerpc/powerpc/bus_machdep.c	standard
 powerpc/powerpc/busdma_machdep.c standard
 powerpc/powerpc/clock.c		standard
-powerpc/powerpc/copyinout.c	standard
+powerpc/powerpc/copyinout.c	optional	aim
 powerpc/powerpc/cpu.c		standard
 powerpc/powerpc/cpu_subr64.S	optional	powerpc64
 powerpc/powerpc/db_disasm.c	optional	ddb
@@ -274,6 +274,7 @@ powerpc/powerpc/sigcode64.S	optional	powerpc64
 powerpc/powerpc/swtch32.S	optional	powerpc | powerpcspe
 powerpc/powerpc/swtch64.S	optional	powerpc64
 powerpc/powerpc/stack_machdep.c	optional	ddb | stack
+powerpc/powerpc/support.S	optional	powerpc64 | booke
 powerpc/powerpc/syncicache.c	standard
 powerpc/powerpc/sys_machdep.c	standard
 powerpc/powerpc/trap.c		standard

Modified: head/sys/powerpc/aim/mmu_radix.c
==============================================================================
--- head/sys/powerpc/aim/mmu_radix.c	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/powerpc/aim/mmu_radix.c	Sat Jun  6 03:09:12 2020	(r361861)
@@ -412,8 +412,6 @@ void mmu_radix_align_superpage(vm_object_t, vm_ooffset
     vm_size_t);
 void mmu_radix_clear_modify(vm_page_t);
 void mmu_radix_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t);
-int mmu_radix_map_user_ptr(pmap_t pm,
-    volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen);
 int mmu_radix_decode_kernel_ptr(vm_offset_t, int *, vm_offset_t *);
 int mmu_radix_enter(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int, int8_t);
 void mmu_radix_enter_object(pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
@@ -538,7 +536,6 @@ static struct pmap_funcs mmu_radix_methods = {
 	.kextract = mmu_radix_kextract,
 	.kremove = mmu_radix_kremove,
 	.change_attr = mmu_radix_change_attr,
-	.map_user_ptr = mmu_radix_map_user_ptr,
 	.decode_kernel_ptr =  mmu_radix_decode_kernel_ptr,
 
 	.tlbie_all = mmu_radix_tlbie_all,
@@ -5996,20 +5993,6 @@ mmu_radix_kremove(vm_offset_t va)
 
 	pte = kvtopte(va);
 	pte_clear(pte);
-}
-
-int mmu_radix_map_user_ptr(pmap_t pm,
-    volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen)
-{
-	if ((uintptr_t)uaddr + ulen >= VM_MAXUSER_ADDRESS ||
-	    (uintptr_t)uaddr + ulen < (uintptr_t)uaddr)
-		return (EFAULT);
-
-	*kaddr = (void *)(uintptr_t)uaddr;
-	if (klen)
-		*klen = ulen;
-
-	return (0);
 }
 
 int

Modified: head/sys/powerpc/booke/pmap.c
==============================================================================
--- head/sys/powerpc/booke/pmap.c	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/powerpc/booke/pmap.c	Sat Jun  6 03:09:12 2020	(r361861)
@@ -349,8 +349,6 @@ static vm_offset_t	mmu_booke_quick_enter_page(vm_page_
 static void		mmu_booke_quick_remove_page(vm_offset_t addr);
 static int		mmu_booke_change_attr(vm_offset_t addr,
     vm_size_t sz, vm_memattr_t mode);
-static int		mmu_booke_map_user_ptr(pmap_t pm,
-    volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen);
 static int		mmu_booke_decode_kernel_ptr(vm_offset_t addr,
     int *is_user, vm_offset_t *decoded_addr);
 static void		mmu_booke_page_array_startup(long);
@@ -410,7 +408,6 @@ static struct pmap_funcs mmu_booke_methods = {
 	.kremove = mmu_booke_kremove,
 	.unmapdev = mmu_booke_unmapdev,
 	.change_attr = mmu_booke_change_attr,
-	.map_user_ptr = mmu_booke_map_user_ptr,
 	.decode_kernel_ptr =  mmu_booke_decode_kernel_ptr,
 
 	/* dumpsys() support */
@@ -1206,26 +1203,6 @@ mmu_booke_kremove(vm_offset_t va)
 
 	tlb_miss_unlock();
 	mtx_unlock_spin(&tlbivax_mutex);
-}
-
-/*
- * Provide a kernel pointer corresponding to a given userland pointer.
- * The returned pointer is valid until the next time this function is
- * called in this thread. This is used internally in copyin/copyout.
- */
-int
-mmu_booke_map_user_ptr(pmap_t pm, volatile const void *uaddr,
-    void **kaddr, size_t ulen, size_t *klen)
-{
-
-	if (trunc_page((uintptr_t)uaddr + ulen) > VM_MAXUSER_ADDRESS)
-		return (EFAULT);
-
-	*kaddr = (void *)(uintptr_t)uaddr;
-	if (klen)
-		*klen = ulen;
-
-	return (0);
 }
 
 /*

Modified: head/sys/powerpc/include/asm.h
==============================================================================
--- head/sys/powerpc/include/asm.h	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/powerpc/include/asm.h	Sat Jun  6 03:09:12 2020	(r361861)
@@ -114,6 +114,7 @@
 	.p2align 4; \
 	TYPE_ENTRY(name) \
 DOT_LABEL(name):
+#define	_NAKED_ENTRY(name)	_ENTRY(name)
 #else
 #define	_ENTRY(name) \
 	.text; \
@@ -124,6 +125,15 @@ name: \
 	addis	%r2, %r12, (.TOC.-name)@ha; \
 	addi	%r2, %r2, (.TOC.-name)@l; \
 	.localentry name, .-name;
+
+/* "Naked" function entry.  No TOC prologue for ELFv2. */
+#define	_NAKED_ENTRY(name) \
+	.text; \
+	.p2align 4; \
+	.globl	name; \
+	.type	name,@function; \
+name: \
+	.localentry name, .-name;
 #endif
 
 #define	_END(name) \
@@ -145,6 +155,8 @@ name: \
 	.type	name,@function; \
 	name:
 #define	_END(name)
+
+#define _NAKED_ENTRY(name)	_ENTRY(name)
 
 #define	LOAD_ADDR(reg, var) \
 	lis	reg, var@ha; \

Modified: head/sys/powerpc/include/mmuvar.h
==============================================================================
--- head/sys/powerpc/include/mmuvar.h	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/powerpc/include/mmuvar.h	Sat Jun  6 03:09:12 2020	(r361861)
@@ -182,6 +182,25 @@ struct mmu_kobj {
 
 typedef struct mmu_kobj		*mmu_t;
 
+/* The currently installed pmap object. */
+extern mmu_t	mmu_obj;
+
+/*
+ * Resolve a given pmap function.
+ * 'func' is the function name less the 'pmap_' * prefix.
+ */
+#define PMAP_RESOLVE_FUNC(func) 		\
+	({					\
+	 pmap_##func##_t f;			\
+	 const struct mmu_kobj	*mmu = mmu_obj;	\
+	 do {					\
+	    f = mmu->funcs->func;		\
+	    if (f != NULL) break;		\
+	    mmu = mmu->base;			\
+	} while (mmu != NULL);			\
+	f;})
+
+
 #define MMU_DEF(name, ident, methods)		\
 						\
 const struct mmu_kobj name = {		\

Modified: head/sys/powerpc/include/param.h
==============================================================================
--- head/sys/powerpc/include/param.h	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/powerpc/include/param.h	Sat Jun  6 03:09:12 2020	(r361861)
@@ -134,6 +134,9 @@
 #define	KSTACK_GUARD_PAGES	1	/* pages of kstack guard; 0 disables */
 #define	USPACE		(kstack_pages * PAGE_SIZE)	/* total size of pcb */
 
+#define	COPYFAULT		0x1
+#define	FUSUFAULT		0x2
+
 /*
  * Mach derived conversion macros
  */

Modified: head/sys/powerpc/powerpc/copyinout.c
==============================================================================
--- head/sys/powerpc/powerpc/copyinout.c	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/powerpc/powerpc/copyinout.c	Sat Jun  6 03:09:12 2020	(r361861)
@@ -66,13 +66,102 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
+#include <machine/mmuvar.h>
 #include <machine/pcb.h>
 #include <machine/vmparam.h>
+#include <machine/ifunc.h>
 
+/*
+ * On powerpc64 (AIM only) the copy functions are IFUNcs, selecting the best
+ * option based on the PMAP in use.
+ *
+ * There are two options for copy functions on powerpc64:
+ * - 'remap' copies, which remap userspace segments into kernel space for
+ *   copying.  This is used by the 'oea64' pmap.
+ * - 'direct' copies, which copy directly from userspace.  This does not require
+ *   remapping user segments into kernel.  This is used by the 'radix' pmap for
+ *   performance.
+ *
+ * Book-E does not use the C functions, opting instead to use the 'direct'
+ * copies, directly, avoiding the IFUNC overhead.
+ *
+ * On 32-bit AIM these functions are direct, not IFUNCs, for performance.
+ */
+#ifdef __powerpc64__
+int subyte_remap(volatile void *addr, int byte);
+int subyte_direct(volatile void *addr, int byte);
+int copyinstr_remap(const void *udaddr, void *kaddr, size_t len, size_t *done);
+int copyinstr_direct(const void *udaddr, void *kaddr, size_t len, size_t *done);
+int copyout_remap(const void *kaddr, void *udaddr, size_t len);
+int copyout_direct(const void *kaddr, void *udaddr, size_t len);
+int copyin_remap(const void *uaddr, void *kaddr, size_t len);
+int copyin_direct(const void *uaddr, void *kaddr, size_t len);
+int suword32_remap(volatile void *addr, int word);
+int suword32_direct(volatile void *addr, int word);
+int suword_remap(volatile void *addr, long word);
+int suword_direct(volatile void *addr, long word);
+int suword64_remap(volatile void *addr, int64_t word);
+int suword64_direct(volatile void *addr, int64_t word);
+int fubyte_remap(volatile const void *addr);
+int fubyte_direct(volatile const void *addr);
+int fuword16_remap(volatile const void *addr);
+int fuword16_direct(volatile const void *addr);
+int fueword32_remap(volatile const void *addr, int32_t *val);
+int fueword32_direct(volatile const void *addr, int32_t *val);
+int fueword64_remap(volatile const void *addr, int64_t *val);
+int fueword64_direct(volatile const void *addr, int64_t *val);
+int fueword_remap(volatile const void *addr, long *val);
+int fueword_direct(volatile const void *addr, long *val);
+int casueword32_remap(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp,
+	uint32_t new);
+int casueword32_direct(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp,
+	uint32_t new);
+int casueword_remap(volatile u_long *addr, u_long old, u_long *oldvalp,
+	u_long new);
+int casueword_direct(volatile u_long *addr, u_long old, u_long *oldvalp,
+	u_long new);
+
+/*
+ * The IFUNC resolver determines the copy based on if the PMAP implementation
+ * includes a pmap_map_user_ptr function.
+ */
+#define DEFINE_COPY_FUNC(ret, func, args)			\
+	DEFINE_IFUNC(, ret, func, args)				\
+	{							\
+		return (PMAP_RESOLVE_FUNC(map_user_ptr) ?	\
+		    func##_remap : func##_direct);		\
+	}
+DEFINE_COPY_FUNC(int, subyte, (volatile void *, int))
+DEFINE_COPY_FUNC(int, copyinstr, (const void *, void *, size_t, size_t *))
+DEFINE_COPY_FUNC(int, copyin, (const void *, void *, size_t))
+DEFINE_COPY_FUNC(int, copyout, (const void *, void *, size_t))
+DEFINE_COPY_FUNC(int, suword, (volatile void *, long))
+DEFINE_COPY_FUNC(int, suword32, (volatile void *, int))
+#ifdef __powerpc64__
+DEFINE_COPY_FUNC(int, suword64, (volatile void *, int64_t))
+#endif
+DEFINE_COPY_FUNC(int, fubyte, (volatile const void *))
+DEFINE_COPY_FUNC(int, fuword16, (volatile const void *))
+DEFINE_COPY_FUNC(int, fueword32, (volatile const void *, int32_t *))
+#ifdef __powerpc64__
+DEFINE_COPY_FUNC(int, fueword64, (volatile const void *, int64_t *))
+#endif
+DEFINE_COPY_FUNC(int, fueword, (volatile const void *, long *))
+DEFINE_COPY_FUNC(int, casueword32,
+    (volatile uint32_t *, uint32_t, uint32_t *, uint32_t))
+DEFINE_COPY_FUNC(int, casueword, (volatile u_long *, u_long, u_long *, u_long))
+
+#define REMAP(x)	x##_remap
+#else
+#define	REMAP(x)	x
+#endif
+
+
 int
-copyout(const void *kaddr, void *udaddr, size_t len)
+REMAP(copyout)(const void *kaddr, void *udaddr, size_t len)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -111,7 +200,7 @@ copyout(const void *kaddr, void *udaddr, size_t len)
 }
 
 int
-copyin(const void *udaddr, void *kaddr, size_t len)
+REMAP(copyin)(const void *udaddr, void *kaddr, size_t len)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -150,7 +239,7 @@ copyin(const void *udaddr, void *kaddr, size_t len)
 }
 
 int
-copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done)
+REMAP(copyinstr)(const void *udaddr, void *kaddr, size_t len, size_t *done)
 {
 	const char	*up;
 	char		*kp;
@@ -183,7 +272,7 @@ copyinstr(const void *udaddr, void *kaddr, size_t len,
 }
 
 int
-subyte(volatile void *addr, int byte)
+REMAP(subyte)(volatile void *addr, int byte)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -212,7 +301,7 @@ subyte(volatile void *addr, int byte)
 
 #ifdef __powerpc64__
 int
-suword32(volatile void *addr, int word)
+REMAP(suword32)(volatile void *addr, int word)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -238,10 +327,16 @@ suword32(volatile void *addr, int word)
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
+#else
+int
+REMAP(suword32)(volatile void *addr, int32_t word)
+{
+REMAP(	return (suword)(addr, (long)word));
+}
 #endif
 
 int
-suword(volatile void *addr, long word)
+REMAP(suword)(volatile void *addr, long word)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -270,20 +365,14 @@ suword(volatile void *addr, long word)
 
 #ifdef __powerpc64__
 int
-suword64(volatile void *addr, int64_t word)
+REMAP(suword64)(volatile void *addr, int64_t word)
 {
-	return (suword(addr, (long)word));
+	return (REMAP(suword)(addr, (long)word));
 }
-#else
-int
-suword32(volatile void *addr, int32_t word)
-{
-	return (suword(addr, (long)word));
-}
 #endif
 
 int
-fubyte(volatile const void *addr)
+REMAP(fubyte)(volatile const void *addr)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -312,7 +401,7 @@ fubyte(volatile const void *addr)
 }
 
 int
-fuword16(volatile const void *addr)
+REMAP(fuword16)(volatile const void *addr)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -340,7 +429,7 @@ fuword16(volatile const void *addr)
 }
 
 int
-fueword32(volatile const void *addr, int32_t *val)
+REMAP(fueword32)(volatile const void *addr, int32_t *val)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -369,7 +458,7 @@ fueword32(volatile const void *addr, int32_t *val)
 
 #ifdef __powerpc64__
 int
-fueword64(volatile const void *addr, int64_t *val)
+REMAP(fueword64)(volatile const void *addr, int64_t *val)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -398,7 +487,7 @@ fueword64(volatile const void *addr, int64_t *val)
 #endif
 
 int
-fueword(volatile const void *addr, long *val)
+REMAP(fueword)(volatile const void *addr, long *val)
 {
 	struct		thread *td;
 	pmap_t		pm;
@@ -426,7 +515,7 @@ fueword(volatile const void *addr, long *val)
 }
 
 int
-casueword32(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp,
+REMAP(casueword32)(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp,
     uint32_t new)
 {
 	struct thread *td;
@@ -474,7 +563,7 @@ casueword32(volatile uint32_t *addr, uint32_t old, uin
 
 #ifndef __powerpc64__
 int
-casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new)
+REMAP(casueword)(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new)
 {
 
 	return (casueword32((volatile uint32_t *)addr, old,
@@ -482,7 +571,7 @@ casueword(volatile u_long *addr, u_long old, u_long *o
 }
 #else
 int
-casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new)
+REMAP(casueword)(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new)
 {
 	struct thread *td;
 	pmap_t pm;

Modified: head/sys/powerpc/powerpc/pmap_dispatch.c
==============================================================================
--- head/sys/powerpc/powerpc/pmap_dispatch.c	Sat Jun  6 02:45:57 2020	(r361860)
+++ head/sys/powerpc/powerpc/pmap_dispatch.c	Sat Jun  6 03:09:12 2020	(r361861)
@@ -62,7 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/mmuvar.h>
 #include <machine/smp.h>
 
-static mmu_t		mmu_obj;
+mmu_t		mmu_obj;
 
 /*
  * pmap globals
@@ -99,24 +99,14 @@ pmap_nomethod(void)
 
 #define DEFINE_PMAP_IFUNC(ret, func, args) 				\
 	DEFINE_IFUNC(, ret, pmap_##func, args) {			\
-		const struct mmu_kobj *mmu = mmu_obj;			\
 		pmap_##func##_t f;					\
-		do {							\
-			f = mmu->funcs->func;				\
-			if (f != NULL) break;				\
-			mmu = mmu->base;				\
-		} while (mmu != NULL);					\
+		f = PMAP_RESOLVE_FUNC(func);				\
 		return (f != NULL ? f : (pmap_##func##_t)pmap_nomethod);\
 	}
 #define DEFINE_DUMPSYS_IFUNC(ret, func, args) 				\
 	DEFINE_IFUNC(, ret, dumpsys_##func, args) {			\
-		const struct mmu_kobj *mmu = mmu_obj;			\
 		pmap_dumpsys_##func##_t f;				\
-		do {							\
-			f = mmu->funcs->dumpsys_##func;			\
-			if (f != NULL) break;				\
-			mmu = mmu->base;				\
-		} while (mmu != NULL);					\
+		f = PMAP_RESOLVE_FUNC(dumpsys_##func);			\
 		return (f != NULL ? f : (pmap_dumpsys_##func##_t)pmap_nomethod);\
 	}
 

Added: head/sys/powerpc/powerpc/support.S
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/powerpc/powerpc/support.S	Sat Jun  6 03:09:12 2020	(r361861)
@@ -0,0 +1,539 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$	
+ */
+
+/*
+ * Assembly variants of various functions, for those that don't need generic C
+ * implementations.  Currently this includes:
+ *
+ * - Direct-access versions of copyin/copyout methods.
+ *   - These are used by Radix AIM pmap (ISA 3.0), and Book-E, to avoid
+ *     unnecessary pmap_map_usr_ptr() calls.
+ */
+
+#include "assym.inc"
+#include "opt_sched.h"
+
+#include <sys/syscall.h>
+#include <sys/errno.h>
+	
+#include <machine/param.h>
+#include <machine/asm.h>
+#include <machine/spr.h>
+#include <machine/trap.h>
+#include <machine/vmparam.h>
+
+#ifdef _CALL_ELF
+.abiversion _CALL_ELF
+#endif
+
+#ifdef __powerpc64__
+#define	LOAD	ld
+#define	STORE	std
+#define	WORD	8
+#define	CMPI	cmpdi
+#define	CMPLI	cmpldi
+/* log_2(8 * WORD) */
+#define	LOOP_LOG	6
+#define	LOG_WORD	3
+#else
+#define	LOAD	lwz
+#define	STORE	stw
+#define	WORD	4
+#define	CMPI	cmpwi
+#define	CMPLI	cmplwi
+/* log_2(8 * WORD) */
+#define	LOOP_LOG	5
+#define	LOG_WORD	2
+#endif
+
+#ifdef AIM
+#define ENTRY_DIRECT(x) ENTRY(x ## _direct)
+#else
+#define	ENTRY_DIRECT(x)	ENTRY(x)
+#endif
+	
+#ifdef __powerpc64__
+#define	PROLOGUE		;\
+	mflr	%r0 		;\
+	std	%r0, 16(%r1)	;\
+
+#define	EPILOGUE		;\
+	ld	%r0, 16(%r1)	;\
+	mtlr	%r0		;\
+	blr			;\
+	nop
+
+#define	VALIDATE_TRUNCATE_ADDR_COPY	VALIDATE_ADDR_COPY
+#define	VALIDATE_ADDR_COPY(raddr, len)	\
+	srdi  %r0, raddr, 52		;\
+	cmpwi %r0, 1			;\
+	bge-	copy_fault		;\
+	nop
+
+#define	VALIDATE_ADDR_FUSU(raddr)	;\
+	srdi  %r0, raddr, 52		;\
+	cmpwi %r0, 1			;\
+	bge-	fusufault		;\
+	nop
+
+#else
+#define	PROLOGUE		;\
+	mflr	%r0 		;\
+	stw	%r0, 4(%r1)	;\
+
+#define	EPILOGUE		;\
+	lwz	%r0, 4(%r1)	;\
+	mtlr	%r0		;\
+	blr			;\
+	nop
+
+/* %r0 is temporary */
+/*
+ * Validate address and length are valid.
+ * For VALIDATE_ADDR_COPY() have to account for wraparound.
+ */
+#define	VALIDATE_ADDR_COPY(raddr, len)		\
+	lis	%r0, VM_MAXUSER_ADDRESS@h	;\
+	ori	%r0, %r0, VM_MAXUSER_ADDRESS@l	;\
+	cmplw	%r0, raddr			;\
+	blt-	copy_fault			;\
+	add	%r0, raddr, len			;\
+	cmplw	7, %r0, raddr			;\
+	blt-	7, copy_fault			;\
+	mtcrf	0x80, %r0			;\
+	bt-	0, copy_fault			;\
+	nop
+
+#define	VALIDATE_TRUNCATE_ADDR_COPY(raddr, len)		\
+	lis	%r0, VM_MAXUSER_ADDRESS@h	;\
+	ori	%r0, %r0, VM_MAXUSER_ADDRESS@l	;\
+	cmplw	%r0, raddr			;\
+	blt-	copy_fault			;\
+	sub	%r0, %r0, raddr			;\
+	cmplw	len, %r0			;\
+	isel	len, len, %r0, 0		;\
+
+#define	VALIDATE_ADDR_FUSU(raddr)		\
+	lis	%r0, VM_MAXUSER_ADDRESS@h	;\
+	ori	%r0, %r0, VM_MAXUSER_ADDRESS@l	;\
+	cmplw	%r0, raddr			;\
+	ble-	fusufault
+
+#endif
+
+#define PCPU(reg) mfsprg  reg, 0
+
+#define	SET_COPYFAULT(raddr, rpcb, len)	\
+	VALIDATE_ADDR_COPY(raddr, len)	;\
+	PCPU(%r9)			;\
+	li	%r0, COPYFAULT		;\
+	LOAD	rpcb, PC_CURPCB(%r9)	;\
+	STORE	%r0, PCB_ONFAULT(rpcb)	;\
+
+#define	SET_COPYFAULT_TRUNCATE(raddr, rpcb, len)\
+	VALIDATE_TRUNCATE_ADDR_COPY(raddr, len)	;\
+	PCPU(%r9)				;\
+	li	%r0, COPYFAULT			;\
+	LOAD	rpcb, PC_CURPCB(%r9)		;\
+	STORE	%r0, PCB_ONFAULT(rpcb)
+
+#define	SET_FUSUFAULT(raddr, rpcb)	\
+	VALIDATE_ADDR_FUSU(raddr)	;\
+	PCPU(%r9)			;\
+	li	%r0, FUSUFAULT		;\
+	LOAD	rpcb, PC_CURPCB(%r9)	;\
+	STORE	%r0, PCB_ONFAULT(rpcb)
+
+#define	CLEAR_FAULT_NO_CLOBBER(rpcb)	\
+	PCPU(%r9)			;\
+	LOAD	rpcb, PC_CURPCB(%r9)	;\
+	li	%r0, 0			;\
+	STORE	%r0, PCB_ONFAULT(rpcb)
+
+#define	CLEAR_FAULT(rpcb)		\
+	CLEAR_FAULT_NO_CLOBBER(rpcb)	;\
+	li	%r3, 0
+
+/*
+ *  bcopy(src, dst, len)
+ *        %r3  %r4  %r5
+ * 
+ *  %r7 is the pcb pointer
+ * 
+ *  %r0 and %r8-%r10 are volatile
+ *  %r11 and %r12 are generally volatile, used in linking and exception
+ *  handling.  Can be clobbered here.
+ *
+ * Does not allocate or use stack space, but clobbers all volatile registers.
+ */
+
+#define	rs	%r3
+#define	rd	%r4
+#define	rl	%r5
+
+#define	t1	%r6
+#define	t2	%r7
+#define	t3	%r8
+#define	t4	%r9
+#define	t5	%r10
+#define	t6	%r11
+#define	t7	%r12
+#define	t8	%r0
+
+#define Thresh	WORD * 8
+#define	W4	3
+#define	W2	2
+#define	W1	1
+#define	WORDS(n)	(32 - LOG_WORD - W##n)
+.text
+ENTRY(bcopy_generic)
+	CMPLI	0, %r5, 0
+	beq	.Lend
+	dcbtst	0, rd
+	dcbt	0, rs
+	CMPLI	rl, Thresh
+	blt	.Lsmall
+	b	.Llarge
+/* memcpy */
+/* ... */
+.Lsmall: 				/* < 8 words remaining */
+	mtcrf	0x3, rl
+.Lsmall_start:
+	bf	WORDS(4), 0f
+	LOAD	t1, 0(rs)
+	LOAD	t2, WORD*1(rs)
+	LOAD	t3, WORD*2(rs)
+	LOAD	t4, WORD*3(rs)
+	addi	rs, rs, WORD*4
+	STORE	t1, 0(rd)
+	STORE	t2, WORD*1(rd)
+	STORE	t3, WORD*2(rd)
+	STORE	t4, WORD*3(rd)
+	addi	rd, rd, WORD*4
+0:					/* < 4 words remaining */
+	bf	WORDS(2), 1f
+	LOAD	t1, 0(rs)
+	LOAD	t2, WORD*1(rs)
+	addi	rs, rs, WORD*2
+	STORE	t1, 0(rd)
+	STORE	t2, WORD*1(rd)
+	addi	rd, rd, WORD*2
+1:					/* < 2 words remaining */
+	bf	WORDS(1), 2f
+	LOAD	t1, 0(rs)
+	addi	rs, rs, WORD
+	STORE	t1, 0(rd)
+	addi	rd, rd, WORD
+2:					/* < 1 word remaining */
+#ifdef __powerpc64__
+	bf	29, 3f
+	lwz	t1, 0(rs)
+	addi	rs, rs, 4
+	stw	t1, 0(rd)
+	addi	rd, rd, 4
+3:					/* < 4 bytes remaining */
+#endif
+	bf	30, 4f
+	lhz	t1, 0(rs)
+	addi	rs, rs, 2
+	sth	t1, 0(rd)
+	addi	rd, rd, 2
+4:					/* < 2 bytes remaining */
+	bf	31, .Lout
+	lbz	t1, 0(rs)
+	addi	rs, rs, 1
+	stb	t1, 0(rd)
+	addi	rd, rd, 1
+	b	.Lout
+
+	.align 4
+.Llarge:
+	neg	t3, rd
+	andi.	t6, t3, WORD-1		/* Align rd to word size */
+	mtctr	t6
+	sub	rl, rl, t6
+	beq+	.Llargealigned
+1:
+	lbz	t1, 0(rs)
+	addi	rs, rs, 1
+	stb	t1, 0(rd)
+	addi	rd, rd, 1
+	bdnz	1b
+
+.Llargealigned:
+	srwi.	t2, rl, LOOP_LOG  /* length >> log_2(loop_size) => 8W iterations */
+	mtcrf	0x3, rl
+	beq	.Lsmall_start
+	mtctr	t2
+	b	1f
+
+	.align 5
+1:
+	LOAD	t1, 0(rs)
+	LOAD	t2, WORD(rs)
+	LOAD	t3, WORD*2(rs)
+	LOAD	t4, WORD*3(rs)
+	LOAD	t5, WORD*4(rs)
+	LOAD	t6, WORD*5(rs)
+	LOAD	t7, WORD*6(rs)
+	LOAD	t8, WORD*7(rs)
+	addi	rs, rs, WORD*8
+	STORE	t1, 0(rd)
+	STORE	t2, WORD*1(rd)
+	STORE	t3, WORD*2(rd)
+	STORE	t4, WORD*3(rd)
+	STORE	t5, WORD*4(rd)
+	STORE	t6, WORD*5(rd)
+	STORE	t7, WORD*6(rd)
+	STORE	t8, WORD*7(rd)
+	addi	rd, rd, WORD*8
+	bdnz	1b
+
+	b	.Lsmall_start
+.Lout:
+/* done */	
+.Lend:	
+	blr
+
+/*
+ * copyout(from_kernel, to_user, len)
+ *         %r3,        %r4,    %r5
+ */
+ENTRY_DIRECT(copyout)
+	PROLOGUE
+	SET_COPYFAULT(%r4, %r7, %r5)
+	bl bcopy_generic 
+	nop
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+
+/*
+ * copyin(from_user, to_kernel, len)
+ *        %r3,        %r4,    %r5
+ */
+ENTRY_DIRECT(copyin)
+	PROLOGUE
+	SET_COPYFAULT(%r3, %r7, %r5)
+	bl bcopy_generic
+	nop
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+/*
+ * copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done)
+ *			%r3          %r4         %r5        %r6 
+ */
+	
+ENTRY_DIRECT(copyinstr)
+	PROLOGUE
+	SET_COPYFAULT_TRUNCATE(%r3, %r7, %r5)
+	addi	%r9, %r5, 1
+	mtctr	%r9
+	mr	%r8, %r3
+	addi	%r8, %r8, -1
+	addi	%r4, %r4, -1
+	li	%r3, ENAMETOOLONG
+0:
+	bdz-	2f
+	lbzu	%r0, 1(%r8)
+	stbu	%r0, 1(%r4)
+
+	// NULL byte reached ?
+	CMPI	%r0, 0
+	beq-	1f
+	b	0b
+1:
+	li	%r3, 0
+2:
+	/* skip storing length if done is NULL */
+	CMPI	%r6, 0
+	beq-	3f
+	mfctr	%r0
+	sub	%r0, %r9, %r0
+	STORE	%r0, 0(%r6)
+3:
+	CLEAR_FAULT_NO_CLOBBER(%r7)
+	EPILOGUE
+
+ENTRY_DIRECT(subyte)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	stb  %r4, 0(%r3)
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+
+#ifndef __powerpc64__
+ENTRY_DIRECT(suword)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	stw  %r4, 0(%r3)
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+#endif	
+
+ENTRY_DIRECT(suword32)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	stw  %r4, 0(%r3)
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+
+#ifdef __powerpc64__	
+ENTRY_DIRECT(suword64)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	std  %r4, 0(%r3) 
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+ENTRY_DIRECT(suword)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	std  %r4, 0(%r3) 
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+#endif	
+	
+ENTRY_DIRECT(fubyte)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	lbz %r3, 0(%r3)
+	CLEAR_FAULT_NO_CLOBBER(%r7)
+	EPILOGUE
+
+ENTRY_DIRECT(fuword16)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	lhz %r3, 0(%r3)
+	CLEAR_FAULT_NO_CLOBBER(%r7)
+	EPILOGUE
+
+#ifndef __powerpc64__
+ENTRY_DIRECT(fueword)	
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	lwz  %r0, 0(%r3)
+	stw  %r0,  0(%r4)
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+#endif	
+ENTRY_DIRECT(fueword32)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	lwz  %r0, 0(%r3)
+	stw  %r0,  0(%r4)
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+
+#ifdef __powerpc64__
+ENTRY_DIRECT(fueword)	
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	ld  %r0, 0(%r3)
+	std %r0, 0(%r4)
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+
+ENTRY_DIRECT(fueword64)
+	PROLOGUE
+	SET_FUSUFAULT(%r3, %r7)
+	ld  %r0, 0(%r3)
+	std %r0, 0(%r4)
+	CLEAR_FAULT(%r7)
+	EPILOGUE
+#endif
+
+/*
+ * casueword(volatile u_long *base, u_long old, u_long *oldp, u_long new)
+ *			      %r3          %r4           %r5         %r6 
+ */
+
+#define	CASUEWORD32(raddr, rpcb)					;\
+	PROLOGUE							;\
+	SET_FUSUFAULT(raddr, rpcb)					;\
+	li	%r8, 0							;\
+1:									;\
+	lwarx	%r0, 0, %r3						;\
+	cmplw	%r4, %r0						;\
+	bne	2f							;\
+	stwcx.	%r6, 0, %r3						;\
+	bne-	3f							;\
+	b	4f							;\
+2:									;\
+	stwcx.	%r0, 0, %r3       	/* clear reservation (74xx) */	;\
+3:									;\
+	li	%r8, 1							;\
+4:									;\
+	stw	%r0, 0(%r5)						;\
+	CLEAR_FAULT_NO_CLOBBER(rpcb)					;\
+	mr	%r3, %r8						;\
+	EPILOGUE	
+	
+ENTRY_DIRECT(casueword32)
+	CASUEWORD32(%r3, %r7)
+
+#ifdef __powerpc64__
+#define	CASUEWORD64(raddr, rpcb)					;\
+	PROLOGUE							;\
+	SET_FUSUFAULT(raddr, rpcb)					;\
+	li	%r8, 0							;\
+1:									;\
+	ldarx	%r0, 0, %r3						;\
+	cmpld	%r4, %r0						;\
+	bne	2f							;\
+	stdcx.	%r6, 0, %r3						;\
+	bne-	3f							;\
+	b	4f							;\
+2:									;\
+	stdcx.	%r0, 0, %r3       	/* clear reservation (74xx) */	;\
+3:									;\
+	li	%r8, 1							;\
+4:									;\
+	std	%r0, 0(%r5)						;\
+	CLEAR_FAULT_NO_CLOBBER(rpcb)					;\
+	mr	%r3, %r8						;\
+	EPILOGUE
+
+ENTRY_DIRECT(casueword)
+	CASUEWORD64(%r3, %r7)
+
+ENTRY_DIRECT(casueword64)
+	CASUEWORD64(%r3, %r7)
+#else
+ENTRY_DIRECT(casueword)
+	CASUEWORD32(%r3, %r7)
+#endif
+	
+_NAKED_ENTRY(fusufault)
+	CLEAR_FAULT_NO_CLOBBER(%r7)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202006060309.05639Cwd016143>