Date: Wed, 28 Jul 2010 02:04:14 +0530 From: "Jayachandran C." <c.jayachandran@gmail.com> To: freebsd-mips@freebsd.org Subject: Support for 64bit userspace. Message-ID: <AANLkTikAjRTMn0m1DemdmUXKMydmyQPxij%2BN=1d9L2=y@mail.gmail.com>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --] Here's my initial work to get 64bit user space. With this set of changes, I can boot to the single user shell with n64 /sbin/init and /bin/sh. There are still issues to fix, and the dynamic loader is still not working, but I think this would be a good time to get some initial feedback. The changes mainly add a 3 level page table for both userspace and kernel. The PTE entries are 32bit still (this will be changed to 64bit once I get the current code more stable) and the PDE entries are 64 bit, which would give us 40bit (9+9+10+12) virtual address space. Managing a both 32bit and 64 bit pagetables in one pmap.c was tough, so I have moved all the 64bit pmap code to a new file pmap64.c, and added a config option for 64bit. But otherwise the 32bit code is almost completely untouched. But once things are stable, we can see if some part of pmap.c and pmap64.c can be move to a common file. The changes have been grouped into (files are also at http://people.freebsd.org/~jchandra/mips-n64/ ) mips-n64-other.patch - header file and other minor changes - param.h - add PDRSHIFT/MASK for mid-level, rename SEGOFSET to SEGMASK for consistency. - _stdint.h/profile.h - 64 bit defs I missed in the last pass - vm_machdep.c/pm_machdep.c - add UX bit to status register where needed. mips-n64-asm.patch - changes for assembly files - exception.S - add second level page table lookup - excption.S/locore.S - enable UX in status - exception.S - the kernel stack is in XKSEG, so we need to set KX before saving registers - support.S - fuword should be 64bit in n64 - swtch.S kstack in XKSEG mips-n64-pmap.patch - move 64bit code to pmap64.c - revert the 64bit additions to pmap.c - create pmap64.c which will be used in 64 bit compilation, (this has some debugging code still in there, and there are still a few issues I'm looking at) mips-n64-config.patch - add config option for 64bit compile, so that we pick pmap.c in 32bit and pmap64.c in 64bit compile. I'm not sure if there is a better way to do this,. - options.mips, mips/conf/DEFAULTS - adds options KERNEL_32 and KERNEL_64, make KERNEL_32 default option - files.mips - use pmap.c or pmap64.c depending on kernel_32 or kernel_64 rmi-n64-rge.patch - support for 64 bit in RMI gmac driver. Let me know your comments (buildworld needs -DWITHOUT_DYNAMICROOT -DWITHOUT_CDDL -DWITHOUT_USB -DWITHOUT_BLUETOOTH, for now) Thanks, JC. [-- Attachment #2 --] Index: sys/mips/include/param.h =================================================================== --- sys/mips/include/param.h (revision 210534) +++ sys/mips/include/param.h (working copy) @@ -103,9 +103,22 @@ #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ #define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */ #define PAGE_MASK (PAGE_SIZE-1) + #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) +#if defined(__mips_n64) +#define SEGSHIFT 31 /* LOG2(NBSEG) */ +#define NBSEG (1ul << SEGSHIFT) /* bytes/segment */ +#else +#define SEGSHIFT 22 /* LOG2(NBSEG) */ +#define NBSEG (1 << SEGSHIFT) /* bytes/segment */ +#endif +#define PDRSHIFT 22 /* only used in n64 */ +#define PDRMASK ((1 << PDRSHIFT) - 1) +#define NBPDR (1 << PDRSHIFT) /* bytes/pagedir */ +#define SEGMASK (NBSEG-1) /* byte offset into segment */ + #define MAXPAGESIZES 1 /* maximum number of supported page sizes */ #define BLKDEV_IOSIZE 2048 /* xxx: Why is this 1/2 page? */ Index: sys/mips/include/_stdint.h =================================================================== --- sys/mips/include/_stdint.h (revision 210534) +++ sys/mips/include/_stdint.h (working copy) @@ -46,15 +46,28 @@ #define INT8_C(c) (c) #define INT16_C(c) (c) #define INT32_C(c) (c) +#if defined(__mips_n64) +#define INT64_C(c) (c ## L) +#else #define INT64_C(c) (c ## LL) +#endif #define UINT8_C(c) (c) #define UINT16_C(c) (c) #define UINT32_C(c) (c ## U) +#if defined(__mips_n64) +#define UINT64_C(c) (c ## UL) +#else #define UINT64_C(c) (c ## ULL) +#endif +#if defined(__mips_n64) +#define INTMAX_C(c) (c ## L) +#define UINTMAX_C(c) (c ## UL) +#else #define INTMAX_C(c) (c ## LL) #define UINTMAX_C(c) (c ## ULL) +#endif #endif /* !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) */ Index: sys/mips/include/profile.h =================================================================== --- sys/mips/include/profile.h (revision 210534) +++ sys/mips/include/profile.h (working copy) @@ -115,7 +115,11 @@ #define FUNCTION_ALIGNMENT 4 +#ifdef __mips_n64 +typedef unsigned long uintfptr_t; +#else typedef unsigned int uintfptr_t; +#endif #endif /* _KERNEL */ @@ -123,7 +127,11 @@ * An unsigned integral type that can hold non-negative difference between * function pointers. */ +#ifdef __mips_n64 +typedef u_long fptrdiff_t; +#else typedef u_int fptrdiff_t; +#endif #ifdef _KERNEL Index: sys/mips/include/runq.h =================================================================== --- sys/mips/include/runq.h (revision 210534) +++ sys/mips/include/runq.h (working copy) @@ -42,7 +42,11 @@ #define RQB_BIT(pri) (1ul << ((pri) & (RQB_BPW - 1))) #define RQB_WORD(pri) ((pri) >> RQB_L2BPW) +#if defined(__mips_n64) +#define RQB_FFS(word) (ffsl(word) - 1) +#else #define RQB_FFS(word) (ffs(word) - 1) +#endif /* * Type of run queue status word. Index: sys/mips/include/pmap.h =================================================================== --- sys/mips/include/pmap.h (revision 210534) +++ sys/mips/include/pmap.h (working copy) @@ -49,7 +49,8 @@ #include <machine/vmparam.h> #include <machine/pte.h> -#define NKPT 120 /* actual number of kernel page tables */ +#define NKPT 120 /* actual number of kernel page tables */ +#define NUPDE (NPDEPG * NPDEPG) /* # of userspace page table pages */ #ifndef LOCORE Index: sys/mips/include/vmparam.h =================================================================== --- sys/mips/include/vmparam.h (revision 210534) +++ sys/mips/include/vmparam.h (working copy) @@ -51,12 +51,6 @@ * is the top (end) of the user stack. */ #define USRTEXT (1*PAGE_SIZE) -/* - * USRSTACK needs to start a little below 0x8000000 because the R8000 - * and some QED CPUs perform some virtual address checks before the - * offset is calculated. - */ -#define USRSTACK 0x7ffff000 /* Start of user stack */ /* * Virtual memory related constants, all in bytes @@ -103,19 +97,22 @@ #define VM_MAX_MMAP_ADDR VM_MAXUSER_ADDRESS #if defined(__mips_n64) -#define VM_MAXUSER_ADDRESS (VM_MINUSER_ADDRESS + (NPDEPG * NPTEPG * PAGE_SIZE)) +#define VM_MAXUSER_ADDRESS (VM_MINUSER_ADDRESS + (NPDEPG * NPDEPG * NPTEPG * PAGE_SIZE)) #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t)0xc000000000000000) -#define VM_MAX_KERNEL_ADDRESS (VM_MIN_KERNEL_ADDRESS + (NPDEPG * NPTEPG * PAGE_SIZE)) +#define VM_MAX_KERNEL_ADDRESS (VM_MIN_KERNEL_ADDRESS + (NPDEPG * NPDEPG * NPTEPG * PAGE_SIZE)) #else #define VM_MAXUSER_ADDRESS ((vm_offset_t)0x80000000) #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t)0xC0000000) #define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)0xFFFFC000) #endif -#if 0 -#define KERNBASE (VM_MIN_KERNEL_ADDRESS) -#else + #define KERNBASE ((vm_offset_t)(intptr_t)(int32_t)0x80000000) -#endif +/* + * USRSTACK needs to start a little below 0x8000000 because the R8000 + * and some QED CPUs perform some virtual address checks before the + * offset is calculated. + */ +#define USRSTACK (VM_MAXUSER_ADDRESS - PAGE_SIZE) /* Start of user stack */ /* * Disable superpage reservations. (not sure if this is right @@ -198,8 +195,4 @@ */ #define VM_NFREEORDER 9 -#define SEGSHIFT 22 /* LOG2(NBSEG) */ -#define NBSEG (1 << SEGSHIFT) /* bytes/segment */ -#define SEGOFSET (NBSEG-1) /* byte offset into segment */ - #endif /* !_MACHINE_VMPARAM_H_ */ Index: sys/mips/mips/vm_machdep.c =================================================================== --- sys/mips/mips/vm_machdep.c (revision 210534) +++ sys/mips/mips/vm_machdep.c (working copy) @@ -148,8 +148,8 @@ pcb2->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)fork_return; pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td2; pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td2->td_frame; - pcb2->pcb_context[PCB_REG_SR] = (MIPS_SR_KX | MIPS_SR_INT_MASK) & - mips_rd_status(); + pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() & + (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK); /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) @@ -351,8 +351,8 @@ pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td; pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td->td_frame; /* Dont set IE bit in SR. sched lock release will take care of it */ - pcb2->pcb_context[PCB_REG_SR] = (MIPS_SR_KX | MIPS_SR_INT_MASK) & - mips_rd_status(); + pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() & + (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK); #ifdef CPU_CNMIPS pcb2->pcb_context[PCB_REG_SR] |= MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT | Index: sys/mips/mips/pm_machdep.c =================================================================== --- sys/mips/mips/pm_machdep.c (revision 210534) +++ sys/mips/mips/pm_machdep.c (working copy) @@ -486,8 +486,10 @@ td->td_frame->t9 = imgp->entry_addr & ~3; /* abicall req */ td->td_frame->sr = MIPS_SR_KSU_USER | MIPS_SR_EXL | MIPS_SR_INT_IE | (mips_rd_status() & MIPS_SR_INT_MASK); -#if defined(__mips_n32) || defined(__mips_n64) +#if defined(__mips_n32) td->td_frame->sr |= MIPS_SR_PX; +#elif defined(__mips_n64) + td->td_frame->sr |= MIPS_SR_PX | MIPS_SR_UX; #endif #ifdef CPU_CNMIPS td->td_frame->sr |= MIPS_SR_COP_2_BIT | MIPS_SR_PX | MIPS_SR_UX | [-- Attachment #3 --] Index: sys/mips/mips/exception.S =================================================================== --- sys/mips/mips/exception.S (revision 210534) +++ sys/mips/mips/exception.S (working copy) @@ -137,7 +137,15 @@ PTR_L k1, 0(k1) #08: k1=seg entry MFC0 k0, MIPS_COP_0_BAD_VADDR #09: k0=bad address (again) beq k1, zero, 2f #0a: ==0 -- no page table - srl k0, PAGE_SHIFT - 2 #0b: k0=VPN (aka va>>10) +#ifdef __mips_n64 + PTR_SRL k0, PDRSHIFT - PTRSHIFT # k0=VPN + andi k0, k0, PTRMASK # k0=pde offset + PTR_ADDU k1, k0, k1 # k1=pde entry address + PTR_L k1, 0(k1) # k1=pde entry + MFC0 k0, MIPS_COP_0_BAD_VADDR # k0=bad address (again) + beq k1, zero, 2f # ==0 -- no page table +#endif + PTR_SRL k0, PAGE_SHIFT - 2 #0b: k0=VPN (aka va>>10) andi k0, k0, 0xff8 #0c: k0=page tab offset PTR_ADDU k1, k1, k0 #0d: k1=pte address lw k0, 0(k1) #0e: k0=lo0 pte @@ -238,7 +246,7 @@ #elif defined(TARGET_XLR_XLS) #define CLEAR_STATUS \ mfc0 a0, MIPS_COP_0_STATUS ;\ - li a2, (MIPS_SR_KX | MIPS_SR_COP_2_BIT) ; \ + li a2, (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_COP_2_BIT) ; \ or a0, a0, a2 ; \ li a2, ~(MIPS_SR_INT_IE | MIPS_SR_EXL | SR_KSU_USER) ; \ and a0, a0, a2 ; \ @@ -426,6 +434,12 @@ /* * Save all of the registers except for the kernel temporaries in u.u_pcb. */ + mfc0 k0, MIPS_COP_0_STATUS + HAZARD_DELAY +#ifdef __mips_n64 + ori k1, k0, MIPS_SR_KX + mtc0 k1, MIPS_COP_0_STATUS +#endif GET_CPU_PCPU(k1) PTR_L k1, PC_CURPCB(k1) SAVE_U_PCB_REG(AT, AST, k1) @@ -443,7 +457,7 @@ SAVE_U_PCB_REG(t2, T2, k1) SAVE_U_PCB_REG(t3, T3, k1) SAVE_U_PCB_REG(ta0, TA0, k1) - mfc0 a0, MIPS_COP_0_STATUS # First arg is the status reg. + move a0, k0 # First arg is the status reg. SAVE_U_PCB_REG(ta1, TA1, k1) SAVE_U_PCB_REG(ta2, TA2, k1) SAVE_U_PCB_REG(ta3, TA3, k1) @@ -478,7 +492,7 @@ #if defined(CPU_CNMIPS) or t0, t0, (MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX | MIPS_SR_PX) #elif defined(TARGET_XLR_XLS) - or t0, t0, (MIPS_SR_KX | MIPS_SR_COP_2_BIT) + or t0, t0, (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_COP_2_BIT) #endif mtc0 t0, MIPS_COP_0_STATUS PTR_ADDU a0, k1, U_PCB_REGS @@ -642,6 +656,12 @@ * Save the relevant user registers into the u.u_pcb struct. * We don't need to save s0 - s8 because the compiler does it for us. */ + mfc0 k0, MIPS_COP_0_STATUS + HAZARD_DELAY +#ifdef __mips_n64 + ori k1, k0, MIPS_SR_KX + mtc0 k1, MIPS_COP_0_STATUS +#endif GET_CPU_PCPU(k1) PTR_L k1, PC_CURPCB(k1) SAVE_U_PCB_REG(AT, AST, k1) @@ -680,7 +700,7 @@ mflo v0 # get lo/hi late to avoid stall mfhi v1 - mfc0 a0, MIPS_COP_0_STATUS + move a0, k0 mfc0 a1, MIPS_COP_0_CAUSE MFC0 a3, MIPS_COP_0_EXC_PC SAVE_U_PCB_REG(v0, MULLO, k1) @@ -696,7 +716,7 @@ #ifdef CPU_CNMIPS or t0, t0, (MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX | MIPS_SR_PX) #elif defined(TARGET_XLR_XLS) - or t0, t0, (MIPS_SR_KX | MIPS_SR_COP_2_BIT) + or t0, t0, (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_COP_2_BIT) #endif mtc0 t0, MIPS_COP_0_STATUS ITLBNOPFIX @@ -824,6 +844,18 @@ beqz k1, 3f nop +#ifdef __mips_n64 + MFC0 k0, MIPS_COP_0_BAD_VADDR + PTR_SRL k0, PDRSHIFT - PTRSHIFT # k0=pde offset (almost) + beq k1, zero, MipsKernGenException # ==0 -- no pde tab + andi k0, k0, PTRMASK # k0=pde offset + PTR_ADDU k1, k0, k1 # k1=pde entry address + PTR_L k1, 0(k1) # k1=pde entry + + /* Validate pde table pointer. */ + beqz k1, 3f + nop +#endif MFC0 k0, MIPS_COP_0_BAD_VADDR # k0=bad address (again) PTR_SRL k0, PAGE_SHIFT - 2 # k0=VPN andi k0, k0, 0xffc # k0=page tab offset @@ -984,6 +1016,14 @@ PTR_L k1, 0(k1) # k1=seg entry MFC0 k0, MIPS_COP_0_BAD_VADDR # k0=bad address (again) beq k1, zero, MipsKernGenException # ==0 -- no page table +#ifdef __mips_n64 + PTR_SRL k0, PDRSHIFT - PTRSHIFT # k0=VPN + andi k0, k0, PTRMASK # k0=pde offset + PTR_ADDU k1, k0, k1 # k1=pde entry address + PTR_L k1, 0(k1) # k1=pde entry + MFC0 k0, MIPS_COP_0_BAD_VADDR # k0=bad address (again) + beq k1, zero, MipsKernGenException # ==0 -- no page table +#endif PTR_SRL k0, PAGE_SHIFT - 2 # k0=VPN andi k0, k0, 0xff8 # k0=page tab offset PTR_ADDU k1, k1, k0 # k1=pte address Index: sys/mips/mips/genassym.c =================================================================== --- sys/mips/mips/genassym.c (revision 210534) +++ sys/mips/mips/genassym.c (working copy) @@ -94,6 +94,7 @@ ASSYM(PAGE_SIZE, PAGE_SIZE); ASSYM(PAGE_MASK, PAGE_MASK); ASSYM(SEGSHIFT, SEGSHIFT); +ASSYM(PDRSHIFT, PDRSHIFT); ASSYM(NPTEPG, NPTEPG); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); @@ -103,3 +104,4 @@ ASSYM(MIPS_KSEG0_START, MIPS_KSEG0_START); ASSYM(MIPS_KSEG1_START, MIPS_KSEG1_START); ASSYM(MIPS_KSEG2_START, MIPS_KSEG2_START); +ASSYM(MIPS_XKSEG_START, MIPS_XKSEG_START); Index: sys/mips/mips/locore.S =================================================================== --- sys/mips/mips/locore.S (revision 210534) +++ sys/mips/mips/locore.S (working copy) @@ -101,7 +101,7 @@ li t0, ~(MIPS_SR_DE | MIPS_SR_SOFT_RESET | MIPS_SR_ERL | MIPS_SR_EXL | MIPS_SR_INT_IE) #elif defined (TARGET_XLR_XLS) /* Set these bits */ - li t1, (MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT | MIPS_SR_KX) + li t1, (MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT | MIPS_SR_KX | MIPS_SR_UX ) /* Reset these bits */ li t0, ~(MIPS_SR_BEV | MIPS_SR_SOFT_RESET | MIPS_SR_INT_IE) Index: sys/mips/mips/swtch.S =================================================================== --- sys/mips/mips/swtch.S (revision 210534) +++ sys/mips/mips/swtch.S (working copy) @@ -278,10 +278,12 @@ PTR_L a2, TD_PCB(a1) PTR_S a2, PC_CURPCB(a3) PTR_L v0, TD_KSTACK(a1) -#if !defined(__mips_n64) +#if defined(__mips_n64) + PTR_LI s0, MIPS_XKSEG_START +#else PTR_LI s0, MIPS_KSEG2_START # If Uarea addr is below kseg2, +#endif bltu v0, s0, sw2 # no need to insert in TLB. -#endif lw a1, TD_UPTE + 0(s7) # a1 = u. pte #0 lw a2, TD_UPTE + 4(s7) # a2 = u. pte #1 /* Index: sys/mips/mips/support.S =================================================================== --- sys/mips/mips/support.S (revision 210534) +++ sys/mips/mips/support.S (working copy) @@ -429,8 +429,9 @@ * {fu,su},{byte,sword,word}, fetch or store a byte, short or word to * user data space. */ -LEAF(fuword) -ALEAF(fuword32) +#if defined(__mips_n64) +LEAF(fuword64) +ALEAF(fuword) ALEAF(fuiword) PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space @@ -438,10 +439,26 @@ GET_CPU_PCPU(v1) PTR_L v1, PC_CURPCB(v1) PTR_S v0, U_PCB_ONFAULT(v1) + ld v0, 0(a0) # fetch word + j ra + PTR_S zero, U_PCB_ONFAULT(v1) +END(fuword64) +#endif + +LEAF(fuword32) +#if !defined(__mips_n64) +ALEAF(fuword) +#endif + PTR_LA v0, fswberr + blt a0, zero, fswberr # make sure address is in user space + nop + GET_CPU_PCPU(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) lw v0, 0(a0) # fetch word j ra PTR_S zero, U_PCB_ONFAULT(v1) -END(fuword) +END(fuword32) LEAF(fusword) ALEAF(fuisword) [-- Attachment #4 --] Index: sys/mips/mips/pmap64.c =================================================================== --- sys/mips/mips/pmap64.c (revision 0) +++ sys/mips/mips/pmap64.c (revision 0) @@ -0,0 +1,3008 @@ +/* + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * Copyright (c) 1994 John S. Dyson + * All rights reserved. + * Copyright (c) 1994 David Greenman + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 + * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps + * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish + */ + +/* + * Manages physical address maps. + * + * In addition to hardware address maps, this + * module is called upon to provide software-use-only + * maps which may or may not be stored in the same + * form as hardware maps. These pseudo-maps are + * used to store intermediate results from copy + * operations to and from address spaces. + * + * Since the information managed by this module is + * also stored by the logical address mapping module, + * this module may throw away valid virtual-to-physical + * mappings at almost any time. However, invalidations + * of virtual-to-physical mappings must be done as + * requested. + * + * In order to cope with hardware architectures which + * make virtual-to-physical map invalidates expensive, + * this module may delay invalidate or reduced protection + * operations until such time as they are actually + * necessary. This module is given full information as + * to which processors are currently using which maps, + * and to when physical maps must be made correct. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 210327 2010-07-21 09:27:00Z jchandra $"); + +#include "opt_msgbuf.h" +#include "opt_ddb.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/msgbuf.h> +#include <sys/vmmeter.h> +#include <sys/mman.h> +#include <sys/smp.h> +#ifdef DDB +#include <ddb/ddb.h> +#endif + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_phys.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/vm_pageout.h> +#include <vm/vm_pager.h> +#include <vm/uma.h> +#include <sys/pcpu.h> +#include <sys/sched.h> +#ifdef SMP +#include <sys/smp.h> +#endif + +#include <machine/cache.h> +#include <machine/md_var.h> +#include <machine/tlb.h> + +#if defined(DIAGNOSTIC) +#define PMAP_DIAGNOSTIC +#endif + +#undef PMAP_DEBUG + +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +#if !defined(PMAP_DIAGNOSTIC) +#define PMAP_INLINE __inline +#else +#define PMAP_INLINE +#endif + +/* + * Get PDEs and PTEs for user/kernel address space + * + * XXX The & for pmap_segshift() is wrong, as is the fact that it doesn't + * trim off gratuitous bits of the address space. By having the & + * there, we break defining NUSERPGTBLS below because the address space + * is defined such that it ends immediately after NPDEPG*NPTEPG*PAGE_SIZE, + * so we end up getting NUSERPGTBLS of 0. + */ + +#define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1)) +#define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1)) +#define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1)) +#define pmap_pde_pindex(v) ((v) >> PDRSHIFT) + +#define NUSERPGTBLS (pmap_pde_pindex(VM_MAXUSER_ADDRESS)) +#define is_kernel_pmap(x) ((x) == kernel_pmap) + +struct pmap kernel_pmap_store; +pd_entry_t *kernel_segmap; + +vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ +vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ + +static int nkpt; +unsigned pmap_max_asid; /* max ASID supported by the system */ + +#define PMAP_ASID_RESERVED 0 + +vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; + +static void pmap_asid_alloc(pmap_t pmap); + +/* + * Data for the pv entry allocation mechanism + */ +static uma_zone_t pvzone; +static struct vm_object pvzone_obj; +static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; + +static PMAP_INLINE void free_pv_entry(pv_entry_t pv); +static pv_entry_t get_pv_entry(pmap_t locked_pmap); +static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, + vm_offset_t va); +static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); + +static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, + vm_page_t m, vm_prot_t prot, vm_page_t mpte); +static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va); +static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); +static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); +static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, + vm_offset_t va, vm_page_t m); +static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va); +static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m); + +static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); +static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); +static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); +static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); +static vm_page_t pmap_alloc_pte_page(unsigned int index, int req); +static void pmap_grow_pte_page_cache(void); + +#ifdef SMP +static void pmap_invalidate_page_action(void *arg); +static void pmap_invalidate_all_action(void *arg); +static void pmap_update_page_action(void *arg); +#endif + +/* + * Top level page table entry for a va + */ +static inline pd_entry_t * +pmap_segmap(pmap_t pmap, vm_offset_t va) +{ + return (&pmap->pm_segtab[pmap_seg_index(va)]); +} + +static inline pd_entry_t * +pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) +{ + pd_entry_t *pde; + + pde = (pd_entry_t *)*pdpe; + return (&pde[pmap_pde_index(va)]); +} + +/* + * Return page directory entry for a va + */ +static __inline pd_entry_t * +pmap_pde(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t *pdpe, *pde; + + pdpe = pmap_segmap(pmap, va); + if (pdpe == NULL || *pdpe == NULL) + return (NULL); + + pde = (pd_entry_t *)*pdpe; + return (&pde[pmap_pde_index(va)]); +} + +static inline pt_entry_t * +pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) +{ + pt_entry_t *pte; + + pte = (pt_entry_t *)*pde; + return (&pte[pmap_pte_index(va)]); +} + +/* + * Routine: pmap_pte + * Function: + * Extract the page table entry associated + * with the given map/virtual_address pair. + */ +pt_entry_t * +pmap_pte(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t *pde; + pt_entry_t *pte; + + pde = pmap_pde(pmap, va); + if (pde == NULL || *pde == NULL) + return (NULL); + + pte = (pt_entry_t *)*pde; + return (&pte[pmap_pte_index(va)]); +} + + +vm_offset_t +pmap_steal_memory(vm_size_t size) +{ + vm_size_t bank_size; + vm_offset_t pa, va; + + size = round_page(size); + + bank_size = phys_avail[1] - phys_avail[0]; + while (size > bank_size) { + int i; + + for (i = 0; phys_avail[i + 2]; i += 2) { + phys_avail[i] = phys_avail[i + 2]; + phys_avail[i + 1] = phys_avail[i + 3]; + } + phys_avail[i] = 0; + phys_avail[i + 1] = 0; + if (!phys_avail[0]) + panic("pmap_steal_memory: out of memory"); + bank_size = phys_avail[1] - phys_avail[0]; + } + + pa = phys_avail[0]; + phys_avail[0] += size; + if (pa >= MIPS_KSEG0_LARGEST_PHYS) { + panic("Out of memory below 512Meg?"); + } + va = MIPS_PHYS_TO_KSEG0(pa); + bzero((caddr_t)va, size); + return va; +} + +/* + * Bootstrap the system enough to run with virtual memory. This + * assumes that the phys_avail array has been initialized. + */ +static void +create_kern_page_table(void) +{ + int npde; + int i, j, npt; + vm_offset_t pdaddr, ptaddr; + pd_entry_t *pde; + pt_entry_t *pte; + + kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); + + /* + * Allocate second level page tables for the kernel + */ + npde = howmany(NKPT, NPDEPG); + nkpt = NKPT; + pdaddr = pmap_steal_memory(PAGE_SIZE * npde); + ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt); + printf("NPDE %lx NPKPT %lx \n", (u_long)npde, (u_long)nkpt); + /* + * The R[4-7]?00 stores only one copy of the Global bit in the + * translation lookaside buffer for each 2 page entry. Thus invalid + * entrys must have the Global bit set so when Entry LO and Entry HI + * G bits are anded together they will produce a global bit to store + * in the tlb. + */ + for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++) + *pte = PTE_G; + + for (i = 0, npt = nkpt; npt > 0; i++) { + kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE); + pde = (pd_entry_t *)kernel_segmap[i]; + + for (j = 0; j < NPDEPG && npt > 0; j++, npt--) + pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE); + } + + PMAP_LOCK_INIT(kernel_pmap); + kernel_pmap->pm_segtab = kernel_segmap; + kernel_pmap->pm_active = ~0; + TAILQ_INIT(&kernel_pmap->pm_pvlist); + kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; + kernel_pmap->pm_asid[0].gen = 0; +} + +void +pmap_bootstrap(void) +{ + int i; + + /* Sort. */ +again: + for (i = 0; phys_avail[i + 1] != 0; i += 2) { + /* + * Keep the memory aligned on page boundary. + */ + phys_avail[i] = round_page(phys_avail[i]); + phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); + + if (i < 2) + continue; + if (phys_avail[i - 2] > phys_avail[i]) { + vm_paddr_t ptemp[2]; + + ptemp[0] = phys_avail[i + 0]; + ptemp[1] = phys_avail[i + 1]; + + phys_avail[i + 0] = phys_avail[i - 2]; + phys_avail[i + 1] = phys_avail[i - 1]; + + phys_avail[i - 2] = ptemp[0]; + phys_avail[i - 1] = ptemp[1]; + goto again; + } + } + + /* + * Copy the phys_avail[] array before we start stealing memory from it. + */ + for (i = 0; phys_avail[i + 1] != 0; i += 2) { + physmem_desc[i] = phys_avail[i]; + physmem_desc[i + 1] = phys_avail[i + 1]; + } + + Maxmem = atop(phys_avail[i - 1]); + + if (bootverbose) { + printf("Physical memory chunk(s):\n"); + for (i = 0; phys_avail[i + 1] != 0; i += 2) { + vm_paddr_t size; + + size = phys_avail[i + 1] - phys_avail[i]; + printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", + (uintmax_t) phys_avail[i], + (uintmax_t) phys_avail[i + 1] - 1, + (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); + } + printf("Maxmem is 0x%0lx\n", ptoa(Maxmem)); + } + /* + * Steal the message buffer from the beginning of memory. + */ + msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); + msgbufinit(msgbufp, MSGBUF_SIZE); + + /* + * Steal thread0 kstack. + */ + kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); + + virtual_avail = VM_MIN_KERNEL_ADDRESS; + virtual_end = VM_MAX_KERNEL_ADDRESS; + +#ifdef SMP + /* + * Steal some virtual address space to map the pcpu area. + */ + virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2); + pcpup = (struct pcpu *)virtual_avail; + virtual_avail += PAGE_SIZE * 2; + + /* + * Initialize the wired TLB entry mapping the pcpu region for + * the BSP at 'pcpup'. Up until this point we were operating + * with the 'pcpup' for the BSP pointing to a virtual address + * in KSEG0 so there was no need for a TLB mapping. + */ + mips_pcpu_tlb_init(PCPU_ADDR(0)); + + if (bootverbose) + printf("pcpu is available at virtual address %p.\n", pcpup); +#endif + + /* + * Allocate segment table for the kernel + */ + create_kern_page_table(); + pmap_max_asid = VMNUM_PIDS; + mips_wr_entryhi(0); + mips_wr_pagemask(0); +} + +/* + * Initialize a vm_page's machine-dependent fields. + */ +void +pmap_page_init(vm_page_t m) +{ + + TAILQ_INIT(&m->md.pv_list); + m->md.pv_list_count = 0; + m->md.pv_flags = 0; +} + +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + * pmap_init has been enhanced to support in a fairly consistant + * way, discontiguous physical memory. + */ +void +pmap_init(void) +{ + + /* + * Initialize the address space (zone) for the pv entries. Set a + * high water mark so that the system can recover from excessive + * numbers of pv entries. + */ + pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); + pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; + pv_entry_high_water = 9 * (pv_entry_max / 10); + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); +} + +/*************************************************** + * Low level helper routines..... + ***************************************************/ + +#if defined(PMAP_DIAGNOSTIC) + +/* + * This code checks for non-writeable/modified pages. + * This should be an invalid condition. + */ +static int +pmap_nw_modified(pt_entry_t pte) +{ + if ((pte & (PTE_D | PTE_RO)) == (PTE_D | PTE_RO)) + return (1); + else + return (0); +} + +#endif + +static void +pmap_invalidate_all(pmap_t pmap) +{ +#ifdef SMP + smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *)pmap); +} + +static void +pmap_invalidate_all_action(void *arg) +{ + pmap_t pmap = (pmap_t)arg; + +#endif + + if (pmap == kernel_pmap) { + tlb_invalidate_all(); + return; + } + + if (pmap->pm_active & PCPU_GET(cpumask)) + tlb_invalidate_all_user(pmap); + else + pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; +} + +struct pmap_invalidate_page_arg { + pmap_t pmap; + vm_offset_t va; +}; + +static __inline void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ +#ifdef SMP + struct pmap_invalidate_page_arg arg; + + arg.pmap = pmap; + arg.va = va; + + smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg); +} + +static void +pmap_invalidate_page_action(void *arg) +{ + pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap; + vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va; + +#endif + + if (is_kernel_pmap(pmap)) { + tlb_invalidate_address(pmap, va); + return; + } + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + return; + else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + return; + } + tlb_invalidate_address(pmap, va); +} + +struct pmap_update_page_arg { + pmap_t pmap; + vm_offset_t va; + pt_entry_t pte; +}; + +void +pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) +{ +#ifdef SMP + struct pmap_update_page_arg arg; + + arg.pmap = pmap; + arg.va = va; + arg.pte = pte; + + smp_rendezvous(0, pmap_update_page_action, 0, (void *)&arg); +} + +static void +pmap_update_page_action(void *arg) +{ + pmap_t pmap = ((struct pmap_update_page_arg *)arg)->pmap; + vm_offset_t va = ((struct pmap_update_page_arg *)arg)->va; + pt_entry_t pte = ((struct pmap_update_page_arg *)arg)->pte; + +#endif + if (is_kernel_pmap(pmap)) { + tlb_update(pmap, va, pte); + return; + } + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + return; + else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + return; + } + tlb_update(pmap, va, pte); +} + +/* + * Routine: pmap_extract + * Function: + * Extract the physical page address associated + * with the given map/virtual_address pair. + */ +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) +{ + pt_entry_t *pte; + vm_offset_t retval = 0; + + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, va); + if (pte) { + retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK); + } + PMAP_UNLOCK(pmap); + return retval; +} + +/* + * Routine: pmap_extract_and_hold + * Function: + * Atomically extract and hold the physical page + * with the given pmap and virtual address pair + * if that mapping permits the given protection. + */ +vm_page_t +pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +{ + pt_entry_t pte; + vm_page_t m; + vm_paddr_t pa; + + m = NULL; + pa = 0; + PMAP_LOCK(pmap); +retry: + pte = *pmap_pte(pmap, va); + if (pte != 0 && pte_test(&pte, PTE_V) && + (pte_test(&pte, PTE_D) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, TLBLO_PTE_TO_PA(pte), &pa)) + goto retry; + + m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(pte)); + vm_page_hold(m); + } + PA_UNLOCK_COND(pa); + PMAP_UNLOCK(pmap); + return (m); +} + +/*************************************************** + * Low level mapping routines..... + ***************************************************/ + +/* + * add a wired page to the kva + */ + /* PMAP_INLINE */ void +pmap_kenter(vm_offset_t va, vm_paddr_t pa) +{ + pt_entry_t *pte; + pt_entry_t opte, npte; + +#ifdef PMAP_DEBUG + printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa); +#endif + npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G | PTE_W; + + if (is_cacheable_mem(pa)) + npte |= PTE_C_CACHE; + else + npte |= PTE_C_UNCACHED; + + pte = pmap_pte(kernel_pmap, va); + opte = *pte; + *pte = npte; + + pmap_update_page(kernel_pmap, va, npte); +} + +/* + * remove a page from the kernel pagetables + */ + /* PMAP_INLINE */ void +pmap_kremove(vm_offset_t va) +{ + pt_entry_t *pte; + + /* + * Write back all caches from the page being destroyed + */ + mips_dcache_wbinv_range_index(va, PAGE_SIZE); + + pte = pmap_pte(kernel_pmap, va); + *pte = PTE_G; + pmap_invalidate_page(kernel_pmap, va); +} + +/* + * Used to map a range of physical addresses into kernel + * virtual address space. + * + * The value passed in '*virt' is a suggested virtual address for + * the mapping. Architectures which can support a direct-mapped + * physical to virtual region can return the appropriate address + * within that region, leaving '*virt' unchanged. Other + * architectures should map the pages starting at '*virt' and + * update '*virt' with the first usable address after the mapped + * region. + * + * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. + */ +vm_offset_t +pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) +{ + return (MIPS_PHYS_TO_XKPHYS_CACHED(start)); +} + +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + */ +void +pmap_qenter(vm_offset_t va, vm_page_t *m, int count) +{ + int i; + vm_offset_t origva = va; + + for (i = 0; i < count; i++) { + pmap_flush_pvcache(m[i]); + pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); + va += PAGE_SIZE; + } + + mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count); +} + +/* + * this routine jerks page mappings from the + * kernel -- it is meant only for temporary mappings. + */ +void +pmap_qremove(vm_offset_t va, int count) +{ + /* + * No need to wb/inv caches here, + * pmap_kremove will do it for us + */ + + while (count-- > 0) { + pmap_kremove(va); + va += PAGE_SIZE; + } +} + +/*************************************************** + * Page table page management routines..... + ***************************************************/ + +/* Revision 1.507 + * + * Simplify the reference counting of page table pages. Specifically, use + * the page table page's wired count rather than its hold count to contain + * the reference count. + */ + +/* + * This routine unholds page table pages, and if the hold count + * drops to zero, then it decrements the wire count. + */ +static PMAP_INLINE int +pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + --m->wire_count; + if (m->wire_count == 0) + return (_pmap_unwire_pte_hold(pmap, va, m)); + else + return (0); +} + +static int +_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pd_entry_t *pde, *pdp; + vm_page_t pdpg; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + /* + * unmap the page table page + */ + + if (m->pindex < NUPDE) { + pde = pmap_pde(pmap, va); + *pde = 0; + } else { + pde = pmap_segmap(pmap, va); + *pde = 0; + } + + pmap->pm_stats.resident_count--; + + if (m->pindex < NUPDE) { + /* + * Recursively decrement next level pagetable refcount + */ + if (pmap->pm_ptphint == m) + pmap->pm_ptphint = NULL; + + pdp = (pd_entry_t *)*pmap_segmap(pmap, va); + pdpg = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(pdp)); + pmap_unwire_pte_hold(pmap, va, pdpg); + } + /* + * If the page is finally unwired, simply free it. + */ + printf("[%d] free pindex[%x] m %p, va %p\n", + curthread->td_proc->p_pid, (int)m->pindex, m, (void *)va); + + vm_page_free_zero(m); + atomic_subtract_int(&cnt.v_wire_count, 1); + return (1); +} + +/* + * After removing a page table entry, this routine is used to + * conditionally free the page, and manage the hold/wire counts. + */ +static int +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +{ + unsigned ptepindex; + pd_entry_t pteva; + + if (va >= VM_MAXUSER_ADDRESS) + return (0); + + if (mpte == NULL) { + ptepindex = pmap_pde_pindex(va); + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + mpte = pmap->pm_ptphint; + } else { + pteva = *pmap_pde(pmap, va); + mpte = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(pteva)); + pmap->pm_ptphint = mpte; + } + } + return pmap_unwire_pte_hold(pmap, va, mpte); +} + +void +pmap_pinit0(pmap_t pmap) +{ + int i; + + PMAP_LOCK_INIT(pmap); + pmap->pm_segtab = kernel_segmap; + pmap->pm_active = 0; + pmap->pm_ptphint = NULL; + for (i = 0; i < MAXCPU; i++) { + pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; + pmap->pm_asid[i].gen = 0; + } + PCPU_SET(curpmap, pmap); + TAILQ_INIT(&pmap->pm_pvlist); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); +} + +static void +pmap_grow_pte_page_cache() +{ + + vm_contig_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); +} + +static vm_page_t +pmap_alloc_pte_page(unsigned int index, int req) +{ + vm_page_t m; + + m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, 0, req); + if (m == NULL) + return (NULL); + + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + + m->pindex = index; + atomic_add_int(&cnt.v_wire_count, 1); + m->wire_count = 1; + return (m); +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +int +pmap_pinit(pmap_t pmap) +{ + vm_offset_t ptdva; + vm_page_t ptdpg; + int i; + + PMAP_LOCK_INIT(pmap); + + /* + * allocate the page directory page + */ + while ((ptdpg = pmap_alloc_pte_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) + pmap_grow_pte_page_cache(); + + ptdva = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(ptdpg)); + pmap->pm_segtab = (pd_entry_t *)ptdva; + pmap->pm_active = 0; + pmap->pm_ptphint = NULL; + for (i = 0; i < MAXCPU; i++) { + pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; + pmap->pm_asid[i].gen = 0; + } + TAILQ_INIT(&pmap->pm_pvlist); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + + return (1); +} + +/* + * this routine is called if the page table page is not + * mapped correctly. + */ +static vm_page_t +_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) +{ + vm_offset_t pageva; + vm_page_t m; + int pid = curthread->td_proc->p_pid; + + KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || + (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, + ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); + + /* + * Find or fabricate a new pagetable page + */ + if ((m = pmap_alloc_pte_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { + if (flags & M_WAITOK) { + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + pmap_grow_pte_page_cache(); + vm_page_lock_queues(); + PMAP_LOCK(pmap); + } + + /* + * Indicate the need to retry. While waiting, the page + * table page may have been allocated. + */ + return (NULL); + } + + pageva = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(m)); + printf("_pmap_allocpte: index %x (%lx), flags %x pageva %lx\n", + ptepindex, NUPDE, flags, (u_long)pageva); + /* + * Map the pagetable page into the process address space, if it + * isn't already there. + */ + if (ptepindex >= NUPDE) { + pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva; + if(0)printf("[%d]segtab[%lu] = %p\n", pid, ptepindex - NUPDE, (void *)pageva); + } else { + pd_entry_t *pdep, *pde; + int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT); + int pdeindex = ptepindex & (NPDEPG - 1); + vm_page_t pg; + + pdep = &pmap->pm_segtab[segindex]; + if (*pdep == NULL) { + /* recurse for allocating page dir */ + if (_pmap_allocpte(pmap, NUPDE + segindex, + flags) == NULL) { + /* alloc failed, release current */ + --m->wire_count; + atomic_subtract_int(&cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + pg = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(*pdep)); + pg->wire_count++; + } + /* Next level entry */ + pde = (pd_entry_t *)*pdep; + pde[pdeindex] = (pd_entry_t)pageva; + pmap->pm_ptphint = m; + if(0)printf("[%d] pde(%p)[%d] = %p\n", pid, pde, pdeindex, (void *)pageva); + } + pmap->pm_stats.resident_count++; + + /* + * Set the page table hint + */ + pmap->pm_ptphint = m; + return (m); +} + +static vm_page_t +pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) +{ + unsigned ptepindex; + pd_entry_t *pde; + vm_page_t m; + + KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || + (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, + ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); + + /* + * Calculate pagetable page index + */ + ptepindex = pmap_pde_pindex(va); +retry: + /* + * Get the page directory entry + */ + pde = pmap_pde(pmap, va); + + /* + * If the page table page is mapped, we just increment the hold + * count, and activate it. + */ + if (pde != NULL && *pde != NULL) { + /* + * In order to get the page table page, try the hint first. + */ + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + m = pmap->pm_ptphint; + } else { + m = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(*pde)); + pmap->pm_ptphint = m; + } + m->wire_count++; + } else { + /* + * Here if the pte page isn't mapped, or if it has been + * deallocated. + */ + m = _pmap_allocpte(pmap, ptepindex, flags); + if (m == NULL && (flags & M_WAITOK)) + goto retry; + } + return (m); +} + + +/*************************************************** +* Pmap allocation/deallocation routines. + ***************************************************/ +/* + * Revision 1.397 + * - Merged pmap_release and pmap_release_free_page. When pmap_release is + * called only the page directory page(s) can be left in the pmap pte + * object, since all page table pages will have been freed by + * pmap_remove_pages and pmap_remove. In addition, there can only be one + * reference to the pmap and the page directory is wired, so the page(s) + * can never be busy. So all there is to do is clear the magic mappings + * from the page directory and free the page(s). + */ + + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap_t pmap) +{ + vm_offset_t ptdva; + vm_page_t ptdpg; + + KASSERT(pmap->pm_stats.resident_count == 0, + ("pmap_release: pmap resident count %ld != 0", + pmap->pm_stats.resident_count)); + + ptdva = (vm_offset_t)pmap->pm_segtab; + ptdpg = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(ptdva)); + + ptdpg->wire_count--; + atomic_subtract_int(&cnt.v_wire_count, 1); + vm_page_free_zero(ptdpg); + PMAP_LOCK_DESTROY(pmap); +} + +/* + * grow the number of kernel page table entries, if needed + */ +void +pmap_growkernel(vm_offset_t addr) +{ + vm_page_t nkpg; + pt_entry_t *pte; + pd_entry_t *pdpe, *pde; + int i; + + if (0) printf("pmap_growkernel, addr %lx max_offset %lx, kernel_vm_end %lx, nkpt %d\n", + (u_long)addr, (u_long)kernel_map->max_offset, (u_long)kernel_vm_end, nkpt); + mtx_assert(&kernel_map->system_mtx, MA_OWNED); + addr = roundup2(addr, PAGE_SIZE * NPTEPG); + if (addr - 1 >= kernel_map->max_offset) + addr = kernel_map->max_offset; + while (kernel_vm_end < addr) { + pdpe = pmap_segmap(kernel_pmap, kernel_vm_end); + if (*pdpe == 0) { + /* new intermediate page table entry */ + nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); + if (nkpg == NULL) + panic("pmap_growkernel: no memory to grow kernel"); + *pdpe = (pd_entry_t)MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(nkpg)); + continue; /* try again */ + } + + pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); + if (*pde != 0) { + kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; + if (kernel_vm_end - 1 >= kernel_map->max_offset) { + kernel_vm_end = kernel_map->max_offset; + break; + } + continue; + } + + /* + * This index is bogus, but out of the way + */ + nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); + if (!nkpg) + panic("pmap_growkernel: no memory to grow kernel"); + nkpt++; + *pde = (pd_entry_t)MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(nkpg)); + + /* + * The R[4-7]?00 stores only one copy of the Global bit in + * the translation lookaside buffer for each 2 page entry. + * Thus invalid entrys must have the Global bit set so when + * Entry LO and Entry HI G bits are anded together they will + * produce a global bit to store in the tlb. + */ + pte = (pt_entry_t *)*pde; + for (i = 0; i < NPTEPG; i++) + pte[i] = PTE_G; + + kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; + if (kernel_vm_end - 1 >= kernel_map->max_offset) { + kernel_vm_end = kernel_map->max_offset; + break; + } + } +} + +/*************************************************** +* page management routines. + ***************************************************/ + +/* + * free the pv_entry back to the free list + */ +static PMAP_INLINE void +free_pv_entry(pv_entry_t pv) +{ + + pv_entry_count--; + uma_zfree(pvzone, pv); +} + +/* + * get a new pv_entry, allocating a block from the system + * when needed. + * the memory allocation is performed bypassing the malloc code + * because of the possibility of allocations at interrupt time. + */ +static pv_entry_t +get_pv_entry(pmap_t locked_pmap) +{ + static const struct timeval printinterval = { 60, 0 }; + static struct timeval lastprint; + struct vpgqueues *vpq; + pt_entry_t *pte, oldpte; + pmap_t pmap; + pv_entry_t allocated_pv, next_pv, pv; + vm_offset_t va; + vm_page_t m; + + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + allocated_pv = uma_zalloc(pvzone, M_NOWAIT); + if (allocated_pv != NULL) { + pv_entry_count++; + if (pv_entry_count > pv_entry_high_water) + pagedaemon_wakeup(); + else + return (allocated_pv); + } + /* + * Reclaim pv entries: At first, destroy mappings to inactive + * pages. After that, if a pv entry is still needed, destroy + * mappings to active pages. + */ + if (ratecheck(&lastprint, &printinterval)) + printf("Approaching the limit on PV entries, " + "increase the vm.pmap.shpgperproc tunable.\n"); + vpq = &vm_page_queues[PQ_INACTIVE]; +retry: + TAILQ_FOREACH(m, &vpq->pl, pageq) { + if (m->hold_count || m->busy) + continue; + TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { + va = pv->pv_va; + pmap = pv->pv_pmap; + /* Avoid deadlock and lock recursion. */ + if (pmap > locked_pmap) + PMAP_LOCK(pmap); + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) + continue; + pmap->pm_stats.resident_count--; + pte = pmap_pte(pmap, va); + KASSERT(pte != NULL, ("pte")); + oldpte = loadandclear((u_int *)pte); + if (is_kernel_pmap(pmap)) + *pte = PTE_G; + KASSERT(!pte_test(&oldpte, PTE_W), + ("wired pte for unwired page")); + if (m->md.pv_flags & PV_TABLE_REF) + vm_page_flag_set(m, PG_REFERENCED); + if (pte_test(&oldpte, PTE_D)) + vm_page_dirty(m); + pmap_invalidate_page(pmap, va); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + m->md.pv_list_count--; + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + pmap_unuse_pt(pmap, va, pv->pv_ptem); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + if (allocated_pv == NULL) + allocated_pv = pv; + else + free_pv_entry(pv); + } + if (TAILQ_EMPTY(&m->md.pv_list)) { + vm_page_flag_clear(m, PG_WRITEABLE); + m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); + } + } + if (allocated_pv == NULL) { + if (vpq == &vm_page_queues[PQ_INACTIVE]) { + vpq = &vm_page_queues[PQ_ACTIVE]; + goto retry; + } + panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); + } + return (allocated_pv); +} + +/* + * Revision 1.370 + * + * Move pmap_collect() out of the machine-dependent code, rename it + * to reflect its new location, and add page queue and flag locking. + * + * Notes: (1) alpha, i386, and ia64 had identical implementations + * of pmap_collect() in terms of machine-independent interfaces; + * (2) sparc64 doesn't require it; (3) powerpc had it as a TODO. + * + * MIPS implementation was identical to alpha [Junos 8.2] + */ + +/* + * If it is the first entry on the list, it is actually + * in the header and we must copy the following entry up + * to the header. Otherwise we must search the list for + * the entry. In either case we free the now unused entry. + */ + +static pv_entry_t +pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if (pvh->pv_list_count < pmap->pm_stats.resident_count) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + if (pmap == pv->pv_pmap && va == pv->pv_va) + break; + } + } else { + TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { + if (va == pv->pv_va) + break; + } + } + if (pv != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + pvh->pv_list_count--; + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + } + return (pv); +} + +static void +pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx", + (u_long)VM_PAGE_TO_PHYS(member2struct(vm_page, md, pvh)), + (u_long)va)); + free_pv_entry(pv); +} + +static void +pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +{ + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + pmap_pvh_free(&m->md, pmap, va); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); +} + +/* + * Conditionally create a pv entry. + */ +static boolean_t +pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, + vm_page_t m) +{ + pv_entry_t pv; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if (pv_entry_count < pv_entry_high_water && + (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { + pv_entry_count++; + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + pv->pv_wired = FALSE; + TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count++; + return (TRUE); + } else + return (FALSE); +} + +/* + * pmap_remove_pte: do the things to unmap a page in a process + */ +static int +pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) +{ + pt_entry_t oldpte; + vm_page_t m; + vm_offset_t pa; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + oldpte = loadandclear((u_int *)ptq); + if (is_kernel_pmap(pmap)) + *ptq = PTE_G; + + if (pte_test(&oldpte, PTE_W)) + pmap->pm_stats.wired_count -= 1; + + pmap->pm_stats.resident_count -= 1; + pa = TLBLO_PTE_TO_PA(oldpte); + + if (page_is_managed(pa)) { + m = PHYS_TO_VM_PAGE(pa); + if (pte_test(&oldpte, PTE_D)) { +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified(oldpte)) { + printf( + "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", + va, oldpte); + } +#endif + vm_page_dirty(m); + } + if (m->md.pv_flags & PV_TABLE_REF) + vm_page_flag_set(m, PG_REFERENCED); + m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); + + pmap_remove_entry(pmap, m, va); + } + return pmap_unuse_pt(pmap, va, NULL); +} + +/* + * Remove a single page from a process address space + */ +static void +pmap_remove_page(struct pmap *pmap, vm_offset_t va) +{ + pt_entry_t *ptq; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + ptq = pmap_pte(pmap, va); + + /* + * if there is no pte for this address, just skip it!!! + */ + if (!ptq || !pte_test(ptq, PTE_V)) { + return; + } + + /* + * Write back all caches from the page being destroyed + */ + mips_dcache_wbinv_range_index(va, PAGE_SIZE); + + /* + * get a local va for mappings for this pmap. + */ + (void)pmap_remove_pte(pmap, ptq, va); + pmap_invalidate_page(pmap, va); + + return; +} + +/* + * Remove the given range of addresses from the specified map. + * + * It is assumed that the start and end are properly + * rounded to the page size. + */ +void +pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t va, va_next; + pd_entry_t *pdpe, *pde; + pt_entry_t *pte; + + if (pmap == NULL) + return; + + if (pmap->pm_stats.resident_count == 0) + return; + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + + /* + * special handling of removing one page. a very common operation + * and easy to short circuit some code. + */ + if ((sva + PAGE_SIZE) == eva) { + pmap_remove_page(pmap, sva); + goto out; + } + for (va = sva; va < eva; va = va_next) { + pdpe = pmap_segmap(pmap, sva); + if (*pdpe == 0) { + va_next = (sva + NBSEG) & ~SEGMASK; + if (va_next < sva) + va_next = eva; + continue; + } + + va_next = (sva + NBPDR) & ~PDRMASK; + if (va_next < sva) + va_next = eva; + + pde = pmap_pdpe_to_pde(pdpe, sva); + if (*pde == 0) + continue; + pte = pmap_pde_to_pte(pde, sva); + + for (; sva != va_next; pte++, sva += PAGE_SIZE) { + if (*pte == 0) + continue; + pmap_remove_page(pmap, sva); + } + + } +out: + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +/* + * Routine: pmap_remove_all + * Function: + * Removes this physical page from + * all physical maps in which it resides. + * Reflects back modify bits to the pager. + * + * Notes: + * Original versions of this routine were very + * inefficient because they iteratively called + * pmap_remove (slow...) + */ + +void +pmap_remove_all(vm_page_t m) +{ + pv_entry_t pv; + pt_entry_t *pte, tpte; + + KASSERT((m->flags & PG_FICTITIOUS) == 0, + ("pmap_remove_all: page %p is fictitious", m)); + vm_page_lock_queues(); + + if (m->md.pv_flags & PV_TABLE_REF) + vm_page_flag_set(m, PG_REFERENCED); + + while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + PMAP_LOCK(pv->pv_pmap); + + /* + * If it's last mapping writeback all caches from + * the page being destroyed + */ + if (m->md.pv_list_count == 1) + mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); + + pv->pv_pmap->pm_stats.resident_count--; + + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + + tpte = loadandclear((u_int *)pte); + if (is_kernel_pmap(pv->pv_pmap)) + *pte = PTE_G; + + if (pte_test(&tpte, PTE_W)) + pv->pv_pmap->pm_stats.wired_count--; + + /* + * Update the vm_page_t clean and reference bits. + */ + if (pte_test(&tpte, PTE_D)) { +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified(tpte)) { + printf( + "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", + pv->pv_va, tpte); + } +#endif + vm_page_dirty(m); + } + pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count--; + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + PMAP_UNLOCK(pv->pv_pmap); + free_pv_entry(pv); + } + + vm_page_flag_clear(m, PG_WRITEABLE); + m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); + vm_page_unlock_queues(); +} + +/* + * Set the physical protection on the + * specified range of this map as requested. + */ +void +pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +{ + pt_entry_t *ptpaddr, *pte; + pd_entry_t *pdpe, *pde; + vm_offset_t va_next; + + if (pmap == NULL) + return; + + printf("PMAP prot sva %p eva %p %x\n",(void *)sva,(void *)eva, (int)prot); + if ((prot & VM_PROT_READ) == VM_PROT_NONE) { + pmap_remove(pmap, sva, eva); + return; + } + if (prot & VM_PROT_WRITE) + return; + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + while (sva < eva) { + /* + * If segment table entry is empty, skip this segment. + */ + pdpe = pmap_segmap(pmap, sva); + if (*pdpe == 0) { + va_next = (sva + NBSEG) & ~SEGMASK; + if (va_next < sva) + va_next = eva; + continue; + } + + va_next = (sva + NBPDR) & ~PDRMASK; + if (va_next < sva) + va_next = eva; + + pde = pmap_pdpe_to_pde(pdpe, sva); + ptpaddr = *pde; + + /* + * Weed out invalid mappings. + */ + if (ptpaddr == 0) + continue; + + if (va_next > eva) + va_next = eva; + + for (pte = ptpaddr; sva != va_next; pte++, + sva += PAGE_SIZE) { + pt_entry_t obits, pbits; + vm_page_t m; + vm_paddr_t pa; + +retry: + obits = pbits = *pte; + pa = TLBLO_PTE_TO_PA(pbits); + + if (page_is_managed(pa) && pte_test(&pbits, PTE_D)) { + m = PHYS_TO_VM_PAGE(pa); + vm_page_dirty(m); + m->md.pv_flags &= ~PV_TABLE_MOD; + } + pte_clear(&pbits, PTE_D); + pte_set(&pbits, PTE_RO); + + if (pbits != *pte) { + if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) + goto retry; + pmap_update_page(pmap, sva, pbits); + } + } + } + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +/* + * Insert the given physical page (p) at + * the specified virtual address (v) in the + * target physical map with the protection requested. + * + * If specified, the page will be wired down, meaning + * that the related pte can not be reclaimed. + * + * NB: This is the only routine which MAY NOT lazy-evaluate + * or lose information. That is, this routine must actually + * insert this page into the given map NOW. + */ +void +pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, + vm_prot_t prot, boolean_t wired) +{ + vm_offset_t pa, opa; + pt_entry_t *pte; + pt_entry_t origpte, newpte; + pv_entry_t pv; + vm_page_t mpte, om; + int rw = 0; + int debug = 0; /*va < VM_MAXUSER_ADDRESS;*/ + int pid = curthread->td_proc->p_pid; + + if (pmap == NULL) { + printf("Null pmp\n"); + return; + } + if (debug) + printf("[%d]VA %lx page %p\n", pid, (u_long)va, m); + + va &= ~PAGE_MASK; + KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->oflags & VPO_BUSY) != 0, + ("pmap_enter: page %p is not busy", m)); + + mpte = NULL; + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + + /* + * In the case that a page table page is not resident, we are + * creating it here. + */ + if (va < VM_MAXUSER_ADDRESS) { + mpte = pmap_allocpte(pmap, va, M_WAITOK); + } + pte = pmap_pte(pmap, va); + + /* + * Page Directory table entry not valid, we need a new PT page + */ + if (pte == NULL) { + panic("pmap_enter: invalid page directory, pdir=%p, va=%p\n", + (void *)pmap->pm_segtab, (void *)va); + } + pa = VM_PAGE_TO_PHYS(m); + om = NULL; + origpte = *pte; + opa = TLBLO_PTE_TO_PA(origpte); + + /* + * Mapping has not changed, must be protection or wiring change. + */ + if (pte_test(&origpte, PTE_V) && opa == pa) { + /* + * Wiring change, just update stats. We don't worry about + * wiring PT pages as they remain resident as long as there + * are valid mappings in them. Hence, if a user page is + * wired, the PT page will be also. + */ + if (wired && !pte_test(&origpte, PTE_W)) + pmap->pm_stats.wired_count++; + else if (!wired && pte_test(&origpte, PTE_W)) + pmap->pm_stats.wired_count--; + +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified(origpte)) { + printf( + "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", + va, origpte); + } +#endif + /* + * Remove extra pte reference + */ + if (mpte) + mpte->wire_count--; + + if (page_is_managed(opa)) { + om = m; + } + goto validate; + } + + pv = NULL; + + /* + * Mapping has changed, invalidate old range and fall through to + * handle validating new mapping. + */ + if (opa) { + if (pte_test(&origpte, PTE_W)) + pmap->pm_stats.wired_count--; + + if (page_is_managed(opa)) { + om = PHYS_TO_VM_PAGE(opa); + pv = pmap_pvh_remove(&om->md, pmap, va); + } + if (mpte != NULL) { + mpte->wire_count--; + KASSERT(mpte->wire_count > 0, + ("pmap_enter: missing reference to page table page," + " va: %p", (void *)va)); + } + } else + pmap->pm_stats.resident_count++; + + /* + * Enter on the PV list if part of our managed memory. Note that we + * raise IPL while manipulating pv_table since pmap_enter can be + * called at interrupt time. + */ + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, + ("pmap_enter: managed mapping within the clean submap")); + if (pv == NULL) + pv = get_pv_entry(pmap); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + pv->pv_wired = wired; + TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count++; + } else if (pv != NULL) + free_pv_entry(pv); + + /* + * Increment counters + */ + if (wired) + pmap->pm_stats.wired_count++; + +validate: + if ((access & VM_PROT_WRITE) != 0) + m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF; + rw = init_pte_prot(va, m, prot); + +#ifdef PMAP_DEBUG + printf("[%d] pmap_enter: va: %p -> pa: %p\n", pid, (void *)va, (void *)pa); +#endif + /* + * Now validate mapping with desired protection/wiring. + */ + newpte = TLBLO_PA_TO_PFN(pa) | rw | PTE_V; + + if (is_cacheable_mem(pa)) + newpte |= PTE_C_CACHE; + else + newpte |= PTE_C_UNCACHED; + + if (wired) + newpte |= PTE_W; + + if (is_kernel_pmap(pmap)) + newpte |= PTE_G; + + /* + * if the mapping or permission bits are different, we need to + * update the pte. + */ + if (origpte != newpte) { + if (pte_test(&origpte, PTE_V)) { + *pte = newpte; + if (page_is_managed(opa) && (opa != pa)) { + if (om->md.pv_flags & PV_TABLE_REF) + vm_page_flag_set(om, PG_REFERENCED); + om->md.pv_flags &= + ~(PV_TABLE_REF | PV_TABLE_MOD); + } + if (pte_test(&origpte, PTE_D)) { + KASSERT(!pte_test(&origpte, PTE_RO), + ("pmap_enter: modified page not writable:" + " va: %p, pte: 0x%x", (void *)va, origpte)); + if (page_is_managed(opa)) + vm_page_dirty(om); + } + if (page_is_managed(opa) && + TAILQ_EMPTY(&om->md.pv_list)) + vm_page_flag_clear(om, PG_WRITEABLE); + } else { + *pte = newpte; + } + } + pmap_update_page(pmap, va, newpte); + + /* + * Sync I & D caches for executable pages. Do this only if the the + * target pmap belongs to the current process. Otherwise, an + * unresolvable TLB miss may occur. + */ + if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && + (prot & VM_PROT_EXECUTE)) { + mips_icache_sync_range(va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); + } + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); + if (debug) { + int i, j, k; + pd_entry_t *p1; + pt_entry_t *p2; + + i = pmap_seg_index(va); + p1 = (pd_entry_t *)pmap->pm_segtab[i]; + j = pmap_pde_index(va); + p2 = (pt_entry_t *)p1[j]; + k = pmap_pte_index(va); + + printf("segmap[%d] = %p, pde[%d] = %p pte[%d] = %x\n", + i, p1, j, p2, k , p2[k]); + printf("pmap enter - %p %x -> %x done\n", pte, origpte, newpte); + } +} + +/* + * this code makes some *MAJOR* assumptions: + * 1. Current pmap & pmap exists. + * 2. Not wired. + * 3. Read access. + * 4. No page table pages. + * but is *MUCH* faster than pmap_enter... + */ + +void +pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +static vm_page_t +pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, vm_page_t mpte) +{ + pt_entry_t *pte; + vm_offset_t pa; + + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || + (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, + ("pmap_enter_quick_locked: managed mapping within the clean submap")); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * In the case that a page table page is not resident, we are + * creating it here. + */ + if (va < VM_MAXUSER_ADDRESS) { + pd_entry_t *pde; + unsigned ptepindex; + + /* + * Calculate pagetable page index + */ + ptepindex = pmap_pde_pindex(va); + if (0) printf("pmap_enter_quick: m %p index %x va: %p \n", m, (int)ptepindex, (void *)va); + if (mpte && (mpte->pindex == ptepindex)) { + mpte->wire_count++; + } else { + /* + * Get the page directory entry + */ + pde = pmap_pde(pmap, va); + + /* + * If the page table page is mapped, we just + * increment the hold count, and activate it. + */ + if (pde && *pde != 0) { + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + mpte = pmap->pm_ptphint; + } else { + mpte = PHYS_TO_VM_PAGE( + MIPS_KSEG0_TO_PHYS(*pde)); + pmap->pm_ptphint = mpte; + } + mpte->wire_count++; + } else { + mpte = _pmap_allocpte(pmap, ptepindex, + M_NOWAIT); + if (mpte == NULL) + return (mpte); + } + } + } else { + mpte = NULL; + } + + pte = pmap_pte(pmap, va); + if (pte_test(pte, PTE_V)) { + if (mpte != NULL) { + mpte->wire_count--; + mpte = NULL; + } + return (mpte); + } + + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && + !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { + if (mpte != NULL) { + pmap_unwire_pte_hold(pmap, va, mpte); + mpte = NULL; + } + return (mpte); + } + + /* + * Increment counters + */ + pmap->pm_stats.resident_count++; + + pa = VM_PAGE_TO_PHYS(m); + + /* + * Now validate mapping with RO protection + */ + *pte = TLBLO_PA_TO_PFN(pa) | PTE_V; + + if (is_cacheable_mem(pa)) + *pte |= PTE_C_CACHE; + else + *pte |= PTE_C_UNCACHED; + + if (is_kernel_pmap(pmap)) + *pte |= PTE_G; + else { + *pte |= PTE_RO; + /* + * Sync I & D caches. Do this only if the the target pmap + * belongs to the current process. Otherwise, an + * unresolvable TLB miss may occur. */ + if (pmap == &curproc->p_vmspace->vm_pmap) { + va &= ~PAGE_MASK; + mips_icache_sync_range(va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); + } + } + return (mpte); +} + +/* + * Make a temporary mapping for a physical address. This is only intended + * to be used for panic dumps. + * + * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. + */ +void * +pmap_kenter_temporary(vm_paddr_t pa, int i) +{ + return ((void *)MIPS_PHYS_TO_XKPHYS_CACHED(pa)); +} +void +pmap_kenter_temporary_free(vm_paddr_t pa) +{ +} + +/* + * Moved the code to Machine Independent + * vm_map_pmap_enter() + */ + +/* + * Maps a sequence of resident pages belonging to the same object. + * The sequence begins with the given page m_start. This page is + * mapped at the given virtual address start. Each subsequent page is + * mapped at a virtual address that is offset from start by the same + * amount as the page is offset from m_start within the object. The + * last page in the sequence is the page with the largest offset from + * m_start that can be mapped at a virtual address less than the given + * virtual address end. Not every virtual page between start and end + * is mapped; only those for which a resident page exists with the + * corresponding offset from m_start are mapped. + */ +void +pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, + vm_page_t m_start, vm_prot_t prot) +{ + vm_page_t m, mpte; + vm_pindex_t diff, psize; + + VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); + psize = atop(end - start); + mpte = NULL; + m = m_start; + vm_page_lock_queues(); + PMAP_LOCK(pmap); + while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { + mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, + prot, mpte); + m = TAILQ_NEXT(m, listq); + } + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +/* + * pmap_object_init_pt preloads the ptes for a given object + * into the specified pmap. This eliminates the blast of soft + * faults on process startup and immediately after an mmap. + */ +void +pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, + vm_object_t object, vm_pindex_t pindex, vm_size_t size) +{ + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, + ("pmap_object_init_pt: non-device object")); +} + +/* + * Routine: pmap_change_wiring + * Function: Change the wiring attribute for a map/virtual-address + * pair. + * In/out conditions: + * The mapping must already exist in the pmap. + */ +void +pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +{ + pt_entry_t *pte; + + if (pmap == NULL) + return; + + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, va); + + if (wired && !pte_test(pte, PTE_W)) + pmap->pm_stats.wired_count++; + else if (!wired && pte_test(pte, PTE_W)) + pmap->pm_stats.wired_count--; + + /* + * Wiring is not a hardware characteristic so there is no need to + * invalidate TLB. + */ + if (wired) + pte_set(pte, PTE_W); + else + pte_clear(pte, PTE_W); + PMAP_UNLOCK(pmap); +} + +/* + * Copy the range specified by src_addr/len + * from the source map to the range dst_addr/len + * in the destination map. + * + * This routine is only advisory and need not do anything. + */ + +void +pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, + vm_size_t len, vm_offset_t src_addr) +{ +} + +/* + * pmap_zero_page zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + * + * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. + */ +void +pmap_zero_page(vm_page_t m) +{ + vm_offset_t va; + vm_paddr_t phys = VM_PAGE_TO_PHYS(m); + + va = MIPS_PHYS_TO_XKPHYS_CACHED(phys); + bzero((caddr_t)va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); +} + +/* + * pmap_zero_page_area zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + * + * off and size may not cover an area beyond a single hardware page. + */ +void +pmap_zero_page_area(vm_page_t m, int off, int size) +{ + vm_offset_t va; + vm_paddr_t phys = VM_PAGE_TO_PHYS(m); + + va = MIPS_PHYS_TO_XKPHYS_CACHED(phys); + bzero((char *)(caddr_t)va + off, size); + mips_dcache_wbinv_range(va + off, size); +} + +void +pmap_zero_page_idle(vm_page_t m) +{ + vm_offset_t va; + vm_paddr_t phys = VM_PAGE_TO_PHYS(m); + + va = MIPS_PHYS_TO_XKPHYS_CACHED(phys); + bzero((caddr_t)va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); +} + +/* + * pmap_copy_page copies the specified (machine independent) + * page by mapping the page into virtual memory and using + * bcopy to copy the page, one machine dependent page at a + * time. + * + * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. + */ +void +pmap_copy_page(vm_page_t src, vm_page_t dst) +{ + vm_offset_t va_src, va_dst; + vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src); + vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst); + + pmap_flush_pvcache(src); + mips_dcache_wbinv_range_index(MIPS_PHYS_TO_XKPHYS_CACHED(phy_dst), PAGE_SIZE); + va_src = MIPS_PHYS_TO_XKPHYS_CACHED(phy_src); + va_dst = MIPS_PHYS_TO_XKPHYS_CACHED(phy_dst); + bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); + mips_dcache_wbinv_range(va_dst, PAGE_SIZE); +} + +/* + * Returns true if the pmap's pv is one of the first + * 16 pvs linked to from this page. This count may + * be changed upwards or downwards in the future; it + * is only necessary that true be returned for a small + * subset of pmaps for proper page aging. + */ +boolean_t +pmap_page_exists_quick(pmap_t pmap, vm_page_t m) +{ + pv_entry_t pv; + int loops = 0; + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_page_exists_quick: page %p is not managed", m)); + rv = FALSE; + vm_page_lock_queues(); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pv->pv_pmap == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + vm_page_unlock_queues(); + return (rv); +} + +/* + * Remove all pages from specified address space + * this aids process exit speeds. Also, this code + * is special cased for current process only, but + * can have the more generic (and slightly slower) + * mode enabled. This is much faster than pmap_remove + * in the case of running down an entire address space. + */ +void +pmap_remove_pages(pmap_t pmap) +{ + pt_entry_t *pte, tpte; + pv_entry_t pv, npv; + vm_page_t m; + + if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { + printf("warning: pmap_remove_pages called with non-current pmap\n"); + return; + } + vm_page_lock_queues(); + PMAP_LOCK(pmap); + sched_pin(); + //XXX need to be TAILQ_FOREACH_SAFE ? + for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + if (pte ==NULL) + printf("pte fail - %p va %p\n", + pv->pv_pmap, (void *)pv->pv_va); + if (!pte_test(pte, PTE_V)) + panic("pmap_remove_pages: page on pm_pvlist has no pte\n"); + tpte = *pte; + +/* + * We cannot remove wired pages from a process' mapping at this time + */ + if (pte_test(&tpte, PTE_W)) { + npv = TAILQ_NEXT(pv, pv_plist); + continue; + } + *pte = is_kernel_pmap(pmap) ? PTE_G : 0; + + m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte)); + KASSERT(m != NULL, + ("pmap_remove_pages: bad tpte %x", tpte)); + + pv->pv_pmap->pm_stats.resident_count--; + + /* + * Update the vm_page_t clean and reference bits. + */ + if (pte_test(&tpte, PTE_D)) { + vm_page_dirty(m); + } + npv = TAILQ_NEXT(pv, pv_plist); + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); + + m->md.pv_list_count--; + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_FIRST(&m->md.pv_list) == NULL) { + vm_page_flag_clear(m, PG_WRITEABLE); + } + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + free_pv_entry(pv); + } + sched_unpin(); + pmap_invalidate_all(pmap); + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); +} + +/* + * pmap_testbit tests bits in pte's + * note that the testbit/changebit routines are inline, + * and a lot of things compile-time evaluate. + */ +static boolean_t +pmap_testbit(vm_page_t m, int bit) +{ + pv_entry_t pv; + pt_entry_t *pte; + boolean_t rv = FALSE; + + if (m->flags & PG_FICTITIOUS) + return rv; + + if (TAILQ_FIRST(&m->md.pv_list) == NULL) + return rv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { +#if defined(PMAP_DIAGNOSTIC) + if (!pv->pv_pmap) { + printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); + continue; + } +#endif + PMAP_LOCK(pv->pv_pmap); + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + rv = pte_test(pte, bit); + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + return (rv); +} + +/* + * this routine is used to clear dirty bits in ptes + */ +static __inline void +pmap_changebit(vm_page_t m, int bit, boolean_t setem) +{ + pv_entry_t pv; + pt_entry_t *pte; + + if (m->flags & PG_FICTITIOUS) + return; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + /* + * Loop over all current mappings setting/clearing as appropos If + * setting RO do we need to clear the VAC? + */ + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { +#if defined(PMAP_DIAGNOSTIC) + if (!pv->pv_pmap) { + printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); + continue; + } +#endif + + PMAP_LOCK(pv->pv_pmap); + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + if (setem) { + *pte |= bit; + pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); + } else { + pt_entry_t pbits = *pte; + + if (pbits & bit) { + if (bit == PTE_D) { + if (pbits & PTE_D) { + vm_page_dirty(m); + } + *pte = (pbits & ~PTE_D) | PTE_RO; + } else { + *pte = pbits & ~bit; + } + pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); + } + } + PMAP_UNLOCK(pv->pv_pmap); + } + if (!setem && bit == PTE_D) + vm_page_flag_clear(m, PG_WRITEABLE); +} + +/* + * pmap_page_wired_mappings: + * + * Return the number of managed mappings to the given physical page + * that are wired. + */ +int +pmap_page_wired_mappings(vm_page_t m) +{ + pv_entry_t pv; + int count; + + count = 0; + if ((m->flags & PG_FICTITIOUS) != 0) + return (count); + vm_page_lock_queues(); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) + if (pv->pv_wired) + count++; + vm_page_unlock_queues(); + return (count); +} + +/* + * Clear the write and modified bits in each of the given page's mappings. + */ +void +pmap_remove_write(vm_page_t m) +{ + pv_entry_t pv, npv; + vm_offset_t va; + pt_entry_t *pte; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_write: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by + * another thread while the object is locked. Thus, if PG_WRITEABLE + * is clear, no page table entries need updating. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) + return; + + /* + * Loop over all current mappings setting/clearing as appropos. + */ + vm_page_lock_queues(); + for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) { + npv = TAILQ_NEXT(pv, pv_plist); + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + if (pte == NULL || !pte_test(pte, PTE_V)) + panic("page on pm_pvlist has no pte\n"); + + va = pv->pv_va; + pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, + VM_PROT_READ | VM_PROT_EXECUTE); + } + vm_page_flag_clear(m, PG_WRITEABLE); + vm_page_unlock_queues(); +} + +/* + * pmap_ts_referenced: + * + * Return the count of reference bits for a page, clearing all of them. + */ +int +pmap_ts_referenced(vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_ts_referenced: page %p is not managed", m)); + if (m->md.pv_flags & PV_TABLE_REF) { + vm_page_lock_queues(); + m->md.pv_flags &= ~PV_TABLE_REF; + vm_page_unlock_queues(); + return (1); + } + return (0); +} + +/* + * pmap_is_modified: + * + * Return whether or not the specified physical page was modified + * in any physical maps. + */ +boolean_t +pmap_is_modified(vm_page_t m) +{ + boolean_t rv; + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_modified: page %p is not managed", m)); + + /* + * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be + * concurrently set while the object is locked. Thus, if PG_WRITEABLE + * is clear, no PTEs can have PTE_D set. + */ + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if ((m->oflags & VPO_BUSY) == 0 && + (m->flags & PG_WRITEABLE) == 0) + return (FALSE); + vm_page_lock_queues(); + if (m->md.pv_flags & PV_TABLE_MOD) + rv = TRUE; + else + rv = pmap_testbit(m, PTE_D); + vm_page_unlock_queues(); + return (rv); +} + +/* N/C */ + +/* + * pmap_is_prefaultable: + * + * Return whether or not the specified virtual address is elgible + * for prefault. + */ +boolean_t +pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +{ + pt_entry_t *pte; + boolean_t rv; + + rv = FALSE; + PMAP_LOCK(pmap); + if (pmap_segmap(pmap, addr) != NULL) { + pte = pmap_pte(pmap, addr); + rv = (*pte == 0); + } + PMAP_UNLOCK(pmap); + return (rv); +} + +/* + * Clear the modify bits on the specified physical page. + */ +void +pmap_clear_modify(vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_modify: page %p is not managed", m)); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + KASSERT((m->oflags & VPO_BUSY) == 0, + ("pmap_clear_modify: page %p is busy", m)); + + /* + * If the page is not PG_WRITEABLE, then no PTEs can have PTE_D set. + * If the object containing the page is locked and the page is not + * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. + */ + if ((m->flags & PG_WRITEABLE) == 0) + return; + vm_page_lock_queues(); + if (m->md.pv_flags & PV_TABLE_MOD) { + pmap_changebit(m, PTE_D, FALSE); + m->md.pv_flags &= ~PV_TABLE_MOD; + } + vm_page_unlock_queues(); +} + +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + return ((m->md.pv_flags & PV_TABLE_REF) != 0); +} + +/* + * pmap_clear_reference: + * + * Clear the reference bit on the specified physical page. + */ +void +pmap_clear_reference(vm_page_t m) +{ + + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_clear_reference: page %p is not managed", m)); + vm_page_lock_queues(); + if (m->md.pv_flags & PV_TABLE_REF) { + m->md.pv_flags &= ~PV_TABLE_REF; + } + vm_page_unlock_queues(); +} + +/* + * Miscellaneous support routines follow + */ + +/* + * Map a set of physical memory pages into the kernel virtual + * address space. Return a pointer to where it is mapped. This + * routine is intended to be used for mapping device memory, + * NOT real memory. + */ + +/* + * Map a set of physical memory pages into the kernel virtual + * address space. Return a pointer to where it is mapped. This + * routine is intended to be used for mapping device memory, + * NOT real memory. + * + * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit. + */ +void * +pmap_mapdev(vm_offset_t pa, vm_size_t size) +{ + return ((void *)MIPS_PHYS_TO_XKPHYS_UNCACHED(pa)); +} + +void +pmap_unmapdev(vm_offset_t va, vm_size_t size) +{ +} + +/* + * perform the pmap work for mincore + */ +int +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) +{ + pt_entry_t *ptep, pte; + vm_offset_t pa; + vm_page_t m; + int val; + boolean_t managed; + + PMAP_LOCK(pmap); +retry: + ptep = pmap_pte(pmap, addr); + pte = (ptep != NULL) ? *ptep : 0; + if (!pte_test(&pte, PTE_V)) { + val = 0; + goto out; + } + val = MINCORE_INCORE; + if (pte_test(&pte, PTE_D)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + pa = TLBLO_PTE_TO_PA(pte); + managed = page_is_managed(pa); + if (managed) { + /* + * This may falsely report the given address as + * MINCORE_REFERENCED. Unfortunately, due to the lack of + * per-PTE reference information, it is impossible to + * determine if the address is MINCORE_REFERENCED. + */ + m = PHYS_TO_VM_PAGE(pa); + if ((m->flags & PG_REFERENCED) != 0) + val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + } + if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != + (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { + /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ + if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) + goto retry; + } else +out: + PA_UNLOCK_COND(*locked_pa); + PMAP_UNLOCK(pmap); + return (val); +} + +void +pmap_activate(struct thread *td) +{ + pmap_t pmap, oldpmap; + struct proc *p = td->td_proc; + + critical_enter(); + + pmap = vmspace_pmap(p->p_vmspace); + oldpmap = PCPU_GET(curpmap); + + if (oldpmap) + atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); + atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); + pmap_asid_alloc(pmap); + if (td == curthread) { + PCPU_SET(segbase, pmap->pm_segtab); + mips_wr_entryhi(pmap->pm_asid[PCPU_GET(cpuid)].asid); + } + + PCPU_SET(curpmap, pmap); + critical_exit(); +} + +void +pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +{ +} + +/* + * Increase the starting virtual address of the given mapping if a + * different alignment might result in more superpage mappings. + */ +void +pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, + vm_offset_t *addr, vm_size_t size) +{ + vm_offset_t superpage_offset; + + if (size < NBSEG) + return; + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + offset += ptoa(object->pg_color); + superpage_offset = offset & SEGMASK; + if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG || + (*addr & SEGMASK) == superpage_offset) + return; + if ((*addr & SEGMASK) < superpage_offset) + *addr = (*addr & ~SEGMASK) + superpage_offset; + else + *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset; +} + +/* + * Increase the starting virtual address of the given mapping so + * that it is aligned to not be the second page in a TLB entry. + * This routine assumes that the length is appropriately-sized so + * that the allocation does not share a TLB entry at all if required. + */ +void +pmap_align_tlb(vm_offset_t *addr) +{ + if ((*addr & PAGE_SIZE) == 0) + return; + *addr += PAGE_SIZE; + return; +} + +DB_SHOW_COMMAND(ptable, ddb_pid_dump) +{ + pmap_t pmap; + struct thread *td = NULL; + struct proc *p; + int i, j, k; + vm_paddr_t pa; + vm_offset_t va; + + if (have_addr) { + td = db_lookup_thread(addr, TRUE); + if (td == NULL) { + db_printf("Invalid pid or tid"); + return; + } + p = td->td_proc; + if (p->p_vmspace == NULL) { + db_printf("No vmspace for process"); + return; + } + pmap = vmspace_pmap(p->p_vmspace); + } else + pmap = kernel_pmap; + + printf("pmap:%p segtab:%p asid:%x generation:%x\n", + pmap, pmap->pm_segtab, + pmap->pm_asid[0].asid, + pmap->pm_asid[0].gen); + for (i = 0; i < NPDEPG; i++) { + pd_entry_t *pdpe; + pt_entry_t *pde; + pt_entry_t pte; + + pdpe = (pd_entry_t *)pmap->pm_segtab[i]; + if (pdpe == NULL) + continue; + db_printf("[%4d] %p\n", i, pdpe); + for (j = 0; j < NPDEPG; j++) { + pde = (pt_entry_t *)pdpe[j]; + if (pde == NULL) + continue; + db_printf("\t[%4d] %p\n", j, pde); + for (k = 0; k < NPTEPG; k++) { + pte = pde[k]; + if (pte == 0 || !pte_test(&pte, PTE_V)) + continue; + pa = TLBLO_PTE_TO_PA(pte); + va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT); + printf("\t\t[%04d] va: %p pte: %8x pa:%lx\n", + k, (void *)va, pte, (u_long)pa); + +#if 0 + m = PHYS_TO_VM_PAGE(pa); + printf("\t\t[%04d] va: %p, pt: %p, h: %d, w: %d, f: 0x%x\n", + k, (void *)va, (void *)pa, m->hold_count, + m->wire_count, + m->flags); +#endif + } + } + } +} + + +#if defined(DEBUG) + +static void pads(pmap_t pm); +void pmap_pvdump(vm_offset_t pa); + +/* print address space of pmap*/ +static void +pads(pmap_t pm) +{ + unsigned va, i, j; + pt_entry_t *ptep; + + if (pm == kernel_pmap) + return; + for (i = 0; i < NPTEPG; i++) + if (pm->pm_segtab[i]) + for (j = 0; j < NPTEPG; j++) { + va = (i << SEGSHIFT) + (j << PAGE_SHIFT); + if (pm == kernel_pmap && va < KERNBASE) + continue; + if (pm != kernel_pmap && + va >= VM_MAXUSER_ADDRESS) + continue; + ptep = pmap_pte(pm, va); + if (pmap_pte_v(ptep)) + printf("%x:%x ", va, *(int *)ptep); + } + +} + +void +pmap_pvdump(vm_offset_t pa) +{ + register pv_entry_t pv; + vm_page_t m; + + printf("pa %x", pa); + m = PHYS_TO_VM_PAGE(pa); + for (pv = TAILQ_FIRST(&m->md.pv_list); pv; + pv = TAILQ_NEXT(pv, pv_list)) { + printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); + pads(pv->pv_pmap); + } + printf(" "); +} + +/* N/C */ +#endif + + +/* + * Allocate TLB address space tag (called ASID or TLBPID) and return it. + * It takes almost as much or more time to search the TLB for a + * specific ASID and flush those entries as it does to flush the entire TLB. + * Therefore, when we allocate a new ASID, we just take the next number. When + * we run out of numbers, we flush the TLB, increment the generation count + * and start over. ASID zero is reserved for kernel use. + */ +static void +pmap_asid_alloc(pmap) + pmap_t pmap; +{ + if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && + pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); + else { + if (PCPU_GET(next_asid) == pmap_max_asid) { + tlb_invalidate_all_user(NULL); + PCPU_SET(asid_generation, + (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); + if (PCPU_GET(asid_generation) == 0) { + PCPU_SET(asid_generation, 1); + } + PCPU_SET(next_asid, 1); /* 0 means invalid */ + } + pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); + pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); + PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); + } +} + +int +page_is_managed(vm_offset_t pa) +{ + vm_offset_t pgnum = mips_btop(pa); + + if (pgnum >= first_page) { + vm_page_t m; + + m = PHYS_TO_VM_PAGE(pa); + if (m == NULL) + return 0; + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) + return 1; + } + return 0; +} + +static int +init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + int rw; + + if (!(prot & VM_PROT_WRITE)) + rw = PTE_V | PTE_RO | PTE_C_CACHE; + else if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { + if ((m->md.pv_flags & PV_TABLE_MOD) != 0) + rw = PTE_V | PTE_D | PTE_C_CACHE; + else + rw = PTE_V | PTE_C_CACHE; + vm_page_flag_set(m, PG_WRITEABLE); + } else + /* Needn't emulate a modified bit for unmanaged pages. */ + rw = PTE_V | PTE_D | PTE_C_CACHE; + return (rw); +} + +/* + * pmap_set_modified: + * + * Sets the page modified and reference bits for the specified page. + */ +void +pmap_set_modified(vm_offset_t pa) +{ + + PHYS_TO_VM_PAGE(pa)->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD); +} + + +/* + * Routine: pmap_kextract + * Function: + * Extract the physical page address associated + * virtual address. + */ + /* PMAP_INLINE */ vm_offset_t +pmap_kextract(vm_offset_t va) +{ + int mapped; + + /* + * First, the direct-mapped regions. + */ + if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END) + return (MIPS_XKPHYS_TO_PHYS(va)); + + if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END) + return (MIPS_KSEG0_TO_PHYS(va)); + + if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END) + return (MIPS_KSEG1_TO_PHYS(va)); + + /* + * User virtual addresses. + */ + if (va < VM_MAXUSER_ADDRESS) { + pt_entry_t *ptep; + + if (curproc && curproc->p_vmspace) { + ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); + if (ptep) { + return (TLBLO_PTE_TO_PA(*ptep) | + (va & PAGE_MASK)); + } + return (0); + } + } + + /* + * Should be kernel virtual here, otherwise fail + */ + mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END); + mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END); + /* + * Kernel virtual. + */ + + if (mapped) { + pt_entry_t *ptep; + + /* Is the kernel pmap initialized? */ + if (kernel_pmap->pm_active) { + /* It's inside the virtual address range */ + ptep = pmap_pte(kernel_pmap, va); + if (ptep) { + return (TLBLO_PTE_TO_PA(*ptep) | + (va & PAGE_MASK)); + } + } + return (0); + } + + panic("%s for unknown address space %p.", __func__, (void *)va); +} + + +void +pmap_flush_pvcache(vm_page_t m) +{ + pv_entry_t pv; + + if (m != NULL) { + for (pv = TAILQ_FIRST(&m->md.pv_list); pv; + pv = TAILQ_NEXT(pv, pv_list)) { + mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); + } + } +} Index: sys/mips/mips/pmap.c =================================================================== --- sys/mips/mips/pmap.c (revision 210534) +++ sys/mips/mips/pmap.c (working copy) @@ -128,12 +128,8 @@ #define pmap_segshift(v) (((v) >> SEGSHIFT) & (NPDEPG - 1)) #define segtab_pde(m, v) ((m)[pmap_segshift((v))]) -#if defined(__mips_n64) -#define NUSERPGTBLS (NPDEPG) -#else #define NUSERPGTBLS (pmap_segshift(VM_MAXUSER_ADDRESS)) -#endif -#define mips_segtrunc(va) ((va) & ~SEGOFSET) +#define mips_segtrunc(va) ((va) & ~SEGMASK) #define is_kernel_pmap(x) ((x) == kernel_pmap) /* @@ -196,7 +192,6 @@ static void pmap_update_page_action(void *arg); #endif -#if !defined(__mips_n64) struct local_sysmaps { vm_offset_t base; uint16_t valid1, valid2; @@ -257,7 +252,6 @@ tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE); \ sysm->valid2 = 0; \ intr_restore(intr) -#endif static inline pt_entry_t * pmap_segmap(pmap_t pmap, vm_offset_t va) @@ -332,9 +326,7 @@ pt_entry_t *pgtab; pt_entry_t *pte; int i, j; -#if !defined(__mips_n64) int memory_larger_than_512meg = 0; -#endif /* Sort. */ again: @@ -362,10 +354,8 @@ } } -#if !defined(__mips_n64) if (phys_avail[i - 1] >= MIPS_KSEG0_LARGEST_PHYS) memory_larger_than_512meg = 1; -#endif /* * Copy the phys_avail[] array before we start stealing memory from it. @@ -424,7 +414,6 @@ printf("pcpu is available at virtual address %p.\n", pcpup); #endif -#if !defined(__mips_n64) /* * Steal some virtual space that will not be in kernel_segmap. This * va memory space will be used to map in kernel pages that are @@ -439,7 +428,6 @@ sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; } } -#endif /* * Allocate segment table for the kernel @@ -450,7 +438,6 @@ * Allocate second level page tables for the kernel */ nkpt = NKPT; -#if !defined(__mips_n64) if (memory_larger_than_512meg) { /* * If we have a large memory system we CANNOT afford to hit @@ -467,7 +454,6 @@ */ nkpt = (PAGE_SIZE / sizeof(pd_entry_t)) - (virtual_avail >> SEGSHIFT); } -#endif pgtab = (pt_entry_t *)pmap_steal_memory(PAGE_SIZE * nkpt); /* @@ -776,16 +762,9 @@ * * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. */ -#if defined(__mips_n64) vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { - return (MIPS_PHYS_TO_XKPHYS_CACHED(start)); -} -#else -vm_offset_t -pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) -{ vm_offset_t va, sva; if (end <= MIPS_KSEG0_LARGEST_PHYS) @@ -800,7 +779,6 @@ *virt = va; return (sva); } -#endif /* * Add a list of wired pages to the kva @@ -2000,20 +1978,9 @@ * * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. */ -#if defined(__mips_n64) void * pmap_kenter_temporary(vm_paddr_t pa, int i) { - return ((void *)MIPS_PHYS_TO_XKPHYS_CACHED(pa)); -} -void -pmap_kenter_temporary_free(vm_paddr_t pa) -{ -} -#else -void * -pmap_kenter_temporary(vm_paddr_t pa, int i) -{ vm_offset_t va; register_t intr; if (i != 0) @@ -2071,7 +2038,6 @@ sysm->valid1 = 0; } } -#endif /* * Moved the code to Machine Independent @@ -2177,26 +2143,12 @@ /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. - * - * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. */ -#if defined (__mips_n64) void pmap_zero_page(vm_page_t m) { vm_offset_t va; vm_paddr_t phys = VM_PAGE_TO_PHYS(m); - - va = MIPS_PHYS_TO_XKPHYS_CACHED(phys); - bzero((caddr_t)va, PAGE_SIZE); - mips_dcache_wbinv_range(va, PAGE_SIZE); -} -#else -void -pmap_zero_page(vm_page_t m) -{ - vm_offset_t va; - vm_paddr_t phys = VM_PAGE_TO_PHYS(m); register_t intr; if (phys < MIPS_KSEG0_LARGEST_PHYS) { @@ -2213,30 +2165,18 @@ PMAP_LMEM_UNMAP(); } } -#endif + /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ -#if defined (__mips_n64) void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va; vm_paddr_t phys = VM_PAGE_TO_PHYS(m); - - va = MIPS_PHYS_TO_XKPHYS_CACHED(phys); - bzero((char *)(caddr_t)va + off, size); - mips_dcache_wbinv_range(va + off, size); -} -#else -void -pmap_zero_page_area(vm_page_t m, int off, int size) -{ - vm_offset_t va; - vm_paddr_t phys = VM_PAGE_TO_PHYS(m); register_t intr; if (phys < MIPS_KSEG0_LARGEST_PHYS) { @@ -2252,25 +2192,12 @@ PMAP_LMEM_UNMAP(); } } -#endif -#if defined (__mips_n64) void pmap_zero_page_idle(vm_page_t m) { vm_offset_t va; vm_paddr_t phys = VM_PAGE_TO_PHYS(m); - - va = MIPS_PHYS_TO_XKPHYS_CACHED(phys); - bzero((caddr_t)va, PAGE_SIZE); - mips_dcache_wbinv_range(va, PAGE_SIZE); -} -#else -void -pmap_zero_page_idle(vm_page_t m) -{ - vm_offset_t va; - vm_paddr_t phys = VM_PAGE_TO_PHYS(m); register_t intr; if (phys < MIPS_KSEG0_LARGEST_PHYS) { @@ -2286,38 +2213,19 @@ PMAP_LMEM_UNMAP(); } } -#endif /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. - * - * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. */ -#if defined (__mips_n64) void pmap_copy_page(vm_page_t src, vm_page_t dst) { vm_offset_t va_src, va_dst; vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src); vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst); - - pmap_flush_pvcache(src); - mips_dcache_wbinv_range_index(MIPS_PHYS_TO_XKPHYS_CACHED(phy_dst), PAGE_SIZE); - va_src = MIPS_PHYS_TO_XKPHYS_CACHED(phy_src); - va_dst = MIPS_PHYS_TO_XKPHYS_CACHED(phy_dst); - bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); - mips_dcache_wbinv_range(va_dst, PAGE_SIZE); -} -#else -void -pmap_copy_page(vm_page_t src, vm_page_t dst) -{ - vm_offset_t va_src, va_dst; - vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src); - vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst); register_t intr; if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) { @@ -2342,7 +2250,6 @@ PMAP_LMEM_UNMAP(); } } -#endif /* * Returns true if the pmap's pv is one of the first @@ -2748,24 +2655,10 @@ * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. - * - * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit. */ -#if defined(__mips_n64) void * pmap_mapdev(vm_offset_t pa, vm_size_t size) { - return ((void *)MIPS_PHYS_TO_XKPHYS_UNCACHED(pa)); -} - -void -pmap_unmapdev(vm_offset_t va, vm_size_t size) -{ -} -#else -void * -pmap_mapdev(vm_offset_t pa, vm_size_t size) -{ vm_offset_t va, tmpva, offset; /* @@ -2809,7 +2702,6 @@ pmap_kremove(tmpva); kmem_free(kernel_map, base, size); } -#endif /* * perform the pmap work for mincore @@ -2902,14 +2794,14 @@ return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); - superpage_offset = offset & SEGOFSET; - if (size - ((NBSEG - superpage_offset) & SEGOFSET) < NBSEG || - (*addr & SEGOFSET) == superpage_offset) + superpage_offset = offset & SEGMASK; + if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG || + (*addr & SEGMASK) == superpage_offset) return; - if ((*addr & SEGOFSET) < superpage_offset) - *addr = (*addr & ~SEGOFSET) + superpage_offset; + if ((*addr & SEGMASK) < superpage_offset) + *addr = (*addr & ~SEGMASK) + superpage_offset; else - *addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset; + *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset; } /* @@ -3141,11 +3033,6 @@ /* * First, the direct-mapped regions. */ -#if defined(__mips_n64) - if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END) - return (MIPS_XKPHYS_TO_PHYS(va)); -#endif - if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END) return (MIPS_KSEG0_TO_PHYS(va)); @@ -3172,9 +3059,6 @@ * Should be kernel virtual here, otherwise fail */ mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END); -#if defined(__mips_n64) - mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END); -#endif /* * Kernel virtual. */ [-- Attachment #5 --] Index: sys/conf/options.mips =================================================================== --- sys/conf/options.mips (revision 210534) +++ sys/conf/options.mips (working copy) @@ -53,6 +53,9 @@ TARGET_EMULATOR opt_ddb.h TARGET_XLR_XLS opt_global.h +KERNEL_64 opt_global.h +KERNEL_32 opt_global.h + TICK_USE_YAMON_FREQ opt_global.h TICK_USE_MALTA_RTC opt_global.h Index: sys/conf/files.mips =================================================================== --- sys/conf/files.mips (revision 210534) +++ sys/conf/files.mips (working copy) @@ -32,7 +32,8 @@ mips/mips/exception.S standard mips/mips/gdb_machdep.c standard # mips/mips/mainbus.c standard -mips/mips/pmap.c standard +mips/mips/pmap.c optional kernel_32 +mips/mips/pmap64.c optional kernel_64 mips/mips/trap.c standard mips/mips/vm_machdep.c standard # ---------------------------------------------------------------------- Index: sys/mips/conf/XLR64 =================================================================== --- sys/mips/conf/XLR64 (revision 210534) +++ sys/mips/conf/XLR64 (working copy) @@ -25,6 +25,8 @@ makeoptions TARGET_BIG_ENDIAN include "../rmi/std.xlr" +nooption KERNEL_32 +options KERNEL_64 makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions ARCH_FLAGS="-march=mips64 -mabi=64" Index: sys/mips/conf/DEFAULTS =================================================================== --- sys/mips/conf/DEFAULTS (revision 210534) +++ sys/mips/conf/DEFAULTS (working copy) @@ -11,3 +11,4 @@ options GEOM_PART_BSD options GEOM_PART_MBR +options KERNEL_32 [-- Attachment #6 --] Index: sys/mips/rmi/dev/xlr/rge.c =================================================================== --- sys/mips/rmi/dev/xlr/rge.c (revision 210534) +++ sys/mips/rmi/dev/xlr/rge.c (working copy) @@ -567,8 +567,8 @@ paddr = vtophys((vm_offset_t)tx_desc); tx_desc->frag[nfrag] = (1ULL << 63) | (fr_stid << 54) | paddr; nfrag++; - tx_desc->frag[XLR_MAX_TX_FRAGS] = (uint64_t) (vm_offset_t)tx_desc; - tx_desc->frag[XLR_MAX_TX_FRAGS + 1] = (uint64_t) (vm_offset_t)m_head; + tx_desc->frag[XLR_MAX_TX_FRAGS] = (uint64_t)(intptr_t)tx_desc; + tx_desc->frag[XLR_MAX_TX_FRAGS + 1] = (uint64_t)(intptr_t)m_head; p2d_len = (nfrag * 8); p2p_msg->msg0 = (1ULL << 63) | (1ULL << 62) | (127ULL << 54) | @@ -614,19 +614,27 @@ free_buf(vm_paddr_t paddr) { struct mbuf *m; - uint32_t mag, um, sr; + uint32_t mag; +#ifdef __mips_n64 + uint64_t *vaddr; + vaddr = (uint64_t *)MIPS_PHYS_TO_XKPHYS_CACHED(paddr); + m = (struct mbuf *)vaddr[0]; + mag = (uint32_t)vaddr[1]; +#else + uint32_t sr; + sr = xlr_enable_kx(); - um = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE); - mag = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + sizeof(uint32_t)); + m = (struct mbuf *)(intptr_t)xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + sizeof(uint32_t)); + mag = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + 3 * sizeof(uint32_t)); mips_wr_status(sr); +#endif if (mag != 0xf00bad) { printf("Something is wrong kseg:%lx found mag:%x not 0xf00bad\n", (u_long)paddr, mag); return; } - m = (struct mbuf *)(intptr_t)um; if (m != NULL) m_freem(m); } @@ -635,7 +643,7 @@ get_buf(void) { struct mbuf *m_new = NULL; - unsigned int *md; + uint64_t *md; #ifdef INVARIANTS vm_paddr_t temp1, temp2; #endif @@ -645,7 +653,7 @@ return NULL; m_adj(m_new, XLR_CACHELINE_SIZE - ((uintptr_t)m_new->m_data & 0x1f)); - md = (unsigned int *)m_new->m_data; + md = (uint64_t *)m_new->m_data; md[0] = (uintptr_t)m_new; /* Back Ptr */ md[1] = 0xf00bad; m_adj(m_new, XLR_CACHELINE_SIZE); @@ -2033,18 +2041,30 @@ rge_rx(struct rge_softc *sc, vm_paddr_t paddr, int len) { struct mbuf *m; - uint32_t tm, mag, sr; + uint32_t mag; struct ifnet *ifp = sc->rge_ifp; +#ifdef __mips_n64 + uint64_t *vaddr; + vaddr =(uint64_t *)MIPS_PHYS_TO_XKPHYS_CACHED(paddr - XLR_CACHELINE_SIZE); + m = (struct mbuf *)vaddr[0]; + mag = (uint32_t)vaddr[1]; +#else + uint32_t sr; + /* + * On 32 bit machines we use XKPHYS to get the values stores with + * the mbuf, need to explicitly enable KX. Disable interrupts while + * KX is enabled to prevent this setting leaking to other code. + */ sr = xlr_enable_kx(); - tm = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE); - mag = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + sizeof(uint32_t)); + m = (struct mbuf *)(intptr_t)xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + sizeof(uint32_t)); + mag = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + 3 * sizeof(uint32_t)); mips_wr_status(sr); - - m = (struct mbuf *)(intptr_t)tm; +#endif if (mag != 0xf00bad) { /* somebody else packet Error - FIXME in intialization */ - printf("cpu %d: *ERROR* Not my packet paddr %p\n", xlr_cpu_id(), (void *)paddr); + printf("cpu %d: *ERROR* Not my packet paddr %p\n", + xlr_cpu_id(), (void *)paddr); return; } /* align the data */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?AANLkTikAjRTMn0m1DemdmUXKMydmyQPxij%2BN=1d9L2=y>
