From owner-freebsd-hackers@FreeBSD.ORG Mon Jul 15 07:36:50 2013 Return-Path: Delivered-To: freebsd-hackers@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) by hub.freebsd.org (Postfix) with ESMTP id 730F2B6E for ; Mon, 15 Jul 2013 07:36:50 +0000 (UTC) (envelope-from torek@torek.net) Received: from elf.torek.net (50-73-42-1-utah.hfc.comcastbusiness.net [50.73.42.1]) by mx1.freebsd.org (Postfix) with ESMTP id 4BCC4DB3 for ; Mon, 15 Jul 2013 07:36:50 +0000 (UTC) Received: from elf.torek.net (localhost [127.0.0.1]) by elf.torek.net (8.14.5/8.14.5) with ESMTP id r6F7aoaw039955 for ; Mon, 15 Jul 2013 01:36:50 -0600 (MDT) (envelope-from torek@torek.net) Message-Id: <201307150736.r6F7aoaw039955@elf.torek.net> From: Chris Torek To: freebsd-hackers@freebsd.org Subject: Re: expanding amd64 past the 1TB limit In-reply-to: Your message of "Thu, 11 Jul 2013 00:41:38 -0700." Date: Mon, 15 Jul 2013 01:36:50 -0600 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.4.3 (elf.torek.net [127.0.0.1]); Mon, 15 Jul 2013 01:36:50 -0600 (MDT) X-BeenThere: freebsd-hackers@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: Technical Discussions relating to FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 15 Jul 2013 07:36:50 -0000 (Durn mailing list software, eating attachments... there are just the two so I will just send them one at a time here. I took the individual people off the to/cc since presumably you all got the attachments already.) Date: Thu, 27 Jun 2013 18:49:29 -0600 Subject: [PATCH 2/2] increase physical and virtual memory limits Increase kernel VM space: go from .5 TB of KVA and 1 TB of direct map, to 8 TB of KVA and 16 TB of direct map. However, we allocate less direct map space for small physical-memory systems. Also, if Maxmem is so large that there is not enough direct map space, reduce Maxmem to fit, so that the system can boot unassisted. --- amd64/amd64/pmap.c | 44 +++++++++++++++++++++++++++++++++----------- amd64/include/pmap.h | 36 +++++++++++++++++++++++++++++------- amd64/include/vmparam.h | 13 +++++++------ 3 files changed, 69 insertions(+), 24 deletions(-) diff --git a/amd64/amd64/pmap.c b/amd64/amd64/pmap.c index 46f6940..5e43c93 100644 --- a/amd64/amd64/pmap.c +++ b/amd64/amd64/pmap.c @@ -232,6 +232,7 @@ u_int64_t KPML4phys; /* phys addr of kernel level 4 */ static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ +static int ndmpdpphys; /* number of DMPDPphys pages */ static struct rwlock_padalign pvh_global_lock; @@ -540,7 +541,18 @@ create_pagetables(vm_paddr_t *firstaddr) ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; if (ndmpdp < 4) /* Minimum 4GB of dirmap */ ndmpdp = 4; - DMPDPphys = allocpages(firstaddr, NDMPML4E); + ndmpdpphys = howmany(ndmpdp, NPDPEPG); + if (ndmpdpphys > NDMPML4E) { + /* + * Each NDMPML4E allows 512 GB, so limit to that, + * and then readjust ndmpdp and ndmpdpphys. + */ + printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); + Maxmem = atop(NDMPML4E * NBPML4); + ndmpdpphys = NDMPML4E; + ndmpdp = NDMPML4E * NPDEPG; + } + DMPDPphys = allocpages(firstaddr, ndmpdpphys); ndm1g = 0; if ((amd_feature & AMDID_PAGE1GB) != 0) ndm1g = ptoa(Maxmem) >> PDPSHIFT; @@ -557,6 +569,10 @@ create_pagetables(vm_paddr_t *firstaddr) * bootstrap. We defer this until after all memory-size dependent * allocations are done (e.g. direct map), so that we don't have to * build in too much slop in our estimate. + * + * Note that when NKPML4E > 1, we have an empty page underneath + * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed) + * pages. (pmap_enter requires a PD page to exist for each KPML4E.) */ nkpt_init(*firstaddr); nkpdpe = NKPDPE(nkpt); @@ -581,8 +597,8 @@ create_pagetables(vm_paddr_t *firstaddr) for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G; - /* And connect up the PD to the PDP */ - pdp_p = (pdp_entry_t *)KPDPphys; + /* And connect up the PD to the PDP (leaving room for L4 pages) */ + pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE)); for (i = 0; i < nkpdpe; i++) pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U; @@ -619,14 +635,16 @@ create_pagetables(vm_paddr_t *firstaddr) p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U; /* Connect the Direct Map slot(s) up to the PML4. */ - for (i = 0; i < NDMPML4E; i++) { + for (i = 0; i < ndmpdpphys; i++) { p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U; } - /* Connect the KVA slot up to the PML4 */ - p4_p[KPML4I] = KPDPphys; - p4_p[KPML4I] |= PG_RW | PG_V | PG_U; + /* Connect the KVA slots up to the PML4 */ + for (i = 0; i < NKPML4E; i++) { + p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); + p4_p[KPML4BASE + i] |= PG_RW | PG_V | PG_U; + } } /* @@ -1685,8 +1703,11 @@ pmap_pinit(pmap_t pmap) pagezero(pmap->pm_pml4); /* Wire in kernel global address entries. */ - pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; - for (i = 0; i < NDMPML4E; i++) { + for (i = 0; i < NKPML4E; i++) { + pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + (i << PAGE_SHIFT)) | + PG_RW | PG_V | PG_U; + } + for (i = 0; i < ndmpdpphys; i++) { pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) | PG_RW | PG_V | PG_U; } @@ -1941,8 +1962,9 @@ pmap_release(pmap_t pmap) m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME); - pmap->pm_pml4[KPML4I] = 0; /* KVA */ - for (i = 0; i < NDMPML4E; i++) /* Direct Map */ + for (i = 0; i < NKPML4E; i++) /* KVA */ + pmap->pm_pml4[KPML4BASE + i] = 0; + for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pml4[DMPML4I + i] = 0; pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ diff --git a/amd64/include/pmap.h b/amd64/include/pmap.h index dc02e49..eda0295 100644 --- a/amd64/include/pmap.h +++ b/amd64/include/pmap.h @@ -113,28 +113,50 @@ ((unsigned long)(l2) << PDRSHIFT) | \ ((unsigned long)(l1) << PAGE_SHIFT)) -#define NKPML4E 1 /* number of kernel PML4 slots */ +/* + * Number of kernel PML4 slots. Can be anywhere from 1 to 64 or so, + * but setting it larger than NDMPML4E makes no sense. + * + * Each slot provides .5 TB of kernel virtual space. + */ +#define NKPML4E 16 #define NUPML4E (NPML4EPG/2) /* number of userland PML4 pages */ #define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */ #define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */ /* - * NDMPML4E is the number of PML4 entries that are used to implement the - * direct map. It must be a power of two. + * NDMPML4E is the maximum number of PML4 entries that will be + * used to implement the direct map. It must be a power of two, + * and should generally exceed NKPML4E. The maximum possible + * value is 64; using 128 will make the direct map intrude into + * the recursive page table map. */ -#define NDMPML4E 2 +#define NDMPML4E 32 /* - * The *PDI values control the layout of virtual memory. The starting address + * These values control the layout of virtual memory. The starting address * of the direct map, which is controlled by DMPML4I, must be a multiple of * its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.) + * + * Note: KPML4I is the index of the (single) level 4 page that maps + * the KVA that holds KERNBASE, while KPML4BASE is the index of the + * first level 4 page that maps VM_MIN_KERNEL_ADDRESS. If NKPML4E + * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra + * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to + * KERNBASE. Similarly, if KMPL4I < (base+N-1), extra level 2 PDEs are + * needed to map from somewhere-above-KERNBASE to VM_MAX_KERNEL_ADDRESS. + * + * (KPML4I combines with KPDPI to choose where KERNBASE starts. + * Or, in other words, KPML4I provides bits 39..46 of KERNBASE, + * and KPDPI provides bits 30..38.) */ #define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */ -#define KPML4I (NPML4EPG-1) /* Top 512GB for KVM */ -#define DMPML4I rounddown(KPML4I - NDMPML4E, NDMPML4E) /* Below KVM */ +#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */ +#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */ +#define KPML4I (NPML4EPG-1) #define KPDPI (NPDPEPG-2) /* kernbase at -2GB */ /* diff --git a/amd64/include/vmparam.h b/amd64/include/vmparam.h index 33f62bd..cff2558 100644 --- a/amd64/include/vmparam.h +++ b/amd64/include/vmparam.h @@ -145,18 +145,19 @@ * 0x0000000000000000 - 0x00007fffffffffff user map * 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole) * 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot) - * 0xffff804020101000 - 0xfffffdffffffffff unused - * 0xfffffe0000000000 - 0xfffffeffffffffff 1TB direct map - * 0xffffff0000000000 - 0xffffff7fffffffff unused - * 0xffffff8000000000 - 0xffffffffffffffff 512GB kernel map + * 0xffff804020101000 - 0xffffdfffffffffff unused + * 0xffffe00000000000 - 0xffffefffffffffff 16TB direct map + * 0xfffff00000000000 - 0xfffff7ffffffffff unused + * 0xfffff80000000000 - 0xffffffffffffffff 8TB kernel map * * Within the kernel map: * * 0xffffffff80000000 KERNBASE */ -#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-1, NPDEPG-1, NPTEPG-1) -#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-512, 0, 0) +#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4BASE, 0, 0, 0) +#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4BASE + NKPML4E - 1, \ + NPDPEPG-1, NPDEPG-1, NPTEPG-1) #define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0) #define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0) -- 1.8.2.1