Date: Sat, 17 Aug 2013 19:49:08 +0000 (UTC) From: Neel Natu <neel@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r254466 - in head/sys/amd64: amd64 include Message-ID: <201308171949.r7HJn8sR091046@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: neel Date: Sat Aug 17 19:49:08 2013 New Revision: 254466 URL: http://svnweb.freebsd.org/changeset/base/254466 Log: Bump up the maximum addressable memory on amd64 systems from 1TB to 4TB. Bump up the KVA size proportionally from 512GB to 2TB. The number of page table pages used by the direct map is now calculated at run time based on 'Maxmem'. This means the small memory systems will not see any additional tax in terms of page table pages for the direct map. However all amd64 systems, regardless of the memory size, will use 3 more pages to accomodate the bump in the KVA size. More details available here: http://lists.freebsd.org/pipermail/freebsd-hackers/2013-June/043015.html http://lists.freebsd.org/pipermail/freebsd-current/2013-July/043143.html Tested with the following configurations: - Sandybridge server with 64GB of memory. - bhyve VM with 64MB of memory. - bhyve VM with a 8GB of memory with the memory segment above 4GB cuddling right up against the 4TB maximum memory limit. Discussed on: hackers@, current@ Submitted by: Chris Torek (torek@torek.net) Modified: head/sys/amd64/amd64/pmap.c head/sys/amd64/include/pmap.h head/sys/amd64/include/vmparam.h Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Sat Aug 17 19:34:41 2013 (r254465) +++ head/sys/amd64/amd64/pmap.c Sat Aug 17 19:49:08 2013 (r254466) @@ -232,6 +232,7 @@ u_int64_t KPML4phys; /* phys addr of ke static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ +static int ndmpdpphys; /* number of DMPDPphys pages */ static struct rwlock_padalign pvh_global_lock; @@ -539,7 +540,18 @@ create_pagetables(vm_paddr_t *firstaddr) ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; if (ndmpdp < 4) /* Minimum 4GB of dirmap */ ndmpdp = 4; - DMPDPphys = allocpages(firstaddr, NDMPML4E); + ndmpdpphys = howmany(ndmpdp, NPDPEPG); + if (ndmpdpphys > NDMPML4E) { + /* + * Each NDMPML4E allows 512 GB, so limit to that, + * and then readjust ndmpdp and ndmpdpphys. + */ + printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); + Maxmem = atop(NDMPML4E * NBPML4); + ndmpdpphys = NDMPML4E; + ndmpdp = NDMPML4E * NPDEPG; + } + DMPDPphys = allocpages(firstaddr, ndmpdpphys); ndm1g = 0; if ((amd_feature & AMDID_PAGE1GB) != 0) ndm1g = ptoa(Maxmem) >> PDPSHIFT; @@ -556,6 +568,10 @@ create_pagetables(vm_paddr_t *firstaddr) * bootstrap. We defer this until after all memory-size dependent * allocations are done (e.g. direct map), so that we don't have to * build in too much slop in our estimate. + * + * Note that when NKPML4E > 1, we have an empty page underneath + * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed) + * pages. (pmap_enter requires a PD page to exist for each KPML4E.) */ nkpt_init(*firstaddr); nkpdpe = NKPDPE(nkpt); @@ -580,8 +596,8 @@ create_pagetables(vm_paddr_t *firstaddr) for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G; - /* And connect up the PD to the PDP */ - pdp_p = (pdp_entry_t *)KPDPphys; + /* And connect up the PD to the PDP (leaving room for L4 pages) */ + pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE)); for (i = 0; i < nkpdpe; i++) pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U; @@ -618,14 +634,16 @@ create_pagetables(vm_paddr_t *firstaddr) p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U; /* Connect the Direct Map slot(s) up to the PML4. */ - for (i = 0; i < NDMPML4E; i++) { + for (i = 0; i < ndmpdpphys; i++) { p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U; } - /* Connect the KVA slot up to the PML4 */ - p4_p[KPML4I] = KPDPphys; - p4_p[KPML4I] |= PG_RW | PG_V | PG_U; + /* Connect the KVA slots up to the PML4 */ + for (i = 0; i < NKPML4E; i++) { + p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); + p4_p[KPML4BASE + i] |= PG_RW | PG_V | PG_U; + } } /* @@ -1684,8 +1702,11 @@ pmap_pinit(pmap_t pmap) pagezero(pmap->pm_pml4); /* Wire in kernel global address entries. */ - pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; - for (i = 0; i < NDMPML4E; i++) { + for (i = 0; i < NKPML4E; i++) { + pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + (i << PAGE_SHIFT)) | + PG_RW | PG_V | PG_U; + } + for (i = 0; i < ndmpdpphys; i++) { pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) | PG_RW | PG_V | PG_U; } @@ -1940,8 +1961,9 @@ pmap_release(pmap_t pmap) m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME); - pmap->pm_pml4[KPML4I] = 0; /* KVA */ - for (i = 0; i < NDMPML4E; i++) /* Direct Map */ + for (i = 0; i < NKPML4E; i++) /* KVA */ + pmap->pm_pml4[KPML4BASE + i] = 0; + for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pml4[DMPML4I + i] = 0; pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ Modified: head/sys/amd64/include/pmap.h ============================================================================== --- head/sys/amd64/include/pmap.h Sat Aug 17 19:34:41 2013 (r254465) +++ head/sys/amd64/include/pmap.h Sat Aug 17 19:49:08 2013 (r254466) @@ -113,28 +113,49 @@ ((unsigned long)(l2) << PDRSHIFT) | \ ((unsigned long)(l1) << PAGE_SHIFT)) -#define NKPML4E 1 /* number of kernel PML4 slots */ +/* + * Number of kernel PML4 slots. Can be anywhere from 1 to 64 or so, + * but setting it larger than NDMPML4E makes no sense. + * + * Each slot provides .5 TB of kernel virtual space. + */ +#define NKPML4E 4 #define NUPML4E (NPML4EPG/2) /* number of userland PML4 pages */ #define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */ #define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */ /* - * NDMPML4E is the number of PML4 entries that are used to implement the - * direct map. It must be a power of two. + * NDMPML4E is the maximum number of PML4 entries that will be + * used to implement the direct map. It must be a power of two, + * and should generally exceed NKPML4E. The maximum possible + * value is 64; using 128 will make the direct map intrude into + * the recursive page table map. */ -#define NDMPML4E 2 +#define NDMPML4E 8 /* - * The *PDI values control the layout of virtual memory. The starting address + * These values control the layout of virtual memory. The starting address * of the direct map, which is controlled by DMPML4I, must be a multiple of * its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.) + * + * Note: KPML4I is the index of the (single) level 4 page that maps + * the KVA that holds KERNBASE, while KPML4BASE is the index of the + * first level 4 page that maps VM_MIN_KERNEL_ADDRESS. If NKPML4E + * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra + * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to + * KERNBASE. + * + * (KPML4I combines with KPDPI to choose where KERNBASE starts. + * Or, in other words, KPML4I provides bits 39..47 of KERNBASE, + * and KPDPI provides bits 30..38.) */ #define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */ -#define KPML4I (NPML4EPG-1) /* Top 512GB for KVM */ -#define DMPML4I rounddown(KPML4I - NDMPML4E, NDMPML4E) /* Below KVM */ +#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */ +#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */ +#define KPML4I (NPML4EPG-1) #define KPDPI (NPDPEPG-2) /* kernbase at -2GB */ /* Modified: head/sys/amd64/include/vmparam.h ============================================================================== --- head/sys/amd64/include/vmparam.h Sat Aug 17 19:34:41 2013 (r254465) +++ head/sys/amd64/include/vmparam.h Sat Aug 17 19:49:08 2013 (r254466) @@ -145,18 +145,19 @@ * 0x0000000000000000 - 0x00007fffffffffff user map * 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole) * 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot) - * 0xffff804020101000 - 0xfffffdffffffffff unused - * 0xfffffe0000000000 - 0xfffffeffffffffff 1TB direct map - * 0xffffff0000000000 - 0xffffff7fffffffff unused - * 0xffffff8000000000 - 0xffffffffffffffff 512GB kernel map + * 0xffff804020101000 - 0xfffff7ffffffffff unused + * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map + * 0xfffffc0000000000 - 0xfffffdffffffffff unused + * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * * Within the kernel map: * * 0xffffffff80000000 KERNBASE */ -#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-1, NPDEPG-1, NPTEPG-1) -#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-512, 0, 0) +#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4BASE, 0, 0, 0) +#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4BASE + NKPML4E - 1, \ + NPDPEPG-1, NPDEPG-1, NPTEPG-1) #define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0) #define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308171949.r7HJn8sR091046>