Date: Thu, 4 May 2023 15:40:14 GMT From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: af1c6d3f3013 - main - amd64: do not leak pcpu pages Message-ID: <202305041540.344FeETu059928@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=af1c6d3f3013062370692c8e1e9c87bb138fbbd9 commit af1c6d3f3013062370692c8e1e9c87bb138fbbd9 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2023-05-03 09:41:46 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2023-05-04 15:39:22 +0000 amd64: do not leak pcpu pages Do not preallocate pcpu area backing pages on early startup, only allocate enough of KVA for pcpu[MAXCPU] and the page for BSP. Other pages are allocated after we know the number of cpus and their assignments to the domains. PCPUs are not accessed until they are initialized, which happens on AP startup. Reviewed by: markj Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D39945 --- sys/amd64/amd64/mp_machdep.c | 52 ++++++++++++++++++++------------------------ sys/amd64/amd64/pmap.c | 17 ++++++++++----- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index f6c3446e9981..5fdde0bb887d 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -290,29 +290,32 @@ init_secondary(void) init_secondary_tail(); } -/******************************************************************* - * local functions and data - */ - -#ifdef NUMA static void -mp_realloc_pcpu(int cpuid, int domain) +amd64_mp_alloc_pcpu(void) { vm_page_t m; - vm_offset_t oa, na; - - oa = (vm_offset_t)&__pcpu[cpuid]; - if (vm_phys_domain(pmap_kextract(oa)) == domain) - return; - m = vm_page_alloc_noobj_domain(domain, 0); - if (m == NULL) - return; - na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - pagecopy((void *)oa, (void *)na); - pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1); - /* XXX old pcpu page leaked. */ -} + int cpu; + + /* Allocate pcpu areas to the correct domain. */ + for (cpu = 1; cpu < mp_ncpus; cpu++) { +#ifdef NUMA + m = NULL; + if (vm_ndomains > 1) { + m = vm_page_alloc_noobj_domain( + acpi_pxm_get_cpu_locality(cpu_apic_ids[cpu]), 0); + } + if (m == NULL) #endif + m = vm_page_alloc_noobj(0); + if (m == NULL) + panic("cannot alloc pcpu page for cpu %d", cpu); + pmap_qenter((vm_offset_t)&__pcpu[cpu], &m, 1); + } +} + +/******************************************************************* + * local functions and data + */ /* * start each AP in our list @@ -330,6 +333,7 @@ start_all_aps(void) int apic_id, cpu, domain, i; u_char mpbiosreason; + amd64_mp_alloc_pcpu(); mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); MPASS(bootMP_size <= PAGE_SIZE); @@ -403,16 +407,6 @@ start_all_aps(void) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ - /* Relocate pcpu areas to the correct domain. */ -#ifdef NUMA - if (vm_ndomains > 1) - for (cpu = 1; cpu < mp_ncpus; cpu++) { - apic_id = cpu_apic_ids[cpu]; - domain = acpi_pxm_get_cpu_locality(apic_id); - mp_realloc_pcpu(cpu, domain); - } -#endif - /* start each AP */ domain = 0; for (cpu = 1; cpu < mp_ncpus; cpu++) { diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 55086125fbb9..1009736472dc 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1902,7 +1902,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) vm_offset_t va; pt_entry_t *pte, *pcpu_pte; struct region_descriptor r_gdt; - uint64_t cr4, pcpu_phys; + uint64_t cr4, pcpu0_phys; u_long res; int i; @@ -1917,7 +1917,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) */ create_pagetables(firstaddr); - pcpu_phys = allocpages(firstaddr, MAXCPU); + pcpu0_phys = allocpages(firstaddr, 1); /* * Add a physical memory segment (vm_phys_seg) corresponding to the @@ -1995,10 +1995,15 @@ pmap_bootstrap(vm_paddr_t *firstaddr) SYSMAP(struct pcpu *, pcpu_pte, __pcpu, MAXCPU); virtual_avail = va; - for (i = 0; i < MAXCPU; i++) { - pcpu_pte[i] = (pcpu_phys + ptoa(i)) | X86_PG_V | X86_PG_RW | - pg_g | pg_nx | X86_PG_M | X86_PG_A; - } + /* + * Map the BSP PCPU now, the rest of the PCPUs are mapped by + * amd64_mp_alloc_pcpu()/start_all_aps() when we know the + * number of CPUs and NUMA affinity. + */ + pcpu_pte[0] = pcpu0_phys | X86_PG_V | X86_PG_RW | pg_g | pg_nx | + X86_PG_M | X86_PG_A; + for (i = 1; i < MAXCPU; i++) + pcpu_pte[i] = 0; /* * Re-initialize PCPU area for BSP after switching.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202305041540.344FeETu059928>