Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 24 Aug 2019 15:31:31 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r351457 - in head/sys/amd64: amd64 include
Message-ID:  <201908241531.x7OFVVOs028533@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sat Aug 24 15:31:31 2019
New Revision: 351457
URL: https://svnweb.freebsd.org/changeset/base/351457

Log:
  amd64: rework PCPU allocation
  
  Move pcpu KVA out of .bss into dynamically allocated VA at
  pmap_bootstrap().  This avoids demoting superpage mapping .data/.bss.
  Also it makes possible to use pmap_qenter() for installation of
  domain-local pcpu page on NUMA configs.
  
  Refactor pcpu and IST initialization by moving it to helper functions.
  
  Reviewed by:	markj
  Tested by:	pho
  Discussed with:	jeff
  Sponsored by:	The FreeBSD Foundation
  Differential revision:	https://reviews.freebsd.org/D21320

Modified:
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/counter.h
  head/sys/amd64/include/md_var.h

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c	Sat Aug 24 15:28:40 2019	(r351456)
+++ head/sys/amd64/amd64/machdep.c	Sat Aug 24 15:31:31 2019	(r351457)
@@ -215,7 +215,8 @@ struct kva_md_info kmi;
 static struct trapframe proc0_tf;
 struct region_descriptor r_gdt, r_idt;
 
-struct pcpu __pcpu[MAXCPU];
+struct pcpu *__pcpu;
+struct pcpu temp_bsp_pcpu;
 
 struct mtx icu_lock;
 
@@ -1543,13 +1544,68 @@ amd64_conf_fast_syscall(void)
 	wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D | PSL_AC);
 }
 
+void
+amd64_bsp_pcpu_init1(struct pcpu *pc)
+{
+
+	PCPU_SET(prvspace, pc);
+	PCPU_SET(curthread, &thread0);
+	PCPU_SET(tssp, &common_tss[0]);
+	PCPU_SET(commontssp, &common_tss[0]);
+	PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
+	PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
+	PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
+	PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
+}
+
+void
+amd64_bsp_pcpu_init2(uint64_t rsp0)
+{
+
+	PCPU_SET(rsp0, rsp0);
+	PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) +
+	    PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
+	PCPU_SET(curpcb, thread0.td_pcb);
+}
+
+void
+amd64_bsp_ist_init(struct pcpu *pc)
+{
+	struct nmi_pcpu *np;
+
+	/* doublefault stack space, runs on ist1 */
+	common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
+
+	/*
+	 * NMI stack, runs on ist2.  The pcpu pointer is stored just
+	 * above the start of the ist2 stack.
+	 */
+	np = ((struct nmi_pcpu *)&nmi0_stack[sizeof(nmi0_stack)]) - 1;
+	np->np_pcpu = (register_t)pc;
+	common_tss[0].tss_ist2 = (long)np;
+
+	/*
+	 * MC# stack, runs on ist3.  The pcpu pointer is stored just
+	 * above the start of the ist3 stack.
+	 */
+	np = ((struct nmi_pcpu *)&mce0_stack[sizeof(mce0_stack)]) - 1;
+	np->np_pcpu = (register_t)pc;
+	common_tss[0].tss_ist3 = (long)np;
+
+	/*
+	 * DB# stack, runs on ist4.
+	 */
+	np = ((struct nmi_pcpu *)&dbg0_stack[sizeof(dbg0_stack)]) - 1;
+	np->np_pcpu = (register_t)pc;
+	common_tss[0].tss_ist4 = (long)np;
+}
+
 u_int64_t
 hammer_time(u_int64_t modulep, u_int64_t physfree)
 {
 	caddr_t kmdp;
 	int gsel_tss, x;
 	struct pcpu *pc;
-	struct nmi_pcpu *np;
 	struct xstate_hdr *xhdr;
 	u_int64_t rsp0;
 	char *env;
@@ -1623,7 +1679,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (long) gdt;
 	lgdt(&r_gdt);
-	pc = &__pcpu[0];
+	pc = &temp_bsp_pcpu;
 
 	wrmsr(MSR_FSBASE, 0);		/* User value */
 	wrmsr(MSR_GSBASE, (u_int64_t)pc);
@@ -1632,15 +1688,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	dpcpu_init((void *)(physfree + KERNBASE), 0);
 	physfree += DPCPU_SIZE;
-	PCPU_SET(prvspace, pc);
-	PCPU_SET(curthread, &thread0);
+	amd64_bsp_pcpu_init1(pc);
 	/* Non-late cninit() and printf() can be moved up to here. */
-	PCPU_SET(tssp, &common_tss[0]);
-	PCPU_SET(commontssp, &common_tss[0]);
-	PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
-	PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
-	PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
-	PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
 
 	/*
 	 * Initialize mutexes.
@@ -1729,31 +1778,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	finishidentcpu();	/* Final stage of CPU initialization */
 	initializecpu();	/* Initialize CPU registers */
 
-	/* doublefault stack space, runs on ist1 */
-	common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
-
-	/*
-	 * NMI stack, runs on ist2.  The pcpu pointer is stored just
-	 * above the start of the ist2 stack.
-	 */
-	np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
-	np->np_pcpu = (register_t) pc;
-	common_tss[0].tss_ist2 = (long) np;
-
-	/*
-	 * MC# stack, runs on ist3.  The pcpu pointer is stored just
-	 * above the start of the ist3 stack.
-	 */
-	np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1;
-	np->np_pcpu = (register_t) pc;
-	common_tss[0].tss_ist3 = (long) np;
-
-	/*
-	 * DB# stack, runs on ist4.
-	 */
-	np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1;
-	np->np_pcpu = (register_t) pc;
-	common_tss[0].tss_ist4 = (long) np;
+	amd64_bsp_ist_init(pc);
 	
 	/* Set the IO permission bitmap (empty due to tss seg limit) */
 	common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE;
@@ -1842,10 +1867,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	/* Ensure the stack is aligned to 16 bytes */
 	rsp0 &= ~0xFul;
 	common_tss[0].tss_rsp0 = rsp0;
-	PCPU_SET(rsp0, rsp0);
-	PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) +
-	    PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
-	PCPU_SET(curpcb, thread0.td_pcb);
+	amd64_bsp_pcpu_init2(rsp0);
 
 	/* transfer to user mode */
 

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Sat Aug 24 15:28:40 2019	(r351456)
+++ head/sys/amd64/amd64/mp_machdep.c	Sat Aug 24 15:31:31 2019	(r351457)
@@ -94,7 +94,7 @@ __FBSDID("$FreeBSD$");
 
 #define	AP_BOOTPT_SZ		(PAGE_SIZE * 3)
 
-extern	struct pcpu __pcpu[];
+extern	struct pcpu *__pcpu;
 
 /* Temporary variables for init_secondary()  */
 char *doublefault_stack;
@@ -404,7 +404,7 @@ mp_realloc_pcpu(int cpuid, int domain)
 	    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
 	na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 	pagecopy((void *)oa, (void *)na);
-	pmap_enter(kernel_pmap, oa, m, VM_PROT_READ | VM_PROT_WRITE, 0, 0);
+	pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1);
 	/* XXX old pcpu page leaked. */
 }
 #endif

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Sat Aug 24 15:28:40 2019	(r351456)
+++ head/sys/amd64/amd64/pmap.c	Sat Aug 24 15:31:31 2019	(r351457)
@@ -443,6 +443,10 @@ static pml4_entry_t *pti_pml4;
 static vm_pindex_t pti_pg_idx;
 static bool pti_finalized;
 
+extern struct pcpu *__pcpu;
+extern struct pcpu temp_bsp_pcpu;
+extern pt_entry_t *pcpu_pte;
+
 struct pmap_pkru_range {
 	struct rs_el	pkru_rs_el;
 	u_int		pkru_keyidx;
@@ -1608,8 +1612,8 @@ void
 pmap_bootstrap(vm_paddr_t *firstaddr)
 {
 	vm_offset_t va;
-	pt_entry_t *pte;
-	uint64_t cr4;
+	pt_entry_t *pte, *pcpu_pte;
+	uint64_t cr4, pcpu_phys;
 	u_long res;
 	int i;
 
@@ -1624,6 +1628,8 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	 */
 	create_pagetables(firstaddr);
 
+	pcpu_phys = allocpages(firstaddr, MAXCPU);
+
 	/*
 	 * Add a physical memory segment (vm_phys_seg) corresponding to the
 	 * preallocated kernel page table pages so that vm_page structures
@@ -1691,7 +1697,20 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	SYSMAP(caddr_t, CMAP1, crashdumpmap, MAXDUMPPGS)
 	CADDR1 = crashdumpmap;
 
+	SYSMAP(struct pcpu *, pcpu_pte, __pcpu, MAXCPU);
 	virtual_avail = va;
+
+	for (i = 0; i < MAXCPU; i++) {
+		pcpu_pte[i] = (pcpu_phys + ptoa(i)) | X86_PG_V | X86_PG_RW |
+		    pg_g | pg_nx | X86_PG_M | X86_PG_A;
+	}
+	STAILQ_INIT(&cpuhead);
+	wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[0]);
+	pcpu_init(&__pcpu[0], 0, sizeof(struct pcpu));
+	amd64_bsp_pcpu_init1(&__pcpu[0]);
+	amd64_bsp_ist_init(&__pcpu[0]);
+	__pcpu[0].pc_dynamic = temp_bsp_pcpu.pc_dynamic;
+	__pcpu[0].pc_acpi_id = temp_bsp_pcpu.pc_acpi_id;
 
 	/*
 	 * Initialize the PAT MSR.

Modified: head/sys/amd64/include/counter.h
==============================================================================
--- head/sys/amd64/include/counter.h	Sat Aug 24 15:28:40 2019	(r351456)
+++ head/sys/amd64/include/counter.h	Sat Aug 24 15:31:31 2019	(r351457)
@@ -33,9 +33,10 @@
 
 #include <sys/pcpu.h>
 
-extern struct pcpu __pcpu[];
+extern struct pcpu *__pcpu;
+extern struct pcpu temp_bsp_pcpu;
 
-#define	EARLY_COUNTER	&__pcpu[0].pc_early_dummy_counter
+#define	EARLY_COUNTER	&temp_bsp_pcpu.pc_early_dummy_counter
 
 #define	counter_enter()	do {} while (0)
 #define	counter_exit()	do {} while (0)

Modified: head/sys/amd64/include/md_var.h
==============================================================================
--- head/sys/amd64/include/md_var.h	Sat Aug 24 15:28:40 2019	(r351456)
+++ head/sys/amd64/include/md_var.h	Sat Aug 24 15:31:31 2019	(r351457)
@@ -58,6 +58,9 @@ struct	sysentvec;
 void	amd64_conf_fast_syscall(void);
 void	amd64_db_resume_dbreg(void);
 void	amd64_lower_shared_page(struct sysentvec *);
+void	amd64_bsp_pcpu_init1(struct pcpu *pc);
+void	amd64_bsp_pcpu_init2(uint64_t rsp0);
+void	amd64_bsp_ist_init(struct pcpu *pc);
 void	amd64_syscall(struct thread *td, int traced);
 void	amd64_syscall_ret_flush_l1d(int error);
 void	amd64_syscall_ret_flush_l1d_recalc(void);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201908241531.x7OFVVOs028533>