Date: Fri, 25 Oct 2019 20:09:42 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r354095 - in head/sys: amd64/amd64 amd64/include kern Message-ID: <201910252009.x9PK9gIV092660@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Fri Oct 25 20:09:42 2019 New Revision: 354095 URL: https://svnweb.freebsd.org/changeset/base/354095 Log: amd64: move pcb out of kstack to struct thread. This saves 320 bytes of the precious stack space. The only negative aspect of the change I can think of is that the struct thread increased by 320 bytes obviously, and that 320 bytes are not swapped out anymore. I believe the freed stack space is much more important than that. Also, current struct thread size is 1392 bytes on amd64, so UMA will allocate two thread structures per (4KB) slab, which leaves a space for pcb without increasing zone memory use. Reviewed by: alc, markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D22138 Modified: head/sys/amd64/amd64/cpu_switch.S head/sys/amd64/amd64/genassym.c head/sys/amd64/amd64/machdep.c head/sys/amd64/amd64/vm_machdep.c head/sys/amd64/include/md_var.h head/sys/amd64/include/pcpu_aux.h head/sys/amd64/include/proc.h head/sys/kern/kern_thread.c Modified: head/sys/amd64/amd64/cpu_switch.S ============================================================================== --- head/sys/amd64/amd64/cpu_switch.S Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/amd64/amd64/cpu_switch.S Fri Oct 25 20:09:42 2019 (r354095) @@ -74,7 +74,7 @@ END(cpu_throw) */ ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ - movq TD_PCB(%rdi),%r8 + leaq TD_MD_PCB(%rdi),%r8 movq (%rsp),%rax /* Hardware registers */ movq %r15,PCB_R15(%r8) @@ -140,7 +140,7 @@ ctx_switch_xsave: callq pmap_activate_sw movq %r15,TD_LOCK(%r13) /* Release the old thread */ sw1: - movq TD_PCB(%r12),%r8 + leaq TD_MD_PCB(%r12),%r8 #if defined(SCHED_ULE) && defined(SMP) movq $blocked_lock, %rdx movq TD_LOCK(%r12),%rcx @@ -193,11 +193,12 @@ do_kthread: cmpq %rax,%rdx jne do_tss done_tss: - movq %r8,PCPU(RSP0) + movq TD_MD_STACK_BASE(%r12),%r9 + movq %r9,PCPU(RSP0) movq %r8,PCPU(CURPCB) movq PCPU(PTI_RSP0),%rax cmpq $~0,PCPU(UCR3) - cmove %r8,%rax + cmove %r9,%rax movq %rax,TSS_RSP0(%rdx) movq %r12,PCPU(CURTHREAD) /* into next thread */ Modified: head/sys/amd64/amd64/genassym.c ============================================================================== --- head/sys/amd64/amd64/genassym.c Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/amd64/amd64/genassym.c Fri Oct 25 20:09:42 2019 (r354095) @@ -87,6 +87,8 @@ ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TD_MD, offsetof(struct thread, td_md)); +ASSYM(TD_MD_PCB, offsetof(struct thread, td_md.md_pcb)); +ASSYM(TD_MD_STACK_BASE, offsetof(struct thread, td_md.md_stack_base)); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); Modified: head/sys/amd64/amd64/machdep.c ============================================================================== --- head/sys/amd64/amd64/machdep.c Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/amd64/amd64/machdep.c Fri Oct 25 20:09:42 2019 (r354095) @@ -1789,12 +1789,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) amd64_conf_fast_syscall(); /* - * Temporary forge some valid pointer to PCB, for exception - * handlers. It is reinitialized properly below after FPU is - * set up. Also set up td_critnest to short-cut the page - * fault handler. + * We initialize the PCB pointer early so that exception + * handlers will work. Also set up td_critnest to short-cut + * the page fault handler. */ cpu_max_ext_state_size = sizeof(struct savefpu); + set_top_of_stack_td(&thread0); thread0.td_pcb = get_pcb_td(&thread0); thread0.td_critnest = 1; @@ -1850,11 +1850,10 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) fpuinit(); /* - * Set up thread0 pcb after fpuinit calculated pcb + fpu save + * Set up thread0 pcb save area after fpuinit calculated fpu save * area size. Zero out the extended state header in fpu save * area. */ - thread0.td_pcb = get_pcb_td(&thread0); thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); if (use_xsave) { @@ -1863,7 +1862,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) xhdr->xstate_bv = xsave_mask; } /* make an initial tss so cpu can get interrupt stack on syscall! */ - rsp0 = (vm_offset_t)thread0.td_pcb; + rsp0 = thread0.td_md.md_stack_base; /* Ensure the stack is aligned to 16 bytes */ rsp0 &= ~0xFul; common_tss[0].tss_rsp0 = rsp0; @@ -1899,7 +1898,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) TSEXIT(); /* Location of kernel stack for locore */ - return ((u_int64_t)thread0.td_pcb); + return (thread0.td_md.md_stack_base); } void Modified: head/sys/amd64/amd64/vm_machdep.c ============================================================================== --- head/sys/amd64/amd64/vm_machdep.c Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/amd64/amd64/vm_machdep.c Fri Oct 25 20:09:42 2019 (r354095) @@ -87,37 +87,41 @@ __FBSDID("$FreeBSD$"); _Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "OFFSETOF_MONITORBUF does not correspond with offset of pc_monitorbuf."); -struct savefpu * -get_pcb_user_save_td(struct thread *td) +void +set_top_of_stack_td(struct thread *td) { - vm_offset_t p; - - p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + td->td_md.md_stack_base = td->td_kstack + + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); - KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area")); - return ((struct savefpu *)p); } struct savefpu * -get_pcb_user_save_pcb(struct pcb *pcb) +get_pcb_user_save_td(struct thread *td) { vm_offset_t p; - p = (vm_offset_t)(pcb + 1); + p = td->td_md.md_stack_base; + KASSERT((p % XSAVE_AREA_ALIGN) == 0, + ("Unaligned pcb_user_save area ptr %#lx td %p", p, td)); return ((struct savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { - vm_offset_t p; - p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - - sizeof(struct pcb); - return ((struct pcb *)p); + return (&td->td_md.md_pcb); } +struct savefpu * +get_pcb_user_save_pcb(struct pcb *pcb) +{ + struct thread *td; + + td = __containerof(pcb, struct thread, td_md.md_pcb); + return (get_pcb_user_save_td(td)); +} + void * alloc_fpusave(int flags) { @@ -165,9 +169,9 @@ cpu_fork(struct thread *td1, struct proc *p2, struct t fpuexit(td1); update_pcb_bases(td1->td_pcb); - /* Point the pcb to the top of the stack */ - pcb2 = get_pcb_td(td2); - td2->td_pcb = pcb2; + /* Point the stack and pcb to the actual location */ + set_top_of_stack_td(td2); + td2->td_pcb = pcb2 = get_pcb_td(td2); /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); @@ -186,7 +190,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct t * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ - td2->td_frame = (struct trapframe *)td2->td_pcb - 1; + td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_rax = 0; /* Child returns zero */ @@ -351,8 +355,9 @@ cpu_thread_alloc(struct thread *td) struct pcb *pcb; struct xstate_hdr *xhdr; + set_top_of_stack_td(td); td->td_pcb = pcb = get_pcb_td(td); - td->td_frame = (struct trapframe *)pcb - 1; + td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); @@ -490,7 +495,6 @@ cpu_copy_thread(struct thread *td, struct thread *td0) { struct pcb *pcb2; - /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* Modified: head/sys/amd64/include/md_var.h ============================================================================== --- head/sys/amd64/include/md_var.h Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/amd64/include/md_var.h Fri Oct 25 20:09:42 2019 (r354095) @@ -83,6 +83,7 @@ void fpstate_drop(struct thread *td); void pagezero(void *addr); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); void sse2_pagezero(void *addr); +void set_top_of_stack_td(struct thread *td); struct savefpu *get_pcb_user_save_td(struct thread *td); struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb); void pci_early_quirks(void); Modified: head/sys/amd64/include/pcpu_aux.h ============================================================================== --- head/sys/amd64/include/pcpu_aux.h Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/amd64/include/pcpu_aux.h Fri Oct 25 20:09:42 2019 (r354095) @@ -57,16 +57,6 @@ __curthread(void) return (td); } #define curthread (__curthread()) - -static __inline __pure2 struct pcb * -__curpcb(void) -{ - struct pcb *pcb; - - __asm("movq %%gs:%P1,%0" : "=r" (pcb) : "n" (offsetof(struct pcpu, - pc_curpcb))); - return (pcb); -} -#define curpcb (__curpcb()) +#define curpcb (&curthread->td_md.md_pcb) #endif /* _MACHINE_PCPU_AUX_H_ */ Modified: head/sys/amd64/include/proc.h ============================================================================== --- head/sys/amd64/include/proc.h Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/amd64/include/proc.h Fri Oct 25 20:09:42 2019 (r354095) @@ -36,6 +36,7 @@ #define _MACHINE_PROC_H_ #include <sys/queue.h> +#include <machine/pcb.h> #include <machine/segments.h> /* @@ -72,6 +73,8 @@ struct mdthread { struct pmap_invl_gen md_invl_gen; register_t md_efirt_tmp; /* (k) */ int md_efirt_dis_pf; /* (k) */ + struct pcb md_pcb; + vm_offset_t md_stack_base; }; struct mdproc { Modified: head/sys/kern/kern_thread.c ============================================================================== --- head/sys/kern/kern_thread.c Fri Oct 25 19:38:02 2019 (r354094) +++ head/sys/kern/kern_thread.c Fri Oct 25 20:09:42 2019 (r354095) @@ -84,7 +84,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0 "struct thread KBI td_pflags"); _Static_assert(offsetof(struct thread, td_frame) == 0x478, "struct thread KBI td_frame"); -_Static_assert(offsetof(struct thread, td_emuldata) == 0x540, +_Static_assert(offsetof(struct thread, td_emuldata) == 0x690, "struct thread KBI td_emuldata"); _Static_assert(offsetof(struct proc, p_flag) == 0xb0, "struct proc KBI p_flag");
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201910252009.x9PK9gIV092660>