Date: Tue, 21 Sep 2021 13:53:14 -0700 From: Cy Schubert <Cy.Schubert@cschubert.com> To: Konstantin Belousov <kib@FreeBSD.org> Cc: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: Re: git: bd9e0f5df681 - main - amd64: eliminate td_md.md_fpu_scratch Message-ID: <202109212053.18LKrErT005572@slippy.cwsent.com> In-Reply-To: <202109211721.18LHLGn6028952@gitrepo.freebsd.org> References: <202109211721.18LHLGn6028952@gitrepo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
In message <202109211721.18LHLGn6028952@gitrepo.freebsd.org>, Konstantin Belous ov writes: > The branch main has been updated by kib: > > URL: https://cgit.FreeBSD.org/src/commit/?id=bd9e0f5df681da8b5ef05a587b4b5b07 > 572d3fc2 > > commit bd9e0f5df681da8b5ef05a587b4b5b07572d3fc2 > Author: Konstantin Belousov <kib@FreeBSD.org> > AuthorDate: 2021-09-15 18:37:47 +0000 > Commit: Konstantin Belousov <kib@FreeBSD.org> > CommitDate: 2021-09-21 17:20:15 +0000 > > amd64: eliminate td_md.md_fpu_scratch > > For signal send, copyout from the user FPU save area directly. > > For sigreturn, we are in sleepable context and can do temporal > allocation of the transient save area. We cannot copying from userspace > directly to user save area because XSAVE state needs to be validated, > also partial copyins can corrupt it. > > Requested by: jhb > Reviewed by: jhb, markj > Tested by: pho > Sponsored by: The FreeBSD Foundation > MFC after: 1 week > Differential revision: https://reviews.freebsd.org/D31954 > --- > sys/amd64/amd64/exec_machdep.c | 47 +++++++++++++++++++--------------------- > -- > sys/amd64/amd64/vm_machdep.c | 3 --- > sys/amd64/ia32/ia32_signal.c | 43 ++++++++++++++++---------------------- > sys/amd64/include/proc.h | 1 - > sys/kern/kern_thread.c | 2 +- > 5 files changed, 40 insertions(+), 56 deletions(-) > > diff --git a/sys/amd64/amd64/exec_machdep.c b/sys/amd64/amd64/exec_machdep.c > index 48bda05f9685..168c24cbb65b 100644 > --- a/sys/amd64/amd64/exec_machdep.c > +++ b/sys/amd64/amd64/exec_machdep.c > @@ -96,7 +96,7 @@ __FBSDID("$FreeBSD$"); > #include <machine/trap.h> > > static void get_fpcontext(struct thread *td, mcontext_t *mcp, > - char *xfpusave, size_t xfpusave_len); > + char **xfpusave, size_t *xfpusave_len); > static int set_fpcontext(struct thread *td, mcontext_t *mcp, > char *xfpustate, size_t xfpustate_len); > > @@ -133,14 +133,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) > regs = td->td_frame; > oonstack = sigonstack(regs->tf_rsp); > > - if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { > - xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); > - xfpusave = (char *)td->td_md.md_fpu_scratch; > - } else { > - xfpusave_len = 0; > - xfpusave = NULL; > - } > - > /* Save user context. */ > bzero(&sf, sizeof(sf)); > sf.sf_uc.uc_sigmask = *mask; > @@ -150,7 +142,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) > sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; > bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); > sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ > - get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); > + get_fpcontext(td, &sf.sf_uc.uc_mcontext, &xfpusave, &xfpusave_len); > fpstate_drop(td); > update_pcb_bases(pcb); > sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; > @@ -301,10 +293,11 @@ sys_sigreturn(struct thread *td, struct sigreturn_args > *uap) > p->p_pid, td->td_name, xfpustate_len); > return (EINVAL); > } > - xfpustate = __builtin_alloca(xfpustate_len); > + xfpustate = (char *)fpu_save_area_alloc(); > error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, > xfpustate, xfpustate_len); > if (error != 0) { > + fpu_save_area_free((struct savefpu *)xfpustate); > uprintf( > "pid %d (%s): sigreturn copying xfpustate failed\n", > p->p_pid, td->td_name); > @@ -315,6 +308,7 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *u > ap) > xfpustate_len = 0; > } > ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); > + fpu_save_area_free((struct savefpu *)xfpustate); > if (ret != 0) { > uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", > p->p_pid, td->td_name, ret); > @@ -674,14 +668,17 @@ set_mcontext(struct thread *td, mcontext_t *mcp) > if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - > sizeof(struct savefpu)) > return (EINVAL); > - xfpustate = (char *)td->td_md.md_fpu_scratch; > + xfpustate = (char *)fpu_save_area_alloc(); > ret = copyin((void *)mcp->mc_xfpustate, xfpustate, > mcp->mc_xfpustate_len); > - if (ret != 0) > + if (ret != 0) { > + fpu_save_area_free((struct savefpu *)xfpustate); > return (ret); > + } > } else > xfpustate = NULL; > ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); > + fpu_save_area_free((struct savefpu *)xfpustate); > if (ret != 0) > return (ret); > tp->tf_r15 = mcp->mc_r15; > @@ -719,26 +716,24 @@ set_mcontext(struct thread *td, mcontext_t *mcp) > } > > static void > -get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, > - size_t xfpusave_len) > +get_fpcontext(struct thread *td, mcontext_t *mcp, char **xfpusave, > + size_t *xfpusave_len) > { > - size_t max_len, len; > - > mcp->mc_ownedfp = fpugetregs(td); > bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], > sizeof(mcp->mc_fpstate)); > mcp->mc_fpformat = fpuformat(); > - if (!use_xsave || xfpusave_len == 0) > + if (xfpusave == NULL) > return; > - max_len = cpu_max_ext_state_size - sizeof(struct savefpu); > - len = xfpusave_len; > - if (len > max_len) { > - len = max_len; > - bzero(xfpusave + max_len, len - max_len); > + if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) { > + *xfpusave_len = 0; > + *xfpusave = NULL; > + } else { > + mcp->mc_flags |= _MC_HASFPXSTATE; > + *xfpusave_len = mcp->mc_xfpustate_len = > + cpu_max_ext_state_size - sizeof(struct savefpu); > + *xfpusave = (char *)(get_pcb_user_save_td(td) + 1); > } > - mcp->mc_flags |= _MC_HASFPXSTATE; > - mcp->mc_xfpustate_len = len; > - bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); > } > > static int > diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c > index e42d16d61b3a..0bfcd03d6655 100644 > --- a/sys/amd64/amd64/vm_machdep.c > +++ b/sys/amd64/amd64/vm_machdep.c > @@ -392,7 +392,6 @@ cpu_thread_alloc(struct thread *td) > td->td_pcb = pcb = get_pcb_td(td); > td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1; > td->td_md.md_usr_fpu_save = fpu_save_area_alloc(); > - td->td_md.md_fpu_scratch = fpu_save_area_alloc(); > pcb->pcb_save = get_pcb_user_save_pcb(pcb); > if (use_xsave) { > xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); > @@ -408,8 +407,6 @@ cpu_thread_free(struct thread *td) > > fpu_save_area_free(td->td_md.md_usr_fpu_save); > td->td_md.md_usr_fpu_save = NULL; > - fpu_save_area_free(td->td_md.md_fpu_scratch); > - td->td_md.md_fpu_scratch = NULL; > } > > bool > diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c > index 9b67c7001a87..1ca19072a1dc 100644 > --- a/sys/amd64/ia32/ia32_signal.c > +++ b/sys/amd64/ia32/ia32_signal.c > @@ -87,10 +87,8 @@ static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sig > set_t *); > > static void > ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp, > - char *xfpusave, size_t xfpusave_len) > + char **xfpusave, size_t *xfpusave_len) > { > - size_t max_len, len; > - > /* > * XXX Format of 64bit and 32bit FXSAVE areas differs. FXSAVE > * in 32bit mode saves %cs and %ds, while on 64bit it saves > @@ -101,17 +99,15 @@ ia32_get_fpcontext(struct thread *td, struct ia32_mconte > xt *mcp, > bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], > sizeof(mcp->mc_fpstate)); > mcp->mc_fpformat = fpuformat(); > - if (!use_xsave || xfpusave_len == 0) > - return; > - max_len = cpu_max_ext_state_size - sizeof(struct savefpu); > - len = xfpusave_len; > - if (len > max_len) { > - len = max_len; > - bzero(xfpusave + max_len, len - max_len); > + if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) { > + *xfpusave_len = 0; > + *xfpusave = NULL; > + } else { > + mcp->mc_flags |= _MC_IA32_HASFPXSTATE; > + *xfpusave_len = mcp->mc_xfpustate_len = This commit causes a panic running and i386 poudriere on an amd64. Notice the ud2 machine instruction at line 107 in the disassemble portion of this reply. Unread portion of the kernel message buffer: interrupt enabled, resume, IOPL = 0 current process = 5200 (cc) trap number = 1 panic: privileged instruction fault cpuid = 3 time = 1632253591 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe00e52377d0 vpanic() at vpanic+0x187/frame 0xfffffe00e5237830 panic() at panic+0x43/frame 0xfffffe00e5237890 trap_fatal() at trap_fatal+0x387/frame 0xfffffe00e52378f0 trap() at trap+0x8b/frame 0xfffffe00e5237a00 calltrap() at calltrap+0x8/frame 0xfffffe00e5237a00 --- trap 0x1, rip = 0xffffffff80a5fef1, rsp = 0xfffffe00e5237ad0, rbp = 0xfffffe00e5237af0 --- ia32_get_mcontext() at ia32_get_mcontext+0x181/frame 0xfffffe00e5237af0 freebsd32_getcontext() at freebsd32_getcontext+0x52/frame 0xfffffe00e5237df0 ia32_syscall() at ia32_syscall+0x126/frame 0xfffffe00e5237f30 int0x80_syscall_common() at int0x80_syscall_common+0x9c/frame 0xffffca38 Uptime: 8m48s Dumping 526 out of 8161 MB: (CTRL-C to abort) ..4%..13%..22%..31%..43%..52%. .61%..73%..83%..92% __curthread () at /opt/src/git-src/sys/amd64/include/pcpu_aux.h:55 55 __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct pcpu, (kgdb) bt #0 __curthread () at /opt/src/git-src/sys/amd64/include/pcpu_aux.h:55 #1 doadump (textdump=textdump@entry=1) at /opt/src/git-src/sys/kern/kern_shutdown.c:399 #2 0xffffffff806d417b in kern_reboot (howto=260) at /opt/src/git-src/sys/kern/kern_shutdown.c:486 #3 0xffffffff806d45f6 in vpanic (fmt=0xffffffff80a9b26c "%s", ap=<optimized out>) at /opt/src/git-src/sys/kern/kern_shutdown.c:919 #4 0xffffffff806d43f3 in panic (fmt=<unavailable>) at /opt/src/git-src/sys/kern/kern_shutdown.c:843 #5 0xffffffff80a3a587 in trap_fatal (frame=0xfffffe00e5237a10, eva=0) at /opt/src/git-src/sys/amd64/amd64/trap.c:946 #6 0xffffffff80a39a7b in trap (frame=0xfffffe00e5237a10) at /opt/src/git-src/sys/amd64/amd64/trap.c:251 #7 <signal handler called> #8 0xffffffff80a5fef1 in ia32_get_fpcontext (td=0xfffffe00e468f3a0, mcp=0xfffffe00e5237b10, xfpusave=0x0, xfpusave_len=0x0) at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:107 #9 ia32_get_mcontext (td=td@entry=0xfffffe00e468f3a0, mcp=mcp@entry=0xfffffe00e5237b10, flags=1) at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:177 #10 0xffffffff80a5fca2 in freebsd32_getcontext (td=0xfffffe00e468f3a0, uap=0xfffffe00e468f790) at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:260 #11 0xffffffff80a61a16 in syscallenter (td=0xfffffe00e468f3a0) at /opt/src/git-src/sys/amd64/ia32/../../kern/subr_syscall.c:189 #12 ia32_syscall (frame=0xfffffe00e5237f40) at /opt/src/git-src/sys/amd64/ia32/ia32_syscall.c:218 #13 0xffffffff80a12bcf in int0x80_syscall_common () at /opt/src/git-src/sys/amd64/ia32/ia32_exception.S:77 #14 0x00000000ffffc710 in ?? () #15 0x00000000ffffc9d0 in ?? () #16 0x000000002540b9ad in ?? () #17 0x000000002526104f in ?? () #18 0x0000000000000000 in ?? () (kgdb) frame 8 #8 0xffffffff80a5fef1 in ia32_get_fpcontext (td=0xfffffe00e468f3a0, mcp=0xfffffe00e5237b10, xfpusave=0x0, xfpusave_len=0x0) at /opt/src/git-src/sys/amd64/ia32/ia32_signal.c:107 107 *xfpusave_len = mcp->mc_xfpustate_len = (kgdb) l 102 if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) { 103 *xfpusave_len = 0; 104 *xfpusave = NULL; 105 } else { 106 mcp->mc_flags |= _MC_IA32_HASFPXSTATE; 107 *xfpusave_len = mcp->mc_xfpustate_len = 108 cpu_max_ext_state_size - sizeof(struct savefpu); 109 *xfpusave = (char *)(get_pcb_user_save_td(td) + 1); 110 } 111 } (kgdb) disassemble /m Dump of assembler code for function ia32_get_mcontext: 98 mcp->mc_ownedfp = fpugetregs(td); 0xffffffff80a5fe9d <+301>: mov %r14,%rdi 0xffffffff80a5fea0 <+304>: call 0xffffffff80a16ca0 <fpugetregs> 0xffffffff80a5fea5 <+309>: mov %eax,0x58(%rbx) 99 bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], 0xffffffff80a5fea8 <+312>: lea 0x60(%rbx),%r15 0xffffffff80a5feac <+316>: mov %r14,%rdi 0xffffffff80a5feaf <+319>: call 0xffffffff80a3bbc0 <get_pcb_user_save_td> 0xffffffff80a5feb4 <+324>: mov $0x200,%edx 0xffffffff80a5feb9 <+329>: mov %r15,%rdi 0xffffffff80a5febc <+332>: mov %rax,%rsi 0xffffffff80a5febf <+335>: call 0xffffffff80a35d20 <memmove_std> 100 sizeof(mcp->mc_fpstate)); 101 mcp->mc_fpformat = fpuformat(); 0xffffffff80a5fec4 <+340>: call 0xffffffff80a169e0 <fpuformat> 0xffffffff80a5fec9 <+345>: mov %eax,0x54(%rbx) 102 if (!use_xsave || cpu_max_ext_state_size <= sizeof(struct savefpu)) { 0xffffffff80a5fecc <+348>: cmpl $0x0,0x53c73d(%rip) # 0xffffffff80f9c610 <use_xsave> 0xffffffff80a5fed3 <+355>: je 0xffffffff80a5fef1 <ia32_get_mcontext+385> 0xffffffff80a5fed5 <+357>: mov 0x5274bd(%rip),%eax # 0xffffffff80f87398 <cpu_max_ext_state_size> 0xffffffff80a5fedb <+363>: cmp $0x200,%eax 0xffffffff80a5fee0 <+368>: jbe 0xffffffff80a5fef1 <ia32_get_mcontext+385> 103 *xfpusave_len = 0; 104 *xfpusave = NULL; 105 } else { 106 mcp->mc_flags |= _MC_IA32_HASFPXSTATE; 0xffffffff80a5fee2 <+370>: orb $0x4,0x5c(%rbx) 107 *xfpusave_len = mcp->mc_xfpustate_len = 0xffffffff80a5feeb <+379>: mov %eax,0x26c(%rbx) => 0xffffffff80a5fef1 <+385>: ud2 0xffffffff80a5fef3 <+387>: xor %eax,%eax 0xffffffff80a5fef5 <+389>: mov $0x140,%edi 108 cpu_max_ext_state_size - sizeof(struct savefpu); 0xffffffff80a5fee6 <+374>: add $0xfffffe00,%eax 109 *xfpusave = (char *)(get_pcb_user_save_td(td) + 1); 110 } 111 } 112 113 static int --Type <RET> for more, q to quit, c to continue without paging--q > + cpu_max_ext_state_size - sizeof(struct savefpu); > + *xfpusave = (char *)(get_pcb_user_save_td(td) + 1); > } > - mcp->mc_flags |= _MC_IA32_HASFPXSTATE; > - mcp->mc_xfpustate_len = len; > - bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); > } > > static int > @@ -210,14 +206,17 @@ ia32_set_mcontext(struct thread *td, struct ia32_mconte > xt *mcp) > if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - > sizeof(struct savefpu)) > return (EINVAL); > - xfpustate = (char *)td->td_md.md_fpu_scratch; > + xfpustate = (char *)fpu_save_area_alloc(); > ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate, > mcp->mc_xfpustate_len); > - if (ret != 0) > + if (ret != 0) { > + fpu_save_area_free((struct savefpu *)xfpustate); > return (ret); > + } > } else > xfpustate = NULL; > ret = ia32_set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); > + fpu_save_area_free((struct savefpu *)xfpustate); > if (ret != 0) > return (ret); > tp->tf_gs = mcp->mc_gs; > @@ -577,14 +576,6 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *m > ask) > regs = td->td_frame; > oonstack = sigonstack(regs->tf_rsp); > > - if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { > - xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); > - xfpusave = (char *)td->td_md.md_fpu_scratch; > - } else { > - xfpusave_len = 0; > - xfpusave = NULL; > - } > - > /* Save user context. */ > bzero(&sf, sizeof(sf)); > sf.sf_uc.uc_sigmask = *mask; > @@ -613,7 +604,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *ma > sk) > sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; > sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; > sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ > - ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); > + ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, &xfpusave, &xfpusave_len) > ; > fpstate_drop(td); > sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; > sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; > @@ -882,10 +873,11 @@ freebsd32_sigreturn(td, uap) > td->td_proc->p_pid, td->td_name, xfpustate_len); > return (EINVAL); > } > - xfpustate = (char *)td->td_md.md_fpu_scratch; > + xfpustate = (char *)fpu_save_area_alloc(); > error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate), > xfpustate, xfpustate_len); > if (error != 0) { > + fpu_save_area_free((struct savefpu *)xfpustate); > uprintf( > "pid %d (%s): sigreturn copying xfpustate failed\n", > td->td_proc->p_pid, td->td_name); > @@ -897,6 +889,7 @@ freebsd32_sigreturn(td, uap) > } > ret = ia32_set_fpcontext(td, &ucp->uc_mcontext, xfpustate, > xfpustate_len); > + fpu_save_area_free((struct savefpu *)xfpustate); > if (ret != 0) { > uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", > td->td_proc->p_pid, td->td_name, ret); > diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h > index bd07f70f8d44..94ac69d31e65 100644 > --- a/sys/amd64/include/proc.h > +++ b/sys/amd64/include/proc.h > @@ -76,7 +76,6 @@ struct mdthread { > struct pcb md_pcb; > vm_offset_t md_stack_base; > struct savefpu *md_usr_fpu_save; > - struct savefpu *md_fpu_scratch; > }; > > struct mdproc { > diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c > index 62f939406374..65c5cc65c87e 100644 > --- a/sys/kern/kern_thread.c > +++ b/sys/kern/kern_thread.c > @@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110, > "struct thread KBI td_pflags"); > _Static_assert(offsetof(struct thread, td_frame) == 0x4a8, > "struct thread KBI td_frame"); > -_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0, > +_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0, > "struct thread KBI td_emuldata"); > _Static_assert(offsetof(struct proc, p_flag) == 0xb8, > "struct proc KBI p_flag"); > -- Cheers, Cy Schubert <Cy.Schubert@cschubert.com> FreeBSD UNIX: <cy@FreeBSD.org> Web: https://FreeBSD.org NTP: <cy@nwtime.org> Web: https://nwtime.org The need of the many outweighs the greed of the few.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202109212053.18LKrErT005572>