Date: Fri, 30 Nov 2018 00:45:10 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r341272 - in head: lib/libc/amd64/string sys/amd64/amd64 Message-ID: <201811300045.wAU0jA5t087056@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Fri Nov 30 00:45:10 2018 New Revision: 341272 URL: https://svnweb.freebsd.org/changeset/base/341272 Log: amd64: tidy up copying backwards in memmove For non-ERMS case the code used handle possible trailing bytes with movsb first and then followed it up with movsq. This also happened to alter how calculations were done for other cases. Handle the tail with regular movs, just like when copying forward. Use leaq to calculate the right offset from the get go, instead of doing separate add and sub. This adjusts the offset for non-rep cases so that they can be used to handle the tail. The routine is still a work in progress. Sponsored by: The FreeBSD Foundation Modified: head/lib/libc/amd64/string/memmove.S head/sys/amd64/amd64/support.S Modified: head/lib/libc/amd64/string/memmove.S ============================================================================== --- head/lib/libc/amd64/string/memmove.S Fri Nov 30 00:00:51 2018 (r341271) +++ head/lib/libc/amd64/string/memmove.S Fri Nov 30 00:45:10 2018 (r341272) @@ -150,24 +150,24 @@ __FBSDID("$FreeBSD$"); */ ALIGN_TEXT 2: - addq %rcx,%rdi - addq %rcx,%rsi + cmpq $256,%rcx + ja 2256f + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi + cmpq $32,%rcx jb 2016f - cmpq $256,%rcx - ja 2256f - 2032: + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) movq -16(%rsi),%rdx movq %rdx,-16(%rdi) movq -24(%rsi),%rdx movq %rdx,-24(%rdi) - movq -32(%rsi),%rdx - movq %rdx,-32(%rdi) leaq -32(%rsi),%rsi leaq -32(%rdi),%rdi subq $32,%rcx @@ -181,10 +181,10 @@ __FBSDID("$FreeBSD$"); 2016: cmpb $16,%cl jl 2008f + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) - movq -16(%rsi),%rdx - movq %rdx,-16(%rdi) subb $16,%cl jz 2000f leaq -16(%rsi),%rsi @@ -192,8 +192,8 @@ __FBSDID("$FreeBSD$"); 2008: cmpb $8,%cl jl 2004f - movq -8(%rsi),%rdx - movq %rdx,-8(%rdi) + movq (%rsi),%rdx + movq %rdx,(%rdi) subb $8,%cl jz 2000f leaq -8(%rsi),%rsi @@ -201,8 +201,8 @@ __FBSDID("$FreeBSD$"); 2004: cmpb $4,%cl jl 2002f - movl -4(%rsi),%edx - movl %edx,-4(%rdi) + movl 4(%rsi),%edx + movl %edx,4(%rdi) subb $4,%cl jz 2000f leaq -4(%rsi),%rsi @@ -210,8 +210,8 @@ __FBSDID("$FreeBSD$"); 2002: cmpb $2,%cl jl 2001f - movw -2(%rsi),%dx - movw %dx,-2(%rdi) + movw 6(%rsi),%dx + movw %dx,6(%rdi) subb $2,%cl jz 2000f leaq -2(%rsi),%rsi @@ -219,33 +219,31 @@ __FBSDID("$FreeBSD$"); 2001: cmpb $1,%cl jl 2000f - movb -1(%rsi),%dl - movb %dl,-1(%rdi) + movb 7(%rsi),%dl + movb %dl,7(%rdi) 2000: \end ret ALIGN_TEXT 2256: - decq %rdi - decq %rsi std .if \erms == 1 + leaq -1(%rdi,%rcx),%rdi + leaq -1(%rsi,%rcx),%rsi rep movsb + cld .else - andq $7,%rcx /* any fractional bytes? */ - je 3f - rep - movsb -3: - movq %rdx,%rcx /* copy remainder by 32-bit words */ + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi rep movsq -.endif cld + movq %rdx,%rcx + andb $7,%cl + jne 2004b +.endif \end ret .endif Modified: head/sys/amd64/amd64/support.S ============================================================================== --- head/sys/amd64/amd64/support.S Fri Nov 30 00:00:51 2018 (r341271) +++ head/sys/amd64/amd64/support.S Fri Nov 30 00:45:10 2018 (r341272) @@ -313,24 +313,24 @@ END(memcmp) */ ALIGN_TEXT 2: - addq %rcx,%rdi - addq %rcx,%rsi + cmpq $256,%rcx + ja 2256f + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi + cmpq $32,%rcx jb 2016f - cmpq $256,%rcx - ja 2256f - 2032: + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) movq -16(%rsi),%rdx movq %rdx,-16(%rdi) movq -24(%rsi),%rdx movq %rdx,-24(%rdi) - movq -32(%rsi),%rdx - movq %rdx,-32(%rdi) leaq -32(%rsi),%rsi leaq -32(%rdi),%rdi subq $32,%rcx @@ -344,10 +344,10 @@ END(memcmp) 2016: cmpb $16,%cl jl 2008f + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) - movq -16(%rsi),%rdx - movq %rdx,-16(%rdi) subb $16,%cl jz 2000f leaq -16(%rsi),%rsi @@ -355,8 +355,8 @@ END(memcmp) 2008: cmpb $8,%cl jl 2004f - movq -8(%rsi),%rdx - movq %rdx,-8(%rdi) + movq (%rsi),%rdx + movq %rdx,(%rdi) subb $8,%cl jz 2000f leaq -8(%rsi),%rsi @@ -364,8 +364,8 @@ END(memcmp) 2004: cmpb $4,%cl jl 2002f - movl -4(%rsi),%edx - movl %edx,-4(%rdi) + movl 4(%rsi),%edx + movl %edx,4(%rdi) subb $4,%cl jz 2000f leaq -4(%rsi),%rsi @@ -373,8 +373,8 @@ END(memcmp) 2002: cmpb $2,%cl jl 2001f - movw -2(%rsi),%dx - movw %dx,-2(%rdi) + movw 6(%rsi),%dx + movw %dx,6(%rdi) subb $2,%cl jz 2000f leaq -2(%rsi),%rsi @@ -382,33 +382,31 @@ END(memcmp) 2001: cmpb $1,%cl jl 2000f - movb -1(%rsi),%dl - movb %dl,-1(%rdi) + movb 7(%rsi),%dl + movb %dl,7(%rdi) 2000: \end ret ALIGN_TEXT 2256: - decq %rdi - decq %rsi std .if \erms == 1 + leaq -1(%rdi,%rcx),%rdi + leaq -1(%rsi,%rcx),%rsi rep movsb + cld .else - andq $7,%rcx /* any fractional bytes? */ - je 3f - rep - movsb -3: - movq %rdx,%rcx /* copy remainder by 32-bit words */ + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi rep movsq -.endif cld + movq %rdx,%rcx + andb $7,%cl + jne 2004b +.endif \end ret .endif
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811300045.wAU0jA5t087056>