Date: Wed, 19 Dec 2018 21:25:43 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r342240 - in stable/12: lib/libc/amd64/string sys/amd64/amd64 Message-ID: <201812192125.wBJLPhYA006744@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Wed Dec 19 21:25:43 2018 New Revision: 342240 URL: https://svnweb.freebsd.org/changeset/base/342240 Log: MFC r341272,r341273,r341351 amd64: tidy up copying backwards in memmove amd64: remove stale attribution for memmove work amd64: handle small memmove buffers with overlapping stores Modified: stable/12/lib/libc/amd64/string/memmove.S stable/12/sys/amd64/amd64/support.S Directory Properties: stable/12/ (props changed) Modified: stable/12/lib/libc/amd64/string/memmove.S ============================================================================== --- stable/12/lib/libc/amd64/string/memmove.S Wed Dec 19 21:04:06 2018 (r342239) +++ stable/12/lib/libc/amd64/string/memmove.S Wed Dec 19 21:25:43 2018 (r342240) @@ -34,8 +34,6 @@ __FBSDID("$FreeBSD$"); /* * memmove(dst, src, cnt) * rdi, rsi, rdx - * Contains parts of bcopy written by: - * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ /* @@ -44,11 +42,19 @@ __FBSDID("$FreeBSD$"); * rsi - source * rdx - count * - * The macro possibly clobbers the above and: rcx, r8. - * It does not clobber rax, r10 nor r11. + * The macro possibly clobbers the above and: rcx, r8, r9, 10 + * It does not clobber rax nor r11. */ .macro MEMMOVE erms overlap begin end \begin + + /* + * For sizes 0..32 all data is read before it is written, so there + * is no correctness issue with direction of copying. + */ + cmpq $32,%rcx + jbe 101632f + .if \overlap == 1 movq %rdi,%r8 subq %rsi,%r8 @@ -56,13 +62,10 @@ __FBSDID("$FreeBSD$"); jb 2f .endif - cmpq $32,%rcx - jb 1016f - cmpq $256,%rcx ja 1256f -1032: +103200: movq (%rsi),%rdx movq %rdx,(%rdi) movq 8(%rsi),%rdx @@ -75,56 +78,62 @@ __FBSDID("$FreeBSD$"); leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b + jae 103200b cmpb $0,%cl - jne 1016f + jne 101632f \end ret ALIGN_TEXT -1016: +101632: cmpb $16,%cl - jl 1008f + jl 100816f movq (%rsi),%rdx + movq 8(%rsi),%r8 + movq -16(%rsi,%rcx),%r9 + movq -8(%rsi,%rcx),%r10 movq %rdx,(%rdi) - movq 8(%rsi),%rdx - movq %rdx,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rsi),%rsi - leaq 16(%rdi),%rdi -1008: + movq %r8,8(%rdi) + movq %r9,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f + jl 100408f movq (%rsi),%rdx + movq -8(%rsi,%rcx),%r8 movq %rdx,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi -1004: + movq %r8,-8(%rdi,%rcx,) + \end + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f + jl 100204f movl (%rsi),%edx + movl -4(%rsi,%rcx),%r8d movl %edx,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rsi),%rsi - leaq 4(%rdi),%rdi -1002: + movl %r8d,-4(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw (%rsi),%dx + jl 100001f + movzwl (%rsi),%edx + movzwl -2(%rsi,%rcx),%r8d movw %dx,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rsi),%rsi - leaq 2(%rdi),%rdi -1001: + movw %r8w,-2(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100001: cmpb $1,%cl - jl 1000f + jl 100000f movb (%rsi),%dl movb %dl,(%rdi) -1000: +100000: \end ret @@ -140,8 +149,8 @@ __FBSDID("$FreeBSD$"); rep movsq movq %rdx,%rcx - andb $7,%cl /* any bytes left? */ - jne 1004b + andl $7,%ecx /* any bytes left? */ + jne 100408b .endif \end ret @@ -180,24 +189,24 @@ __FBSDID("$FreeBSD$"); */ ALIGN_TEXT 2: - addq %rcx,%rdi - addq %rcx,%rsi + cmpq $256,%rcx + ja 2256f + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi + cmpq $32,%rcx jb 2016f - cmpq $256,%rcx - ja 2256f - 2032: + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) movq -16(%rsi),%rdx movq %rdx,-16(%rdi) movq -24(%rsi),%rdx movq %rdx,-24(%rdi) - movq -32(%rsi),%rdx - movq %rdx,-32(%rdi) leaq -32(%rsi),%rsi leaq -32(%rdi),%rdi subq $32,%rcx @@ -211,10 +220,10 @@ __FBSDID("$FreeBSD$"); 2016: cmpb $16,%cl jl 2008f + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) - movq -16(%rsi),%rdx - movq %rdx,-16(%rdi) subb $16,%cl jz 2000f leaq -16(%rsi),%rsi @@ -222,8 +231,8 @@ __FBSDID("$FreeBSD$"); 2008: cmpb $8,%cl jl 2004f - movq -8(%rsi),%rdx - movq %rdx,-8(%rdi) + movq (%rsi),%rdx + movq %rdx,(%rdi) subb $8,%cl jz 2000f leaq -8(%rsi),%rsi @@ -231,8 +240,8 @@ __FBSDID("$FreeBSD$"); 2004: cmpb $4,%cl jl 2002f - movl -4(%rsi),%edx - movl %edx,-4(%rdi) + movl 4(%rsi),%edx + movl %edx,4(%rdi) subb $4,%cl jz 2000f leaq -4(%rsi),%rsi @@ -240,8 +249,8 @@ __FBSDID("$FreeBSD$"); 2002: cmpb $2,%cl jl 2001f - movw -2(%rsi),%dx - movw %dx,-2(%rdi) + movw 6(%rsi),%dx + movw %dx,6(%rdi) subb $2,%cl jz 2000f leaq -2(%rsi),%rsi @@ -249,37 +258,36 @@ __FBSDID("$FreeBSD$"); 2001: cmpb $1,%cl jl 2000f - movb -1(%rsi),%dl - movb %dl,-1(%rdi) + movb 7(%rsi),%dl + movb %dl,7(%rdi) 2000: \end ret ALIGN_TEXT 2256: - decq %rdi - decq %rsi std .if \erms == 1 + leaq -1(%rdi,%rcx),%rdi + leaq -1(%rsi,%rcx),%rsi rep movsb + cld .else - andq $7,%rcx /* any fractional bytes? */ - je 3f - rep - movsb -3: - movq %rdx,%rcx /* copy remainder by 32-bit words */ + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi rep movsq -.endif cld + movq %rdx,%rcx + andb $7,%cl + jne 2004b +.endif \end ret .endif .endm + .macro MEMMOVE_BEGIN movq %rdi,%rax Modified: stable/12/sys/amd64/amd64/support.S ============================================================================== --- stable/12/sys/amd64/amd64/support.S Wed Dec 19 21:04:06 2018 (r342239) +++ stable/12/sys/amd64/amd64/support.S Wed Dec 19 21:25:43 2018 (r342240) @@ -197,8 +197,6 @@ END(memcmp) /* * memmove(dst, src, cnt) * rdi, rsi, rdx - * Adapted from bcopy written by: - * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ /* @@ -207,11 +205,19 @@ END(memcmp) * rsi - source * rdx - count * - * The macro possibly clobbers the above and: rcx, r8. - * It does not clobber rax, r10 nor r11. + * The macro possibly clobbers the above and: rcx, r8, r9, 10 + * It does not clobber rax nor r11. */ .macro MEMMOVE erms overlap begin end \begin + + /* + * For sizes 0..32 all data is read before it is written, so there + * is no correctness issue with direction of copying. + */ + cmpq $32,%rcx + jbe 101632f + .if \overlap == 1 movq %rdi,%r8 subq %rsi,%r8 @@ -219,13 +225,10 @@ END(memcmp) jb 2f .endif - cmpq $32,%rcx - jb 1016f - cmpq $256,%rcx ja 1256f -1032: +103200: movq (%rsi),%rdx movq %rdx,(%rdi) movq 8(%rsi),%rdx @@ -238,56 +241,62 @@ END(memcmp) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b + jae 103200b cmpb $0,%cl - jne 1016f + jne 101632f \end ret ALIGN_TEXT -1016: +101632: cmpb $16,%cl - jl 1008f + jl 100816f movq (%rsi),%rdx + movq 8(%rsi),%r8 + movq -16(%rsi,%rcx),%r9 + movq -8(%rsi,%rcx),%r10 movq %rdx,(%rdi) - movq 8(%rsi),%rdx - movq %rdx,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rsi),%rsi - leaq 16(%rdi),%rdi -1008: + movq %r8,8(%rdi) + movq %r9,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f + jl 100408f movq (%rsi),%rdx + movq -8(%rsi,%rcx),%r8 movq %rdx,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi -1004: + movq %r8,-8(%rdi,%rcx,) + \end + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f + jl 100204f movl (%rsi),%edx + movl -4(%rsi,%rcx),%r8d movl %edx,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rsi),%rsi - leaq 4(%rdi),%rdi -1002: + movl %r8d,-4(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw (%rsi),%dx + jl 100001f + movzwl (%rsi),%edx + movzwl -2(%rsi,%rcx),%r8d movw %dx,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rsi),%rsi - leaq 2(%rdi),%rdi -1001: + movw %r8w,-2(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100001: cmpb $1,%cl - jl 1000f + jl 100000f movb (%rsi),%dl movb %dl,(%rdi) -1000: +100000: \end ret @@ -303,8 +312,8 @@ END(memcmp) rep movsq movq %rdx,%rcx - andb $7,%cl /* any bytes left? */ - jne 1004b + andl $7,%ecx /* any bytes left? */ + jne 100408b .endif \end ret @@ -343,24 +352,24 @@ END(memcmp) */ ALIGN_TEXT 2: - addq %rcx,%rdi - addq %rcx,%rsi + cmpq $256,%rcx + ja 2256f + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi + cmpq $32,%rcx jb 2016f - cmpq $256,%rcx - ja 2256f - 2032: + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) movq -16(%rsi),%rdx movq %rdx,-16(%rdi) movq -24(%rsi),%rdx movq %rdx,-24(%rdi) - movq -32(%rsi),%rdx - movq %rdx,-32(%rdi) leaq -32(%rsi),%rsi leaq -32(%rdi),%rdi subq $32,%rcx @@ -374,10 +383,10 @@ END(memcmp) 2016: cmpb $16,%cl jl 2008f + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) - movq -16(%rsi),%rdx - movq %rdx,-16(%rdi) subb $16,%cl jz 2000f leaq -16(%rsi),%rsi @@ -385,8 +394,8 @@ END(memcmp) 2008: cmpb $8,%cl jl 2004f - movq -8(%rsi),%rdx - movq %rdx,-8(%rdi) + movq (%rsi),%rdx + movq %rdx,(%rdi) subb $8,%cl jz 2000f leaq -8(%rsi),%rsi @@ -394,8 +403,8 @@ END(memcmp) 2004: cmpb $4,%cl jl 2002f - movl -4(%rsi),%edx - movl %edx,-4(%rdi) + movl 4(%rsi),%edx + movl %edx,4(%rdi) subb $4,%cl jz 2000f leaq -4(%rsi),%rsi @@ -403,8 +412,8 @@ END(memcmp) 2002: cmpb $2,%cl jl 2001f - movw -2(%rsi),%dx - movw %dx,-2(%rdi) + movw 6(%rsi),%dx + movw %dx,6(%rdi) subb $2,%cl jz 2000f leaq -2(%rsi),%rsi @@ -412,33 +421,31 @@ END(memcmp) 2001: cmpb $1,%cl jl 2000f - movb -1(%rsi),%dl - movb %dl,-1(%rdi) + movb 7(%rsi),%dl + movb %dl,7(%rdi) 2000: \end ret ALIGN_TEXT 2256: - decq %rdi - decq %rsi std .if \erms == 1 + leaq -1(%rdi,%rcx),%rdi + leaq -1(%rsi,%rcx),%rsi rep movsb + cld .else - andq $7,%rcx /* any fractional bytes? */ - je 3f - rep - movsb -3: - movq %rdx,%rcx /* copy remainder by 32-bit words */ + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi rep movsq -.endif cld + movq %rdx,%rcx + andb $7,%cl + jne 2004b +.endif \end ret .endif
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201812192125.wBJLPhYA006744>