Date: Tue, 20 Nov 2018 17:10:44 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r340684 - in stable/12: . lib/libc/amd64/string sys/amd64/amd64 Message-ID: <201811201710.wAKHAimp000800@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Tue Nov 20 17:10:44 2018 New Revision: 340684 URL: https://svnweb.freebsd.org/changeset/base/340684 Log: MFC r339531,r339579,r340252,r340463,r340464,340472,r340587 amd64: tidy up memset to have rax set earlier for small sizes amd64: finish the tail in memset with an overlapping store amd64: align memset buffers to 16 bytes before using rep stos amd64: convert libc bzero to a C func to avoid future bloat amd64: sync up libc memset with the kernel version amd64: handle small memset buffers with overlapping stores Fix -DNO_CLEAN amd64 build after r340463 Added: stable/12/lib/libc/amd64/string/bzero.c - copied unchanged from r340463, head/lib/libc/amd64/string/bzero.c Deleted: stable/12/lib/libc/amd64/string/bzero.S Modified: stable/12/Makefile.inc1 stable/12/lib/libc/amd64/string/Makefile.inc stable/12/lib/libc/amd64/string/memset.S stable/12/sys/amd64/amd64/support.S Directory Properties: stable/12/ (props changed) Modified: stable/12/Makefile.inc1 ============================================================================== --- stable/12/Makefile.inc1 Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/Makefile.inc1 Tue Nov 20 17:10:44 2018 (r340684) @@ -948,6 +948,13 @@ _cleanobj_fast_depend_hack: .PHONY ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \ fi .endfor +# 20181115 r340463 bzero reimplemented as .c + @if [ -e "${OBJTOP}/lib/libc/.depend.bzero.o" ] && \ + egrep -qw 'bzero\.[sS]' ${OBJTOP}/lib/libc/.depend.bzero.o; then \ + echo "Removing stale dependencies for bzero"; \ + rm -f ${OBJTOP}/lib/libc/.depend.bzero.* \ + ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.bzero.*}; \ + fi # 20181009 track migration from ntp's embedded libevent to updated one @if [ -e "${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o" ] && \ egrep -q 'contrib/ntp/sntp/libevent/bufferevent_openssl.c' \ Modified: stable/12/lib/libc/amd64/string/Makefile.inc ============================================================================== --- stable/12/lib/libc/amd64/string/Makefile.inc Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/lib/libc/amd64/string/Makefile.inc Tue Nov 20 17:10:44 2018 (r340684) @@ -2,7 +2,6 @@ MDSRCS+= \ bcmp.S \ - bzero.S \ memcmp.S \ memcpy.S \ memmove.S \ Copied: stable/12/lib/libc/amd64/string/bzero.c (from r340463, head/lib/libc/amd64/string/bzero.c) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ stable/12/lib/libc/amd64/string/bzero.c Tue Nov 20 17:10:44 2018 (r340684, copy of r340463, head/lib/libc/amd64/string/bzero.c) @@ -0,0 +1,15 @@ +/*- + * Public domain. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <string.h> + +void +bzero(void *b, size_t len) +{ + + memset(b, 0, len); +} Modified: stable/12/lib/libc/amd64/string/memset.S ============================================================================== --- stable/12/lib/libc/amd64/string/memset.S Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/lib/libc/amd64/string/memset.S Tue Nov 20 17:10:44 2018 (r340684) @@ -31,101 +31,112 @@ #include <machine/asm.h> __FBSDID("$FreeBSD$"); -.macro MEMSET bzero erms -.if \bzero == 1 - movq %rsi,%rcx - movq %rsi,%rdx - xorl %eax,%eax -.else - movq %rdi,%r9 +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + +.macro MEMSET erms + movq %rdi,%rax movq %rdx,%rcx movzbq %sil,%r8 - movabs $0x0101010101010101,%rax - imulq %r8,%rax -.endif + movabs $0x0101010101010101,%r10 + imulq %r8,%r10 cmpq $32,%rcx - jb 1016f + jbe 101632f cmpq $256,%rcx ja 1256f -1032: - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) +103200: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,16(%rdi) + movq %r10,24(%rdi) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b - cmpb $0,%cl - je 1000f -1016: + ja 103200b cmpb $16,%cl - jl 1008f - movq %rax,(%rdi) - movq %rax,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rdi),%rdi -1008: + ja 201632f + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +101632: + cmpb $16,%cl + jl 100816f +201632: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f - movq %rax,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rdi),%rdi -1004: + jl 100408f + movq %r10,(%rdi) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f - movl %eax,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rdi),%rdi -1002: + jl 100204f + movl %r10d,(%rdi) + movl %r10d,-4(%rdi,%rcx) + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw %ax,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rdi),%rdi -1001: - cmpb $1,%cl - jl 1000f - movb %al,(%rdi) -1000: -.if \bzero == 0 - movq %r9,%rax -.endif + jl 100001f + movw %r10w,(%rdi) + movw %r10w,-2(%rdi,%rcx) ret - + ALIGN_TEXT +100001: + cmpb $0,%cl + je 100000f + movb %r10b,(%rdi) +100000: + ret + ALIGN_TEXT 1256: + movq %rdi,%r9 + movq %r10,%rax + testl $15,%edi + jnz 3f +1: .if \erms == 1 rep stosb + movq %r9,%rax .else + movq %rcx,%rdx shrq $3,%rcx rep stosq - movq %rdx,%rcx - andb $7,%cl - jne 1004b -.endif -.if \bzero == 0 movq %r9,%rax + andl $7,%edx + jnz 2f + ret +2: + movq %r10,-8(%rdi,%rdx) .endif ret + ALIGN_TEXT +3: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %rdi,%r8 + andq $15,%r8 + leaq -16(%rcx,%r8),%rcx + neg %r8 + leaq 16(%rdi,%r8),%rdi + jmp 1b .endm -#ifndef BZERO + ENTRY(memset) - MEMSET bzero=0 erms=0 + MEMSET erms=0 END(memset) -#else -ENTRY(bzero) - MEMSET bzero=1 erms=0 -END(bzero) -#endif .section .note.GNU-stack,"",%progbits Modified: stable/12/sys/amd64/amd64/support.S ============================================================================== --- stable/12/sys/amd64/amd64/support.S Tue Nov 20 17:05:32 2018 (r340683) +++ stable/12/sys/amd64/amd64/support.S Tue Nov 20 17:10:44 2018 (r340684) @@ -452,82 +452,112 @@ END(memcpy_erms) */ .macro MEMSET erms PUSH_FRAME_POINTER - movq %rdi,%r9 + movq %rdi,%rax movq %rdx,%rcx movzbq %sil,%r8 - movabs $0x0101010101010101,%rax - imulq %r8,%rax + movabs $0x0101010101010101,%r10 + imulq %r8,%r10 cmpq $32,%rcx - jb 1016f + jbe 101632f cmpq $256,%rcx ja 1256f -1032: - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) +103200: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,16(%rdi) + movq %r10,24(%rdi) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b - cmpb $0,%cl - je 1000f -1016: + ja 103200b cmpb $16,%cl - jl 1008f - movq %rax,(%rdi) - movq %rax,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rdi),%rdi -1008: + ja 201632f + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +101632: + cmpb $16,%cl + jl 100816f +201632: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f - movq %rax,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rdi),%rdi -1004: + jl 100408f + movq %r10,(%rdi) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f - movl %eax,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rdi),%rdi -1002: + jl 100204f + movl %r10d,(%rdi) + movl %r10d,-4(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw %ax,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rdi),%rdi -1001: - cmpb $1,%cl - jl 1000f - movb %al,(%rdi) -1000: - movq %r9,%rax + jl 100001f + movw %r10w,(%rdi) + movw %r10w,-2(%rdi,%rcx) POP_FRAME_POINTER ret ALIGN_TEXT +100001: + cmpb $0,%cl + je 100000f + movb %r10b,(%rdi) +100000: + POP_FRAME_POINTER + ret + ALIGN_TEXT 1256: + movq %rdi,%r9 + movq %r10,%rax + testl $15,%edi + jnz 3f +1: .if \erms == 1 rep stosb + movq %r9,%rax .else + movq %rcx,%rdx shrq $3,%rcx rep stosq - movq %rdx,%rcx - andb $7,%cl - jne 1004b -.endif movq %r9,%rax + andl $7,%edx + jnz 2f POP_FRAME_POINTER ret +2: + movq %r10,-8(%rdi,%rdx) +.endif + POP_FRAME_POINTER + ret + ALIGN_TEXT +3: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %rdi,%r8 + andq $15,%r8 + leaq -16(%rcx,%r8),%rcx + neg %r8 + leaq 16(%rdi,%r8),%rdi + jmp 1b .endm ENTRY(memset_std)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811201710.wAKHAimp000800>