Date: Fri, 16 Nov 2018 00:44:23 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r340472 - in head: lib/libc/amd64/string sys/amd64/amd64 Message-ID: <201811160044.wAG0iNjM011630@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Fri Nov 16 00:44:22 2018 New Revision: 340472 URL: https://svnweb.freebsd.org/changeset/base/340472 Log: amd64: handle small memset buffers with overlapping stores Instead of jumping to locations which store the exact number of bytes, use displacement to move the destination. In particular the following clears an area between 8-16 (inclusive) branch-free: movq %r10,(%rdi) movq %r10,-8(%rdi,%rcx) For instance for rcx of 10 the second line is rdi + 10 - 8 = rdi + 2. Writing 8 bytes starting at that offset overlaps with 6 bytes written previously and writes 2 new, giving 10 in total. Provides a nice win for smaller stores. Other ones are erratic depending on the microarchitecture. General idea taken from NetBSD (restricted use of the trick) and bionic string functions (use for various ranges like in this patch). Reviewed by: kib (previous version) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D17660 Modified: head/lib/libc/amd64/string/memset.S head/sys/amd64/amd64/support.S Modified: head/lib/libc/amd64/string/memset.S ============================================================================== --- head/lib/libc/amd64/string/memset.S Fri Nov 16 00:03:31 2018 (r340471) +++ head/lib/libc/amd64/string/memset.S Fri Nov 16 00:44:22 2018 (r340472) @@ -41,12 +41,12 @@ __FBSDID("$FreeBSD$"); imulq %r8,%r10 cmpq $32,%rcx - jb 1016f + jbe 101632f cmpq $256,%rcx ja 1256f -1032: +103200: movq %r10,(%rdi) movq %r10,8(%rdi) movq %r10,16(%rdi) @@ -54,43 +54,49 @@ __FBSDID("$FreeBSD$"); leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b - cmpb $0,%cl - je 1000f -1016: + ja 103200b cmpb $16,%cl - jl 1008f + ja 201632f + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +101632: + cmpb $16,%cl + jl 100816f +201632: movq %r10,(%rdi) movq %r10,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rdi),%rdi -1008: + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f + jl 100408f movq %r10,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rdi),%rdi -1004: + movq %r10,-8(%rdi,%rcx) + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f + jl 100204f movl %r10d,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rdi),%rdi -1002: + movl %r10d,-4(%rdi,%rcx) + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f + jl 100001f movw %r10w,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rdi),%rdi -1001: - cmpb $1,%cl - jl 1000f + movw %r10w,-2(%rdi,%rcx) + ret + ALIGN_TEXT +100001: + cmpb $0,%cl + je 100000f movb %r10b,(%rdi) -1000: +100000: ret ALIGN_TEXT 1256: @@ -127,6 +133,7 @@ __FBSDID("$FreeBSD$"); leaq 16(%rdi,%r8),%rdi jmp 1b .endm + ENTRY(memset) MEMSET erms=0 Modified: head/sys/amd64/amd64/support.S ============================================================================== --- head/sys/amd64/amd64/support.S Fri Nov 16 00:03:31 2018 (r340471) +++ head/sys/amd64/amd64/support.S Fri Nov 16 00:44:22 2018 (r340472) @@ -459,12 +459,12 @@ END(memcpy_erms) imulq %r8,%r10 cmpq $32,%rcx - jb 1016f + jbe 101632f cmpq $256,%rcx ja 1256f -1032: +103200: movq %r10,(%rdi) movq %r10,8(%rdi) movq %r10,16(%rdi) @@ -472,43 +472,54 @@ END(memcpy_erms) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b - cmpb $0,%cl - je 1000f -1016: + ja 103200b cmpb $16,%cl - jl 1008f + ja 201632f + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +101632: + cmpb $16,%cl + jl 100816f +201632: movq %r10,(%rdi) movq %r10,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rdi),%rdi -1008: + movq %r10,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f + jl 100408f movq %r10,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rdi),%rdi -1004: + movq %r10,-8(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f + jl 100204f movl %r10d,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rdi),%rdi -1002: + movl %r10d,-4(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f + jl 100001f movw %r10w,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rdi),%rdi -1001: - cmpb $1,%cl - jl 1000f + movw %r10w,-2(%rdi,%rcx) + POP_FRAME_POINTER + ret + ALIGN_TEXT +100001: + cmpb $0,%cl + je 100000f movb %r10b,(%rdi) -1000: +100000: POP_FRAME_POINTER ret ALIGN_TEXT
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811160044.wAG0iNjM011630>