From owner-svn-src-all@freebsd.org Fri Nov 30 20:58:10 2018 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 0F5A411503CD; Fri, 30 Nov 2018 20:58:10 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id A48DF775C2; Fri, 30 Nov 2018 20:58:09 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 80E695F8A; Fri, 30 Nov 2018 20:58:09 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id wAUKw9d7025565; Fri, 30 Nov 2018 20:58:09 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id wAUKw9UE025564; Fri, 30 Nov 2018 20:58:09 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <201811302058.wAUKw9UE025564@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Fri, 30 Nov 2018 20:58:09 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r341351 - in head: lib/libc/amd64/string sys/amd64/amd64 X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: in head: lib/libc/amd64/string sys/amd64/amd64 X-SVN-Commit-Revision: 341351 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Rspamd-Queue-Id: A48DF775C2 X-Spamd-Result: default: False [1.02 / 15.00]; local_wl_from(0.00)[FreeBSD.org]; NEURAL_SPAM_SHORT(0.53)[0.534,0]; NEURAL_SPAM_MEDIUM(0.15)[0.151,0]; NEURAL_SPAM_LONG(0.34)[0.335,0]; ASN(0.00)[asn:11403, ipnet:2610:1c1:1::/48, country:US] X-Rspamd-Server: mx1.freebsd.org X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 30 Nov 2018 20:58:10 -0000 Author: mjg Date: Fri Nov 30 20:58:08 2018 New Revision: 341351 URL: https://svnweb.freebsd.org/changeset/base/341351 Log: amd64: handle small memmove buffers with overlapping stores Handling sizes of > 32 backwards will be updated later. Reviewed by: kib (kernel part) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18387 Modified: head/lib/libc/amd64/string/memmove.S head/sys/amd64/amd64/support.S Modified: head/lib/libc/amd64/string/memmove.S ============================================================================== --- head/lib/libc/amd64/string/memmove.S Fri Nov 30 19:59:28 2018 (r341350) +++ head/lib/libc/amd64/string/memmove.S Fri Nov 30 20:58:08 2018 (r341351) @@ -42,11 +42,19 @@ __FBSDID("$FreeBSD$"); * rsi - source * rdx - count * - * The macro possibly clobbers the above and: rcx, r8. - * It does not clobber rax, r10 nor r11. + * The macro possibly clobbers the above and: rcx, r8, r9, 10 + * It does not clobber rax nor r11. */ .macro MEMMOVE erms overlap begin end \begin + + /* + * For sizes 0..32 all data is read before it is written, so there + * is no correctness issue with direction of copying. + */ + cmpq $32,%rcx + jbe 101632f + .if \overlap == 1 movq %rdi,%r8 subq %rsi,%r8 @@ -54,13 +62,10 @@ __FBSDID("$FreeBSD$"); jb 2f .endif - cmpq $32,%rcx - jb 1016f - cmpq $256,%rcx ja 1256f -1032: +103200: movq (%rsi),%rdx movq %rdx,(%rdi) movq 8(%rsi),%rdx @@ -73,56 +78,62 @@ __FBSDID("$FreeBSD$"); leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b + jae 103200b cmpb $0,%cl - jne 1016f + jne 101632f \end ret ALIGN_TEXT -1016: +101632: cmpb $16,%cl - jl 1008f + jl 100816f movq (%rsi),%rdx + movq 8(%rsi),%r8 + movq -16(%rsi,%rcx),%r9 + movq -8(%rsi,%rcx),%r10 movq %rdx,(%rdi) - movq 8(%rsi),%rdx - movq %rdx,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rsi),%rsi - leaq 16(%rdi),%rdi -1008: + movq %r8,8(%rdi) + movq %r9,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f + jl 100408f movq (%rsi),%rdx + movq -8(%rsi,%rcx),%r8 movq %rdx,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi -1004: + movq %r8,-8(%rdi,%rcx,) + \end + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f + jl 100204f movl (%rsi),%edx + movl -4(%rsi,%rcx),%r8d movl %edx,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rsi),%rsi - leaq 4(%rdi),%rdi -1002: + movl %r8d,-4(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw (%rsi),%dx + jl 100001f + movzwl (%rsi),%edx + movzwl -2(%rsi,%rcx),%r8d movw %dx,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rsi),%rsi - leaq 2(%rdi),%rdi -1001: + movw %r8w,-2(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100001: cmpb $1,%cl - jl 1000f + jl 100000f movb (%rsi),%dl movb %dl,(%rdi) -1000: +100000: \end ret @@ -136,8 +147,8 @@ __FBSDID("$FreeBSD$"); rep movsq movq %rdx,%rcx - andb $7,%cl /* any bytes left? */ - jne 1004b + andl $7,%ecx /* any bytes left? */ + jne 100408b .endif \end ret @@ -246,6 +257,7 @@ __FBSDID("$FreeBSD$"); ret .endif .endm + .macro MEMMOVE_BEGIN movq %rdi,%rax Modified: head/sys/amd64/amd64/support.S ============================================================================== --- head/sys/amd64/amd64/support.S Fri Nov 30 19:59:28 2018 (r341350) +++ head/sys/amd64/amd64/support.S Fri Nov 30 20:58:08 2018 (r341351) @@ -205,11 +205,19 @@ END(memcmp) * rsi - source * rdx - count * - * The macro possibly clobbers the above and: rcx, r8. - * It does not clobber rax, r10 nor r11. + * The macro possibly clobbers the above and: rcx, r8, r9, 10 + * It does not clobber rax nor r11. */ .macro MEMMOVE erms overlap begin end \begin + + /* + * For sizes 0..32 all data is read before it is written, so there + * is no correctness issue with direction of copying. + */ + cmpq $32,%rcx + jbe 101632f + .if \overlap == 1 movq %rdi,%r8 subq %rsi,%r8 @@ -217,13 +225,10 @@ END(memcmp) jb 2f .endif - cmpq $32,%rcx - jb 1016f - cmpq $256,%rcx ja 1256f -1032: +103200: movq (%rsi),%rdx movq %rdx,(%rdi) movq 8(%rsi),%rdx @@ -236,56 +241,62 @@ END(memcmp) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b + jae 103200b cmpb $0,%cl - jne 1016f + jne 101632f \end ret ALIGN_TEXT -1016: +101632: cmpb $16,%cl - jl 1008f + jl 100816f movq (%rsi),%rdx + movq 8(%rsi),%r8 + movq -16(%rsi,%rcx),%r9 + movq -8(%rsi,%rcx),%r10 movq %rdx,(%rdi) - movq 8(%rsi),%rdx - movq %rdx,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rsi),%rsi - leaq 16(%rdi),%rdi -1008: + movq %r8,8(%rdi) + movq %r9,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f + jl 100408f movq (%rsi),%rdx + movq -8(%rsi,%rcx),%r8 movq %rdx,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi -1004: + movq %r8,-8(%rdi,%rcx,) + \end + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f + jl 100204f movl (%rsi),%edx + movl -4(%rsi,%rcx),%r8d movl %edx,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rsi),%rsi - leaq 4(%rdi),%rdi -1002: + movl %r8d,-4(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw (%rsi),%dx + jl 100001f + movzwl (%rsi),%edx + movzwl -2(%rsi,%rcx),%r8d movw %dx,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rsi),%rsi - leaq 2(%rdi),%rdi -1001: + movw %r8w,-2(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100001: cmpb $1,%cl - jl 1000f + jl 100000f movb (%rsi),%dl movb %dl,(%rdi) -1000: +100000: \end ret @@ -299,8 +310,8 @@ END(memcmp) rep movsq movq %rdx,%rcx - andb $7,%cl /* any bytes left? */ - jne 1004b + andl $7,%ecx /* any bytes left? */ + jne 100408b .endif \end ret