From owner-svn-src-all@freebsd.org Fri Nov 30 00:45:11 2018 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 594461152245; Fri, 30 Nov 2018 00:45:11 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id F280A8A957; Fri, 30 Nov 2018 00:45:10 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id D379421313; Fri, 30 Nov 2018 00:45:10 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id wAU0jA0H087057; Fri, 30 Nov 2018 00:45:10 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id wAU0jA5t087056; Fri, 30 Nov 2018 00:45:10 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <201811300045.wAU0jA5t087056@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Fri, 30 Nov 2018 00:45:10 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r341272 - in head: lib/libc/amd64/string sys/amd64/amd64 X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: in head: lib/libc/amd64/string sys/amd64/amd64 X-SVN-Commit-Revision: 341272 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Rspamd-Queue-Id: F280A8A957 X-Spamd-Result: default: False [0.86 / 15.00]; local_wl_from(0.00)[FreeBSD.org]; NEURAL_SPAM_SHORT(0.22)[0.217,0]; NEURAL_SPAM_MEDIUM(0.31)[0.312,0]; NEURAL_SPAM_LONG(0.34)[0.335,0]; ASN(0.00)[asn:11403, ipnet:2610:1c1:1::/48, country:US] X-Rspamd-Server: mx1.freebsd.org X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 30 Nov 2018 00:45:11 -0000 Author: mjg Date: Fri Nov 30 00:45:10 2018 New Revision: 341272 URL: https://svnweb.freebsd.org/changeset/base/341272 Log: amd64: tidy up copying backwards in memmove For non-ERMS case the code used handle possible trailing bytes with movsb first and then followed it up with movsq. This also happened to alter how calculations were done for other cases. Handle the tail with regular movs, just like when copying forward. Use leaq to calculate the right offset from the get go, instead of doing separate add and sub. This adjusts the offset for non-rep cases so that they can be used to handle the tail. The routine is still a work in progress. Sponsored by: The FreeBSD Foundation Modified: head/lib/libc/amd64/string/memmove.S head/sys/amd64/amd64/support.S Modified: head/lib/libc/amd64/string/memmove.S ============================================================================== --- head/lib/libc/amd64/string/memmove.S Fri Nov 30 00:00:51 2018 (r341271) +++ head/lib/libc/amd64/string/memmove.S Fri Nov 30 00:45:10 2018 (r341272) @@ -150,24 +150,24 @@ __FBSDID("$FreeBSD$"); */ ALIGN_TEXT 2: - addq %rcx,%rdi - addq %rcx,%rsi + cmpq $256,%rcx + ja 2256f + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi + cmpq $32,%rcx jb 2016f - cmpq $256,%rcx - ja 2256f - 2032: + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) movq -16(%rsi),%rdx movq %rdx,-16(%rdi) movq -24(%rsi),%rdx movq %rdx,-24(%rdi) - movq -32(%rsi),%rdx - movq %rdx,-32(%rdi) leaq -32(%rsi),%rsi leaq -32(%rdi),%rdi subq $32,%rcx @@ -181,10 +181,10 @@ __FBSDID("$FreeBSD$"); 2016: cmpb $16,%cl jl 2008f + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) - movq -16(%rsi),%rdx - movq %rdx,-16(%rdi) subb $16,%cl jz 2000f leaq -16(%rsi),%rsi @@ -192,8 +192,8 @@ __FBSDID("$FreeBSD$"); 2008: cmpb $8,%cl jl 2004f - movq -8(%rsi),%rdx - movq %rdx,-8(%rdi) + movq (%rsi),%rdx + movq %rdx,(%rdi) subb $8,%cl jz 2000f leaq -8(%rsi),%rsi @@ -201,8 +201,8 @@ __FBSDID("$FreeBSD$"); 2004: cmpb $4,%cl jl 2002f - movl -4(%rsi),%edx - movl %edx,-4(%rdi) + movl 4(%rsi),%edx + movl %edx,4(%rdi) subb $4,%cl jz 2000f leaq -4(%rsi),%rsi @@ -210,8 +210,8 @@ __FBSDID("$FreeBSD$"); 2002: cmpb $2,%cl jl 2001f - movw -2(%rsi),%dx - movw %dx,-2(%rdi) + movw 6(%rsi),%dx + movw %dx,6(%rdi) subb $2,%cl jz 2000f leaq -2(%rsi),%rsi @@ -219,33 +219,31 @@ __FBSDID("$FreeBSD$"); 2001: cmpb $1,%cl jl 2000f - movb -1(%rsi),%dl - movb %dl,-1(%rdi) + movb 7(%rsi),%dl + movb %dl,7(%rdi) 2000: \end ret ALIGN_TEXT 2256: - decq %rdi - decq %rsi std .if \erms == 1 + leaq -1(%rdi,%rcx),%rdi + leaq -1(%rsi,%rcx),%rsi rep movsb + cld .else - andq $7,%rcx /* any fractional bytes? */ - je 3f - rep - movsb -3: - movq %rdx,%rcx /* copy remainder by 32-bit words */ + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi rep movsq -.endif cld + movq %rdx,%rcx + andb $7,%cl + jne 2004b +.endif \end ret .endif Modified: head/sys/amd64/amd64/support.S ============================================================================== --- head/sys/amd64/amd64/support.S Fri Nov 30 00:00:51 2018 (r341271) +++ head/sys/amd64/amd64/support.S Fri Nov 30 00:45:10 2018 (r341272) @@ -313,24 +313,24 @@ END(memcmp) */ ALIGN_TEXT 2: - addq %rcx,%rdi - addq %rcx,%rsi + cmpq $256,%rcx + ja 2256f + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi + cmpq $32,%rcx jb 2016f - cmpq $256,%rcx - ja 2256f - 2032: + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) movq -16(%rsi),%rdx movq %rdx,-16(%rdi) movq -24(%rsi),%rdx movq %rdx,-24(%rdi) - movq -32(%rsi),%rdx - movq %rdx,-32(%rdi) leaq -32(%rsi),%rsi leaq -32(%rdi),%rdi subq $32,%rcx @@ -344,10 +344,10 @@ END(memcmp) 2016: cmpb $16,%cl jl 2008f + movq (%rsi),%rdx + movq %rdx,(%rdi) movq -8(%rsi),%rdx movq %rdx,-8(%rdi) - movq -16(%rsi),%rdx - movq %rdx,-16(%rdi) subb $16,%cl jz 2000f leaq -16(%rsi),%rsi @@ -355,8 +355,8 @@ END(memcmp) 2008: cmpb $8,%cl jl 2004f - movq -8(%rsi),%rdx - movq %rdx,-8(%rdi) + movq (%rsi),%rdx + movq %rdx,(%rdi) subb $8,%cl jz 2000f leaq -8(%rsi),%rsi @@ -364,8 +364,8 @@ END(memcmp) 2004: cmpb $4,%cl jl 2002f - movl -4(%rsi),%edx - movl %edx,-4(%rdi) + movl 4(%rsi),%edx + movl %edx,4(%rdi) subb $4,%cl jz 2000f leaq -4(%rsi),%rsi @@ -373,8 +373,8 @@ END(memcmp) 2002: cmpb $2,%cl jl 2001f - movw -2(%rsi),%dx - movw %dx,-2(%rdi) + movw 6(%rsi),%dx + movw %dx,6(%rdi) subb $2,%cl jz 2000f leaq -2(%rsi),%rsi @@ -382,33 +382,31 @@ END(memcmp) 2001: cmpb $1,%cl jl 2000f - movb -1(%rsi),%dl - movb %dl,-1(%rdi) + movb 7(%rsi),%dl + movb %dl,7(%rdi) 2000: \end ret ALIGN_TEXT 2256: - decq %rdi - decq %rsi std .if \erms == 1 + leaq -1(%rdi,%rcx),%rdi + leaq -1(%rsi,%rcx),%rsi rep movsb + cld .else - andq $7,%rcx /* any fractional bytes? */ - je 3f - rep - movsb -3: - movq %rdx,%rcx /* copy remainder by 32-bit words */ + leaq -8(%rdi,%rcx),%rdi + leaq -8(%rsi,%rcx),%rsi shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi rep movsq -.endif cld + movq %rdx,%rcx + andb $7,%cl + jne 2004b +.endif \end ret .endif