From owner-svn-src-all@freebsd.org  Wed Dec 19 21:25:44 2018
Return-Path: <owner-svn-src-all@freebsd.org>
Delivered-To: svn-src-all@mailman.ysv.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1])
 by mailman.ysv.freebsd.org (Postfix) with ESMTP id 8B0DE1348D94;
 Wed, 19 Dec 2018 21:25:44 +0000 (UTC) (envelope-from mjg@FreeBSD.org)
Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org
 [IPv6:2610:1c1:1:606c::19:3])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 server-signature RSA-PSS (4096 bits)
 client-signature RSA-PSS (4096 bits) client-digest SHA256)
 (Client CN "mxrelay.nyi.freebsd.org",
 Issuer "Let's Encrypt Authority X3" (verified OK))
 by mx1.freebsd.org (Postfix) with ESMTPS id 2BF1B7004E;
 Wed, 19 Dec 2018 21:25:44 +0000 (UTC) (envelope-from mjg@FreeBSD.org)
Received: from repo.freebsd.org (repo.freebsd.org
 [IPv6:2610:1c1:1:6068::e6a:0])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client did not present a certificate)
 by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 1B6161B9C5;
 Wed, 19 Dec 2018 21:25:44 +0000 (UTC) (envelope-from mjg@FreeBSD.org)
Received: from repo.freebsd.org ([127.0.1.37])
 by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id wBJLPhAN006746;
 Wed, 19 Dec 2018 21:25:43 GMT (envelope-from mjg@FreeBSD.org)
Received: (from mjg@localhost)
 by repo.freebsd.org (8.15.2/8.15.2/Submit) id wBJLPhYA006744;
 Wed, 19 Dec 2018 21:25:43 GMT (envelope-from mjg@FreeBSD.org)
Message-Id: <201812192125.wBJLPhYA006744@repo.freebsd.org>
X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org
 using -f
From: Mateusz Guzik <mjg@FreeBSD.org>
Date: Wed, 19 Dec 2018 21:25:43 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
 svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject: svn commit: r342240 - in stable/12: lib/libc/amd64/string
 sys/amd64/amd64
X-SVN-Group: stable-12
X-SVN-Commit-Author: mjg
X-SVN-Commit-Paths: in stable/12: lib/libc/amd64/string sys/amd64/amd64
X-SVN-Commit-Revision: 342240
X-SVN-Commit-Repository: base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
X-Rspamd-Queue-Id: 2BF1B7004E
X-Spamd-Bar: --
Authentication-Results: mx1.freebsd.org
X-Spamd-Result: default: False [-2.95 / 15.00];
 local_wl_from(0.00)[FreeBSD.org];
 NEURAL_HAM_MEDIUM(-1.00)[-0.998,0];
 NEURAL_HAM_SHORT(-0.95)[-0.953,0];
 NEURAL_HAM_LONG(-1.00)[-0.998,0];
 ASN(0.00)[asn:11403, ipnet:2610:1c1:1::/48, country:US]
X-BeenThere: svn-src-all@freebsd.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: "SVN commit messages for the entire src tree \(except for &quot;
 user&quot; and &quot; projects&quot; \)" <svn-src-all.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-all/>
List-Post: <mailto:svn-src-all@freebsd.org>
List-Help: <mailto:svn-src-all-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Wed, 19 Dec 2018 21:25:44 -0000

Author: mjg
Date: Wed Dec 19 21:25:43 2018
New Revision: 342240
URL: https://svnweb.freebsd.org/changeset/base/342240

Log:
  MFC r341272,r341273,r341351
  
  amd64: tidy up copying backwards in memmove
  amd64: remove stale attribution for memmove work
  amd64: handle small memmove buffers with overlapping stores

Modified:
  stable/12/lib/libc/amd64/string/memmove.S
  stable/12/sys/amd64/amd64/support.S
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/lib/libc/amd64/string/memmove.S
==============================================================================
--- stable/12/lib/libc/amd64/string/memmove.S	Wed Dec 19 21:04:06 2018	(r342239)
+++ stable/12/lib/libc/amd64/string/memmove.S	Wed Dec 19 21:25:43 2018	(r342240)
@@ -34,8 +34,6 @@ __FBSDID("$FreeBSD$");
 /*
  * memmove(dst, src, cnt)
  *         rdi, rsi, rdx
- * Contains parts of bcopy written by:
- *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 
 /*
@@ -44,11 +42,19 @@ __FBSDID("$FreeBSD$");
  * rsi - source
  * rdx - count
  *
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
  */
 .macro MEMMOVE erms overlap begin end
 	\begin
+
+	/*
+	 * For sizes 0..32 all data is read before it is written, so there
+	 * is no correctness issue with direction of copying.
+	 */
+	cmpq	$32,%rcx
+	jbe	101632f
+
 .if \overlap == 1
 	movq	%rdi,%r8
 	subq	%rsi,%r8
@@ -56,13 +62,10 @@ __FBSDID("$FreeBSD$");
 	jb	2f
 .endif
 
-	cmpq	$32,%rcx
-	jb	1016f
-
 	cmpq	$256,%rcx
 	ja	1256f
 
-1032:
+103200:
 	movq	(%rsi),%rdx
 	movq	%rdx,(%rdi)
 	movq	8(%rsi),%rdx
@@ -75,56 +78,62 @@ __FBSDID("$FreeBSD$");
 	leaq	32(%rdi),%rdi
 	subq	$32,%rcx
 	cmpq	$32,%rcx
-	jae	1032b
+	jae	103200b
 	cmpb	$0,%cl
-	jne	1016f
+	jne	101632f
 	\end
 	ret
 	ALIGN_TEXT
-1016:
+101632:
 	cmpb	$16,%cl
-	jl	1008f
+	jl	100816f
 	movq	(%rsi),%rdx
+	movq	8(%rsi),%r8
+	movq	-16(%rsi,%rcx),%r9
+	movq	-8(%rsi,%rcx),%r10
 	movq	%rdx,(%rdi)
-	movq	8(%rsi),%rdx
-	movq	%rdx,8(%rdi)
-	subb	$16,%cl
-	jz	1000f
-	leaq	16(%rsi),%rsi
-	leaq	16(%rdi),%rdi
-1008:
+	movq	%r8,8(%rdi)
+	movq	%r9,-16(%rdi,%rcx)
+	movq	%r10,-8(%rdi,%rcx)
+	\end
+	ret
+	ALIGN_TEXT
+100816:
 	cmpb	$8,%cl
-	jl	1004f
+	jl	100408f
 	movq	(%rsi),%rdx
+	movq	-8(%rsi,%rcx),%r8
 	movq	%rdx,(%rdi)
-	subb	$8,%cl
-	jz	1000f
-	leaq	8(%rsi),%rsi
-	leaq	8(%rdi),%rdi
-1004:
+	movq	%r8,-8(%rdi,%rcx,)
+	\end
+	ret
+	ALIGN_TEXT
+100408:
 	cmpb	$4,%cl
-	jl	1002f
+	jl	100204f
 	movl	(%rsi),%edx
+	movl	-4(%rsi,%rcx),%r8d
 	movl	%edx,(%rdi)
-	subb	$4,%cl
-	jz	1000f
-	leaq	4(%rsi),%rsi
-	leaq	4(%rdi),%rdi
-1002:
+	movl	%r8d,-4(%rdi,%rcx)
+	\end
+	ret
+	ALIGN_TEXT
+100204:
 	cmpb	$2,%cl
-	jl	1001f
-	movw	(%rsi),%dx
+	jl	100001f
+	movzwl	(%rsi),%edx
+	movzwl	-2(%rsi,%rcx),%r8d
 	movw	%dx,(%rdi)
-	subb	$2,%cl
-	jz	1000f
-	leaq	2(%rsi),%rsi
-	leaq	2(%rdi),%rdi
-1001:
+	movw	%r8w,-2(%rdi,%rcx)
+	\end
+	ret
+	ALIGN_TEXT
+100001:
 	cmpb	$1,%cl
-	jl	1000f
+	jl	100000f
 	movb	(%rsi),%dl
 	movb	%dl,(%rdi)
-1000:
+100000:
 	\end
 	ret
 
@@ -140,8 +149,8 @@ __FBSDID("$FreeBSD$");
 	rep
 	movsq
 	movq	%rdx,%rcx
-	andb	$7,%cl                         /* any bytes left? */
-	jne	1004b
+	andl	$7,%ecx                         /* any bytes left? */
+	jne	100408b
 .endif
 	\end
 	ret
@@ -180,24 +189,24 @@ __FBSDID("$FreeBSD$");
 	 */
         ALIGN_TEXT
 2:
-	addq	%rcx,%rdi
-	addq	%rcx,%rsi
+	cmpq	$256,%rcx
+	ja	2256f
 
+	leaq	-8(%rdi,%rcx),%rdi
+	leaq	-8(%rsi,%rcx),%rsi
+
 	cmpq	$32,%rcx
 	jb	2016f
 
-	cmpq	$256,%rcx
-	ja	2256f
-
 2032:
+	movq	(%rsi),%rdx
+	movq	%rdx,(%rdi)
 	movq	-8(%rsi),%rdx
 	movq	%rdx,-8(%rdi)
 	movq	-16(%rsi),%rdx
 	movq	%rdx,-16(%rdi)
 	movq	-24(%rsi),%rdx
 	movq	%rdx,-24(%rdi)
-	movq	-32(%rsi),%rdx
-	movq	%rdx,-32(%rdi)
 	leaq	-32(%rsi),%rsi
 	leaq	-32(%rdi),%rdi
 	subq	$32,%rcx
@@ -211,10 +220,10 @@ __FBSDID("$FreeBSD$");
 2016:
 	cmpb	$16,%cl
 	jl	2008f
+	movq	(%rsi),%rdx
+	movq	%rdx,(%rdi)
 	movq	-8(%rsi),%rdx
 	movq	%rdx,-8(%rdi)
-	movq	-16(%rsi),%rdx
-	movq	%rdx,-16(%rdi)
 	subb	$16,%cl
 	jz	2000f
 	leaq	-16(%rsi),%rsi
@@ -222,8 +231,8 @@ __FBSDID("$FreeBSD$");
 2008:
 	cmpb	$8,%cl
 	jl	2004f
-	movq	-8(%rsi),%rdx
-	movq	%rdx,-8(%rdi)
+	movq	(%rsi),%rdx
+	movq	%rdx,(%rdi)
 	subb	$8,%cl
 	jz	2000f
 	leaq	-8(%rsi),%rsi
@@ -231,8 +240,8 @@ __FBSDID("$FreeBSD$");
 2004:
 	cmpb	$4,%cl
 	jl	2002f
-	movl	-4(%rsi),%edx
-	movl	%edx,-4(%rdi)
+	movl	4(%rsi),%edx
+	movl	%edx,4(%rdi)
 	subb	$4,%cl
 	jz	2000f
 	leaq	-4(%rsi),%rsi
@@ -240,8 +249,8 @@ __FBSDID("$FreeBSD$");
 2002:
 	cmpb	$2,%cl
 	jl	2001f
-	movw	-2(%rsi),%dx
-	movw	%dx,-2(%rdi)
+	movw	6(%rsi),%dx
+	movw	%dx,6(%rdi)
 	subb	$2,%cl
 	jz	2000f
 	leaq	-2(%rsi),%rsi
@@ -249,37 +258,36 @@ __FBSDID("$FreeBSD$");
 2001:
 	cmpb	$1,%cl
 	jl	2000f
-	movb	-1(%rsi),%dl
-	movb	%dl,-1(%rdi)
+	movb	7(%rsi),%dl
+	movb	%dl,7(%rdi)
 2000:
 	\end
 	ret
 	ALIGN_TEXT
 2256:
-	decq	%rdi
-	decq	%rsi
 	std
 .if \erms == 1
+	leaq	-1(%rdi,%rcx),%rdi
+	leaq	-1(%rsi,%rcx),%rsi
 	rep
 	movsb
+	cld
 .else
-	andq	$7,%rcx                         /* any fractional bytes? */
-	je	3f
-	rep
-	movsb
-3:
-	movq	%rdx,%rcx                       /* copy remainder by 32-bit words */
+	leaq	-8(%rdi,%rcx),%rdi
+	leaq	-8(%rsi,%rcx),%rsi
 	shrq	$3,%rcx
-	subq	$7,%rsi
-	subq	$7,%rdi
 	rep
 	movsq
-.endif
 	cld
+	movq	%rdx,%rcx
+	andb	$7,%cl
+	jne	2004b
+.endif
 	\end
 	ret
 .endif
 .endm
+
 
 .macro MEMMOVE_BEGIN
 	movq	%rdi,%rax

Modified: stable/12/sys/amd64/amd64/support.S
==============================================================================
--- stable/12/sys/amd64/amd64/support.S	Wed Dec 19 21:04:06 2018	(r342239)
+++ stable/12/sys/amd64/amd64/support.S	Wed Dec 19 21:25:43 2018	(r342240)
@@ -197,8 +197,6 @@ END(memcmp)
 /*
  * memmove(dst, src, cnt)
  *         rdi, rsi, rdx
- * Adapted from bcopy written by:
- *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 
 /*
@@ -207,11 +205,19 @@ END(memcmp)
  * rsi - source
  * rdx - count
  *
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
  */
 .macro MEMMOVE erms overlap begin end
 	\begin
+
+	/*
+	 * For sizes 0..32 all data is read before it is written, so there
+	 * is no correctness issue with direction of copying.
+	 */
+	cmpq	$32,%rcx
+	jbe	101632f
+
 .if \overlap == 1
 	movq	%rdi,%r8
 	subq	%rsi,%r8
@@ -219,13 +225,10 @@ END(memcmp)
 	jb	2f
 .endif
 
-	cmpq	$32,%rcx
-	jb	1016f
-
 	cmpq	$256,%rcx
 	ja	1256f
 
-1032:
+103200:
 	movq	(%rsi),%rdx
 	movq	%rdx,(%rdi)
 	movq	8(%rsi),%rdx
@@ -238,56 +241,62 @@ END(memcmp)
 	leaq	32(%rdi),%rdi
 	subq	$32,%rcx
 	cmpq	$32,%rcx
-	jae	1032b
+	jae	103200b
 	cmpb	$0,%cl
-	jne	1016f
+	jne	101632f
 	\end
 	ret
 	ALIGN_TEXT
-1016:
+101632:
 	cmpb	$16,%cl
-	jl	1008f
+	jl	100816f
 	movq	(%rsi),%rdx
+	movq	8(%rsi),%r8
+	movq	-16(%rsi,%rcx),%r9
+	movq	-8(%rsi,%rcx),%r10
 	movq	%rdx,(%rdi)
-	movq	8(%rsi),%rdx
-	movq	%rdx,8(%rdi)
-	subb	$16,%cl
-	jz	1000f
-	leaq	16(%rsi),%rsi
-	leaq	16(%rdi),%rdi
-1008:
+	movq	%r8,8(%rdi)
+	movq	%r9,-16(%rdi,%rcx)
+	movq	%r10,-8(%rdi,%rcx)
+	\end
+	ret
+	ALIGN_TEXT
+100816:
 	cmpb	$8,%cl
-	jl	1004f
+	jl	100408f
 	movq	(%rsi),%rdx
+	movq	-8(%rsi,%rcx),%r8
 	movq	%rdx,(%rdi)
-	subb	$8,%cl
-	jz	1000f
-	leaq	8(%rsi),%rsi
-	leaq	8(%rdi),%rdi
-1004:
+	movq	%r8,-8(%rdi,%rcx,)
+	\end
+	ret
+	ALIGN_TEXT
+100408:
 	cmpb	$4,%cl
-	jl	1002f
+	jl	100204f
 	movl	(%rsi),%edx
+	movl	-4(%rsi,%rcx),%r8d
 	movl	%edx,(%rdi)
-	subb	$4,%cl
-	jz	1000f
-	leaq	4(%rsi),%rsi
-	leaq	4(%rdi),%rdi
-1002:
+	movl	%r8d,-4(%rdi,%rcx)
+	\end
+	ret
+	ALIGN_TEXT
+100204:
 	cmpb	$2,%cl
-	jl	1001f
-	movw	(%rsi),%dx
+	jl	100001f
+	movzwl	(%rsi),%edx
+	movzwl	-2(%rsi,%rcx),%r8d
 	movw	%dx,(%rdi)
-	subb	$2,%cl
-	jz	1000f
-	leaq	2(%rsi),%rsi
-	leaq	2(%rdi),%rdi
-1001:
+	movw	%r8w,-2(%rdi,%rcx)
+	\end
+	ret
+	ALIGN_TEXT
+100001:
 	cmpb	$1,%cl
-	jl	1000f
+	jl	100000f
 	movb	(%rsi),%dl
 	movb	%dl,(%rdi)
-1000:
+100000:
 	\end
 	ret
 
@@ -303,8 +312,8 @@ END(memcmp)
 	rep
 	movsq
 	movq	%rdx,%rcx
-	andb	$7,%cl                         /* any bytes left? */
-	jne	1004b
+	andl	$7,%ecx                         /* any bytes left? */
+	jne	100408b
 .endif
 	\end
 	ret
@@ -343,24 +352,24 @@ END(memcmp)
 	 */
         ALIGN_TEXT
 2:
-	addq	%rcx,%rdi
-	addq	%rcx,%rsi
+	cmpq	$256,%rcx
+	ja	2256f
 
+	leaq	-8(%rdi,%rcx),%rdi
+	leaq	-8(%rsi,%rcx),%rsi
+
 	cmpq	$32,%rcx
 	jb	2016f
 
-	cmpq	$256,%rcx
-	ja	2256f
-
 2032:
+	movq	(%rsi),%rdx
+	movq	%rdx,(%rdi)
 	movq	-8(%rsi),%rdx
 	movq	%rdx,-8(%rdi)
 	movq	-16(%rsi),%rdx
 	movq	%rdx,-16(%rdi)
 	movq	-24(%rsi),%rdx
 	movq	%rdx,-24(%rdi)
-	movq	-32(%rsi),%rdx
-	movq	%rdx,-32(%rdi)
 	leaq	-32(%rsi),%rsi
 	leaq	-32(%rdi),%rdi
 	subq	$32,%rcx
@@ -374,10 +383,10 @@ END(memcmp)
 2016:
 	cmpb	$16,%cl
 	jl	2008f
+	movq	(%rsi),%rdx
+	movq	%rdx,(%rdi)
 	movq	-8(%rsi),%rdx
 	movq	%rdx,-8(%rdi)
-	movq	-16(%rsi),%rdx
-	movq	%rdx,-16(%rdi)
 	subb	$16,%cl
 	jz	2000f
 	leaq	-16(%rsi),%rsi
@@ -385,8 +394,8 @@ END(memcmp)
 2008:
 	cmpb	$8,%cl
 	jl	2004f
-	movq	-8(%rsi),%rdx
-	movq	%rdx,-8(%rdi)
+	movq	(%rsi),%rdx
+	movq	%rdx,(%rdi)
 	subb	$8,%cl
 	jz	2000f
 	leaq	-8(%rsi),%rsi
@@ -394,8 +403,8 @@ END(memcmp)
 2004:
 	cmpb	$4,%cl
 	jl	2002f
-	movl	-4(%rsi),%edx
-	movl	%edx,-4(%rdi)
+	movl	4(%rsi),%edx
+	movl	%edx,4(%rdi)
 	subb	$4,%cl
 	jz	2000f
 	leaq	-4(%rsi),%rsi
@@ -403,8 +412,8 @@ END(memcmp)
 2002:
 	cmpb	$2,%cl
 	jl	2001f
-	movw	-2(%rsi),%dx
-	movw	%dx,-2(%rdi)
+	movw	6(%rsi),%dx
+	movw	%dx,6(%rdi)
 	subb	$2,%cl
 	jz	2000f
 	leaq	-2(%rsi),%rsi
@@ -412,33 +421,31 @@ END(memcmp)
 2001:
 	cmpb	$1,%cl
 	jl	2000f
-	movb	-1(%rsi),%dl
-	movb	%dl,-1(%rdi)
+	movb	7(%rsi),%dl
+	movb	%dl,7(%rdi)
 2000:
 	\end
 	ret
 	ALIGN_TEXT
 2256:
-	decq	%rdi
-	decq	%rsi
 	std
 .if \erms == 1
+	leaq	-1(%rdi,%rcx),%rdi
+	leaq	-1(%rsi,%rcx),%rsi
 	rep
 	movsb
+	cld
 .else
-	andq	$7,%rcx                         /* any fractional bytes? */
-	je	3f
-	rep
-	movsb
-3:
-	movq	%rdx,%rcx                       /* copy remainder by 32-bit words */
+	leaq	-8(%rdi,%rcx),%rdi
+	leaq	-8(%rsi,%rcx),%rsi
 	shrq	$3,%rcx
-	subq	$7,%rsi
-	subq	$7,%rdi
 	rep
 	movsq
-.endif
 	cld
+	movq	%rdx,%rcx
+	andb	$7,%cl
+	jne	2004b
+.endif
 	\end
 	ret
 .endif