Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 8 Nov 2018 15:12:37 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r340252 - head/sys/amd64/amd64
Message-ID:  <201811081512.wA8FCbqG086293@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Nov  8 15:12:36 2018
New Revision: 340252
URL: https://svnweb.freebsd.org/changeset/base/340252

Log:
  amd64: align memset buffers to 16 bytes before using rep stos
  
  Both Intel manual and Agner Fog's docs suggest aligning to 16.
  
  See the review for benchmark results.
  
  Reviewed by:	kib (previous version)
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D17661

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Thu Nov  8 14:46:21 2018	(r340251)
+++ head/sys/amd64/amd64/support.S	Thu Nov  8 15:12:36 2018	(r340252)
@@ -515,24 +515,38 @@ END(memcpy_erms)
 1256:
 	movq	%rdi,%r9
 	movq	%r10,%rax
+	testl	$15,%edi
+	jnz	3f
+1:
 .if \erms == 1
 	rep
 	stosb
 	movq	%r9,%rax
 .else
+	movq	%rcx,%rdx
 	shrq	$3,%rcx
 	rep
 	stosq
 	movq	%r9,%rax
 	andl	$7,%edx
-	jnz	1f
+	jnz	2f
 	POP_FRAME_POINTER
 	ret
-1:
+2:
 	movq	%r10,-8(%rdi,%rdx)
 .endif
 	POP_FRAME_POINTER
 	ret
+	ALIGN_TEXT
+3:
+	movq	%r10,(%rdi)
+	movq	%r10,8(%rdi)
+	movq	%rdi,%r8
+	andq	$15,%r8
+	leaq	-16(%rcx,%r8),%rcx
+	neg	%r8
+	leaq	16(%rdi,%r8),%rdi
+	jmp	1b
 .endm
 
 ENTRY(memset_std)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811081512.wA8FCbqG086293>