Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 9 May 2018 15:16:25 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r333413 - head/sys/amd64/amd64
Message-ID:  <201805091516.w49FGPhQ007531@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Wed May  9 15:16:25 2018
New Revision: 333413
URL: https://svnweb.freebsd.org/changeset/base/333413

Log:
  amd64: depessimize bcmp for small buffers
  
  Adapt assembly generated by clang for memcmp and use it for <= 64 sized
  compares (which are the vast majority).
  
  Sample result of doing stats on Broadwell (% of samples):
  before: 4.0 kernel     bcmp                 cache_lookup
  after : 0.7 kernel     bcmp                 cache_lookup
  
  The routine is most definitely still not optimal. Anyone interested in
  spending time improving it is welcome to take over.
  
  Reviewed by:	kib

Modified:
  head/sys/amd64/amd64/support.S

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Wed May  9 14:50:32 2018	(r333412)
+++ head/sys/amd64/amd64/support.S	Wed May  9 15:16:25 2018	(r333413)
@@ -98,17 +98,40 @@ END(sse2_pagezero)
 
 ENTRY(bcmp)
 	PUSH_FRAME_POINTER
+	test	%rdx,%rdx
+	je	1f
+	cmpq	$64,%rdx
+	jg	4f
+
+	xor	%ecx,%ecx
+2:
+	movzbl	(%rdi,%rcx,1),%eax
+	movzbl	(%rsi,%rcx,1),%r8d
+	cmp	%r8b,%al
+	jne	3f
+	add	$0x1,%rcx
+	cmp	%rcx,%rdx
+	jne	2b
+1:
+	xor	%eax,%eax
+	POP_FRAME_POINTER
+	retq
+3:
+	mov	$1,%eax
+	POP_FRAME_POINTER
+	retq
+4:
 	movq	%rdx,%rcx
 	shrq	$3,%rcx
 	repe
 	cmpsq
-	jne	1f
+	jne	5f
 
 	movq	%rdx,%rcx
 	andq	$7,%rcx
 	repe
 	cmpsb
-1:
+5:
 	setne	%al
 	movsbl	%al,%eax
 	POP_FRAME_POINTER



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805091516.w49FGPhQ007531>