From owner-svn-src-head@freebsd.org Thu Jan 30 19:56:22 2020 Return-Path: Delivered-To: svn-src-head@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id BC4E41FC73C; Thu, 30 Jan 2020 19:56:22 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) server-signature RSA-PSS (4096 bits) client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 487rhQ4Ym9z4Z3N; Thu, 30 Jan 2020 19:56:22 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 97BE02318; Thu, 30 Jan 2020 19:56:22 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 00UJuMTb004093; Thu, 30 Jan 2020 19:56:22 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 00UJuMC3004092; Thu, 30 Jan 2020 19:56:22 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <202001301956.00UJuMC3004092@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Thu, 30 Jan 2020 19:56:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r357309 - head/sys/amd64/amd64 X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: head/sys/amd64/amd64 X-SVN-Commit-Revision: 357309 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 30 Jan 2020 19:56:22 -0000 Author: mjg Date: Thu Jan 30 19:56:22 2020 New Revision: 357309 URL: https://svnweb.freebsd.org/changeset/base/357309 Log: amd64: speed up failing case for memcmp Instead of branching on up to 8 bytes, drop the size to 4. Assorted clean ups while here. Validated with glibc test suite. Modified: head/sys/amd64/amd64/support.S Modified: head/sys/amd64/amd64/support.S ============================================================================== --- head/sys/amd64/amd64/support.S Thu Jan 30 19:38:51 2020 (r357308) +++ head/sys/amd64/amd64/support.S Thu Jan 30 19:56:22 2020 (r357309) @@ -107,7 +107,7 @@ END(sse2_pagezero) /* * memcmpy(b1, b2, len) - * rdi,rsi,len + * rdi,rsi,rdx */ ENTRY(memcmp) PUSH_FRAME_POINTER @@ -123,7 +123,7 @@ ENTRY(memcmp) movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 - jne 1f + jne 80f movq -8(%rdi,%rdx),%r8 movq -8(%rsi,%rdx),%r9 cmpq %r8,%r9 @@ -133,25 +133,25 @@ ENTRY(memcmp) 100408: cmpb $4,%dl jl 100204f - movl (%rsi),%r8d - movl (%rdi),%r9d + movl (%rdi),%r8d + movl (%rsi),%r9d cmpl %r8d,%r9d - jne 1f - movl -4(%rsi,%rdx),%r8d - movl -4(%rdi,%rdx),%r9d + jne 80f + movl -4(%rdi,%rdx),%r8d + movl -4(%rsi,%rdx),%r9d cmpl %r8d,%r9d - jne 1f + jne 10040804f POP_FRAME_POINTER ret 100204: cmpb $2,%dl jl 100001f - movzwl (%rsi),%r8d - movzwl (%rdi),%r9d + movzwl (%rdi),%r8d + movzwl (%rsi),%r9d cmpl %r8d,%r9d jne 1f - movzwl -2(%rsi,%rdx),%r8d - movzwl -2(%rdi,%rdx),%r9d + movzwl -2(%rdi,%rdx),%r8d + movzwl -2(%rsi,%rdx),%r9d cmpl %r8d,%r9d jne 1f POP_FRAME_POINTER @@ -159,10 +159,9 @@ ENTRY(memcmp) 100001: cmpb $1,%dl jl 100000f - movzbl (%rdi),%r8d - movzbl (%rsi),%r9d - cmpb %r8b,%r9b - jne 1f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax 100000: POP_FRAME_POINTER ret @@ -173,11 +172,11 @@ ALIGN_TEXT movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 - jne 1f + jne 80f movq 8(%rdi),%r8 movq 8(%rsi),%r9 cmpq %r8,%r9 - jne 10163208f + jne 10163208f movq -16(%rdi,%rdx),%r8 movq -16(%rsi,%rdx),%r9 cmpq %r8,%r9 @@ -194,14 +193,14 @@ ALIGN_TEXT movq 8(%rdi),%r9 subq (%rsi),%r8 subq 8(%rsi),%r9 - or %r8,%r9 + orq %r8,%r9 jnz 10320000f movq 16(%rdi),%r8 movq 24(%rdi),%r9 subq 16(%rsi),%r8 subq 24(%rsi),%r9 - or %r8,%r9 + orq %r8,%r9 jnz 10320016f leaq 32(%rdi),%rdi @@ -214,40 +213,57 @@ ALIGN_TEXT POP_FRAME_POINTER ret +/* + * Mismatch was found. + * + * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). + */ +ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi 10320000: -/* - * Mismatch was found within a 16 bytes range. The part of the routine - * which calculates it only operates on sizes up to 8 bytes. Find the - * right part. - */ movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 - jne 1f + jne 80f leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi - jmp 1f + jmp 80f +ALIGN_TEXT +10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi - jmp 1f + jmp 80f +ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi - jmp 1f + jmp 80f +ALIGN_TEXT 10163208: -10081608: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi + jmp 80f +ALIGN_TEXT +10040804: + leaq -4(%rdi,%rdx),%rdi + leaq -4(%rsi,%rdx),%rsi jmp 1f +ALIGN_TEXT +80: + movl (%rdi),%r8d + movl (%rsi),%r9d + cmpl %r8d,%r9d + jne 1f + leaq 4(%rdi),%rdi + leaq 4(%rsi),%rsi + /* - * Mismatch was found. We have no more than 8 bytes to inspect. + * We have up to 4 bytes to inspect. */ -ALIGN_TEXT 1: movzbl (%rdi),%eax movzbl (%rsi),%r8d @@ -266,32 +282,6 @@ ALIGN_TEXT movzbl 3(%rdi),%eax movzbl 3(%rsi),%r8d - cmpb %r8b,%al - jne 2f - - movzbl 4(%rdi),%eax - movzbl 4(%rsi),%r8d - cmpb %r8b,%al - jne 2f - - movzbl 5(%rdi),%eax - movzbl 5(%rsi),%r8d - cmpb %r8b,%al - jne 2f - - movzbl 6(%rdi),%eax - movzbl 6(%rsi),%r8d - cmpb %r8b,%al - jne 2f - - movzbl 7(%rdi),%eax - movzbl 7(%rsi),%r8d - cmpb %r8b,%al - jne 2f - - xorl %eax,%eax - POP_FRAME_POINTER - ret 2: subl %r8d,%eax POP_FRAME_POINTER