From owner-svn-src-all@freebsd.org Mon Feb 3 18:23:35 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 99C6622A51E; Mon, 3 Feb 2020 18:23:35 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) server-signature RSA-PSS (4096 bits) client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 48BGRW3d7Tz3P3Q; Mon, 3 Feb 2020 18:23:35 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 77A6A4F74; Mon, 3 Feb 2020 18:23:35 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 013INZiq024762; Mon, 3 Feb 2020 18:23:35 GMT (envelope-from markj@FreeBSD.org) Received: (from markj@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 013INZD5024761; Mon, 3 Feb 2020 18:23:35 GMT (envelope-from markj@FreeBSD.org) Message-Id: <202002031823.013INZD5024761@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: markj set sender to markj@FreeBSD.org using -f From: Mark Johnston Date: Mon, 3 Feb 2020 18:23:35 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r357459 - head/sys/arm64/include X-SVN-Group: head X-SVN-Commit-Author: markj X-SVN-Commit-Paths: head/sys/arm64/include X-SVN-Commit-Revision: 357459 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 03 Feb 2020 18:23:35 -0000 Author: markj Date: Mon Feb 3 18:23:35 2020 New Revision: 357459 URL: https://svnweb.freebsd.org/changeset/base/357459 Log: Add LSE-based atomic(9) implementations. These make use of the cas*, ld* and swp instructions added in ARMv8.1. Testing shows them to be significantly more performant than LL/SC-based implementations. No functional change here since the wrappers still unconditionally select the _llsc variants. Reviewed by: andrew, kib MFC after: 1 month Submitted by: Ali Saidi (original version) Differential Revision: https://reviews.freebsd.org/D23324 Modified: head/sys/arm64/include/atomic.h Modified: head/sys/arm64/include/atomic.h ============================================================================== --- head/sys/arm64/include/atomic.h Mon Feb 3 18:23:14 2020 (r357458) +++ head/sys/arm64/include/atomic.h Mon Feb 3 18:23:35 2020 (r357459) @@ -63,15 +63,16 @@ static __inline void \ atomic_##op##_##bar##t##flav(volatile uint##t##_t *p, uint##t##_t val) -#define _ATOMIC_OP_IMPL(t, w, s, op, asm_op, bar, a, l) \ +#define _ATOMIC_OP_IMPL(t, w, s, op, llsc_asm_op, lse_asm_op, pre, bar, a, l) \ _ATOMIC_OP_PROTO(t, op, bar, _llsc) \ { \ uint##t##_t tmp; \ int res; \ \ + pre; \ __asm __volatile( \ "1: ld"#a"xr"#s" %"#w"0, [%2]\n" \ - " "#asm_op" %"#w"0, %"#w"0, %"#w"3\n" \ + " "#llsc_asm_op" %"#w"0, %"#w"0, %"#w"3\n" \ " st"#l"xr"#s" %w1, %"#w"0, [%2]\n" \ " cbnz %w1, 1b\n" \ : "=&r"(tmp), "=&r"(res) \ @@ -80,26 +81,45 @@ _ATOMIC_OP_PROTO(t, op, bar, _llsc) \ ); \ } \ \ +_ATOMIC_OP_PROTO(t, op, bar, _lse) \ +{ \ + uint##t##_t tmp; \ + \ + pre; \ + __asm __volatile( \ + ".arch_extension lse\n" \ + "ld"#lse_asm_op#a#l#s" %"#w"2, %"#w"0, [%1]\n" \ + ".arch_extension nolse\n" \ + : "=r" (tmp) \ + : "r" (p), "r" (val) \ + : "memory" \ + ); \ +} \ + \ _ATOMIC_OP_PROTO(t, op, bar, ) \ { \ atomic_##op##_##bar##t##_llsc(p, val); \ } -#define __ATOMIC_OP(op, asm_op, bar, a, l) \ - _ATOMIC_OP_IMPL(8, w, b, op, asm_op, bar, a, l) \ - _ATOMIC_OP_IMPL(16, w, h, op, asm_op, bar, a, l) \ - _ATOMIC_OP_IMPL(32, w, , op, asm_op, bar, a, l) \ - _ATOMIC_OP_IMPL(64, , , op, asm_op, bar, a, l) +#define __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, bar, a, l) \ + _ATOMIC_OP_IMPL(8, w, b, op, llsc_asm_op, lse_asm_op, pre, \ + bar, a, l) \ + _ATOMIC_OP_IMPL(16, w, h, op, llsc_asm_op, lse_asm_op, pre, \ + bar, a, l) \ + _ATOMIC_OP_IMPL(32, w, , op, llsc_asm_op, lse_asm_op, pre, \ + bar, a, l) \ + _ATOMIC_OP_IMPL(64, , , op, llsc_asm_op, lse_asm_op, pre, \ + bar, a, l) -#define _ATOMIC_OP(op, asm_op) \ - __ATOMIC_OP(op, asm_op, , , ) \ - __ATOMIC_OP(op, asm_op, acq_, a, ) \ - __ATOMIC_OP(op, asm_op, rel_, , l) +#define _ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre) \ + __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, , , ) \ + __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, acq_, a, ) \ + __ATOMIC_OP(op, llsc_asm_op, lse_asm_op, pre, rel_, , l) -_ATOMIC_OP(add, add) -_ATOMIC_OP(clear, bic) -_ATOMIC_OP(set, orr) -_ATOMIC_OP(subtract, sub) +_ATOMIC_OP(add, add, add, ) +_ATOMIC_OP(clear, bic, clr, ) +_ATOMIC_OP(set, orr, set, ) +_ATOMIC_OP(subtract, add, add, val = -val) #define _ATOMIC_CMPSET_PROTO(t, bar, flav) \ static __inline int \ @@ -133,6 +153,26 @@ _ATOMIC_CMPSET_PROTO(t, bar, _llsc) \ return (!res); \ } \ \ +_ATOMIC_CMPSET_PROTO(t, bar, _lse) \ +{ \ + uint##t##_t oldval; \ + int res; \ + \ + oldval = cmpval; \ + __asm __volatile( \ + ".arch_extension lse\n" \ + "cas"#a#l#s" %"#w"1, %"#w"4, [%3]\n" \ + "cmp %"#w"1, %"#w"2\n" \ + "cset %w0, eq\n" \ + ".arch_extension nolse\n" \ + : "=r" (res), "+&r" (cmpval) \ + : "r" (oldval), "r" (p), "r" (newval) \ + : "cc", "memory" \ + ); \ + \ + return (res); \ +} \ + \ _ATOMIC_CMPSET_PROTO(t, bar, ) \ { \ return (atomic_cmpset_##bar##t##_llsc(p, cmpval, newval)); \ @@ -160,6 +200,27 @@ _ATOMIC_FCMPSET_PROTO(t, bar, _llsc) \ return (!res); \ } \ \ +_ATOMIC_FCMPSET_PROTO(t, bar, _lse) \ +{ \ + uint##t##_t _cmpval, tmp; \ + int res; \ + \ + _cmpval = tmp = *cmpval; \ + __asm __volatile( \ + ".arch_extension lse\n" \ + "cas"#a#l#s" %"#w"1, %"#w"4, [%3]\n" \ + "cmp %"#w"1, %"#w"2\n" \ + "cset %w0, eq\n" \ + ".arch_extension nolse\n" \ + : "=r" (res), "+&r" (tmp) \ + : "r" (_cmpval), "r" (p), "r" (newval) \ + : "cc", "memory" \ + ); \ + *cmpval = tmp; \ + \ + return (res); \ +} \ + \ _ATOMIC_FCMPSET_PROTO(t, bar, ) \ { \ return (atomic_fcmpset_##bar##t##_llsc(p, cmpval, newval)); \ @@ -182,7 +243,7 @@ atomic_fetchadd_##t##flav(volatile uint##t##_t *p, uin #define _ATOMIC_FETCHADD_IMPL(t, w) \ _ATOMIC_FETCHADD_PROTO(t, _llsc) \ { \ - uint##t##_t tmp, ret; \ + uint##t##_t ret, tmp; \ int res; \ \ __asm __volatile( \ @@ -198,6 +259,22 @@ _ATOMIC_FETCHADD_PROTO(t, _llsc) \ return (ret); \ } \ \ +_ATOMIC_FETCHADD_PROTO(t, _lse) \ +{ \ + uint##t##_t ret; \ + \ + __asm __volatile( \ + ".arch_extension lse\n" \ + "ldadd %"#w"2, %"#w"0, [%1]\n" \ + ".arch_extension nolse\n" \ + : "=r" (ret) \ + : "r" (p), "r" (val) \ + : "memory" \ + ); \ + \ + return (ret); \ +} \ + \ _ATOMIC_FETCHADD_PROTO(t, ) \ { \ return (atomic_fetchadd_##t##_llsc(p, val)); \ @@ -232,6 +309,22 @@ _ATOMIC_SWAP_PROTO(t, _llsc) \ return (ret); \ } \ \ +_ATOMIC_SWAP_PROTO(t, _lse) \ +{ \ + uint##t##_t ret; \ + \ + __asm __volatile( \ + ".arch_extension lse\n" \ + "swp %"#w"2, %"#w"0, [%1]\n" \ + ".arch_extension nolse\n" \ + : "=r" (ret) \ + : "r" (p), "r" (val) \ + : "memory" \ + ); \ + \ + return (ret); \ +} \ + \ _ATOMIC_SWAP_PROTO(t, ) \ { \ return (atomic_swap_##t##_llsc(p, val)); \ @@ -254,6 +347,11 @@ _ATOMIC_READANDCLEAR_PROTO(t, _llsc) \ return (ret); \ } \ \ +_ATOMIC_READANDCLEAR_PROTO(t, _lse) \ +{ \ + return (atomic_swap_##t##_lse(p, 0)); \ +} \ + \ _ATOMIC_READANDCLEAR_PROTO(t, ) \ { \ return (atomic_readandclear_##t##_llsc(p)); \ @@ -266,7 +364,7 @@ _ATOMIC_SWAP_IMPL(64, , xzr) static __inline int \ atomic_testand##op##_##t##flav(volatile uint##t##_t *p, u_int val) -#define _ATOMIC_TEST_OP_IMPL(t, w, op, asm_op) \ +#define _ATOMIC_TEST_OP_IMPL(t, w, op, llsc_asm_op, lse_asm_op) \ _ATOMIC_TEST_OP_PROTO(t, op, _llsc) \ { \ uint##t##_t mask, old, tmp; \ @@ -275,7 +373,7 @@ _ATOMIC_TEST_OP_PROTO(t, op, _llsc) \ mask = 1u << (val & 0x1f); \ __asm __volatile( \ "1: ldxr %"#w"2, [%3]\n" \ - " "#asm_op" %"#w"0, %"#w"2, %"#w"4\n" \ + " "#llsc_asm_op" %"#w"0, %"#w"2, %"#w"4\n" \ " stxr %w1, %"#w"0, [%3]\n" \ " cbnz %w1, 1b\n" \ : "=&r" (tmp), "=&r" (res), "=&r" (old) \ @@ -286,17 +384,34 @@ _ATOMIC_TEST_OP_PROTO(t, op, _llsc) \ return ((old & mask) != 0); \ } \ \ +_ATOMIC_TEST_OP_PROTO(t, op, _lse) \ +{ \ + uint##t##_t mask, old; \ + \ + mask = 1u << (val & 0x1f); \ + __asm __volatile( \ + ".arch_extension lse\n" \ + "ld"#lse_asm_op" %"#w"2, %"#w"0, [%1]\n" \ + ".arch_extension nolse\n" \ + : "=r" (old) \ + : "r" (p), "r" (mask) \ + : "memory" \ + ); \ + \ + return ((old & mask) != 0); \ +} \ + \ _ATOMIC_TEST_OP_PROTO(t, op, ) \ { \ return (atomic_testand##op##_##t##_llsc(p, val)); \ } -#define _ATOMIC_TEST_OP(op, asm_op) \ - _ATOMIC_TEST_OP_IMPL(32, w, op, asm_op) \ - _ATOMIC_TEST_OP_IMPL(64, , op, asm_op) +#define _ATOMIC_TEST_OP(op, llsc_asm_op, lse_asm_op) \ + _ATOMIC_TEST_OP_IMPL(32, w, op, llsc_asm_op, lse_asm_op) \ + _ATOMIC_TEST_OP_IMPL(64, , op, llsc_asm_op, lse_asm_op) -_ATOMIC_TEST_OP(clear, bic) -_ATOMIC_TEST_OP(set, orr) +_ATOMIC_TEST_OP(clear, bic, clr) +_ATOMIC_TEST_OP(set, orr, set) #define _ATOMIC_LOAD_ACQ_IMPL(t, w, s) \ static __inline uint##t##_t \