Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 15 Jul 2024 12:37:51 GMT
From:      Andrew Turner <andrew@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 98768d6870cc - stable/14 - ossl: Rebuild the openssl asm
Message-ID:  <202407151237.46FCbpW9058307@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/14 has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=98768d6870cc30303a80d2d8c888f0daa3f1ad4a

commit 98768d6870cc30303a80d2d8c888f0daa3f1ad4a
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2023-09-21 13:06:54 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2024-07-15 08:09:43 +0000

    ossl: Rebuild the openssl asm
    
    This adds the new BTI instructions when needed to the arm64 assembly.
    
    Sponsored by:   Arm Ltd
    
    Reviewed by:    Pierre Pronchery <pierre@freebsdfoundation.org> (earlier version)
    Sponsored by:   Arm Ltd
    Differential Revision:  https://reviews.freebsd.org/D41941
    
    (cherry picked from commit bd9588bca05f5cbdeac6e5f9f426b2589301d7c6)
---
 sys/crypto/openssl/aarch64/aes-gcm-armv8_64.S   |  6 +++
 sys/crypto/openssl/aarch64/aesv8-armx.S         | 15 +++++-
 sys/crypto/openssl/aarch64/arm64cpuid.S         | 10 ++++
 sys/crypto/openssl/aarch64/armv8-mont.S         | 19 ++++++--
 sys/crypto/openssl/aarch64/chacha-armv8.S       | 18 +++----
 sys/crypto/openssl/aarch64/ecp_nistz256-armv8.S | 64 +++++++++++++++----------
 sys/crypto/openssl/aarch64/ghashv8-armx.S       |  3 ++
 sys/crypto/openssl/aarch64/keccak1600-armv8.S   | 30 ++++++------
 sys/crypto/openssl/aarch64/poly1305-armv8.S     | 17 ++++++-
 sys/crypto/openssl/aarch64/sha1-armv8.S         |  5 +-
 sys/crypto/openssl/aarch64/sha256-armv8.S       | 10 ++--
 sys/crypto/openssl/aarch64/sha512-armv8.S       |  8 ++--
 sys/crypto/openssl/aarch64/vpaes-armv8.S        | 39 ++++++++-------
 sys/crypto/openssl/arm_arch.h                   | 58 ++++++++++++++++++++++
 14 files changed, 219 insertions(+), 83 deletions(-)

diff --git a/sys/crypto/openssl/aarch64/aes-gcm-armv8_64.S b/sys/crypto/openssl/aarch64/aes-gcm-armv8_64.S
index eb85dbc9f996..55856548fa6f 100644
--- a/sys/crypto/openssl/aarch64/aes-gcm-armv8_64.S
+++ b/sys/crypto/openssl/aarch64/aes-gcm-armv8_64.S
@@ -8,6 +8,7 @@
 .type	aes_gcm_enc_128_kernel,%function
 .align	4
 aes_gcm_enc_128_kernel:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x1, .L128_enc_ret
 	stp	x19, x20, [sp, #-112]!
 	mov	x16, x4
@@ -990,6 +991,7 @@ aes_gcm_enc_128_kernel:
 .type	aes_gcm_dec_128_kernel,%function
 .align	4
 aes_gcm_dec_128_kernel:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x1, .L128_dec_ret
 	stp	x19, x20, [sp, #-112]!
 	mov	x16, x4
@@ -1982,6 +1984,7 @@ aes_gcm_dec_128_kernel:
 .type	aes_gcm_enc_192_kernel,%function
 .align	4
 aes_gcm_enc_192_kernel:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x1, .L192_enc_ret
 	stp	x19, x20, [sp, #-112]!
 	mov	x16, x4
@@ -3039,6 +3042,7 @@ aes_gcm_enc_192_kernel:
 .type	aes_gcm_dec_192_kernel,%function
 .align	4
 aes_gcm_dec_192_kernel:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x1, .L192_dec_ret
 	stp	x19, x20, [sp, #-112]!
 	mov	x16, x4
@@ -4106,6 +4110,7 @@ aes_gcm_dec_192_kernel:
 .type	aes_gcm_enc_256_kernel,%function
 .align	4
 aes_gcm_enc_256_kernel:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x1, .L256_enc_ret
 	stp	x19, x20, [sp, #-112]!
 	mov	x16, x4
@@ -5230,6 +5235,7 @@ aes_gcm_enc_256_kernel:
 .type	aes_gcm_dec_256_kernel,%function
 .align	4
 aes_gcm_dec_256_kernel:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x1, .L256_dec_ret
 	stp	x19, x20, [sp, #-112]!
 	mov	x16, x4
diff --git a/sys/crypto/openssl/aarch64/aesv8-armx.S b/sys/crypto/openssl/aarch64/aesv8-armx.S
index 7666667e4782..015c2eea6dbb 100644
--- a/sys/crypto/openssl/aarch64/aesv8-armx.S
+++ b/sys/crypto/openssl/aarch64/aesv8-armx.S
@@ -15,6 +15,8 @@
 .align	5
 aes_v8_set_encrypt_key:
 .Lenc_key:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	mov	x3,#-1
@@ -186,7 +188,7 @@ aes_v8_set_encrypt_key:
 .type	aes_v8_set_decrypt_key,%function
 .align	5
 aes_v8_set_decrypt_key:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	bl	.Lenc_key
@@ -220,13 +222,14 @@ aes_v8_set_decrypt_key:
 	eor	x0,x0,x0		// return value
 .Ldec_key_abort:
 	ldp	x29,x30,[sp],#16
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
 .globl	aes_v8_encrypt
 .type	aes_v8_encrypt,%function
 .align	5
 aes_v8_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -256,6 +259,7 @@ aes_v8_encrypt:
 .type	aes_v8_decrypt,%function
 .align	5
 aes_v8_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -285,6 +289,7 @@ aes_v8_decrypt:
 .type	aes_v8_ecb_encrypt,%function
 .align	5
 aes_v8_ecb_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	subs	x2,x2,#16
 	// Original input data size bigger than 16, jump to big size processing.
 	b.ne	.Lecb_big_size
@@ -1031,6 +1036,8 @@ aes_v8_ecb_encrypt:
 .type	aes_v8_cbc_encrypt,%function
 .align	5
 aes_v8_cbc_encrypt:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	subs	x2,x2,#16
@@ -1506,6 +1513,8 @@ aes_v8_cbc_encrypt:
 .type	aes_v8_ctr32_encrypt_blocks,%function
 .align	5
 aes_v8_ctr32_encrypt_blocks:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	ldr	w5,[x3,#240]
@@ -1855,6 +1864,7 @@ aes_v8_ctr32_encrypt_blocks:
 .type	aes_v8_xts_encrypt,%function
 .align	5
 aes_v8_xts_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	cmp	x2,#16
 	// Original input data size bigger than 16, jump to big size processing.
 	b.ne	.Lxts_enc_big_size
@@ -2496,6 +2506,7 @@ aes_v8_xts_encrypt:
 .type	aes_v8_xts_decrypt,%function
 .align	5
 aes_v8_xts_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	cmp	x2,#16
 	// Original input data size bigger than 16, jump to big size processing.
 	b.ne	.Lxts_dec_big_size
diff --git a/sys/crypto/openssl/aarch64/arm64cpuid.S b/sys/crypto/openssl/aarch64/arm64cpuid.S
index 5ee467c7f129..52c6ee5b65d3 100644
--- a/sys/crypto/openssl/aarch64/arm64cpuid.S
+++ b/sys/crypto/openssl/aarch64/arm64cpuid.S
@@ -8,6 +8,7 @@
 .globl	_armv7_neon_probe
 .type	_armv7_neon_probe,%function
 _armv7_neon_probe:
+	AARCH64_VALID_CALL_TARGET
 	orr	v15.16b, v15.16b, v15.16b
 	ret
 .size	_armv7_neon_probe,.-_armv7_neon_probe
@@ -15,6 +16,7 @@ _armv7_neon_probe:
 .globl	_armv7_tick
 .type	_armv7_tick,%function
 _armv7_tick:
+	AARCH64_VALID_CALL_TARGET
 #ifdef	__APPLE__
 	mrs	x0, CNTPCT_EL0
 #else
@@ -26,6 +28,7 @@ _armv7_tick:
 .globl	_armv8_aes_probe
 .type	_armv8_aes_probe,%function
 _armv8_aes_probe:
+	AARCH64_VALID_CALL_TARGET
 	aese	v0.16b, v0.16b
 	ret
 .size	_armv8_aes_probe,.-_armv8_aes_probe
@@ -33,6 +36,7 @@ _armv8_aes_probe:
 .globl	_armv8_sha1_probe
 .type	_armv8_sha1_probe,%function
 _armv8_sha1_probe:
+	AARCH64_VALID_CALL_TARGET
 	sha1h	s0, s0
 	ret
 .size	_armv8_sha1_probe,.-_armv8_sha1_probe
@@ -40,6 +44,7 @@ _armv8_sha1_probe:
 .globl	_armv8_sha256_probe
 .type	_armv8_sha256_probe,%function
 _armv8_sha256_probe:
+	AARCH64_VALID_CALL_TARGET
 	sha256su0	v0.4s, v0.4s
 	ret
 .size	_armv8_sha256_probe,.-_armv8_sha256_probe
@@ -47,6 +52,7 @@ _armv8_sha256_probe:
 .globl	_armv8_pmull_probe
 .type	_armv8_pmull_probe,%function
 _armv8_pmull_probe:
+	AARCH64_VALID_CALL_TARGET
 	pmull	v0.1q, v0.1d, v0.1d
 	ret
 .size	_armv8_pmull_probe,.-_armv8_pmull_probe
@@ -54,6 +60,7 @@ _armv8_pmull_probe:
 .globl	_armv8_sha512_probe
 .type	_armv8_sha512_probe,%function
 _armv8_sha512_probe:
+	AARCH64_VALID_CALL_TARGET
 .long	0xcec08000	// sha512su0	v0.2d,v0.2d
 	ret
 .size	_armv8_sha512_probe,.-_armv8_sha512_probe
@@ -61,6 +68,7 @@ _armv8_sha512_probe:
 .globl	_armv8_cpuid_probe
 .type	_armv8_cpuid_probe,%function
 _armv8_cpuid_probe:
+	AARCH64_VALID_CALL_TARGET
 	mrs	x0, midr_el1
 	ret
 .size	_armv8_cpuid_probe,.-_armv8_cpuid_probe
@@ -69,6 +77,7 @@ _armv8_cpuid_probe:
 .type	OPENSSL_cleanse,%function
 .align	5
 OPENSSL_cleanse:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x1,.Lret	// len==0?
 	cmp	x1,#15
 	b.hi	.Lot		// len>15
@@ -100,6 +109,7 @@ OPENSSL_cleanse:
 .type	CRYPTO_memcmp,%function
 .align	4
 CRYPTO_memcmp:
+	AARCH64_VALID_CALL_TARGET
 	eor	w3,w3,w3
 	cbz	x2,.Lno_data	// len==0?
 	cmp	x2,#16
diff --git a/sys/crypto/openssl/aarch64/armv8-mont.S b/sys/crypto/openssl/aarch64/armv8-mont.S
index 4bdba95f80f8..8b85fb080aba 100644
--- a/sys/crypto/openssl/aarch64/armv8-mont.S
+++ b/sys/crypto/openssl/aarch64/armv8-mont.S
@@ -1,6 +1,6 @@
 /* Do not modify. This file is auto-generated from armv8-mont.pl. */
+#include "arm_arch.h"
 #ifndef	__KERNEL__
-# include "arm_arch.h"
 
 .hidden	OPENSSL_armv8_rsa_neonized
 #endif
@@ -10,6 +10,7 @@
 .type	bn_mul_mont,%function
 .align	5
 bn_mul_mont:
+	AARCH64_SIGN_LINK_REGISTER
 .Lbn_mul_mont:
 	tst	x5,#3
 	b.ne	.Lmul_mont
@@ -220,11 +221,14 @@ bn_mul_mont:
 	mov	x0,#1
 	ldp	x23,x24,[x29,#48]
 	ldr	x29,[sp],#64
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	bn_mul_mont,.-bn_mul_mont
 .type	bn_mul8x_mont_neon,%function
 .align	5
 bn_mul8x_mont_neon:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because bn_mul8x_mont_neon is jumped to
+	// only from bn_mul_mont which has already signed the return address.
 	stp	x29,x30,[sp,#-80]!
 	mov	x16,sp
 	stp	d8,d9,[sp,#16]
@@ -917,6 +921,7 @@ bn_mul8x_mont_neon:
 	ldp	d10,d11,[sp,#32]
 	ldp	d8,d9,[sp,#16]
 	ldr	x29,[sp],#80
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret	// bx lr
 
 .size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
@@ -926,7 +931,8 @@ __bn_sqr8x_mont:
 	cmp	x1,x2
 	b.ne	__bn_mul4x_mont
 .Lsqr8x_mont:
-.inst	0xd503233f		// paciasp
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
+	// only from bn_mul_mont which has already signed the return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1677,13 +1683,15 @@ __bn_sqr8x_mont:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
-.inst	0xd50323bf		// autiasp
+	// x30 is loaded earlier
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
 .type	__bn_mul4x_mont,%function
 .align	5
 __bn_mul4x_mont:
-.inst	0xd503233f		// paciasp
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
+	// only from bn_mul_mont (or __bn_sqr8x_mont from bn_mul_mont) which has already signed the return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -2117,7 +2125,8 @@ __bn_mul4x_mont:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
-.inst	0xd50323bf		// autiasp
+	// x30 loaded earlier
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	__bn_mul4x_mont,.-__bn_mul4x_mont
 .byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/sys/crypto/openssl/aarch64/chacha-armv8.S b/sys/crypto/openssl/aarch64/chacha-armv8.S
index 609e34f422a2..4f9d6bd372f7 100644
--- a/sys/crypto/openssl/aarch64/chacha-armv8.S
+++ b/sys/crypto/openssl/aarch64/chacha-armv8.S
@@ -1,6 +1,6 @@
 /* Do not modify. This file is auto-generated from chacha-armv8.pl. */
+#include "arm_arch.h"
 #ifndef	__KERNEL__
-# include "arm_arch.h"
 
 .hidden	OPENSSL_armcap_P
 #endif
@@ -21,6 +21,7 @@
 .type	ChaCha20_ctr32,%function
 .align	5
 ChaCha20_ctr32:
+	AARCH64_SIGN_LINK_REGISTER
 	cbz	x2,.Labort
 	cmp	x2,#192
 	b.lo	.Lshort
@@ -33,7 +34,6 @@ ChaCha20_ctr32:
 #endif
 
 .Lshort:
-.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -245,8 +245,8 @@ ChaCha20_ctr32:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
-.inst	0xd50323bf			// autiasp
 .Labort:
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 .align	4
@@ -302,7 +302,7 @@ ChaCha20_ctr32:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ChaCha20_ctr32,.-ChaCha20_ctr32
 
@@ -312,8 +312,8 @@ ChaCha20_ctr32:
 .type	ChaCha20_neon,%function
 .align	5
 ChaCha20_neon:
+	AARCH64_SIGN_LINK_REGISTER
 .LChaCha20_neon:
-.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -748,7 +748,7 @@ ChaCha20_neon:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 .align	4
@@ -880,13 +880,13 @@ ChaCha20_neon:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ChaCha20_neon,.-ChaCha20_neon
 .type	ChaCha20_512_neon,%function
 .align	5
 ChaCha20_512_neon:
-.inst	0xd503233f			// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -2030,6 +2030,6 @@ ChaCha20_512_neon:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ChaCha20_512_neon,.-ChaCha20_512_neon
diff --git a/sys/crypto/openssl/aarch64/ecp_nistz256-armv8.S b/sys/crypto/openssl/aarch64/ecp_nistz256-armv8.S
index d297ac15605c..73c367bcf1fc 100644
--- a/sys/crypto/openssl/aarch64/ecp_nistz256-armv8.S
+++ b/sys/crypto/openssl/aarch64/ecp_nistz256-armv8.S
@@ -2396,7 +2396,7 @@ ecp_nistz256_precomputed:
 .type	ecp_nistz256_to_mont,%function
 .align	6
 ecp_nistz256_to_mont:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -2412,7 +2412,7 @@ ecp_nistz256_to_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
 
@@ -2421,7 +2421,7 @@ ecp_nistz256_to_mont:
 .type	ecp_nistz256_from_mont,%function
 .align	4
 ecp_nistz256_from_mont:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -2437,7 +2437,7 @@ ecp_nistz256_from_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
 
@@ -2447,7 +2447,7 @@ ecp_nistz256_from_mont:
 .type	ecp_nistz256_mul_mont,%function
 .align	4
 ecp_nistz256_mul_mont:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -2462,7 +2462,7 @@ ecp_nistz256_mul_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
 
@@ -2471,7 +2471,7 @@ ecp_nistz256_mul_mont:
 .type	ecp_nistz256_sqr_mont,%function
 .align	4
 ecp_nistz256_sqr_mont:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -2485,7 +2485,7 @@ ecp_nistz256_sqr_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
 
@@ -2495,7 +2495,7 @@ ecp_nistz256_sqr_mont:
 .type	ecp_nistz256_add,%function
 .align	4
 ecp_nistz256_add:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -2509,7 +2509,7 @@ ecp_nistz256_add:
 	bl	__ecp_nistz256_add
 
 	ldp	x29,x30,[sp],#16
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_add,.-ecp_nistz256_add
 
@@ -2518,7 +2518,7 @@ ecp_nistz256_add:
 .type	ecp_nistz256_div_by_2,%function
 .align	4
 ecp_nistz256_div_by_2:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -2530,7 +2530,7 @@ ecp_nistz256_div_by_2:
 	bl	__ecp_nistz256_div_by_2
 
 	ldp	x29,x30,[sp],#16
-.inst	0xd50323bf		//  autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
 
@@ -2539,7 +2539,7 @@ ecp_nistz256_div_by_2:
 .type	ecp_nistz256_mul_by_2,%function
 .align	4
 ecp_nistz256_mul_by_2:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -2555,7 +2555,7 @@ ecp_nistz256_mul_by_2:
 	bl	__ecp_nistz256_add	// ret = a+a	// 2*a
 
 	ldp	x29,x30,[sp],#16
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
 
@@ -2564,7 +2564,7 @@ ecp_nistz256_mul_by_2:
 .type	ecp_nistz256_mul_by_3,%function
 .align	4
 ecp_nistz256_mul_by_3:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -2591,7 +2591,7 @@ ecp_nistz256_mul_by_3:
 	bl	__ecp_nistz256_add	// ret += a	// 2*a+a=3*a
 
 	ldp	x29,x30,[sp],#16
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
 
@@ -2601,7 +2601,7 @@ ecp_nistz256_mul_by_3:
 .type	ecp_nistz256_sub,%function
 .align	4
 ecp_nistz256_sub:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -2613,7 +2613,7 @@ ecp_nistz256_sub:
 	bl	__ecp_nistz256_sub_from
 
 	ldp	x29,x30,[sp],#16
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_sub,.-ecp_nistz256_sub
 
@@ -2622,7 +2622,7 @@ ecp_nistz256_sub:
 .type	ecp_nistz256_neg,%function
 .align	4
 ecp_nistz256_neg:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -2637,7 +2637,7 @@ ecp_nistz256_neg:
 	bl	__ecp_nistz256_sub_from
 
 	ldp	x29,x30,[sp],#16
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_neg,.-ecp_nistz256_neg
 
@@ -3015,7 +3015,7 @@ __ecp_nistz256_div_by_2:
 .type	ecp_nistz256_point_double,%function
 .align	5
 ecp_nistz256_point_double:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -3150,14 +3150,14 @@ ecp_nistz256_point_double:
 	ldp	x19,x20,[x29,#16]
 	ldp	x21,x22,[x29,#32]
 	ldp	x29,x30,[sp],#96
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
 .globl	ecp_nistz256_point_add
 .type	ecp_nistz256_point_add,%function
 .align	5
 ecp_nistz256_point_add:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -3404,14 +3404,14 @@ ecp_nistz256_point_add:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
 .globl	ecp_nistz256_point_add_affine
 .type	ecp_nistz256_point_add_affine,%function
 .align	5
 ecp_nistz256_point_add_affine:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -3610,7 +3610,7 @@ ecp_nistz256_point_add_affine:
 	ldp	x23,x24,[x29,#48]
 	ldp	x25,x26,[x29,#64]
 	ldp	x29,x30,[sp],#80
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
 ////////////////////////////////////////////////////////////////////////
@@ -3620,6 +3620,8 @@ ecp_nistz256_point_add_affine:
 .type	ecp_nistz256_ord_mul_mont,%function
 .align	4
 ecp_nistz256_ord_mul_mont:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-64]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -3828,6 +3830,8 @@ ecp_nistz256_ord_mul_mont:
 .type	ecp_nistz256_ord_sqr_mont,%function
 .align	4
 ecp_nistz256_ord_sqr_mont:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-64]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -4016,6 +4020,8 @@ ecp_nistz256_ord_sqr_mont:
 .type	ecp_nistz256_scatter_w5,%function
 .align	4
 ecp_nistz256_scatter_w5:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -4078,6 +4084,8 @@ ecp_nistz256_scatter_w5:
 .type	ecp_nistz256_gather_w5,%function
 .align	4
 ecp_nistz256_gather_w5:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -4155,6 +4163,8 @@ ecp_nistz256_gather_w5:
 .type	ecp_nistz256_scatter_w7,%function
 .align	4
 ecp_nistz256_scatter_w7:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -4199,6 +4209,8 @@ ecp_nistz256_scatter_w7:
 .type	ecp_nistz256_gather_w7,%function
 .align	4
 ecp_nistz256_gather_w7:
+	AARCH64_VALID_CALL_TARGET
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
diff --git a/sys/crypto/openssl/aarch64/ghashv8-armx.S b/sys/crypto/openssl/aarch64/ghashv8-armx.S
index cb7aa9a3908b..42f053d664ef 100644
--- a/sys/crypto/openssl/aarch64/ghashv8-armx.S
+++ b/sys/crypto/openssl/aarch64/ghashv8-armx.S
@@ -8,6 +8,7 @@
 .type	gcm_init_v8,%function
 .align	4
 gcm_init_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v17.2d},[x1]		//load input H
 	movi	v19.16b,#0xe1
 	shl	v19.2d,v19.2d,#57		//0xc2.0
@@ -98,6 +99,7 @@ gcm_init_v8:
 .type	gcm_gmult_v8,%function
 .align	4
 gcm_gmult_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v17.2d},[x0]		//load Xi
 	movi	v19.16b,#0xe1
 	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
@@ -139,6 +141,7 @@ gcm_gmult_v8:
 .type	gcm_ghash_v8,%function
 .align	4
 gcm_ghash_v8:
+	AARCH64_VALID_CALL_TARGET
 	cmp	x3,#64
 	b.hs	.Lgcm_ghash_v8_4x
 	ld1	{v0.2d},[x0]		//load [rotated] Xi
diff --git a/sys/crypto/openssl/aarch64/keccak1600-armv8.S b/sys/crypto/openssl/aarch64/keccak1600-armv8.S
index cf63318a8d08..08b3cc351213 100644
--- a/sys/crypto/openssl/aarch64/keccak1600-armv8.S
+++ b/sys/crypto/openssl/aarch64/keccak1600-armv8.S
@@ -1,4 +1,6 @@
 /* Do not modify. This file is auto-generated from keccak1600-armv8.pl. */
+#include "arm_arch.h"
+
 .text
 
 .align	8	// strategic alignment and padding that allows to use
@@ -34,8 +36,8 @@ iotas:
 .type	KeccakF1600_int,%function
 .align	5
 KeccakF1600_int:
+	AARCH64_SIGN_LINK_REGISTER
 	adr	x28,iotas
-.inst	0xd503233f			// paciasp
 	stp	x28,x30,[sp,#16]		// 32 bytes on top are mine
 	b	.Loop
 .align	4
@@ -199,14 +201,14 @@ KeccakF1600_int:
 	bne	.Loop
 
 	ldr	x30,[sp,#24]
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	KeccakF1600_int,.-KeccakF1600_int
 
 .type	KeccakF1600,%function
 .align	5
 KeccakF1600:
-.inst	0xd503233f			// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -256,7 +258,7 @@ KeccakF1600:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	KeccakF1600,.-KeccakF1600
 
@@ -264,7 +266,7 @@ KeccakF1600:
 .type	SHA3_absorb,%function
 .align	5
 SHA3_absorb:
-.inst	0xd503233f			// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -498,14 +500,14 @@ SHA3_absorb:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	SHA3_absorb,.-SHA3_absorb
 .globl	SHA3_squeeze
 .type	SHA3_squeeze,%function
 .align	5
 SHA3_squeeze:
-.inst	0xd503233f			// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-48]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -568,7 +570,7 @@ SHA3_squeeze:
 	ldp	x19,x20,[sp,#16]
 	ldp	x21,x22,[sp,#32]
 	ldp	x29,x30,[sp],#48
-.inst	0xd50323bf			// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	SHA3_squeeze,.-SHA3_squeeze
 .type	KeccakF1600_ce,%function
@@ -676,7 +678,7 @@ KeccakF1600_ce:
 .type	KeccakF1600_cext,%function
 .align	5
 KeccakF1600_cext:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#16]		// per ABI requirement
@@ -717,14 +719,14 @@ KeccakF1600_cext:
 	ldp	d12,d13,[sp,#48]
 	ldp	d14,d15,[sp,#64]
 	ldr	x29,[sp],#80
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	KeccakF1600_cext,.-KeccakF1600_cext
 .globl	SHA3_absorb_cext
 .type	SHA3_absorb_cext,%function
 .align	5
 SHA3_absorb_cext:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#16]		// per ABI requirement
@@ -940,14 +942,14 @@ SHA3_absorb_cext:
 	ldp	d12,d13,[sp,#48]
 	ldp	d14,d15,[sp,#64]
 	ldp	x29,x30,[sp],#80
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	SHA3_absorb_cext,.-SHA3_absorb_cext
 .globl	SHA3_squeeze_cext
 .type	SHA3_squeeze_cext,%function
 .align	5
 SHA3_squeeze_cext:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	mov	x9,x0
@@ -1003,7 +1005,7 @@ SHA3_squeeze_cext:
 
 .Lsqueeze_done_ce:
 	ldr	x29,[sp],#16
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	SHA3_squeeze_cext,.-SHA3_squeeze_cext
 .byte	75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/sys/crypto/openssl/aarch64/poly1305-armv8.S b/sys/crypto/openssl/aarch64/poly1305-armv8.S
index bf1daa3adaff..8925984c3ee0 100644
--- a/sys/crypto/openssl/aarch64/poly1305-armv8.S
+++ b/sys/crypto/openssl/aarch64/poly1305-armv8.S
@@ -16,6 +16,7 @@
 .type	poly1305_init,%function
 .align	5
 poly1305_init:
+	AARCH64_VALID_CALL_TARGET
 	cmp	x1,xzr
 	stp	xzr,xzr,[x0]		// zero hash value
 	stp	xzr,xzr,[x0,#16]	// [along with is_base2_26]
@@ -63,6 +64,9 @@ poly1305_init:
 .align	5
 poly1305_blocks:
 .Lpoly1305_blocks:
+	// The symbol .Lpoly1305_blocks is not a .globl symbol
+	// but a pointer to it is returned by poly1305_init
+	AARCH64_VALID_CALL_TARGET
 	ands	x2,x2,#-16
 	b.eq	.Lno_data
 
@@ -128,6 +132,9 @@ poly1305_blocks:
 .align	5
 poly1305_emit:
 .Lpoly1305_emit:
+	// The symbol .poly1305_emit is not a .globl symbol
+	// but a pointer to it is returned by poly1305_init
+	AARCH64_VALID_CALL_TARGET
 	ldp	x4,x5,[x0]		// load hash base 2^64
 	ldr	x6,[x0,#16]
 	ldp	x10,x11,[x2]	// load nonce
@@ -223,13 +230,16 @@ poly1305_splat:
 .align	5
 poly1305_blocks_neon:
 .Lpoly1305_blocks_neon:
+	// The symbol .Lpoly1305_blocks_neon is not a .globl symbol
+	// but a pointer to it is returned by poly1305_init
+	AARCH64_VALID_CALL_TARGET
 	ldr	x17,[x0,#24]
 	cmp	x2,#128
 	b.hs	.Lblocks_neon
 	cbz	x17,.Lpoly1305_blocks
 
 .Lblocks_neon:
-.inst	0xd503233f		// paciasp
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 
@@ -799,7 +809,7 @@ poly1305_blocks_neon:
 
 .Lno_data_neon:
 	ldr	x29,[sp],#80
-.inst	0xd50323bf		// autiasp
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	poly1305_blocks_neon,.-poly1305_blocks_neon
 
@@ -807,6 +817,9 @@ poly1305_blocks_neon:
 .align	5
 poly1305_emit_neon:
 .Lpoly1305_emit_neon:
+	// The symbol .Lpoly1305_emit_neon is not a .globl symbol
+	// but a pointer to it is returned by poly1305_init
+	AARCH64_VALID_CALL_TARGET
 	ldr	x17,[x0,#24]
 	cbz	x17,poly1305_emit
 
diff --git a/sys/crypto/openssl/aarch64/sha1-armv8.S b/sys/crypto/openssl/aarch64/sha1-armv8.S
index a9e1d81d5817..9e2d86072394 100644
--- a/sys/crypto/openssl/aarch64/sha1-armv8.S
+++ b/sys/crypto/openssl/aarch64/sha1-armv8.S
@@ -1,6 +1,6 @@
 /* Do not modify. This file is auto-generated from sha1-armv8.pl. */
+#include "arm_arch.h"
 #ifndef	__KERNEL__
-# include "arm_arch.h"
 
 .hidden	OPENSSL_armcap_P
 #endif
@@ -11,11 +11,13 @@
 .type	sha1_block_data_order,%function
 .align	6
 sha1_block_data_order:
+	AARCH64_VALID_CALL_TARGET
 	adrp	x16,OPENSSL_armcap_P
 	ldr	w16,[x16,#:lo12:OPENSSL_armcap_P]
 	tst	w16,#ARMV8_SHA1
 	b.ne	.Lv8_entry
 
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1075,6 +1077,7 @@ sha1_block_data_order:
 .align	6
 sha1_block_armv8:
 .Lv8_entry:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
*** 349 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202407151237.46FCbpW9058307>