From owner-svn-src-all@freebsd.org Fri Jun 5 13:54:13 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id BF77532B86C; Fri, 5 Jun 2020 13:54:13 +0000 (UTC) (envelope-from arichardson@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 49dkdx54XPz3b76; Fri, 5 Jun 2020 13:54:13 +0000 (UTC) (envelope-from arichardson@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 9098E11CCF; Fri, 5 Jun 2020 13:54:13 +0000 (UTC) (envelope-from arichardson@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 055DsDZt021080; Fri, 5 Jun 2020 13:54:13 GMT (envelope-from arichardson@FreeBSD.org) Received: (from arichardson@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 055DsDOb021079; Fri, 5 Jun 2020 13:54:13 GMT (envelope-from arichardson@FreeBSD.org) Message-Id: <202006051354.055DsDOb021079@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: arichardson set sender to arichardson@FreeBSD.org using -f From: Alex Richardson Date: Fri, 5 Jun 2020 13:54:13 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r361833 - head/sys/crypto/skein/amd64 X-SVN-Group: head X-SVN-Commit-Author: arichardson X-SVN-Commit-Paths: head/sys/crypto/skein/amd64 X-SVN-Commit-Revision: 361833 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 05 Jun 2020 13:54:13 -0000 Author: arichardson Date: Fri Jun 5 13:54:13 2020 New Revision: 361833 URL: https://svnweb.freebsd.org/changeset/base/361833 Log: Allow assembling skein_block_asm.s with clang GNU as seems to allow macro arguments without the '\' but clang is more strict in that regard. This change makes the source code compatible with LLVM's but does not yet change the build system or rename it to .S. The new code assembles identically with GNU as 2.17.50. Reviewed By: emaste Differential Revision: https://reviews.freebsd.org/D25143 Modified: head/sys/crypto/skein/amd64/skein_block_asm.s Modified: head/sys/crypto/skein/amd64/skein_block_asm.s ============================================================================== --- head/sys/crypto/skein/amd64/skein_block_asm.s Fri Jun 5 09:58:59 2020 (r361832) +++ head/sys/crypto/skein/amd64/skein_block_asm.s Fri Jun 5 13:54:13 2020 (r361833) @@ -10,7 +10,9 @@ # .text .altmacro +#ifndef __clang__ .psize 0,128 #list file has no page boundaries +#endif # _MASK_ALL_ = (256+512+1024) #all three algorithm bits _MAX_FRAME_ = 240 @@ -240,9 +242,8 @@ RC_1024_7_7 = 20 # Output: <<< RC_BlkSize_roundNum_mixNum, BlkSize=256/512/1024 # .macro RotL64 reg,BLK_SIZE,ROUND_NUM,MIX_NUM -_RCNT_ = RC_\BLK_SIZE&_\ROUND_NUM&_\MIX_NUM - .if _RCNT_ #is there anything to do? - rolq $_RCNT_,%\reg + .if RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM #is there anything to do? + rolq $RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM,%\reg .endif .endm # @@ -334,7 +335,7 @@ __STK_FRM_OFFS_\BLK_BITS = FRAME_OFFS #---------------------------------------------------------------- # .macro Reset_Stack - addq $LOCAL_SIZE,%rsp #get rid of locals (wipe??) + addq $LOCAL_SIZE,%rsp #get rid of locals (wipe?) .irp _reg_,r15,r14,r13,r12,rbx,rbp popq %\_reg_ #restore caller's regs _PushCnt_ = _PushCnt_ - 1 @@ -724,22 +725,22 @@ C_label Skein_256_Unroll_Cnt .macro R_512_OneRound rn0,rn1,rn2,rn3,rn4,rn5,rn6,rn7,_Rn_,op1,op2,op3,op4 # addReg r\rn0, r\rn1 - RotL64 r\rn1, 512,%((_Rn_) % 8),0 + RotL64 r\rn1, 512,%((\_Rn_) % 8),0 xorReg r\rn1, r\rn0 - op1 + \op1 addReg r\rn2, r\rn3 - RotL64 r\rn3, 512,%((_Rn_) % 8),1 + RotL64 r\rn3, 512,%((\_Rn_) % 8),1 xorReg r\rn3, r\rn2 - op2 + \op2 addReg r\rn4, r\rn5 - RotL64 r\rn5, 512,%((_Rn_) % 8),2 + RotL64 r\rn5, 512,%((\_Rn_) % 8),2 xorReg r\rn5, r\rn4 - op3 + \op3 addReg r\rn6, r\rn7 - RotL64 r\rn7, 512,%((_Rn_) % 8),3 + RotL64 r\rn7, 512,%((\_Rn_) % 8),3 xorReg r\rn7, r\rn6 - op4 - Skein_Debug_Round 512,%(_Rn_+1),-4 + \op4 + Skein_Debug_Round 512,%(\_Rn_+1),-4 # .endm #R_512_OneRound # @@ -749,11 +750,11 @@ C_label Skein_256_Unroll_Cnt .macro R_512_FourRounds _RR_ #RR = base round number (0 % 8) .if (SKEIN_ASM_UNROLL && 512) # here for fully unrolled case. - _II_ = ((_RR_)/4) + 1 #key injection counter - R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),,, - R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),,, - R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),,, - R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),, + _II_ = ((\_RR_)/4) + 1 #key injection counter + R_512_OneRound 8, 9,10,11,12,13,14,15,%((\_RR_)+0),,, + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),,, + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),,, + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),, # inject the key schedule addq ksKey+8*(((_II_)+0)%9)+F_O(%rbp),%r8 addReg r11, rax @@ -766,10 +767,10 @@ C_label Skein_256_Unroll_Cnt .else # here for looping case #"rotate" key/tweak schedule (move up on stack) incq %rdi #bump key injection counter - R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),,, - R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),,, - R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),,, - R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),, + R_512_OneRound 8, 9,10,11,12,13,14,15,%((\_RR_)+0),,, + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),,, + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),,, + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),, # inject the key schedule addq ksKey+8*0+F_O(%rbp,%rdi,8),%r8 addReg r11, rax @@ -814,9 +815,9 @@ Skein_512_block_loop: movq %rbx,ksTwk+ 8+F_O(%rbp) movq %rcx,ksTwk+16+F_O(%rbp) .irp _Rn_,8,9,10,11,12,13,14,15 - movq X_VARS+8*(_Rn_-8)(%rdi),%r\_Rn_ + movq X_VARS+8*(\_Rn_-8)(%rdi),%r\_Rn_ xorq %r\_Rn_,%rdx #compute overall parity - movq %r\_Rn_,ksKey+8*(_Rn_-8)+F_O(%rbp) + movq %r\_Rn_,ksKey+8*(\_Rn_-8)+F_O(%rbp) .endr #load state into %r8 ..%r15, compute parity movq %rdx,ksKey+8*(8)+F_O(%rbp)#save key schedule parity @@ -853,7 +854,7 @@ Skein_512_block_loop: .if _SKEIN_DEBUG .irp _Rn_,8,9,10,11,12,13,14,15 #save values on stack for debug output - movq %r\_Rn_,X_stk+8*(_Rn_-8)(%rsp) + movq %r\_Rn_,X_stk+8*(\_Rn_-8)(%rsp) .endr Skein_Debug_Block 512 #debug dump @@ -891,12 +892,12 @@ _Rbase_ = _Rbase_+1 ################# # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} .irp _Rn_,8,9,10,11,12,13,14,15 - .if (_Rn_ == 8) + .if (\_Rn_ == 8) movq $FIRST_MASK64,%rbx .endif - xorq Wcopy+8*(_Rn_-8)+F_O(%rbp),%r\_Rn_ #feedforward XOR - movq %r\_Rn_,X_VARS+8*(_Rn_-8)(%rdi) #and store result - .if (_Rn_ == 14) + xorq Wcopy+8*(\_Rn_-8)+F_O(%rbp),%r\_Rn_ #feedforward XOR + movq %r\_Rn_,X_VARS+8*(\_Rn_-8)(%rdi) #and store result + .if (\_Rn_ == 14) andq TWEAK+ 8(%rdi),%rbx .endif .endr @@ -917,7 +918,7 @@ Skein_Debug_Round_512: pushq %rsi #save two regs for BLK_BITS-specific parms pushq %rdi .irp _Rn_,8,9,10,11,12,13,14,15 #save X[] state on stack so debug routines can access it - movq %r\_Rn_,X_stk+8*(_Rn_-8)+F_O(%rbp) + movq %r\_Rn_,X_stk+8*(\_Rn_-8)+F_O(%rbp) .endr movq ctxPtr+F_O(%rbp),%rsi #ctx_hdr_ptr movq $512,%rdi #now are set for the call @@ -968,50 +969,50 @@ rIdx_offs = tmpStk_1024 # .macro r1024_Mix w0,w1,reg0,reg1,_RN0_,_Rn1_,op1 addReg \reg0 , \reg1 #perform the MIX - RotL64 \reg1 , 1024,%((_RN0_) % 8),_Rn1_ + RotL64 \reg1 , 1024,%((\_RN0_) % 8),\_Rn1_ xorReg \reg1 , \reg0 -.if ((_RN0_) && 3) == 3 #time to do key injection? +.if ((\_RN0_) && 3) == 3 #time to do key injection? .if _SKEIN_DEBUG - movq %\reg0 , xDebug_1024+8*w0(%rsp) #save intermediate values for Debug_Round - movq %\reg1 , xDebug_1024+8*w1(%rsp) # (before inline key injection) + movq %\reg0 , xDebug_1024+8*\w0(%rsp) #save intermediate values for Debug_Round + movq %\reg1 , xDebug_1024+8*\w1(%rsp) # (before inline key injection) .endif -_II_ = ((_RN0_)/4)+1 #injection count +_II_ = ((\_RN0_)/4)+1 #injection count .if SKEIN_ASM_UNROLL && 1024 #here to do fully unrolled key injection - addq ksKey+ 8*((_II_+w0) % 17)(%rsp),%\reg0 - addq ksKey+ 8*((_II_+w1) % 17)(%rsp),%\reg1 - .if w1 == 13 #tweak injection + addq ksKey+ 8*((_II_+\w0) % 17)(%rsp),%\reg0 + addq ksKey+ 8*((_II_+\w1) % 17)(%rsp),%\reg1 + .if \w1 == 13 #tweak injection addq ksTwk+ 8*((_II_+ 0) % 3)(%rsp),%\reg1 - .elseif w0 == 14 + .elseif \w0 == 14 addq ksTwk+ 8*((_II_+ 1) % 3)(%rsp),%\reg0 - .elseif w1 == 15 + .elseif \w1 == 15 addq $_II_, %\reg1 #(injection counter) .endif .else #here to do looping key injection - .if (w0 == 0) - movq %rdi, X_stk+8*w0(%rsp) #if so, store N0 so we can use reg as index + .if (\w0 == 0) + movq %rdi, X_stk+8*\w0(%rsp) #if so, store N0 so we can use reg as index movq rIdx_offs(%rsp),%rdi #get the injection counter index into rdi .else - addq ksKey+8+8*w0(%rsp,%rdi,8),%\reg0 #even key injection + addq ksKey+8+8*\w0(%rsp,%rdi,8),%\reg0 #even key injection .endif - .if w1 == 13 #tweak injection + .if \w1 == 13 #tweak injection addq ksTwk+8+8* 0(%rsp,%rdi,8),%\reg1 - .elseif w0 == 14 + .elseif \w0 == 14 addq ksTwk+8+8* 1(%rsp,%rdi,8),%\reg0 - .elseif w1 == 15 + .elseif \w1 == 15 addReg \reg1,rdi,,,1 #(injection counter) .endif - addq ksKey+8+8*w1(%rsp,%rdi,8),%\reg1 #odd key injection + addq ksKey+8+8*\w1(%rsp,%rdi,8),%\reg1 #odd key injection .endif .endif # insert the op provided, .if any - op1 + \op1 .endm ################# # MACRO: four rounds for 1024-bit blocks # .macro r1024_FourRounds _RR_ #RR = base round number (0 mod 4) # should be here with X4 set properly, X6 stored on stack -_Rn_ = (_RR_) + 0 +_Rn_ = (\_RR_) + 0 r1024_Mix 0, 1,rdi,rsi,_Rn_,0 r1024_Mix 2, 3,rbp,rax,_Rn_,1 r1024_Mix 4, 5,rcx,rbx,_Rn_,2, #save X4 on stack (x4/x6 alternate) @@ -1023,7 +1024,7 @@ _Rn_ = (_RR_) + 0 .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 1 +_Rn_ = (\_RR_) + 1 r1024_Mix 0, 9,rdi,r9 ,_Rn_,0 r1024_Mix 2,13,rbp,r13,_Rn_,1 r1024_Mix 6,11,rcx,r11,_Rn_,2, #save X6 on stack (x4/x6 alternate) @@ -1035,7 +1036,7 @@ _Rn_ = (_RR_) + 1 .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 2 +_Rn_ = (\_RR_) + 2 r1024_Mix 0, 7,rdi,rdx,_Rn_,0 r1024_Mix 2, 5,rbp,rbx,_Rn_,1 r1024_Mix 4, 3,rcx,rax,_Rn_,2, #save X4 on stack (x4/x6 alternate) @@ -1047,7 +1048,7 @@ _Rn_ = (_RR_) + 2 .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 3 +_Rn_ = (\_RR_) + 3 r1024_Mix 0,15,rdi,r15,_Rn_,0 r1024_Mix 2,11,rbp,r11,_Rn_,1 r1024_Mix 6,13,rcx,r13,_Rn_,2, #save X6 on stack (x4/x6 alternate) @@ -1116,13 +1117,13 @@ Skein1024_block_loop: # the logic here assumes the set {rdi,rsi,rbp,rax} = X[0,1,2,3] .irp _rN_,0,1,2,3,4,6 #process the "initial" words, using r14/r15 as temps - movq X_VARS+8*_rN_(%rdi),%r14 #get state word - movq 8*_rN_(%rsi),%r15 #get msg word + movq X_VARS+8*\_rN_(%rdi),%r14 #get state word + movq 8*\_rN_(%rsi),%r15 #get msg word xorq %r14,%rax #update key schedule overall parity - movq %r14,ksKey +8*_rN_+F_O(%rbp) #save key schedule word on stack - movq %r15,Wcopy +8*_rN_+F_O(%rbp) #save local msg Wcopy + movq %r14,ksKey +8*\_rN_+F_O(%rbp) #save key schedule word on stack + movq %r15,Wcopy +8*\_rN_+F_O(%rbp) #save local msg Wcopy addq %r15,%r14 #do the initial key injection - movq %r14,X_stk +8*_rN_ (%rsp) #save initial state var on stack + movq %r14,X_stk +8*\_rN_ (%rsp) #save initial state var on stack .endr # now process the rest, using the "real" registers # (MUST do it in reverse order to inject tweaks r8/r9 first) @@ -1135,9 +1136,9 @@ _oo_ = o1K_\_rr_ #offset ass movq %rcx,Wcopy+8*_oo_+F_O(%rbp) #save copy of msg word for feedforward addq %rcx,%\_rr_ #do the initial key injection .if _oo_ == 13 #do the initial tweak injection - addReg _rr_,r8 # (only in words 13/14) + addReg \_rr_,r8 # (only in words 13/14) .elseif _oo_ == 14 - addReg _rr_,r9 + addReg \_rr_,r9 .endif .endr movq %rax,ksKey+8*WCNT+F_O(%rbp) #save key schedule parity