Date: Fri, 5 Jun 2020 13:54:13 +0000 (UTC) From: Alex Richardson <arichardson@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r361833 - head/sys/crypto/skein/amd64 Message-ID: <202006051354.055DsDOb021079@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: arichardson Date: Fri Jun 5 13:54:13 2020 New Revision: 361833 URL: https://svnweb.freebsd.org/changeset/base/361833 Log: Allow assembling skein_block_asm.s with clang GNU as seems to allow macro arguments without the '\' but clang is more strict in that regard. This change makes the source code compatible with LLVM's but does not yet change the build system or rename it to .S. The new code assembles identically with GNU as 2.17.50. Reviewed By: emaste Differential Revision: https://reviews.freebsd.org/D25143 Modified: head/sys/crypto/skein/amd64/skein_block_asm.s Modified: head/sys/crypto/skein/amd64/skein_block_asm.s ============================================================================== --- head/sys/crypto/skein/amd64/skein_block_asm.s Fri Jun 5 09:58:59 2020 (r361832) +++ head/sys/crypto/skein/amd64/skein_block_asm.s Fri Jun 5 13:54:13 2020 (r361833) @@ -10,7 +10,9 @@ # .text .altmacro +#ifndef __clang__ .psize 0,128 #list file has no page boundaries +#endif # _MASK_ALL_ = (256+512+1024) #all three algorithm bits _MAX_FRAME_ = 240 @@ -240,9 +242,8 @@ RC_1024_7_7 = 20 # Output: <reg> <<< RC_BlkSize_roundNum_mixNum, BlkSize=256/512/1024 # .macro RotL64 reg,BLK_SIZE,ROUND_NUM,MIX_NUM -_RCNT_ = RC_\BLK_SIZE&_\ROUND_NUM&_\MIX_NUM - .if _RCNT_ #is there anything to do? - rolq $_RCNT_,%\reg + .if RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM #is there anything to do? + rolq $RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM,%\reg .endif .endm # @@ -334,7 +335,7 @@ __STK_FRM_OFFS_\BLK_BITS = FRAME_OFFS #---------------------------------------------------------------- # .macro Reset_Stack - addq $LOCAL_SIZE,%rsp #get rid of locals (wipe??) + addq $LOCAL_SIZE,%rsp #get rid of locals (wipe?) .irp _reg_,r15,r14,r13,r12,rbx,rbp popq %\_reg_ #restore caller's regs _PushCnt_ = _PushCnt_ - 1 @@ -724,22 +725,22 @@ C_label Skein_256_Unroll_Cnt .macro R_512_OneRound rn0,rn1,rn2,rn3,rn4,rn5,rn6,rn7,_Rn_,op1,op2,op3,op4 # addReg r\rn0, r\rn1 - RotL64 r\rn1, 512,%((_Rn_) % 8),0 + RotL64 r\rn1, 512,%((\_Rn_) % 8),0 xorReg r\rn1, r\rn0 - op1 + \op1 addReg r\rn2, r\rn3 - RotL64 r\rn3, 512,%((_Rn_) % 8),1 + RotL64 r\rn3, 512,%((\_Rn_) % 8),1 xorReg r\rn3, r\rn2 - op2 + \op2 addReg r\rn4, r\rn5 - RotL64 r\rn5, 512,%((_Rn_) % 8),2 + RotL64 r\rn5, 512,%((\_Rn_) % 8),2 xorReg r\rn5, r\rn4 - op3 + \op3 addReg r\rn6, r\rn7 - RotL64 r\rn7, 512,%((_Rn_) % 8),3 + RotL64 r\rn7, 512,%((\_Rn_) % 8),3 xorReg r\rn7, r\rn6 - op4 - Skein_Debug_Round 512,%(_Rn_+1),-4 + \op4 + Skein_Debug_Round 512,%(\_Rn_+1),-4 # .endm #R_512_OneRound # @@ -749,11 +750,11 @@ C_label Skein_256_Unroll_Cnt .macro R_512_FourRounds _RR_ #RR = base round number (0 % 8) .if (SKEIN_ASM_UNROLL && 512) # here for fully unrolled case. - _II_ = ((_RR_)/4) + 1 #key injection counter - R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),<movq ksKey+8*(((_II_)+3) % 9)+F_O(%rbp),%rax>,,<movq ksKey+8*(((_II_)+4) % 9)+F_O(%rbp),%rbx> - R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),<movq ksKey+8*(((_II_)+5) % 9)+F_O(%rbp),%rcx>,,<movq ksKey+8*(((_II_)+6) % 9)+F_O(%rbp),%rdx> - R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),<movq ksKey+8*(((_II_)+7) % 9)+F_O(%rbp),%rsi>,,<addq ksTwk+8*(((_II_)+0) % 3)+F_O(%rbp),%rcx> - R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),<addq ksTwk+8*(((_II_)+1) % 3)+F_O(%rbp),%rdx>, + _II_ = ((\_RR_)/4) + 1 #key injection counter + R_512_OneRound 8, 9,10,11,12,13,14,15,%((\_RR_)+0),<movq ksKey+8*(((_II_)+3) % 9)+F_O(%rbp),%rax>,,<movq ksKey+8*(((_II_)+4) % 9)+F_O(%rbp),%rbx> + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),<movq ksKey+8*(((_II_)+5) % 9)+F_O(%rbp),%rcx>,,<movq ksKey+8*(((_II_)+6) % 9)+F_O(%rbp),%rdx> + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),<movq ksKey+8*(((_II_)+7) % 9)+F_O(%rbp),%rsi>,,<addq ksTwk+8*(((_II_)+0) % 3)+F_O(%rbp),%rcx> + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),<addq ksTwk+8*(((_II_)+1) % 3)+F_O(%rbp),%rdx>, # inject the key schedule addq ksKey+8*(((_II_)+0)%9)+F_O(%rbp),%r8 addReg r11, rax @@ -766,10 +767,10 @@ C_label Skein_256_Unroll_Cnt .else # here for looping case #"rotate" key/tweak schedule (move up on stack) incq %rdi #bump key injection counter - R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),<movq ksKey+8*6+F_O(%rbp,%rdi,8),%rdx>,<movq ksTwk-8*1+F_O(%rbp,%rdi,8),%rax>,<movq ksKey-8*1+F_O(%rbp,%rdi,8),%rsi> - R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),<movq ksKey+8*5+F_O(%rbp,%rdi,8),%rcx>,<movq %rax,ksTwk+8*2+F_O(%rbp,%rdi,8) >,<movq %rsi,ksKey+8*8+F_O(%rbp,%rdi,8)> - R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),<movq ksKey+8*4+F_O(%rbp,%rdi,8),%rbx>,<addq ksTwk+8*1+F_O(%rbp,%rdi,8),%rdx>,<movq ksKey+8*7+F_O(%rbp,%rdi,8),%rsi> - R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),<movq ksKey+8*3+F_O(%rbp,%rdi,8),%rax>,<addq ksTwk+8*0+F_O(%rbp,%rdi,8),%rcx> + R_512_OneRound 8, 9,10,11,12,13,14,15,%((\_RR_)+0),<movq ksKey+8*6+F_O(%rbp,%rdi,8),%rdx>,<movq ksTwk-8*1+F_O(%rbp,%rdi,8),%rax>,<movq ksKey-8*1+F_O(%rbp,%rdi,8),%rsi> + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),<movq ksKey+8*5+F_O(%rbp,%rdi,8),%rcx>,<movq %rax,ksTwk+8*2+F_O(%rbp,%rdi,8) >,<movq %rsi,ksKey+8*8+F_O(%rbp,%rdi,8)> + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),<movq ksKey+8*4+F_O(%rbp,%rdi,8),%rbx>,<addq ksTwk+8*1+F_O(%rbp,%rdi,8),%rdx>,<movq ksKey+8*7+F_O(%rbp,%rdi,8),%rsi> + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),<movq ksKey+8*3+F_O(%rbp,%rdi,8),%rax>,<addq ksTwk+8*0+F_O(%rbp,%rdi,8),%rcx> # inject the key schedule addq ksKey+8*0+F_O(%rbp,%rdi,8),%r8 addReg r11, rax @@ -814,9 +815,9 @@ Skein_512_block_loop: movq %rbx,ksTwk+ 8+F_O(%rbp) movq %rcx,ksTwk+16+F_O(%rbp) .irp _Rn_,8,9,10,11,12,13,14,15 - movq X_VARS+8*(_Rn_-8)(%rdi),%r\_Rn_ + movq X_VARS+8*(\_Rn_-8)(%rdi),%r\_Rn_ xorq %r\_Rn_,%rdx #compute overall parity - movq %r\_Rn_,ksKey+8*(_Rn_-8)+F_O(%rbp) + movq %r\_Rn_,ksKey+8*(\_Rn_-8)+F_O(%rbp) .endr #load state into %r8 ..%r15, compute parity movq %rdx,ksKey+8*(8)+F_O(%rbp)#save key schedule parity @@ -853,7 +854,7 @@ Skein_512_block_loop: .if _SKEIN_DEBUG .irp _Rn_,8,9,10,11,12,13,14,15 #save values on stack for debug output - movq %r\_Rn_,X_stk+8*(_Rn_-8)(%rsp) + movq %r\_Rn_,X_stk+8*(\_Rn_-8)(%rsp) .endr Skein_Debug_Block 512 #debug dump @@ -891,12 +892,12 @@ _Rbase_ = _Rbase_+1 ################# # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} .irp _Rn_,8,9,10,11,12,13,14,15 - .if (_Rn_ == 8) + .if (\_Rn_ == 8) movq $FIRST_MASK64,%rbx .endif - xorq Wcopy+8*(_Rn_-8)+F_O(%rbp),%r\_Rn_ #feedforward XOR - movq %r\_Rn_,X_VARS+8*(_Rn_-8)(%rdi) #and store result - .if (_Rn_ == 14) + xorq Wcopy+8*(\_Rn_-8)+F_O(%rbp),%r\_Rn_ #feedforward XOR + movq %r\_Rn_,X_VARS+8*(\_Rn_-8)(%rdi) #and store result + .if (\_Rn_ == 14) andq TWEAK+ 8(%rdi),%rbx .endif .endr @@ -917,7 +918,7 @@ Skein_Debug_Round_512: pushq %rsi #save two regs for BLK_BITS-specific parms pushq %rdi .irp _Rn_,8,9,10,11,12,13,14,15 #save X[] state on stack so debug routines can access it - movq %r\_Rn_,X_stk+8*(_Rn_-8)+F_O(%rbp) + movq %r\_Rn_,X_stk+8*(\_Rn_-8)+F_O(%rbp) .endr movq ctxPtr+F_O(%rbp),%rsi #ctx_hdr_ptr movq $512,%rdi #now <rdi,rsi,rdx> are set for the call @@ -968,50 +969,50 @@ rIdx_offs = tmpStk_1024 # .macro r1024_Mix w0,w1,reg0,reg1,_RN0_,_Rn1_,op1 addReg \reg0 , \reg1 #perform the MIX - RotL64 \reg1 , 1024,%((_RN0_) % 8),_Rn1_ + RotL64 \reg1 , 1024,%((\_RN0_) % 8),\_Rn1_ xorReg \reg1 , \reg0 -.if ((_RN0_) && 3) == 3 #time to do key injection? +.if ((\_RN0_) && 3) == 3 #time to do key injection? .if _SKEIN_DEBUG - movq %\reg0 , xDebug_1024+8*w0(%rsp) #save intermediate values for Debug_Round - movq %\reg1 , xDebug_1024+8*w1(%rsp) # (before inline key injection) + movq %\reg0 , xDebug_1024+8*\w0(%rsp) #save intermediate values for Debug_Round + movq %\reg1 , xDebug_1024+8*\w1(%rsp) # (before inline key injection) .endif -_II_ = ((_RN0_)/4)+1 #injection count +_II_ = ((\_RN0_)/4)+1 #injection count .if SKEIN_ASM_UNROLL && 1024 #here to do fully unrolled key injection - addq ksKey+ 8*((_II_+w0) % 17)(%rsp),%\reg0 - addq ksKey+ 8*((_II_+w1) % 17)(%rsp),%\reg1 - .if w1 == 13 #tweak injection + addq ksKey+ 8*((_II_+\w0) % 17)(%rsp),%\reg0 + addq ksKey+ 8*((_II_+\w1) % 17)(%rsp),%\reg1 + .if \w1 == 13 #tweak injection addq ksTwk+ 8*((_II_+ 0) % 3)(%rsp),%\reg1 - .elseif w0 == 14 + .elseif \w0 == 14 addq ksTwk+ 8*((_II_+ 1) % 3)(%rsp),%\reg0 - .elseif w1 == 15 + .elseif \w1 == 15 addq $_II_, %\reg1 #(injection counter) .endif .else #here to do looping key injection - .if (w0 == 0) - movq %rdi, X_stk+8*w0(%rsp) #if so, store N0 so we can use reg as index + .if (\w0 == 0) + movq %rdi, X_stk+8*\w0(%rsp) #if so, store N0 so we can use reg as index movq rIdx_offs(%rsp),%rdi #get the injection counter index into rdi .else - addq ksKey+8+8*w0(%rsp,%rdi,8),%\reg0 #even key injection + addq ksKey+8+8*\w0(%rsp,%rdi,8),%\reg0 #even key injection .endif - .if w1 == 13 #tweak injection + .if \w1 == 13 #tweak injection addq ksTwk+8+8* 0(%rsp,%rdi,8),%\reg1 - .elseif w0 == 14 + .elseif \w0 == 14 addq ksTwk+8+8* 1(%rsp,%rdi,8),%\reg0 - .elseif w1 == 15 + .elseif \w1 == 15 addReg \reg1,rdi,,,1 #(injection counter) .endif - addq ksKey+8+8*w1(%rsp,%rdi,8),%\reg1 #odd key injection + addq ksKey+8+8*\w1(%rsp,%rdi,8),%\reg1 #odd key injection .endif .endif # insert the op provided, .if any - op1 + \op1 .endm ################# # MACRO: four rounds for 1024-bit blocks # .macro r1024_FourRounds _RR_ #RR = base round number (0 mod 4) # should be here with X4 set properly, X6 stored on stack -_Rn_ = (_RR_) + 0 +_Rn_ = (\_RR_) + 0 r1024_Mix 0, 1,rdi,rsi,_Rn_,0 r1024_Mix 2, 3,rbp,rax,_Rn_,1 r1024_Mix 4, 5,rcx,rbx,_Rn_,2,<movq %rcx,X_stk+8*4(%rsp)> #save X4 on stack (x4/x6 alternate) @@ -1023,7 +1024,7 @@ _Rn_ = (_RR_) + 0 .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 1 +_Rn_ = (\_RR_) + 1 r1024_Mix 0, 9,rdi,r9 ,_Rn_,0 r1024_Mix 2,13,rbp,r13,_Rn_,1 r1024_Mix 6,11,rcx,r11,_Rn_,2,<movq %rcx,X_stk+8*6(%rsp)> #save X6 on stack (x4/x6 alternate) @@ -1035,7 +1036,7 @@ _Rn_ = (_RR_) + 1 .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 2 +_Rn_ = (\_RR_) + 2 r1024_Mix 0, 7,rdi,rdx,_Rn_,0 r1024_Mix 2, 5,rbp,rbx,_Rn_,1 r1024_Mix 4, 3,rcx,rax,_Rn_,2,<movq %rcx,X_stk+8*4(%rsp)> #save X4 on stack (x4/x6 alternate) @@ -1047,7 +1048,7 @@ _Rn_ = (_RR_) + 2 .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 3 +_Rn_ = (\_RR_) + 3 r1024_Mix 0,15,rdi,r15,_Rn_,0 r1024_Mix 2,11,rbp,r11,_Rn_,1 r1024_Mix 6,13,rcx,r13,_Rn_,2,<movq %rcx,X_stk+8*6(%rsp)> #save X6 on stack (x4/x6 alternate) @@ -1116,13 +1117,13 @@ Skein1024_block_loop: # the logic here assumes the set {rdi,rsi,rbp,rax} = X[0,1,2,3] .irp _rN_,0,1,2,3,4,6 #process the "initial" words, using r14/r15 as temps - movq X_VARS+8*_rN_(%rdi),%r14 #get state word - movq 8*_rN_(%rsi),%r15 #get msg word + movq X_VARS+8*\_rN_(%rdi),%r14 #get state word + movq 8*\_rN_(%rsi),%r15 #get msg word xorq %r14,%rax #update key schedule overall parity - movq %r14,ksKey +8*_rN_+F_O(%rbp) #save key schedule word on stack - movq %r15,Wcopy +8*_rN_+F_O(%rbp) #save local msg Wcopy + movq %r14,ksKey +8*\_rN_+F_O(%rbp) #save key schedule word on stack + movq %r15,Wcopy +8*\_rN_+F_O(%rbp) #save local msg Wcopy addq %r15,%r14 #do the initial key injection - movq %r14,X_stk +8*_rN_ (%rsp) #save initial state var on stack + movq %r14,X_stk +8*\_rN_ (%rsp) #save initial state var on stack .endr # now process the rest, using the "real" registers # (MUST do it in reverse order to inject tweaks r8/r9 first) @@ -1135,9 +1136,9 @@ _oo_ = o1K_\_rr_ #offset ass movq %rcx,Wcopy+8*_oo_+F_O(%rbp) #save copy of msg word for feedforward addq %rcx,%\_rr_ #do the initial key injection .if _oo_ == 13 #do the initial tweak injection - addReg _rr_,r8 # (only in words 13/14) + addReg \_rr_,r8 # (only in words 13/14) .elseif _oo_ == 14 - addReg _rr_,r9 + addReg \_rr_,r9 .endif .endr movq %rax,ksKey+8*WCNT+F_O(%rbp) #save key schedule parity
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202006051354.055DsDOb021079>