Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 5 Jun 2020 13:54:13 +0000 (UTC)
From:      Alex Richardson <arichardson@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r361833 - head/sys/crypto/skein/amd64
Message-ID:  <202006051354.055DsDOb021079@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: arichardson
Date: Fri Jun  5 13:54:13 2020
New Revision: 361833
URL: https://svnweb.freebsd.org/changeset/base/361833

Log:
  Allow assembling skein_block_asm.s with clang
  
  GNU as seems to allow macro arguments without the '\' but clang is more
  strict in that regard.
  This change makes the source code compatible with LLVM's but does not yet
  change the build system or rename it to .S.
  
  The new code assembles identically with GNU as 2.17.50.
  
  Reviewed By:	emaste
  Differential Revision: https://reviews.freebsd.org/D25143

Modified:
  head/sys/crypto/skein/amd64/skein_block_asm.s

Modified: head/sys/crypto/skein/amd64/skein_block_asm.s
==============================================================================
--- head/sys/crypto/skein/amd64/skein_block_asm.s	Fri Jun  5 09:58:59 2020	(r361832)
+++ head/sys/crypto/skein/amd64/skein_block_asm.s	Fri Jun  5 13:54:13 2020	(r361833)
@@ -10,7 +10,9 @@
 #
     .text
     .altmacro
+#ifndef __clang__
     .psize 0,128                            #list file has no page boundaries
+#endif
 #
 _MASK_ALL_  =  (256+512+1024)               #all three algorithm bits
 _MAX_FRAME_ =  240
@@ -240,9 +242,8 @@ RC_1024_7_7 = 20
 # Output: <reg> <<< RC_BlkSize_roundNum_mixNum, BlkSize=256/512/1024
 #
 .macro RotL64   reg,BLK_SIZE,ROUND_NUM,MIX_NUM
-_RCNT_ = RC_\BLK_SIZE&_\ROUND_NUM&_\MIX_NUM
-  .if _RCNT_  #is there anything to do?
-    rolq    $_RCNT_,%\reg
+  .if RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM  #is there anything to do?
+    rolq    $RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM,%\reg
   .endif
 .endm
 #
@@ -334,7 +335,7 @@ __STK_FRM_OFFS_\BLK_BITS = FRAME_OFFS
 #----------------------------------------------------------------
 #
 .macro Reset_Stack
-    addq    $LOCAL_SIZE,%rsp        #get rid of locals (wipe??)
+    addq    $LOCAL_SIZE,%rsp        #get rid of locals (wipe?)
   .irp _reg_,r15,r14,r13,r12,rbx,rbp
     popq    %\_reg_                 #restore caller's regs
 _PushCnt_ = _PushCnt_ - 1
@@ -724,22 +725,22 @@ C_label Skein_256_Unroll_Cnt
 .macro R_512_OneRound rn0,rn1,rn2,rn3,rn4,rn5,rn6,rn7,_Rn_,op1,op2,op3,op4
 #
     addReg      r\rn0, r\rn1
-    RotL64      r\rn1, 512,%((_Rn_) % 8),0
+    RotL64      r\rn1, 512,%((\_Rn_) % 8),0
     xorReg      r\rn1, r\rn0
-            op1
+            \op1
     addReg      r\rn2, r\rn3
-    RotL64      r\rn3, 512,%((_Rn_) % 8),1
+    RotL64      r\rn3, 512,%((\_Rn_) % 8),1
     xorReg      r\rn3, r\rn2
-            op2
+            \op2
     addReg      r\rn4, r\rn5
-    RotL64      r\rn5, 512,%((_Rn_) % 8),2
+    RotL64      r\rn5, 512,%((\_Rn_) % 8),2
     xorReg      r\rn5, r\rn4
-            op3
+            \op3
     addReg      r\rn6, r\rn7
-    RotL64      r\rn7, 512,%((_Rn_) % 8),3
+    RotL64      r\rn7, 512,%((\_Rn_) % 8),3
     xorReg      r\rn7, r\rn6
-            op4
-    Skein_Debug_Round 512,%(_Rn_+1),-4
+            \op4
+    Skein_Debug_Round 512,%(\_Rn_+1),-4
 #
 .endm #R_512_OneRound
 #
@@ -749,11 +750,11 @@ C_label Skein_256_Unroll_Cnt
 .macro R_512_FourRounds _RR_    #RR = base round number (0 % 8)
   .if (SKEIN_ASM_UNROLL && 512)
     # here for fully unrolled case.
-    _II_ = ((_RR_)/4) + 1       #key injection counter
-    R_512_OneRound  8, 9,10,11,12,13,14,15,%((_RR_)+0),<movq ksKey+8*(((_II_)+3) % 9)+F_O(%rbp),%rax>,,<movq ksKey+8*(((_II_)+4) % 9)+F_O(%rbp),%rbx>
-    R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),<movq ksKey+8*(((_II_)+5) % 9)+F_O(%rbp),%rcx>,,<movq ksKey+8*(((_II_)+6) % 9)+F_O(%rbp),%rdx>
-    R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),<movq ksKey+8*(((_II_)+7) % 9)+F_O(%rbp),%rsi>,,<addq ksTwk+8*(((_II_)+0) % 3)+F_O(%rbp),%rcx>
-    R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),<addq ksTwk+8*(((_II_)+1) % 3)+F_O(%rbp),%rdx>,
+    _II_ = ((\_RR_)/4) + 1       #key injection counter
+    R_512_OneRound  8, 9,10,11,12,13,14,15,%((\_RR_)+0),<movq ksKey+8*(((_II_)+3) % 9)+F_O(%rbp),%rax>,,<movq ksKey+8*(((_II_)+4) % 9)+F_O(%rbp),%rbx>
+    R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),<movq ksKey+8*(((_II_)+5) % 9)+F_O(%rbp),%rcx>,,<movq ksKey+8*(((_II_)+6) % 9)+F_O(%rbp),%rdx>
+    R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),<movq ksKey+8*(((_II_)+7) % 9)+F_O(%rbp),%rsi>,,<addq ksTwk+8*(((_II_)+0) % 3)+F_O(%rbp),%rcx>
+    R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),<addq ksTwk+8*(((_II_)+1) % 3)+F_O(%rbp),%rdx>,
     # inject the key schedule
     addq    ksKey+8*(((_II_)+0)%9)+F_O(%rbp),%r8
     addReg   r11, rax
@@ -766,10 +767,10 @@ C_label Skein_256_Unroll_Cnt
   .else
     # here for looping case                                                    #"rotate" key/tweak schedule (move up on stack)
     incq    %rdi                 #bump key injection counter
-    R_512_OneRound  8, 9,10,11,12,13,14,15,%((_RR_)+0),<movq ksKey+8*6+F_O(%rbp,%rdi,8),%rdx>,<movq      ksTwk-8*1+F_O(%rbp,%rdi,8),%rax>,<movq      ksKey-8*1+F_O(%rbp,%rdi,8),%rsi>
-    R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),<movq ksKey+8*5+F_O(%rbp,%rdi,8),%rcx>,<movq %rax,ksTwk+8*2+F_O(%rbp,%rdi,8)     >,<movq %rsi,ksKey+8*8+F_O(%rbp,%rdi,8)>
-    R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),<movq ksKey+8*4+F_O(%rbp,%rdi,8),%rbx>,<addq      ksTwk+8*1+F_O(%rbp,%rdi,8),%rdx>,<movq      ksKey+8*7+F_O(%rbp,%rdi,8),%rsi>    
-    R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),<movq ksKey+8*3+F_O(%rbp,%rdi,8),%rax>,<addq      ksTwk+8*0+F_O(%rbp,%rdi,8),%rcx>
+    R_512_OneRound  8, 9,10,11,12,13,14,15,%((\_RR_)+0),<movq ksKey+8*6+F_O(%rbp,%rdi,8),%rdx>,<movq      ksTwk-8*1+F_O(%rbp,%rdi,8),%rax>,<movq      ksKey-8*1+F_O(%rbp,%rdi,8),%rsi>
+    R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),<movq ksKey+8*5+F_O(%rbp,%rdi,8),%rcx>,<movq %rax,ksTwk+8*2+F_O(%rbp,%rdi,8)     >,<movq %rsi,ksKey+8*8+F_O(%rbp,%rdi,8)>
+    R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),<movq ksKey+8*4+F_O(%rbp,%rdi,8),%rbx>,<addq      ksTwk+8*1+F_O(%rbp,%rdi,8),%rdx>,<movq      ksKey+8*7+F_O(%rbp,%rdi,8),%rsi>
+    R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),<movq ksKey+8*3+F_O(%rbp,%rdi,8),%rax>,<addq      ksTwk+8*0+F_O(%rbp,%rdi,8),%rcx>
     # inject the key schedule
     addq    ksKey+8*0+F_O(%rbp,%rdi,8),%r8
     addReg   r11, rax
@@ -814,9 +815,9 @@ Skein_512_block_loop:
     movq    %rbx,ksTwk+ 8+F_O(%rbp)
     movq    %rcx,ksTwk+16+F_O(%rbp)
     .irp _Rn_,8,9,10,11,12,13,14,15
-      movq  X_VARS+8*(_Rn_-8)(%rdi),%r\_Rn_
+      movq  X_VARS+8*(\_Rn_-8)(%rdi),%r\_Rn_
       xorq  %r\_Rn_,%rdx              #compute overall parity
-      movq  %r\_Rn_,ksKey+8*(_Rn_-8)+F_O(%rbp)
+      movq  %r\_Rn_,ksKey+8*(\_Rn_-8)+F_O(%rbp)
     .endr                             #load state into %r8 ..%r15, compute parity
       movq  %rdx,ksKey+8*(8)+F_O(%rbp)#save key schedule parity
 
@@ -853,7 +854,7 @@ Skein_512_block_loop:
 
 .if _SKEIN_DEBUG
     .irp _Rn_,8,9,10,11,12,13,14,15   #save values on stack for debug output
-      movq  %r\_Rn_,X_stk+8*(_Rn_-8)(%rsp)
+      movq  %r\_Rn_,X_stk+8*(\_Rn_-8)(%rsp)
     .endr
 
     Skein_Debug_Block 512             #debug dump
@@ -891,12 +892,12 @@ _Rbase_ = _Rbase_+1
     #################
     # feedforward:   ctx->X[i] = X[i] ^ w[i], {i=0..7}
     .irp _Rn_,8,9,10,11,12,13,14,15
-  .if (_Rn_ == 8)
+  .if (\_Rn_ == 8)
     movq    $FIRST_MASK64,%rbx
   .endif
-      xorq  Wcopy+8*(_Rn_-8)+F_O(%rbp),%r\_Rn_  #feedforward XOR
-      movq  %r\_Rn_,X_VARS+8*(_Rn_-8)(%rdi)     #and store result
-  .if (_Rn_ == 14)
+      xorq  Wcopy+8*(\_Rn_-8)+F_O(%rbp),%r\_Rn_  #feedforward XOR
+      movq  %r\_Rn_,X_VARS+8*(\_Rn_-8)(%rdi)     #and store result
+  .if (\_Rn_ == 14)
     andq    TWEAK+ 8(%rdi),%rbx
   .endif
     .endr
@@ -917,7 +918,7 @@ Skein_Debug_Round_512:
     pushq   %rsi                     #save two regs for BLK_BITS-specific parms
     pushq   %rdi
   .irp _Rn_,8,9,10,11,12,13,14,15    #save X[] state on stack so debug routines can access it
-    movq    %r\_Rn_,X_stk+8*(_Rn_-8)+F_O(%rbp)
+    movq    %r\_Rn_,X_stk+8*(\_Rn_-8)+F_O(%rbp)
   .endr
     movq    ctxPtr+F_O(%rbp),%rsi    #ctx_hdr_ptr
     movq    $512,%rdi                #now <rdi,rsi,rdx> are set for the call
@@ -968,50 +969,50 @@ rIdx_offs = tmpStk_1024
 #
 .macro r1024_Mix w0,w1,reg0,reg1,_RN0_,_Rn1_,op1
     addReg      \reg0 , \reg1                      #perform the MIX
-    RotL64      \reg1 , 1024,%((_RN0_) % 8),_Rn1_
+    RotL64      \reg1 , 1024,%((\_RN0_) % 8),\_Rn1_
     xorReg      \reg1 , \reg0
-.if ((_RN0_) && 3) == 3         #time to do key injection?
+.if ((\_RN0_) && 3) == 3        #time to do key injection?
  .if _SKEIN_DEBUG
-    movq       %\reg0 , xDebug_1024+8*w0(%rsp)     #save intermediate values for Debug_Round
-    movq       %\reg1 , xDebug_1024+8*w1(%rsp)     # (before inline key injection)
+    movq       %\reg0 , xDebug_1024+8*\w0(%rsp)    #save intermediate values for Debug_Round
+    movq       %\reg1 , xDebug_1024+8*\w1(%rsp)    # (before inline key injection)
  .endif
-_II_ = ((_RN0_)/4)+1            #injection count
+_II_ = ((\_RN0_)/4)+1           #injection count
  .if SKEIN_ASM_UNROLL && 1024   #here to do fully unrolled key injection
-    addq        ksKey+ 8*((_II_+w0) % 17)(%rsp),%\reg0
-    addq        ksKey+ 8*((_II_+w1) % 17)(%rsp),%\reg1
-  .if     w1 == 13                                 #tweak injection
+    addq        ksKey+ 8*((_II_+\w0) % 17)(%rsp),%\reg0
+    addq        ksKey+ 8*((_II_+\w1) % 17)(%rsp),%\reg1
+  .if     \w1 == 13                                #tweak injection
     addq        ksTwk+ 8*((_II_+ 0) %  3)(%rsp),%\reg1
-  .elseif w0 == 14
+  .elseif \w0 == 14
     addq        ksTwk+ 8*((_II_+ 1) %  3)(%rsp),%\reg0
-  .elseif w1 == 15
+  .elseif \w1 == 15
     addq        $_II_, %\reg1                      #(injection counter)
   .endif
  .else                          #here to do looping  key injection
-  .if  (w0 == 0)
-    movq        %rdi, X_stk+8*w0(%rsp)             #if so, store N0 so we can use reg as index
+  .if  (\w0 == 0)
+    movq        %rdi, X_stk+8*\w0(%rsp)            #if so, store N0 so we can use reg as index
     movq         rIdx_offs(%rsp),%rdi              #get the injection counter index into rdi
   .else
-    addq         ksKey+8+8*w0(%rsp,%rdi,8),%\reg0  #even key injection
+    addq         ksKey+8+8*\w0(%rsp,%rdi,8),%\reg0 #even key injection
   .endif
-  .if     w1 == 13                                 #tweak injection
+  .if     \w1 == 13                                #tweak injection
     addq         ksTwk+8+8* 0(%rsp,%rdi,8),%\reg1  
-  .elseif w0 == 14
+  .elseif \w0 == 14
     addq         ksTwk+8+8* 1(%rsp,%rdi,8),%\reg0  
-  .elseif w1 == 15
+  .elseif \w1 == 15
     addReg      \reg1,rdi,,,1                      #(injection counter)
   .endif
-    addq         ksKey+8+8*w1(%rsp,%rdi,8),%\reg1  #odd key injection
+    addq         ksKey+8+8*\w1(%rsp,%rdi,8),%\reg1 #odd key injection
  .endif
 .endif
     # insert the op provided, .if any
-    op1
+    \op1
 .endm
 #################
 # MACRO: four rounds for 1024-bit blocks
 #
 .macro r1024_FourRounds _RR_    #RR = base round number (0 mod 4)
     # should be here with X4 set properly, X6 stored on stack
-_Rn_ = (_RR_) + 0
+_Rn_ = (\_RR_) + 0
         r1024_Mix  0, 1,rdi,rsi,_Rn_,0
         r1024_Mix  2, 3,rbp,rax,_Rn_,1
         r1024_Mix  4, 5,rcx,rbx,_Rn_,2,<movq %rcx,X_stk+8*4(%rsp)>       #save X4  on  stack (x4/x6 alternate)
@@ -1023,7 +1024,7 @@ _Rn_ = (_RR_) + 0
     .if _SKEIN_DEBUG
       Skein_Debug_Round 1024,%(_Rn_+1)
     .endif
-_Rn_ = (_RR_) + 1
+_Rn_ = (\_RR_) + 1
         r1024_Mix  0, 9,rdi,r9 ,_Rn_,0
         r1024_Mix  2,13,rbp,r13,_Rn_,1
         r1024_Mix  6,11,rcx,r11,_Rn_,2,<movq %rcx,X_stk+8*6(%rsp)>       #save X6  on  stack (x4/x6 alternate)
@@ -1035,7 +1036,7 @@ _Rn_ = (_RR_) + 1
     .if _SKEIN_DEBUG
       Skein_Debug_Round 1024,%(_Rn_+1)
     .endif
-_Rn_ = (_RR_) + 2
+_Rn_ = (\_RR_) + 2
         r1024_Mix  0, 7,rdi,rdx,_Rn_,0
         r1024_Mix  2, 5,rbp,rbx,_Rn_,1
         r1024_Mix  4, 3,rcx,rax,_Rn_,2,<movq %rcx,X_stk+8*4(%rsp)>       #save X4  on  stack (x4/x6 alternate)
@@ -1047,7 +1048,7 @@ _Rn_ = (_RR_) + 2
     .if _SKEIN_DEBUG
       Skein_Debug_Round 1024,%(_Rn_+1)
     .endif
-_Rn_ = (_RR_) + 3
+_Rn_ = (\_RR_) + 3
         r1024_Mix  0,15,rdi,r15,_Rn_,0
         r1024_Mix  2,11,rbp,r11,_Rn_,1
         r1024_Mix  6,13,rcx,r13,_Rn_,2,<movq %rcx,X_stk+8*6(%rsp)>       #save X6  on  stack (x4/x6 alternate)
@@ -1116,13 +1117,13 @@ Skein1024_block_loop:
 
     # the logic here assumes the set {rdi,rsi,rbp,rax} = X[0,1,2,3]
     .irp _rN_,0,1,2,3,4,6                  #process the "initial" words, using r14/r15 as temps
-      movq       X_VARS+8*_rN_(%rdi),%r14  #get state word
-      movq              8*_rN_(%rsi),%r15  #get msg   word
+      movq       X_VARS+8*\_rN_(%rdi),%r14 #get state word
+      movq              8*\_rN_(%rsi),%r15 #get msg   word
       xorq  %r14,%rax                      #update key schedule overall parity
-      movq  %r14,ksKey +8*_rN_+F_O(%rbp)   #save key schedule word on stack
-      movq  %r15,Wcopy +8*_rN_+F_O(%rbp)   #save local msg Wcopy 
+      movq  %r14,ksKey +8*\_rN_+F_O(%rbp)  #save key schedule word on stack
+      movq  %r15,Wcopy +8*\_rN_+F_O(%rbp)  #save local msg Wcopy
       addq  %r15,%r14                      #do the initial key injection
-      movq  %r14,X_stk +8*_rN_    (%rsp)   #save initial state var on stack
+      movq  %r14,X_stk +8*\_rN_    (%rsp)  #save initial state var on stack
     .endr
     # now process the rest, using the "real" registers 
     #     (MUST do it in reverse order to inject tweaks r8/r9 first)
@@ -1135,9 +1136,9 @@ _oo_ = o1K_\_rr_                           #offset ass
       movq  %rcx,Wcopy+8*_oo_+F_O(%rbp)    #save copy of msg word for feedforward
       addq  %rcx,%\_rr_                    #do the initial  key  injection
       .if    _oo_ == 13                    #do the initial tweak injection
-        addReg _rr_,r8                     #          (only in words 13/14)
+        addReg \_rr_,r8                    #          (only in words 13/14)
       .elseif _oo_ == 14
-        addReg _rr_,r9 
+        addReg \_rr_,r9
       .endif
     .endr
     movq    %rax,ksKey+8*WCNT+F_O(%rbp)    #save key schedule parity



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202006051354.055DsDOb021079>