3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # SHA1 block procedure for PA-RISC.
14 # On PA-7100LC performance is >30% better than gcc 3.2 generated code
15 # for aligned input and >50% better for unaligned. Compared to vendor
16 # compiler on PA-8600 it's almost 60% faster in 64-bit build and just
17 # few percent faster in 32-bit one (this for aligned input, data for
18 # unaligned input is not available).
20 # Special thanks to polarhome.com for providing HP-UX account.
24 open STDOUT
,">$output";
26 if ($flavour =~ /64/) {
46 $FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
47 # [+ argument transfer]
56 @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
57 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
59 @V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
62 my ($i,$a,$b,$c,$d,$e)=@_;
64 $code.=<<___
if ($i<15);
75 $code.=<<___
if ($i>=15); # with forward Xupdate
78 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
81 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
86 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
88 shd
@X[$j%16],@X[$j%16],31,@X[$j%16]
93 my ($i,$a,$b,$c,$d,$e)=@_;
95 $code.=<<___
if ($i<79);
96 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i
99 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
102 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
106 shd
@X[$j%16],@X[$j%16],31,@X[$j%16]
109 $code.=<<___
if ($i==79); # with context load
110 ldw
0($ctx),@X[0] ; $i
127 my ($i,$a,$b,$c,$d,$e)=@_;
130 shd
$a,$a,27,$t1 ; $i
132 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
135 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
139 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
142 shd
@X[$j%16],@X[$j%16],31,@X[$j%16]
150 .SUBSPA \
$CODE\
$,QUAD
=0,ALIGN
=8,ACCESS
=0x2C,CODE_ONLY
152 .EXPORT sha1_block_data_order
,ENTRY
,ARGW0
=GR
,ARGW1
=GR
,ARGW2
=GR
153 sha1_block_data_order
155 .CALLINFO FRAME
=`$FRAME-14*$SIZE_T`,NO_CALLS
,SAVE_RP
,ENTRY_GR
=16
157 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
158 $PUSHMA %r3,$FRAME(%sp)
159 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
160 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
161 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
162 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
163 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
164 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
165 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
166 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
167 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
168 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
169 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
170 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
171 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
179 extru
$inp,31,2,$t0 ; t0
=inp
&3;
180 sh3addl
$t0,%r0,$t0 ; t0
*=8;
181 subi
32,$t0,$t0 ; t0
=32-t0
;
182 mtctl
$t0,%cr11 ; %sar=t0
;
186 andcm
$inp,$t0,$t0 ; 64-bit neutral
188 for ($i=0;$i<15;$i++) { # load input block
189 $code.="\tldw `4*$i`($t0),@X[$i]\n"; }
191 cmpb
,*= $inp,$t0,L\
$aligned
195 for ($i=0;$i<16;$i++) { # align input
196 $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; }
199 ldil L
'0x5a827000,$K ; K_00_19
202 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
204 ldil L'0x6ed9e000,$K ; K_20_39
208 for (;$i<40;$i++) { &BODY_20_39
($i,@V); unshift(@V,pop(@V)); }
210 ldil L
'0x8f1bb000,$K ; K_40_59
214 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
216 ldil L'0xca62c000,$K ; K_60_79
219 for (;$i<80;$i++) { &BODY_20_39
($i,@V); unshift(@V,pop(@V)); }
232 addib
,*<> -1,$num,L\
$oop
235 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
236 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
237 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
238 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
239 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
240 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
241 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
242 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
243 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
244 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
245 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
246 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
247 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
248 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
251 $POPMB -$FRAME(%sp),%r3
253 .STRINGZ
"SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
256 $code =~ s/\`([^\`]*)\`/eval $1/gem;
257 $code =~ s/,\*/,/gm if ($SIZE_T==4);