3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # I let hardware handle unaligned input(*), except on page boundaries
11 # (see below for details). Otherwise straightforward implementation
12 # with X vector in register bank. The module is big-endian [which is
13 # not big deal as there're no little-endian targets left around].
15 # (*) this means that this module is inappropriate for PPC403? Does
16 # anybody know if pre-POWER3 can sustain unaligned load?
19 # ----------------------------------
20 # PPC970,gcc-4.0.0 +76% +59%
21 # Power6,xlc-7 +68% +33%
25 if ($flavour =~ /64/) {
32 } elsif ($flavour =~ /32/) {
39 } else { die "nonsense $flavour"; }
41 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
42 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
43 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
44 die "can't locate ppc-xlate.pl";
46 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
67 @V=($A,$B,$C,$D,$E,$T);
68 @X=("r16","r17","r18","r19","r20","r21","r22","r23",
69 "r24","r25","r26","r27","r28","r29","r30","r31");
72 my ($i,$a,$b,$c,$d,$e,$f)=@_;
74 $code.=<<___
if ($i==0);
75 lwz
@X[$i],`$i*4`($inp)
77 $code.=<<___
if ($i<15);
78 lwz
@X[$j],`$j*4`($inp)
89 $code.=<<___
if ($i>=15);
92 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
95 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
100 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
102 rotlwi
@X[$j%16],@X[$j%16],1
107 my ($i,$a,$b,$c,$d,$e,$f)=@_;
109 $code.=<<___
if ($i<79);
112 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
115 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
119 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
121 rotlwi
@X[$j%16],@X[$j%16],1
123 $code.=<<___
if ($i==79);
141 my ($i,$a,$b,$c,$d,$e,$f)=@_;
146 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
149 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
153 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
156 rotlwi
@X[$j%16],@X[$j%16],1
165 .globl
.sha1_block_data_order
167 .sha1_block_data_order
:
168 $STU $sp,-$FRAME($sp)
170 $PUSH r15
,`$FRAME-$SIZE_T*17`($sp)
171 $PUSH r16
,`$FRAME-$SIZE_T*16`($sp)
172 $PUSH r17
,`$FRAME-$SIZE_T*15`($sp)
173 $PUSH r18
,`$FRAME-$SIZE_T*14`($sp)
174 $PUSH r19
,`$FRAME-$SIZE_T*13`($sp)
175 $PUSH r20
,`$FRAME-$SIZE_T*12`($sp)
176 $PUSH r21
,`$FRAME-$SIZE_T*11`($sp)
177 $PUSH r22
,`$FRAME-$SIZE_T*10`($sp)
178 $PUSH r23
,`$FRAME-$SIZE_T*9`($sp)
179 $PUSH r24
,`$FRAME-$SIZE_T*8`($sp)
180 $PUSH r25
,`$FRAME-$SIZE_T*7`($sp)
181 $PUSH r26
,`$FRAME-$SIZE_T*6`($sp)
182 $PUSH r27
,`$FRAME-$SIZE_T*5`($sp)
183 $PUSH r28
,`$FRAME-$SIZE_T*4`($sp)
184 $PUSH r29
,`$FRAME-$SIZE_T*3`($sp)
185 $PUSH r30
,`$FRAME-$SIZE_T*2`($sp)
186 $PUSH r31
,`$FRAME-$SIZE_T*1`($sp)
187 $PUSH r0
,`$FRAME+$LRSAVE`($sp)
197 bl Lsha1_block_private
200 ; PowerPC specification allows an implementation to be ill
-behaved
201 ; upon unaligned access which crosses page boundary
. "Better safe
202 ; than sorry" principle makes me treat it specially
. But I don
't
203 ; look for particular offending word, but rather for 64-byte input
204 ; block which crosses the boundary. Once found that block is aligned
205 ; and hashed separately...
209 andi. $t1,$t1,4095 ; distance to closest page boundary
210 srwi. $t1,$t1,6 ; t1/=64
213 ble- Laligned ; didn't cross the page boundary
216 bl Lsha1_block_private
220 addi r20
,$sp,$LOCALS ; spot within the frame
234 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
236 addi
$inp,$sp,$LOCALS
238 bl Lsha1_block_private
239 $POP $inp,`$FRAME-$SIZE_T*18`($sp)
244 $POP r0
,`$FRAME+$LRSAVE`($sp)
245 $POP r15
,`$FRAME-$SIZE_T*17`($sp)
246 $POP r16
,`$FRAME-$SIZE_T*16`($sp)
247 $POP r17
,`$FRAME-$SIZE_T*15`($sp)
248 $POP r18
,`$FRAME-$SIZE_T*14`($sp)
249 $POP r19
,`$FRAME-$SIZE_T*13`($sp)
250 $POP r20
,`$FRAME-$SIZE_T*12`($sp)
251 $POP r21
,`$FRAME-$SIZE_T*11`($sp)
252 $POP r22
,`$FRAME-$SIZE_T*10`($sp)
253 $POP r23
,`$FRAME-$SIZE_T*9`($sp)
254 $POP r24
,`$FRAME-$SIZE_T*8`($sp)
255 $POP r25
,`$FRAME-$SIZE_T*7`($sp)
256 $POP r26
,`$FRAME-$SIZE_T*6`($sp)
257 $POP r27
,`$FRAME-$SIZE_T*5`($sp)
258 $POP r28
,`$FRAME-$SIZE_T*4`($sp)
259 $POP r29
,`$FRAME-$SIZE_T*3`($sp)
260 $POP r30
,`$FRAME-$SIZE_T*2`($sp)
261 $POP r31
,`$FRAME-$SIZE_T*1`($sp)
266 .byte
0,12,4,1,0x80,18,3,0
270 # This is private block function, which uses tailored calling
271 # interface, namely upon entry SHA_CTX is pre-loaded to given
272 # registers and counter register contains amount of chunks to
278 $code.=<<___
; # load K_00_19
282 for($i=0;$i<20;$i++) { &BODY_00_19
($i,@V); unshift(@V,pop(@V)); }
283 $code.=<<___
; # load K_20_39
287 for(;$i<40;$i++) { &BODY_20_39
($i,@V); unshift(@V,pop(@V)); }
288 $code.=<<___
; # load K_40_59
292 for(;$i<60;$i++) { &BODY_40_59
($i,@V); unshift(@V,pop(@V)); }
293 $code.=<<___
; # load K_60_79
297 for(;$i<80;$i++) { &BODY_20_39
($i,@V); unshift(@V,pop(@V)); }
314 addi
$inp,$inp,`16*4`
315 bdnz
- Lsha1_block_private
318 .byte
0,12,0x14,0,0,0,0,0
321 .asciz
"SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
324 $code =~ s/\`([^\`]*)\`/eval $1/gem;