Allow IPv6 address entry in tools>ping - Loosens valid character check
[tomato/davidwu.git] / release / src / router / openssl / crypto / rc4 / asm / rc4-parisc.pl
blob9165067080eff92155ddb6a404f547268fb774fb
1 #!/usr/bin/env perl
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # RC4 for PA-RISC.
12 # June 2009.
14 # Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
15 # For reference, [4x] unrolled loop is >40% faster than folded one.
16 # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
17 # is believed to be not sufficient to justify the effort...
19 # Special thanks to polarhome.com for providing HP-UX account.
21 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
23 $flavour = shift;
24 $output = shift;
25 open STDOUT,">$output";
27 if ($flavour =~ /64/) {
28 $LEVEL ="2.0W";
29 $SIZE_T =8;
30 $FRAME_MARKER =80;
31 $SAVED_RP =16;
32 $PUSH ="std";
33 $PUSHMA ="std,ma";
34 $POP ="ldd";
35 $POPMB ="ldd,mb";
36 } else {
37 $LEVEL ="1.0";
38 $SIZE_T =4;
39 $FRAME_MARKER =48;
40 $SAVED_RP =20;
41 $PUSH ="stw";
42 $PUSHMA ="stwm";
43 $POP ="ldw";
44 $POPMB ="ldwm";
47 $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
48 # [+ argument transfer]
49 $SZ=1; # defaults to RC4_CHAR
50 if (open CONF,"<${dir}../../opensslconf.h") {
51 while(<CONF>) {
52 if (m/#\s*define\s+RC4_INT\s+(.*)/) {
53 $SZ = ($1=~/char$/) ? 1 : 4;
54 last;
57 close CONF;
60 if ($SZ==1) { # RC4_CHAR
61 $LD="ldb";
62 $LDX="ldbx";
63 $MKX="addl";
64 $ST="stb";
65 } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
66 $LD="ldw";
67 $LDX="ldwx,s";
68 $MKX="sh2addl";
69 $ST="stw";
72 $key="%r26";
73 $len="%r25";
74 $inp="%r24";
75 $out="%r23";
77 @XX=("%r19","%r20");
78 @TX=("%r21","%r22");
79 $YY="%r28";
80 $TY="%r29";
82 $acc="%r1";
83 $ix="%r2";
84 $iy="%r3";
85 $dat0="%r4";
86 $dat1="%r5";
87 $rem="%r6";
88 $mask="%r31";
90 sub unrolledloopbody {
91 for ($i=0;$i<4;$i++) {
92 $code.=<<___;
93 ldo 1($XX[0]),$XX[1]
94 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
95 and $mask,$XX[1],$XX[1]
96 $LDX $YY($key),$TY
97 $MKX $YY,$key,$ix
98 $LDX $XX[1]($key),$TX[1]
99 $MKX $XX[0],$key,$iy
100 $ST $TX[0],0($ix)
101 comclr,<> $XX[1],$YY,%r0 ; conditional
102 copy $TX[0],$TX[1] ; move
103 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
104 $ST $TY,0($iy)
105 addl $TX[0],$TY,$TY
106 addl $TX[1],$YY,$YY
107 and $mask,$TY,$TY
108 and $mask,$YY,$YY
110 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
113 sub foldedloop {
114 my ($label,$count)=@_;
115 $code.=<<___;
116 $label
117 $MKX $YY,$key,$iy
118 $LDX $YY($key),$TY
119 $MKX $XX[0],$key,$ix
120 $ST $TX[0],0($iy)
121 ldo 1($XX[0]),$XX[0]
122 $ST $TY,0($ix)
123 addl $TX[0],$TY,$TY
124 ldbx $inp($out),$dat1
125 and $mask,$TY,$TY
126 and $mask,$XX[0],$XX[0]
127 $LDX $TY($key),$acc
128 $LDX $XX[0]($key),$TX[0]
129 ldo 1($out),$out
130 xor $dat1,$acc,$acc
131 addl $TX[0],$YY,$YY
132 stb $acc,-1($out)
133 addib,<> -1,$count,$label ; $count is always small
134 and $mask,$YY,$YY
138 $code=<<___;
139 .LEVEL $LEVEL
140 .SPACE \$TEXT\$
141 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
143 .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
145 .PROC
146 .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
147 .ENTRY
148 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
149 $PUSHMA %r3,$FRAME(%sp)
150 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
151 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
152 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
154 cmpib,*= 0,$len,L\$abort
155 sub $inp,$out,$inp ; distance between $inp and $out
157 $LD `0*$SZ`($key),$XX[0]
158 $LD `1*$SZ`($key),$YY
159 ldo `2*$SZ`($key),$key
161 ldi 0xff,$mask
162 ldi 3,$dat0
164 ldo 1($XX[0]),$XX[0] ; warm up loop
165 and $mask,$XX[0],$XX[0]
166 $LDX $XX[0]($key),$TX[0]
167 addl $TX[0],$YY,$YY
168 cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
169 and $mask,$YY,$YY
171 and,<> $out,$dat0,$rem ; is $out aligned?
172 b L\$alignedout
173 subi 4,$rem,$rem
174 sub $len,$rem,$len
176 &foldedloop("L\$alignout",$rem); # process till $out is aligned
178 $code.=<<___;
179 L\$alignedout ; $len is at least 4 here
180 and,<> $inp,$dat0,$acc ; is $inp aligned?
181 b L\$oop4
182 sub $inp,$acc,$rem ; align $inp
184 sh3addl $acc,%r0,$acc
185 subi 32,$acc,$acc
186 mtctl $acc,%cr11 ; load %sar with vshd align factor
187 ldwx $rem($out),$dat0
188 ldo 4($rem),$rem
189 L\$oop4misalignedinp
191 &unrolledloopbody();
192 $code.=<<___;
193 $LDX $TY($key),$ix
194 ldwx $rem($out),$dat1
195 ldo -4($len),$len
196 or $ix,$acc,$acc ; last piece, no need to dep
197 vshd $dat0,$dat1,$iy ; align data
198 copy $dat1,$dat0
199 xor $iy,$acc,$acc
200 stw $acc,0($out)
201 cmpib,*<< 3,$len,L\$oop4misalignedinp
202 ldo 4($out),$out
203 cmpib,*= 0,$len,L\$done
205 b L\$oop1
208 .ALIGN 8
209 L\$oop4
211 &unrolledloopbody();
212 $code.=<<___;
213 $LDX $TY($key),$ix
214 ldwx $inp($out),$dat0
215 ldo -4($len),$len
216 or $ix,$acc,$acc ; last piece, no need to dep
217 xor $dat0,$acc,$acc
218 stw $acc,0($out)
219 cmpib,*<< 3,$len,L\$oop4
220 ldo 4($out),$out
221 cmpib,*= 0,$len,L\$done
224 &foldedloop("L\$oop1",$len);
225 $code.=<<___;
226 L\$done
227 $POP `-$FRAME-$SAVED_RP`(%sp),%r2
228 ldo -1($XX[0]),$XX[0] ; chill out loop
229 sub $YY,$TX[0],$YY
230 and $mask,$XX[0],$XX[0]
231 and $mask,$YY,$YY
232 $ST $XX[0],`-2*$SZ`($key)
233 $ST $YY,`-1*$SZ`($key)
234 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
235 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
236 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
237 L\$abort
238 bv (%r2)
239 .EXIT
240 $POPMB -$FRAME(%sp),%r3
241 .PROCEND
244 $code.=<<___;
246 .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
247 .ALIGN 8
248 private_RC4_set_key
249 .PROC
250 .CALLINFO NO_CALLS
251 .ENTRY
252 $ST %r0,`0*$SZ`($key)
253 $ST %r0,`1*$SZ`($key)
254 ldo `2*$SZ`($key),$key
255 copy %r0,@XX[0]
256 L\$1st
257 $ST @XX[0],0($key)
258 ldo 1(@XX[0]),@XX[0]
259 bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
260 ldo $SZ($key),$key
262 ldo `-256*$SZ`($key),$key ; rewind $key
263 addl $len,$inp,$inp ; $inp to point at the end
264 sub %r0,$len,%r23 ; inverse index
265 copy %r0,@XX[0]
266 copy %r0,@XX[1]
267 ldi 0xff,$mask
269 L\$2nd
270 $LDX @XX[0]($key),@TX[0]
271 ldbx %r23($inp),@TX[1]
272 addi,nuv 1,%r23,%r23 ; increment and conditional
273 sub %r0,$len,%r23 ; inverse index
274 addl @TX[0],@XX[1],@XX[1]
275 addl @TX[1],@XX[1],@XX[1]
276 and $mask,@XX[1],@XX[1]
277 $MKX @XX[0],$key,$TY
278 $LDX @XX[1]($key),@TX[1]
279 $MKX @XX[1],$key,$YY
280 ldo 1(@XX[0]),@XX[0]
281 $ST @TX[0],0($YY)
282 bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
283 $ST @TX[1],0($TY)
285 bv,n (%r2)
286 .EXIT
288 .PROCEND
290 .EXPORT RC4_options,ENTRY
291 .ALIGN 8
292 RC4_options
293 .PROC
294 .CALLINFO NO_CALLS
295 .ENTRY
296 blr %r0,%r28
297 ldi 3,%r1
298 L\$pic
299 andcm %r28,%r1,%r28
300 bv (%r2)
301 .EXIT
302 ldo L\$opts-L\$pic(%r28),%r28
303 .PROCEND
304 .ALIGN 8
305 L\$opts
306 .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
307 .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
309 $code =~ s/\`([^\`]*)\`/eval $1/gem;
310 $code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
312 print $code;
313 close STDOUT;