Remove building with NOCRYPTO option
[minix.git] / crypto / external / bsd / openssl / lib / libcrypto / arch / arm / armv4-gf2m.S
blobb25b2550fb84b24ec5093480625c8d8c58062450
1 #include "arm_arch.h"
2 #include "arm_asm.h"
4 .text
5 .code   32
6 .type   mul_1x1_ialu,%function
7 .align  5
8 mul_1x1_ialu:
9         mov     r4,#0
10         bic     r5,r1,#3<<30            @ a1=a&0x3fffffff
11         str     r4,[sp,#0]              @ tab[0]=0
12         add     r6,r5,r5                @ a2=a1<<1
13         str     r5,[sp,#4]              @ tab[1]=a1
14         eor     r7,r5,r6                @ a1^a2
15         str     r6,[sp,#8]              @ tab[2]=a2
16         mov     r8,r5,lsl#2             @ a4=a1<<2
17         str     r7,[sp,#12]             @ tab[3]=a1^a2
18         eor     r9,r5,r8                @ a1^a4
19         str     r8,[sp,#16]             @ tab[4]=a4
20         eor     r4,r6,r8                @ a2^a4
21         str     r9,[sp,#20]             @ tab[5]=a1^a4
22         eor     r7,r7,r8                @ a1^a2^a4
23         str     r4,[sp,#24]             @ tab[6]=a2^a4
24         and     r8,r12,r0,lsl#2
25         str     r7,[sp,#28]             @ tab[7]=a1^a2^a4
27         and     r9,r12,r0,lsr#1
28         ldr     r5,[sp,r8]              @ tab[b       & 0x7]
29         and     r8,r12,r0,lsr#4
30         ldr     r7,[sp,r9]              @ tab[b >>  3 & 0x7]
31         and     r9,r12,r0,lsr#7
32         ldr     r6,[sp,r8]              @ tab[b >>  6 & 0x7]
33         eor     r5,r5,r7,lsl#3  @ stall
34         mov     r4,r7,lsr#29
35         ldr     r7,[sp,r9]              @ tab[b >>  9 & 0x7]
37         and     r8,r12,r0,lsr#10
38         eor     r5,r5,r6,lsl#6
39         eor     r4,r4,r6,lsr#26
40         ldr     r6,[sp,r8]              @ tab[b >> 12 & 0x7]
42         and     r9,r12,r0,lsr#13
43         eor     r5,r5,r7,lsl#9
44         eor     r4,r4,r7,lsr#23
45         ldr     r7,[sp,r9]              @ tab[b >> 15 & 0x7]
47         and     r8,r12,r0,lsr#16
48         eor     r5,r5,r6,lsl#12
49         eor     r4,r4,r6,lsr#20
50         ldr     r6,[sp,r8]              @ tab[b >> 18 & 0x7]
52         and     r9,r12,r0,lsr#19
53         eor     r5,r5,r7,lsl#15
54         eor     r4,r4,r7,lsr#17
55         ldr     r7,[sp,r9]              @ tab[b >> 21 & 0x7]
57         and     r8,r12,r0,lsr#22
58         eor     r5,r5,r6,lsl#18
59         eor     r4,r4,r6,lsr#14
60         ldr     r6,[sp,r8]              @ tab[b >> 24 & 0x7]
62         and     r9,r12,r0,lsr#25
63         eor     r5,r5,r7,lsl#21
64         eor     r4,r4,r7,lsr#11
65         ldr     r7,[sp,r9]              @ tab[b >> 27 & 0x7]
67         tst     r1,#1<<30
68         and     r8,r12,r0,lsr#28
69         eor     r5,r5,r6,lsl#24
70         eor     r4,r4,r6,lsr#8
71         ldr     r6,[sp,r8]              @ tab[b >> 30      ]
73         eorne   r5,r5,r0,lsl#30
74         eorne   r4,r4,r0,lsr#2
75         tst     r1,#1<<31
76         eor     r5,r5,r7,lsl#27
77         eor     r4,r4,r7,lsr#5
78         eorne   r5,r5,r0,lsl#31
79         eorne   r4,r4,r0,lsr#1
80         eor     r5,r5,r6,lsl#30
81         eor     r4,r4,r6,lsr#2
83         mov     pc,lr
84 .size   mul_1x1_ialu,.-mul_1x1_ialu
85 .global bn_GF2m_mul_2x2
86 .type   bn_GF2m_mul_2x2,%function
87 .align  5
88 bn_GF2m_mul_2x2:
89 #if __ARM_MAX_ARCH__>=7
90         ldr     r12,.LOPENSSL_armcap
91 .Lpic:  ldr     r12,[pc,r12]
92         tst     r12,#1
93         bne     .LNEON
94 #endif
95         stmdb   sp!,{r4-r10,lr}
96         mov     r10,r0                  @ reassign 1st argument
97         mov     r0,r3                   @ r0=b1
98         ldr     r3,[sp,#32]             @ load b0
99         mov     r12,#7<<2
100         sub     sp,sp,#32               @ allocate tab[8]
102         bl      mul_1x1_ialu            @ a1·b1
103         str     r5,[r10,#8]
104         str     r4,[r10,#12]
106         eor     r0,r0,r3                @ flip b0 and b1
107          eor    r1,r1,r2                @ flip a0 and a1
108         eor     r3,r3,r0
109          eor    r2,r2,r1
110         eor     r0,r0,r3
111          eor    r1,r1,r2
112         bl      mul_1x1_ialu            @ a0·b0
113         str     r5,[r10]
114         str     r4,[r10,#4]
116         eor     r1,r1,r2
117         eor     r0,r0,r3
118         bl      mul_1x1_ialu            @ (a1+a0)·(b1+b0)
119         ldmia   r10,{r6-r9}
120         eor     r5,r5,r4
121         eor     r4,r4,r7
122         eor     r5,r5,r6
123         eor     r4,r4,r8
124         eor     r5,r5,r9
125         eor     r4,r4,r9
126         str     r4,[r10,#8]
127         eor     r5,r5,r4
128         add     sp,sp,#32               @ destroy tab[8]
129         str     r5,[r10,#4]
131 #if __ARM_ARCH__>=5
132         ldmia   sp!,{r4-r10,pc}
133 #else
134         ldmia   sp!,{r4-r10,lr}
135         tst     lr,#1
136         moveq   pc,lr                   @ be binary compatible with V4, yet
137         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
138 #endif
139 #if __ARM_MAX_ARCH__>=7
140 .arch   armv7-a
141 .fpu    neon
143 .align  5
144 .LNEON:
145         ldr             r12, [sp]               @ 5th argument
146         vmov            d26, r2, r1
147         vmov            d27, r12, r3
148         vmov.i64        d28, #0x0000ffffffffffff
149         vmov.i64        d29, #0x00000000ffffffff
150         vmov.i64        d30, #0x000000000000ffff
152         vext.8          d2, d26, d26, #1        @ A1
153         vmull.p8        q1, d2, d27             @ F = A1*B
154         vext.8          d0, d27, d27, #1        @ B1
155         vmull.p8        q0, d26, d0             @ E = A*B1
156         vext.8          d4, d26, d26, #2        @ A2
157         vmull.p8        q2, d4, d27             @ H = A2*B
158         vext.8          d16, d27, d27, #2       @ B2
159         vmull.p8        q8, d26, d16            @ G = A*B2
160         vext.8          d6, d26, d26, #3        @ A3
161         veor            q1, q1, q0              @ L = E + F
162         vmull.p8        q3, d6, d27             @ J = A3*B
163         vext.8          d0, d27, d27, #3        @ B3
164         veor            q2, q2, q8              @ M = G + H
165         vmull.p8        q0, d26, d0             @ I = A*B3
166         veor            d2, d2, d3      @ t0 = (L) (P0 + P1) << 8
167         vand            d3, d3, d28
168         vext.8          d16, d27, d27, #4       @ B4
169         veor            d4, d4, d5      @ t1 = (M) (P2 + P3) << 16
170         vand            d5, d5, d29
171         vmull.p8        q8, d26, d16            @ K = A*B4
172         veor            q3, q3, q0              @ N = I + J
173         veor            d2, d2, d3
174         veor            d4, d4, d5
175         veor            d6, d6, d7      @ t2 = (N) (P4 + P5) << 24
176         vand            d7, d7, d30
177         vext.8          q1, q1, q1, #15
178         veor            d16, d16, d17   @ t3 = (K) (P6 + P7) << 32
179         vmov.i64        d17, #0
180         vext.8          q2, q2, q2, #14
181         veor            d6, d6, d7
182         vmull.p8        q0, d26, d27            @ D = A*B
183         vext.8          q8, q8, q8, #12
184         vext.8          q3, q3, q3, #13
185         veor            q1, q1, q2
186         veor            q3, q3, q8
187         veor            q0, q0, q1
188         veor            q0, q0, q3
190         vst1.32         {q0}, [r0]
191         RET             @ bx lr
192 #endif
193 .size   bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
194 #if __ARM_MAX_ARCH__>=7
195 .align  5
196 .LOPENSSL_armcap:
197 .word   OPENSSL_armcap_P-(.Lpic+8)
198 #endif
199 .asciz  "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
200 .align  5
202 #if __ARM_MAX_ARCH__>=7
203 .comm   OPENSSL_armcap_P,4,4
204 #endif