unify {de,}mangle_poll(), get rid of kernel-side POLL...
[cris-mirror.git] / arch / powerpc / crypto / md5-asm.S
blob10cdf5bceebbaf6eb468f75b98f2144502d99efe
1 /*
2  * Fast MD5 implementation for PPC
3  *
4  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2 of the License, or (at your option)
9  * any later version.
10  *
11  */
12 #include <asm/ppc_asm.h>
13 #include <asm/asm-offsets.h>
15 #define rHP     r3
16 #define rWP     r4
18 #define rH0     r0
19 #define rH1     r6
20 #define rH2     r7
21 #define rH3     r5
23 #define rW00    r8
24 #define rW01    r9
25 #define rW02    r10
26 #define rW03    r11
27 #define rW04    r12
28 #define rW05    r14
29 #define rW06    r15
30 #define rW07    r16
31 #define rW08    r17
32 #define rW09    r18
33 #define rW10    r19
34 #define rW11    r20
35 #define rW12    r21
36 #define rW13    r22
37 #define rW14    r23
38 #define rW15    r24
40 #define rT0     r25
41 #define rT1     r26
43 #define INITIALIZE \
44         PPC_STLU r1,-INT_FRAME_SIZE(r1); \
45         SAVE_8GPRS(14, r1);             /* push registers onto stack    */ \
46         SAVE_4GPRS(22, r1);                                                \
47         SAVE_GPR(26, r1)
49 #define FINALIZE \
50         REST_8GPRS(14, r1);             /* pop registers from stack     */ \
51         REST_4GPRS(22, r1);                                                \
52         REST_GPR(26, r1);                                                  \
53         addi    r1,r1,INT_FRAME_SIZE;
55 #ifdef __BIG_ENDIAN__
56 #define LOAD_DATA(reg, off) \
57         lwbrx           reg,0,rWP;      /* load data                    */
58 #define INC_PTR \
59         addi            rWP,rWP,4;      /* increment per word           */
60 #define NEXT_BLOCK                      /* nothing to do                */
61 #else
62 #define LOAD_DATA(reg, off) \
63         lwz             reg,off(rWP);   /* load data                    */
64 #define INC_PTR                         /* nothing to do                */
65 #define NEXT_BLOCK \
66         addi            rWP,rWP,64;     /* increment per block          */
67 #endif
69 #define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
70         LOAD_DATA(w0, off)              /*    W                         */ \
71         and             rT0,b,c;        /* 1: f = b and c               */ \
72         INC_PTR                         /*    ptr++                     */ \
73         andc            rT1,d,b;        /* 1: f' = ~b and d             */ \
74         LOAD_DATA(w1, off+4)            /*    W                         */ \
75         or              rT0,rT0,rT1;    /* 1: f = f or f'               */ \
76         addi            w0,w0,k0l;      /* 1: wk = w + k                */ \
77         add             a,a,rT0;        /* 1: a = a + f                 */ \
78         addis           w0,w0,k0h;      /* 1: wk = w + k'               */ \
79         addis           w1,w1,k1h;      /* 2: wk = w + k                */ \
80         add             a,a,w0;         /* 1: a = a + wk                */ \
81         addi            w1,w1,k1l;      /* 2: wk = w + k'               */ \
82         rotrwi          a,a,p;          /* 1: a = a rotl x              */ \
83         add             d,d,w1;         /* 2: a = a + wk                */ \
84         add             a,a,b;          /* 1: a = a + b                 */ \
85         and             rT0,a,b;        /* 2: f = b and c               */ \
86         andc            rT1,c,a;        /* 2: f' = ~b and d             */ \
87         or              rT0,rT0,rT1;    /* 2: f = f or f'               */ \
88         add             d,d,rT0;        /* 2: a = a + f                 */ \
89         INC_PTR                         /*    ptr++                     */ \
90         rotrwi          d,d,q;          /* 2: a = a rotl x              */ \
91         add             d,d,a;          /* 2: a = a + b                 */
93 #define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
94         andc            rT0,c,d;        /* 1: f = c and ~d              */ \
95         and             rT1,b,d;        /* 1: f' = b and d              */ \
96         addi            w0,w0,k0l;      /* 1: wk = w + k                */ \
97         or              rT0,rT0,rT1;    /* 1: f = f or f'               */ \
98         addis           w0,w0,k0h;      /* 1: wk = w + k'               */ \
99         add             a,a,rT0;        /* 1: a = a + f                 */ \
100         addi            w1,w1,k1l;      /* 2: wk = w + k                */ \
101         add             a,a,w0;         /* 1: a = a + wk                */ \
102         addis           w1,w1,k1h;      /* 2: wk = w + k'               */ \
103         andc            rT0,b,c;        /* 2: f = c and ~d              */ \
104         rotrwi          a,a,p;          /* 1: a = a rotl x              */ \
105         add             a,a,b;          /* 1: a = a + b                 */ \
106         add             d,d,w1;         /* 2: a = a + wk                */ \
107         and             rT1,a,c;        /* 2: f' = b and d              */ \
108         or              rT0,rT0,rT1;    /* 2: f = f or f'               */ \
109         add             d,d,rT0;        /* 2: a = a + f                 */ \
110         rotrwi          d,d,q;          /* 2: a = a rotl x              */ \
111         add             d,d,a;          /* 2: a = a +b                  */
113 #define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
114         xor             rT0,b,c;        /* 1: f' = b xor c              */ \
115         addi            w0,w0,k0l;      /* 1: wk = w + k                */ \
116         xor             rT1,rT0,d;      /* 1: f = f xor f'              */ \
117         addis           w0,w0,k0h;      /* 1: wk = w + k'               */ \
118         add             a,a,rT1;        /* 1: a = a + f                 */ \
119         addi            w1,w1,k1l;      /* 2: wk = w + k                */ \
120         add             a,a,w0;         /* 1: a = a + wk                */ \
121         addis           w1,w1,k1h;      /* 2: wk = w + k'               */ \
122         rotrwi          a,a,p;          /* 1: a = a rotl x              */ \
123         add             d,d,w1;         /* 2: a = a + wk                */ \
124         add             a,a,b;          /* 1: a = a + b                 */ \
125         xor             rT1,rT0,a;      /* 2: f = b xor f'              */ \
126         add             d,d,rT1;        /* 2: a = a + f                 */ \
127         rotrwi          d,d,q;          /* 2: a = a rotl x              */ \
128         add             d,d,a;          /* 2: a = a + b                 */
130 #define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
131         addi            w0,w0,k0l;      /* 1: w = w + k                 */ \
132         orc             rT0,b,d;        /* 1: f = b or ~d               */ \
133         addis           w0,w0,k0h;      /* 1: w = w + k'                */ \
134         xor             rT0,rT0,c;      /* 1: f = f xor c               */ \
135         add             a,a,w0;         /* 1: a = a + wk                */ \
136         addi            w1,w1,k1l;      /* 2: w = w + k                 */ \
137         add             a,a,rT0;        /* 1: a = a + f                 */ \
138         addis           w1,w1,k1h;      /* 2: w = w + k'                */ \
139         rotrwi          a,a,p;          /* 1: a = a rotl x              */ \
140         add             a,a,b;          /* 1: a = a + b                 */ \
141         orc             rT0,a,c;        /* 2: f = b or ~d               */ \
142         add             d,d,w1;         /* 2: a = a + wk                */ \
143         xor             rT0,rT0,b;      /* 2: f = f xor c               */ \
144         add             d,d,rT0;        /* 2: a = a + f                 */ \
145         rotrwi          d,d,q;          /* 2: a = a rotl x              */ \
146         add             d,d,a;          /* 2: a = a + b                 */
148 _GLOBAL(ppc_md5_transform)
149         INITIALIZE
151         mtctr           r5
152         lwz             rH0,0(rHP)
153         lwz             rH1,4(rHP)
154         lwz             rH2,8(rHP)
155         lwz             rH3,12(rHP)
157 ppc_md5_main:
158         R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
159                 0xd76b, -23432, 0xe8c8, -18602)
160         R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
161                 0x2420, 0x70db, 0xc1be, -12562)
162         R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
163                 0xf57c, 0x0faf, 0x4788, -14806)
164         R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
165                 0xa830, 0x4613, 0xfd47, -27391)
166         R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
167                 0x6981, -26408, 0x8b45,  -2129)
168         R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
169                 0xffff, 0x5bb1, 0x895d, -10306)
170         R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
171                 0x6b90, 0x1122, 0xfd98, 0x7193)
172         R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
173                 0xa679, 0x438e, 0x49b4, 0x0821)
175         R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
176                 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
177         R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
178                 0x9d02, -32109, 0x124c, 0x2332)
179         R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
180                 0x8ea7, 0x4a33, 0x0245, -18270)
181         R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
182                 0x8eee,  -8608, 0xf258,  -5095)
183         R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
184                 0x969d, -10697, 0x1cbe, -15288)
185         R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
186                 0x3317, 0x3e99, 0xdbd9, 0x7c15)
187         R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
188                 0xac4b, 0x7772, 0xd8cf, 0x331d)
189         R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
190                 0x6a28, 0x6dd8, 0x219a, 0x3b68)
192         R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
193                 0x29cb, 0x28e5, 0x4218,  -7788)
194         R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
195                 0x473f, 0x06d1, 0x3aae, 0x3036)
196         R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
197                 0xaea1, -15134, 0x640b, -11295)
198         R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
199                 0x8f4c, 0x4887, 0xbc7c, -22499)
200         R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
201                 0x7eb8, -27199, 0x00ea, 0x6050)
202         R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
203                 0xe01a, 0x22fe, 0x4447, 0x69c5)
204         R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
205                 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
206         R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
207                 0x4701, -27017, 0xc7bd, -19859)
209         R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
210                 0x0988,  -1462, 0x4c70, -19401)
211         R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
212                 0xadaf,  -5221, 0xfc99, 0x66f7)
213         R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
214                 0x7e80, -16418, 0xba1e, -25587)
215         R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
216                 0x4130, 0x380d, 0xe0c5, 0x738d)
217         lwz             rW00,0(rHP)
218         R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
219                 0xe837, -30770, 0xde8a, 0x69e8)
220         lwz             rW14,4(rHP)
221         R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
222                 0x9e79, 0x260f, 0x256d, -27941)
223         lwz             rW12,8(rHP)
224         R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
225                 0xab75, -20775, 0x4f9e, -28397)
226         lwz             rW10,12(rHP)
227         R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
228                 0x662b, 0x7c56, 0x11b2, 0x0358)
230         add             rH0,rH0,rW00
231         stw             rH0,0(rHP)
232         add             rH1,rH1,rW14
233         stw             rH1,4(rHP)
234         add             rH2,rH2,rW12
235         stw             rH2,8(rHP)
236         add             rH3,rH3,rW10
237         stw             rH3,12(rHP)
238         NEXT_BLOCK
240         bdnz            ppc_md5_main
242         FINALIZE
243         blr