Merge tag 'locks-v3.16-2' of git://git.samba.org/jlayton/linux
[linux/fpc-iii.git] / arch / powerpc / crypto / sha1-powerpc-asm.S
blob125e16520061289aff815417fd7aa18ec41e3df7
1 /*
2  * SHA-1 implementation for PowerPC.
3  *
4  * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
5  */
7 #include <asm/ppc_asm.h>
8 #include <asm/asm-offsets.h>
11  * We roll the registers for T, A, B, C, D, E around on each
12  * iteration; T on iteration t is A on iteration t+1, and so on.
13  * We use registers 7 - 12 for this.
14  */
15 #define RT(t)   ((((t)+5)%6)+7)
16 #define RA(t)   ((((t)+4)%6)+7)
17 #define RB(t)   ((((t)+3)%6)+7)
18 #define RC(t)   ((((t)+2)%6)+7)
19 #define RD(t)   ((((t)+1)%6)+7)
20 #define RE(t)   ((((t)+0)%6)+7)
22 /* We use registers 16 - 31 for the W values */
23 #define W(t)    (((t)%16)+16)
25 #define LOADW(t)                                \
26         lwz     W(t),(t)*4(r4)
28 #define STEPD0_LOAD(t)                          \
29         andc    r0,RD(t),RB(t);         \
30         and     r6,RB(t),RC(t);         \
31         rotlwi  RT(t),RA(t),5;                  \
32         or      r6,r6,r0;                       \
33         add     r0,RE(t),r15;                   \
34         add     RT(t),RT(t),r6;         \
35         add     r14,r0,W(t);                    \
36         lwz     W((t)+4),((t)+4)*4(r4); \
37         rotlwi  RB(t),RB(t),30;                 \
38         add     RT(t),RT(t),r14
40 #define STEPD0_UPDATE(t)                        \
41         and     r6,RB(t),RC(t);         \
42         andc    r0,RD(t),RB(t);         \
43         rotlwi  RT(t),RA(t),5;                  \
44         rotlwi  RB(t),RB(t),30;                 \
45         or      r6,r6,r0;                       \
46         add     r0,RE(t),r15;                   \
47         xor     r5,W((t)+4-3),W((t)+4-8);               \
48         add     RT(t),RT(t),r6;         \
49         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
50         add     r0,r0,W(t);                     \
51         xor     W((t)+4),W((t)+4),r5;                   \
52         add     RT(t),RT(t),r0;         \
53         rotlwi  W((t)+4),W((t)+4),1
55 #define STEPD1(t)                               \
56         xor     r6,RB(t),RC(t);         \
57         rotlwi  RT(t),RA(t),5;                  \
58         rotlwi  RB(t),RB(t),30;                 \
59         xor     r6,r6,RD(t);                    \
60         add     r0,RE(t),r15;                   \
61         add     RT(t),RT(t),r6;         \
62         add     r0,r0,W(t);                     \
63         add     RT(t),RT(t),r0
65 #define STEPD1_UPDATE(t)                                \
66         xor     r6,RB(t),RC(t);         \
67         rotlwi  RT(t),RA(t),5;                  \
68         rotlwi  RB(t),RB(t),30;                 \
69         xor     r6,r6,RD(t);                    \
70         add     r0,RE(t),r15;                   \
71         xor     r5,W((t)+4-3),W((t)+4-8);               \
72         add     RT(t),RT(t),r6;         \
73         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
74         add     r0,r0,W(t);                     \
75         xor     W((t)+4),W((t)+4),r5;                   \
76         add     RT(t),RT(t),r0;         \
77         rotlwi  W((t)+4),W((t)+4),1
79 #define STEPD2_UPDATE(t)                        \
80         and     r6,RB(t),RC(t);         \
81         and     r0,RB(t),RD(t);         \
82         rotlwi  RT(t),RA(t),5;                  \
83         or      r6,r6,r0;                       \
84         rotlwi  RB(t),RB(t),30;                 \
85         and     r0,RC(t),RD(t);         \
86         xor     r5,W((t)+4-3),W((t)+4-8);       \
87         or      r6,r6,r0;                       \
88         xor     W((t)+4),W((t)+4-16),W((t)+4-14);       \
89         add     r0,RE(t),r15;                   \
90         add     RT(t),RT(t),r6;         \
91         add     r0,r0,W(t);                     \
92         xor     W((t)+4),W((t)+4),r5;           \
93         add     RT(t),RT(t),r0;         \
94         rotlwi  W((t)+4),W((t)+4),1
96 #define STEP0LD4(t)                             \
97         STEPD0_LOAD(t);                         \
98         STEPD0_LOAD((t)+1);                     \
99         STEPD0_LOAD((t)+2);                     \
100         STEPD0_LOAD((t)+3)
102 #define STEPUP4(t, fn)                          \
103         STEP##fn##_UPDATE(t);                   \
104         STEP##fn##_UPDATE((t)+1);               \
105         STEP##fn##_UPDATE((t)+2);               \
106         STEP##fn##_UPDATE((t)+3)
108 #define STEPUP20(t, fn)                         \
109         STEPUP4(t, fn);                         \
110         STEPUP4((t)+4, fn);                     \
111         STEPUP4((t)+8, fn);                     \
112         STEPUP4((t)+12, fn);                    \
113         STEPUP4((t)+16, fn)
115 _GLOBAL(powerpc_sha_transform)
116         PPC_STLU r1,-INT_FRAME_SIZE(r1)
117         SAVE_8GPRS(14, r1)
118         SAVE_10GPRS(22, r1)
120         /* Load up A - E */
121         lwz     RA(0),0(r3)     /* A */
122         lwz     RB(0),4(r3)     /* B */
123         lwz     RC(0),8(r3)     /* C */
124         lwz     RD(0),12(r3)    /* D */
125         lwz     RE(0),16(r3)    /* E */
127         LOADW(0)
128         LOADW(1)
129         LOADW(2)
130         LOADW(3)
132         lis     r15,0x5a82      /* K0-19 */
133         ori     r15,r15,0x7999
134         STEP0LD4(0)
135         STEP0LD4(4)
136         STEP0LD4(8)
137         STEPUP4(12, D0)
138         STEPUP4(16, D0)
140         lis     r15,0x6ed9      /* K20-39 */
141         ori     r15,r15,0xeba1
142         STEPUP20(20, D1)
144         lis     r15,0x8f1b      /* K40-59 */
145         ori     r15,r15,0xbcdc
146         STEPUP20(40, D2)
148         lis     r15,0xca62      /* K60-79 */
149         ori     r15,r15,0xc1d6
150         STEPUP4(60, D1)
151         STEPUP4(64, D1)
152         STEPUP4(68, D1)
153         STEPUP4(72, D1)
154         lwz     r20,16(r3)
155         STEPD1(76)
156         lwz     r19,12(r3)
157         STEPD1(77)
158         lwz     r18,8(r3)
159         STEPD1(78)
160         lwz     r17,4(r3)
161         STEPD1(79)
163         lwz     r16,0(r3)
164         add     r20,RE(80),r20
165         add     RD(0),RD(80),r19
166         add     RC(0),RC(80),r18
167         add     RB(0),RB(80),r17
168         add     RA(0),RA(80),r16
169         mr      RE(0),r20
170         stw     RA(0),0(r3)
171         stw     RB(0),4(r3)
172         stw     RC(0),8(r3)
173         stw     RD(0),12(r3)
174         stw     RE(0),16(r3)
176         REST_8GPRS(14, r1)
177         REST_10GPRS(22, r1)
178         addi    r1,r1,INT_FRAME_SIZE
179         blr