WIP FPC-III support
[linux/fpc-iii.git] / arch / c6x / lib / csum_64plus.S
blob57148866d8d31702aa11cbf3a7e8f410fc28d644
1 ; SPDX-License-Identifier: GPL-2.0-only
3 ;  linux/arch/c6x/lib/csum_64plus.s
5 ;  Port on Texas Instruments TMS320C6x architecture
7 ;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
8 ;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
10 #include <linux/linkage.h>
13 ;unsigned int csum_partial_copy_nocheck(const char *src, char * dst,
14 ;                                       int len, int sum)
16 ; A4:   src
17 ; B4:   dst
18 ; A6:   len
19 ; B6:   sum
20 ; return csum in A4
23         .text
24 ENTRY(csum_partial_copy_nocheck)
25         MVC     .S2     ILC,B30
27         ZERO    .D1     A9              ; csum (a side)
28 ||      ZERO    .D2     B9              ; csum (b side)
29 ||      SHRU    .S2X    A6,2,B5         ; len / 4
31         ;; Check alignment and size
32         AND     .S1     3,A4,A1
33 ||      AND     .S2     3,B4,B0
34         OR      .L2X    B0,A1,B0        ; non aligned condition
35 ||      MVC     .S2     B5,ILC
36 ||      MVK     .D2     1,B2
37 ||      MV      .D1X    B5,A1           ; words condition
38   [!A1] B       .S1     L8
39    [B0] BNOP    .S1     L6,5
41         SPLOOP          1
43         ;; Main loop for aligned words
44         LDW     .D1T1   *A4++,A7
45         NOP     4
46         MV      .S2X    A7,B7
47 ||      EXTU    .S1     A7,0,16,A16
48         STW     .D2T2   B7,*B4++
49 ||      MPYU    .M2     B7,B2,B8
50 ||      ADD     .L1     A16,A9,A9
51         NOP
52         SPKERNEL        8,0
53 ||      ADD     .L2     B8,B9,B9
55         ZERO    .D1     A1
56 ||      ADD     .L1X    A9,B9,A9        ;  add csum from a and b sides
58 L6:
59   [!A1] BNOP    .S1     L8,5
61         ;; Main loop for non-aligned words
62         SPLOOP          2
63  ||     MVK     .L1     1,A2
65         LDNW    .D1T1   *A4++,A7
66         NOP             3
68         NOP
69         MV      .S2X    A7,B7
70  ||     EXTU    .S1     A7,0,16,A16
71  ||     MPYU    .M1     A7,A2,A8
73         ADD     .L1     A16,A9,A9
74         SPKERNEL        6,0
75  ||     STNW    .D2T2   B7,*B4++
76  ||     ADD     .L1     A8,A9,A9
78 L8:     AND     .S2X    2,A6,B5
79         CMPGT   .L2     B5,0,B0
80   [!B0] BNOP    .S1     L82,4
82         ;; Manage half-word
83         ZERO    .L1     A7
84 ||      ZERO    .D1     A8
86 #ifdef CONFIG_CPU_BIG_ENDIAN
88         LDBU    .D1T1   *A4++,A7
89         LDBU    .D1T1   *A4++,A8
90         NOP             3
91         SHL     .S1     A7,8,A0
92         ADD     .S1     A8,A9,A9
93         STB     .D2T1   A7,*B4++
94 ||      ADD     .S1     A0,A9,A9
95         STB     .D2T1   A8,*B4++
97 #else
99         LDBU    .D1T1   *A4++,A7
100         LDBU    .D1T1   *A4++,A8
101         NOP             3
102         ADD     .S1     A7,A9,A9
103         SHL     .S1     A8,8,A0
105         STB     .D2T1   A7,*B4++
106 ||      ADD     .S1     A0,A9,A9
107         STB     .D2T1   A8,*B4++
109 #endif
111         ;; Manage eventually the last byte
112 L82:    AND     .S2X    1,A6,B0
113   [!B0] BNOP    .S1     L9,5
115 ||      ZERO    .L1     A7
117 L83:    LDBU    .D1T1   *A4++,A7
118         NOP             4
120         MV      .L2X    A7,B7
122 #ifdef CONFIG_CPU_BIG_ENDIAN
124         STB     .D2T2   B7,*B4++
125 ||      SHL     .S1     A7,8,A7
126         ADD     .S1     A7,A9,A9
128 #else
130         STB     .D2T2   B7,*B4++
131 ||      ADD     .S1     A7,A9,A9
133 #endif
135         ;; Fold the csum
136 L9:     SHRU    .S2X    A9,16,B0
137   [!B0] BNOP    .S1     L10,5
139 L91:    SHRU    .S2X    A9,16,B4
140 ||      EXTU    .S1     A9,16,16,A3
141         ADD     .D1X    A3,B4,A9
143         SHRU    .S1     A9,16,A0
144    [A0] BNOP    .S1     L91,5
146 L10:    MV      .D1     A9,A4
148         BNOP    .S2     B3,4
149         MVC     .S2     B30,ILC
150 ENDPROC(csum_partial_copy_nocheck)
153 ;unsigned short
154 ;ip_fast_csum(unsigned char *iph, unsigned int ihl)
156 ;       unsigned int checksum = 0;
157 ;       unsigned short *tosum = (unsigned short *) iph;
158 ;       int len;
160 ;       len = ihl*4;
162 ;       if (len <= 0)
163 ;               return 0;
165 ;       while(len) {
166 ;               len -= 2;
167 ;               checksum += *tosum++;
168 ;       }
169 ;       if (len & 1)
170 ;               checksum += *(unsigned char*) tosum;
172 ;       while(checksum >> 16)
173 ;               checksum = (checksum & 0xffff) + (checksum >> 16);
175 ;       return ~checksum;
178 ; A4:   iph
179 ; B4:   ihl
180 ; return checksum in A4
182         .text
184 ENTRY(ip_fast_csum)
185         ZERO    .D1     A5
186  ||     MVC     .S2     ILC,B30
187         SHL     .S2     B4,2,B0
188         CMPGT   .L2     B0,0,B1
189   [!B1] BNOP    .S1     L15,4
190   [!B1] ZERO    .D1     A3
192   [!B0] B       .S1     L12
193         SHRU    .S2     B0,1,B0
194         MVC     .S2     B0,ILC
195         NOP     3
197         SPLOOP  1
198         LDHU    .D1T1   *A4++,A3
199         NOP     3
200         NOP
201         SPKERNEL        5,0
202  ||     ADD     .L1     A3,A5,A5
204 L12:    SHRU    .S1     A5,16,A0
205   [!A0] BNOP    .S1     L14,5
207 L13:    SHRU    .S2X    A5,16,B4
208         EXTU    .S1     A5,16,16,A3
209         ADD     .D1X    A3,B4,A5
210         SHRU    .S1     A5,16,A0
211   [A0]  BNOP    .S1     L13,5
213 L14:    NOT     .D1     A5,A3
214         EXTU    .S1     A3,16,16,A3
216 L15:    BNOP    .S2     B3,3
217         MVC     .S2     B30,ILC
218         MV      .D1     A3,A4
219 ENDPROC(ip_fast_csum)
222 ;unsigned short
223 ;do_csum(unsigned char *buff, unsigned int len)
225 ;       int odd, count;
226 ;       unsigned int result = 0;
228 ;       if (len <= 0)
229 ;               goto out;
230 ;       odd = 1 & (unsigned long) buff;
231 ;       if (odd) {
232 ;#ifdef __LITTLE_ENDIAN
233 ;               result += (*buff << 8);
234 ;#else
235 ;               result = *buff;
236 ;#endif
237 ;               len--;
238 ;               buff++;
239 ;       }
240 ;       count = len >> 1;               /* nr of 16-bit words.. */
241 ;       if (count) {
242 ;               if (2 & (unsigned long) buff) {
243 ;                       result += *(unsigned short *) buff;
244 ;                       count--;
245 ;                       len -= 2;
246 ;                       buff += 2;
247 ;               }
248 ;               count >>= 1;            /* nr of 32-bit words.. */
249 ;               if (count) {
250 ;                       unsigned int carry = 0;
251 ;                       do {
252 ;                               unsigned int w = *(unsigned int *) buff;
253 ;                               count--;
254 ;                               buff += 4;
255 ;                               result += carry;
256 ;                               result += w;
257 ;                               carry = (w > result);
258 ;                       } while (count);
259 ;                       result += carry;
260 ;                       result = (result & 0xffff) + (result >> 16);
261 ;               }
262 ;               if (len & 2) {
263 ;                       result += *(unsigned short *) buff;
264 ;                       buff += 2;
265 ;               }
266 ;       }
267 ;       if (len & 1)
268 ;#ifdef __LITTLE_ENDIAN
269 ;               result += *buff;
270 ;#else
271 ;               result += (*buff << 8);
272 ;#endif
273 ;       result = (result & 0xffff) + (result >> 16);
274 ;       /* add up carry.. */
275 ;       result = (result & 0xffff) + (result >> 16);
276 ;       if (odd)
277 ;               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
278 ;out:
279 ;       return result;
282 ; A4:   buff
283 ; B4:   len
284 ; return checksum in A4
287 ENTRY(do_csum)
288            CMPGT   .L2     B4,0,B0
289    [!B0]   BNOP    .S1     L26,3
290            EXTU    .S1     A4,31,31,A0
292            MV      .L1     A0,A3
293 ||         MV      .S1X    B3,A5
294 ||         MV      .L2     B4,B3
295 ||         ZERO    .D1     A1
297 #ifdef CONFIG_CPU_BIG_ENDIAN
298    [A0]    SUB     .L2     B3,1,B3
299 || [A0]    LDBU    .D1T1   *A4++,A1
300 #else
301    [!A0]   BNOP    .S1     L21,5
302 || [A0]    LDBU    .D1T1   *A4++,A0
303            SUB     .L2     B3,1,B3
304 ||         SHL     .S1     A0,8,A1
305 L21:
306 #endif
307            SHR     .S2     B3,1,B0
308    [!B0]   BNOP    .S1     L24,3
309            MVK     .L1     2,A0
310            AND     .L1     A4,A0,A0
312    [!A0]   BNOP    .S1     L22,5
313 || [A0]    LDHU    .D1T1   *A4++,A0
314            SUB     .L2     B0,1,B0
315 ||         SUB     .S2     B3,2,B3
316 ||         ADD     .L1     A0,A1,A1
317 L22:
318            SHR     .S2     B0,1,B0
319 ||         ZERO    .L1     A0
321    [!B0]   BNOP    .S1     L23,5
322 || [B0]    MVC     .S2     B0,ILC
324            SPLOOP  3
325            SPMASK  L1
326 ||         MV      .L1     A1,A2
327 ||         LDW     .D1T1   *A4++,A1
329            NOP     4
330            ADD     .L1     A0,A1,A0
331            ADD     .L1     A2,A0,A2
333            SPKERNEL 1,2
334 ||         CMPGTU  .L1     A1,A2,A0
336            ADD     .L1     A0,A2,A6
337            EXTU    .S1     A6,16,16,A7
338            SHRU    .S2X    A6,16,B0
339            NOP             1
340            ADD     .L1X    A7,B0,A1
341 L23:
342            MVK     .L2     2,B0
343            AND     .L2     B3,B0,B0
344    [B0]    LDHU    .D1T1   *A4++,A0
345            NOP     4
346    [B0]    ADD     .L1     A0,A1,A1
347 L24:
348            EXTU    .S2     B3,31,31,B0
349 #ifdef CONFIG_CPU_BIG_ENDIAN
350    [!B0]   BNOP    .S1     L25,4
351 || [B0]    LDBU    .D1T1   *A4,A0
352            SHL     .S1     A0,8,A0
353            ADD     .L1     A0,A1,A1
354 L25:
355 #else
356    [B0]    LDBU    .D1T1   *A4,A0
357            NOP     4
358    [B0]    ADD     .L1     A0,A1,A1
359 #endif
360            EXTU    .S1     A1,16,16,A0
361            SHRU    .S2X    A1,16,B0
362            NOP     1
363            ADD     .L1X    A0,B0,A0
364            SHRU    .S1     A0,16,A1
365            ADD     .L1     A0,A1,A0
366            EXTU    .S1     A0,16,16,A1
367            EXTU    .S1     A1,16,24,A2
369            EXTU    .S1     A1,24,16,A0
370 ||         MV      .L2X    A3,B0
372    [B0]    OR      .L1     A0,A2,A1
373 L26:
374            NOP     1
375            BNOP    .S2X    A5,4
376            MV      .L1     A1,A4
377 ENDPROC(do_csum)
379 ;__wsum csum_partial(const void *buff, int len, __wsum wsum)
381 ;       unsigned int sum = (__force unsigned int)wsum;
382 ;       unsigned int result = do_csum(buff, len);
384 ;       /* add in old sum, and carry.. */
385 ;       result += sum;
386 ;       if (sum > result)
387 ;               result += 1;
388 ;       return (__force __wsum)result;
391 ENTRY(csum_partial)
392            MV      .L1X    B3,A9
393 ||         CALLP   .S2     do_csum,B3
394 ||         MV      .S1     A6,A8
395            BNOP    .S2X    A9,2
396            ADD     .L1     A8,A4,A1
397            CMPGTU  .L1     A8,A1,A0
398            ADD     .L1     A1,A0,A4
399 ENDPROC(csum_partial)
401 ;unsigned short
402 ;ip_compute_csum(unsigned char *buff, unsigned int len)
404 ; A4:   buff
405 ; B4:   len
406 ; return checksum in A4
408 ENTRY(ip_compute_csum)
409            MV      .L1X    B3,A9
410 ||         CALLP   .S2     do_csum,B3
411            BNOP    .S2X    A9,3
412            NOT     .S1     A4,A4
413            CLR     .S1     A4,16,31,A4
414 ENDPROC(ip_compute_csum)