Merge tag 'iommu-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
[linux/fpc-iii.git] / include / asm-generic / xor.h
blobb62a2a56a4d4976a383c30a57cafd8246e784df1
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * include/asm-generic/xor.h
5 * Generic optimized RAID-5 checksumming functions.
6 */
8 #include <linux/prefetch.h>
10 static void
11 xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
13 long lines = bytes / (sizeof (long)) / 8;
15 do {
16 p1[0] ^= p2[0];
17 p1[1] ^= p2[1];
18 p1[2] ^= p2[2];
19 p1[3] ^= p2[3];
20 p1[4] ^= p2[4];
21 p1[5] ^= p2[5];
22 p1[6] ^= p2[6];
23 p1[7] ^= p2[7];
24 p1 += 8;
25 p2 += 8;
26 } while (--lines > 0);
29 static void
30 xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
31 unsigned long *p3)
33 long lines = bytes / (sizeof (long)) / 8;
35 do {
36 p1[0] ^= p2[0] ^ p3[0];
37 p1[1] ^= p2[1] ^ p3[1];
38 p1[2] ^= p2[2] ^ p3[2];
39 p1[3] ^= p2[3] ^ p3[3];
40 p1[4] ^= p2[4] ^ p3[4];
41 p1[5] ^= p2[5] ^ p3[5];
42 p1[6] ^= p2[6] ^ p3[6];
43 p1[7] ^= p2[7] ^ p3[7];
44 p1 += 8;
45 p2 += 8;
46 p3 += 8;
47 } while (--lines > 0);
50 static void
51 xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
52 unsigned long *p3, unsigned long *p4)
54 long lines = bytes / (sizeof (long)) / 8;
56 do {
57 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
58 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
59 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
60 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
61 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
62 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
63 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
64 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
65 p1 += 8;
66 p2 += 8;
67 p3 += 8;
68 p4 += 8;
69 } while (--lines > 0);
72 static void
73 xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
74 unsigned long *p3, unsigned long *p4, unsigned long *p5)
76 long lines = bytes / (sizeof (long)) / 8;
78 do {
79 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
80 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
81 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
82 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
83 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
84 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
85 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
86 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
87 p1 += 8;
88 p2 += 8;
89 p3 += 8;
90 p4 += 8;
91 p5 += 8;
92 } while (--lines > 0);
95 static void
96 xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
98 long lines = bytes / (sizeof (long)) / 8;
100 do {
101 register long d0, d1, d2, d3, d4, d5, d6, d7;
102 d0 = p1[0]; /* Pull the stuff into registers */
103 d1 = p1[1]; /* ... in bursts, if possible. */
104 d2 = p1[2];
105 d3 = p1[3];
106 d4 = p1[4];
107 d5 = p1[5];
108 d6 = p1[6];
109 d7 = p1[7];
110 d0 ^= p2[0];
111 d1 ^= p2[1];
112 d2 ^= p2[2];
113 d3 ^= p2[3];
114 d4 ^= p2[4];
115 d5 ^= p2[5];
116 d6 ^= p2[6];
117 d7 ^= p2[7];
118 p1[0] = d0; /* Store the result (in bursts) */
119 p1[1] = d1;
120 p1[2] = d2;
121 p1[3] = d3;
122 p1[4] = d4;
123 p1[5] = d5;
124 p1[6] = d6;
125 p1[7] = d7;
126 p1 += 8;
127 p2 += 8;
128 } while (--lines > 0);
131 static void
132 xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
133 unsigned long *p3)
135 long lines = bytes / (sizeof (long)) / 8;
137 do {
138 register long d0, d1, d2, d3, d4, d5, d6, d7;
139 d0 = p1[0]; /* Pull the stuff into registers */
140 d1 = p1[1]; /* ... in bursts, if possible. */
141 d2 = p1[2];
142 d3 = p1[3];
143 d4 = p1[4];
144 d5 = p1[5];
145 d6 = p1[6];
146 d7 = p1[7];
147 d0 ^= p2[0];
148 d1 ^= p2[1];
149 d2 ^= p2[2];
150 d3 ^= p2[3];
151 d4 ^= p2[4];
152 d5 ^= p2[5];
153 d6 ^= p2[6];
154 d7 ^= p2[7];
155 d0 ^= p3[0];
156 d1 ^= p3[1];
157 d2 ^= p3[2];
158 d3 ^= p3[3];
159 d4 ^= p3[4];
160 d5 ^= p3[5];
161 d6 ^= p3[6];
162 d7 ^= p3[7];
163 p1[0] = d0; /* Store the result (in bursts) */
164 p1[1] = d1;
165 p1[2] = d2;
166 p1[3] = d3;
167 p1[4] = d4;
168 p1[5] = d5;
169 p1[6] = d6;
170 p1[7] = d7;
171 p1 += 8;
172 p2 += 8;
173 p3 += 8;
174 } while (--lines > 0);
177 static void
178 xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
179 unsigned long *p3, unsigned long *p4)
181 long lines = bytes / (sizeof (long)) / 8;
183 do {
184 register long d0, d1, d2, d3, d4, d5, d6, d7;
185 d0 = p1[0]; /* Pull the stuff into registers */
186 d1 = p1[1]; /* ... in bursts, if possible. */
187 d2 = p1[2];
188 d3 = p1[3];
189 d4 = p1[4];
190 d5 = p1[5];
191 d6 = p1[6];
192 d7 = p1[7];
193 d0 ^= p2[0];
194 d1 ^= p2[1];
195 d2 ^= p2[2];
196 d3 ^= p2[3];
197 d4 ^= p2[4];
198 d5 ^= p2[5];
199 d6 ^= p2[6];
200 d7 ^= p2[7];
201 d0 ^= p3[0];
202 d1 ^= p3[1];
203 d2 ^= p3[2];
204 d3 ^= p3[3];
205 d4 ^= p3[4];
206 d5 ^= p3[5];
207 d6 ^= p3[6];
208 d7 ^= p3[7];
209 d0 ^= p4[0];
210 d1 ^= p4[1];
211 d2 ^= p4[2];
212 d3 ^= p4[3];
213 d4 ^= p4[4];
214 d5 ^= p4[5];
215 d6 ^= p4[6];
216 d7 ^= p4[7];
217 p1[0] = d0; /* Store the result (in bursts) */
218 p1[1] = d1;
219 p1[2] = d2;
220 p1[3] = d3;
221 p1[4] = d4;
222 p1[5] = d5;
223 p1[6] = d6;
224 p1[7] = d7;
225 p1 += 8;
226 p2 += 8;
227 p3 += 8;
228 p4 += 8;
229 } while (--lines > 0);
232 static void
233 xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
234 unsigned long *p3, unsigned long *p4, unsigned long *p5)
236 long lines = bytes / (sizeof (long)) / 8;
238 do {
239 register long d0, d1, d2, d3, d4, d5, d6, d7;
240 d0 = p1[0]; /* Pull the stuff into registers */
241 d1 = p1[1]; /* ... in bursts, if possible. */
242 d2 = p1[2];
243 d3 = p1[3];
244 d4 = p1[4];
245 d5 = p1[5];
246 d6 = p1[6];
247 d7 = p1[7];
248 d0 ^= p2[0];
249 d1 ^= p2[1];
250 d2 ^= p2[2];
251 d3 ^= p2[3];
252 d4 ^= p2[4];
253 d5 ^= p2[5];
254 d6 ^= p2[6];
255 d7 ^= p2[7];
256 d0 ^= p3[0];
257 d1 ^= p3[1];
258 d2 ^= p3[2];
259 d3 ^= p3[3];
260 d4 ^= p3[4];
261 d5 ^= p3[5];
262 d6 ^= p3[6];
263 d7 ^= p3[7];
264 d0 ^= p4[0];
265 d1 ^= p4[1];
266 d2 ^= p4[2];
267 d3 ^= p4[3];
268 d4 ^= p4[4];
269 d5 ^= p4[5];
270 d6 ^= p4[6];
271 d7 ^= p4[7];
272 d0 ^= p5[0];
273 d1 ^= p5[1];
274 d2 ^= p5[2];
275 d3 ^= p5[3];
276 d4 ^= p5[4];
277 d5 ^= p5[5];
278 d6 ^= p5[6];
279 d7 ^= p5[7];
280 p1[0] = d0; /* Store the result (in bursts) */
281 p1[1] = d1;
282 p1[2] = d2;
283 p1[3] = d3;
284 p1[4] = d4;
285 p1[5] = d5;
286 p1[6] = d6;
287 p1[7] = d7;
288 p1 += 8;
289 p2 += 8;
290 p3 += 8;
291 p4 += 8;
292 p5 += 8;
293 } while (--lines > 0);
296 static void
297 xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
299 long lines = bytes / (sizeof (long)) / 8 - 1;
300 prefetchw(p1);
301 prefetch(p2);
303 do {
304 prefetchw(p1+8);
305 prefetch(p2+8);
306 once_more:
307 p1[0] ^= p2[0];
308 p1[1] ^= p2[1];
309 p1[2] ^= p2[2];
310 p1[3] ^= p2[3];
311 p1[4] ^= p2[4];
312 p1[5] ^= p2[5];
313 p1[6] ^= p2[6];
314 p1[7] ^= p2[7];
315 p1 += 8;
316 p2 += 8;
317 } while (--lines > 0);
318 if (lines == 0)
319 goto once_more;
322 static void
323 xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
324 unsigned long *p3)
326 long lines = bytes / (sizeof (long)) / 8 - 1;
327 prefetchw(p1);
328 prefetch(p2);
329 prefetch(p3);
331 do {
332 prefetchw(p1+8);
333 prefetch(p2+8);
334 prefetch(p3+8);
335 once_more:
336 p1[0] ^= p2[0] ^ p3[0];
337 p1[1] ^= p2[1] ^ p3[1];
338 p1[2] ^= p2[2] ^ p3[2];
339 p1[3] ^= p2[3] ^ p3[3];
340 p1[4] ^= p2[4] ^ p3[4];
341 p1[5] ^= p2[5] ^ p3[5];
342 p1[6] ^= p2[6] ^ p3[6];
343 p1[7] ^= p2[7] ^ p3[7];
344 p1 += 8;
345 p2 += 8;
346 p3 += 8;
347 } while (--lines > 0);
348 if (lines == 0)
349 goto once_more;
352 static void
353 xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
354 unsigned long *p3, unsigned long *p4)
356 long lines = bytes / (sizeof (long)) / 8 - 1;
358 prefetchw(p1);
359 prefetch(p2);
360 prefetch(p3);
361 prefetch(p4);
363 do {
364 prefetchw(p1+8);
365 prefetch(p2+8);
366 prefetch(p3+8);
367 prefetch(p4+8);
368 once_more:
369 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
370 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
371 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
372 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
373 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
374 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
375 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
376 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
377 p1 += 8;
378 p2 += 8;
379 p3 += 8;
380 p4 += 8;
381 } while (--lines > 0);
382 if (lines == 0)
383 goto once_more;
386 static void
387 xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
388 unsigned long *p3, unsigned long *p4, unsigned long *p5)
390 long lines = bytes / (sizeof (long)) / 8 - 1;
392 prefetchw(p1);
393 prefetch(p2);
394 prefetch(p3);
395 prefetch(p4);
396 prefetch(p5);
398 do {
399 prefetchw(p1+8);
400 prefetch(p2+8);
401 prefetch(p3+8);
402 prefetch(p4+8);
403 prefetch(p5+8);
404 once_more:
405 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
406 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
407 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
408 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
409 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
410 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
411 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
412 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
413 p1 += 8;
414 p2 += 8;
415 p3 += 8;
416 p4 += 8;
417 p5 += 8;
418 } while (--lines > 0);
419 if (lines == 0)
420 goto once_more;
423 static void
424 xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
426 long lines = bytes / (sizeof (long)) / 8 - 1;
428 prefetchw(p1);
429 prefetch(p2);
431 do {
432 register long d0, d1, d2, d3, d4, d5, d6, d7;
434 prefetchw(p1+8);
435 prefetch(p2+8);
436 once_more:
437 d0 = p1[0]; /* Pull the stuff into registers */
438 d1 = p1[1]; /* ... in bursts, if possible. */
439 d2 = p1[2];
440 d3 = p1[3];
441 d4 = p1[4];
442 d5 = p1[5];
443 d6 = p1[6];
444 d7 = p1[7];
445 d0 ^= p2[0];
446 d1 ^= p2[1];
447 d2 ^= p2[2];
448 d3 ^= p2[3];
449 d4 ^= p2[4];
450 d5 ^= p2[5];
451 d6 ^= p2[6];
452 d7 ^= p2[7];
453 p1[0] = d0; /* Store the result (in bursts) */
454 p1[1] = d1;
455 p1[2] = d2;
456 p1[3] = d3;
457 p1[4] = d4;
458 p1[5] = d5;
459 p1[6] = d6;
460 p1[7] = d7;
461 p1 += 8;
462 p2 += 8;
463 } while (--lines > 0);
464 if (lines == 0)
465 goto once_more;
468 static void
469 xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
470 unsigned long *p3)
472 long lines = bytes / (sizeof (long)) / 8 - 1;
474 prefetchw(p1);
475 prefetch(p2);
476 prefetch(p3);
478 do {
479 register long d0, d1, d2, d3, d4, d5, d6, d7;
481 prefetchw(p1+8);
482 prefetch(p2+8);
483 prefetch(p3+8);
484 once_more:
485 d0 = p1[0]; /* Pull the stuff into registers */
486 d1 = p1[1]; /* ... in bursts, if possible. */
487 d2 = p1[2];
488 d3 = p1[3];
489 d4 = p1[4];
490 d5 = p1[5];
491 d6 = p1[6];
492 d7 = p1[7];
493 d0 ^= p2[0];
494 d1 ^= p2[1];
495 d2 ^= p2[2];
496 d3 ^= p2[3];
497 d4 ^= p2[4];
498 d5 ^= p2[5];
499 d6 ^= p2[6];
500 d7 ^= p2[7];
501 d0 ^= p3[0];
502 d1 ^= p3[1];
503 d2 ^= p3[2];
504 d3 ^= p3[3];
505 d4 ^= p3[4];
506 d5 ^= p3[5];
507 d6 ^= p3[6];
508 d7 ^= p3[7];
509 p1[0] = d0; /* Store the result (in bursts) */
510 p1[1] = d1;
511 p1[2] = d2;
512 p1[3] = d3;
513 p1[4] = d4;
514 p1[5] = d5;
515 p1[6] = d6;
516 p1[7] = d7;
517 p1 += 8;
518 p2 += 8;
519 p3 += 8;
520 } while (--lines > 0);
521 if (lines == 0)
522 goto once_more;
525 static void
526 xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
527 unsigned long *p3, unsigned long *p4)
529 long lines = bytes / (sizeof (long)) / 8 - 1;
531 prefetchw(p1);
532 prefetch(p2);
533 prefetch(p3);
534 prefetch(p4);
536 do {
537 register long d0, d1, d2, d3, d4, d5, d6, d7;
539 prefetchw(p1+8);
540 prefetch(p2+8);
541 prefetch(p3+8);
542 prefetch(p4+8);
543 once_more:
544 d0 = p1[0]; /* Pull the stuff into registers */
545 d1 = p1[1]; /* ... in bursts, if possible. */
546 d2 = p1[2];
547 d3 = p1[3];
548 d4 = p1[4];
549 d5 = p1[5];
550 d6 = p1[6];
551 d7 = p1[7];
552 d0 ^= p2[0];
553 d1 ^= p2[1];
554 d2 ^= p2[2];
555 d3 ^= p2[3];
556 d4 ^= p2[4];
557 d5 ^= p2[5];
558 d6 ^= p2[6];
559 d7 ^= p2[7];
560 d0 ^= p3[0];
561 d1 ^= p3[1];
562 d2 ^= p3[2];
563 d3 ^= p3[3];
564 d4 ^= p3[4];
565 d5 ^= p3[5];
566 d6 ^= p3[6];
567 d7 ^= p3[7];
568 d0 ^= p4[0];
569 d1 ^= p4[1];
570 d2 ^= p4[2];
571 d3 ^= p4[3];
572 d4 ^= p4[4];
573 d5 ^= p4[5];
574 d6 ^= p4[6];
575 d7 ^= p4[7];
576 p1[0] = d0; /* Store the result (in bursts) */
577 p1[1] = d1;
578 p1[2] = d2;
579 p1[3] = d3;
580 p1[4] = d4;
581 p1[5] = d5;
582 p1[6] = d6;
583 p1[7] = d7;
584 p1 += 8;
585 p2 += 8;
586 p3 += 8;
587 p4 += 8;
588 } while (--lines > 0);
589 if (lines == 0)
590 goto once_more;
593 static void
594 xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
595 unsigned long *p3, unsigned long *p4, unsigned long *p5)
597 long lines = bytes / (sizeof (long)) / 8 - 1;
599 prefetchw(p1);
600 prefetch(p2);
601 prefetch(p3);
602 prefetch(p4);
603 prefetch(p5);
605 do {
606 register long d0, d1, d2, d3, d4, d5, d6, d7;
608 prefetchw(p1+8);
609 prefetch(p2+8);
610 prefetch(p3+8);
611 prefetch(p4+8);
612 prefetch(p5+8);
613 once_more:
614 d0 = p1[0]; /* Pull the stuff into registers */
615 d1 = p1[1]; /* ... in bursts, if possible. */
616 d2 = p1[2];
617 d3 = p1[3];
618 d4 = p1[4];
619 d5 = p1[5];
620 d6 = p1[6];
621 d7 = p1[7];
622 d0 ^= p2[0];
623 d1 ^= p2[1];
624 d2 ^= p2[2];
625 d3 ^= p2[3];
626 d4 ^= p2[4];
627 d5 ^= p2[5];
628 d6 ^= p2[6];
629 d7 ^= p2[7];
630 d0 ^= p3[0];
631 d1 ^= p3[1];
632 d2 ^= p3[2];
633 d3 ^= p3[3];
634 d4 ^= p3[4];
635 d5 ^= p3[5];
636 d6 ^= p3[6];
637 d7 ^= p3[7];
638 d0 ^= p4[0];
639 d1 ^= p4[1];
640 d2 ^= p4[2];
641 d3 ^= p4[3];
642 d4 ^= p4[4];
643 d5 ^= p4[5];
644 d6 ^= p4[6];
645 d7 ^= p4[7];
646 d0 ^= p5[0];
647 d1 ^= p5[1];
648 d2 ^= p5[2];
649 d3 ^= p5[3];
650 d4 ^= p5[4];
651 d5 ^= p5[5];
652 d6 ^= p5[6];
653 d7 ^= p5[7];
654 p1[0] = d0; /* Store the result (in bursts) */
655 p1[1] = d1;
656 p1[2] = d2;
657 p1[3] = d3;
658 p1[4] = d4;
659 p1[5] = d5;
660 p1[6] = d6;
661 p1[7] = d7;
662 p1 += 8;
663 p2 += 8;
664 p3 += 8;
665 p4 += 8;
666 p5 += 8;
667 } while (--lines > 0);
668 if (lines == 0)
669 goto once_more;
672 static struct xor_block_template xor_block_8regs = {
673 .name = "8regs",
674 .do_2 = xor_8regs_2,
675 .do_3 = xor_8regs_3,
676 .do_4 = xor_8regs_4,
677 .do_5 = xor_8regs_5,
680 static struct xor_block_template xor_block_32regs = {
681 .name = "32regs",
682 .do_2 = xor_32regs_2,
683 .do_3 = xor_32regs_3,
684 .do_4 = xor_32regs_4,
685 .do_5 = xor_32regs_5,
688 static struct xor_block_template xor_block_8regs_p __maybe_unused = {
689 .name = "8regs_prefetch",
690 .do_2 = xor_8regs_p_2,
691 .do_3 = xor_8regs_p_3,
692 .do_4 = xor_8regs_p_4,
693 .do_5 = xor_8regs_p_5,
696 static struct xor_block_template xor_block_32regs_p __maybe_unused = {
697 .name = "32regs_prefetch",
698 .do_2 = xor_32regs_p_2,
699 .do_3 = xor_32regs_p_3,
700 .do_4 = xor_32regs_p_4,
701 .do_5 = xor_32regs_p_5,
704 #define XOR_TRY_TEMPLATES \
705 do { \
706 xor_speed(&xor_block_8regs); \
707 xor_speed(&xor_block_8regs_p); \
708 xor_speed(&xor_block_32regs); \
709 xor_speed(&xor_block_32regs_p); \
710 } while (0)