Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[cris-mirror.git] / arch / powerpc / lib / copyuser_64.S
blob08da06e1bd729c3374493cf570ce053fb5137e79
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11 #include <asm/export.h>
13 #ifdef __BIG_ENDIAN__
14 #define sLd sld         /* Shift towards low-numbered address. */
15 #define sHd srd         /* Shift towards high-numbered address. */
16 #else
17 #define sLd srd         /* Shift towards low-numbered address. */
18 #define sHd sld         /* Shift towards high-numbered address. */
19 #endif
21         .align  7
22 _GLOBAL_TOC(__copy_tofrom_user)
23 BEGIN_FTR_SECTION
24         nop
25 FTR_SECTION_ELSE
26         b       __copy_tofrom_user_power7
27 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
28 _GLOBAL(__copy_tofrom_user_base)
29         /* first check for a whole page copy on a page boundary */
30         cmpldi  cr1,r5,16
31         cmpdi   cr6,r5,4096
32         or      r0,r3,r4
33         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
34         andi.   r0,r0,4095
35         std     r3,-24(r1)
36         crand   cr0*4+2,cr0*4+2,cr6*4+2
37         std     r4,-16(r1)
38         std     r5,-8(r1)
39         dcbt    0,r4
40         beq     .Lcopy_page_4K
41         andi.   r6,r6,7
42         PPC_MTOCRF(0x01,r5)
43         blt     cr1,.Lshort_copy
44 /* Below we want to nop out the bne if we're on a CPU that has the
45  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
46  * cleared.
47  * At the time of writing the only CPU that has this combination of bits
48  * set is Power6.
49  */
50 BEGIN_FTR_SECTION
51         nop
52 FTR_SECTION_ELSE
53         bne     .Ldst_unaligned
54 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
55                     CPU_FTR_UNALIGNED_LD_STD)
56 .Ldst_aligned:
57         addi    r3,r3,-16
58 BEGIN_FTR_SECTION
59         andi.   r0,r4,7
60         bne     .Lsrc_unaligned
61 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
62         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
63         srdi    r0,r5,5
64         cmpdi   cr1,r0,0
65 20:     ld      r7,0(r4)
66 220:    ld      r6,8(r4)
67         addi    r4,r4,16
68         mtctr   r0
69         andi.   r0,r5,0x10
70         beq     22f
71         addi    r3,r3,16
72         addi    r4,r4,-16
73         mr      r9,r7
74         mr      r8,r6
75         beq     cr1,72f
76 21:     ld      r7,16(r4)
77 221:    ld      r6,24(r4)
78         addi    r4,r4,32
79 70:     std     r9,0(r3)
80 270:    std     r8,8(r3)
81 22:     ld      r9,0(r4)
82 222:    ld      r8,8(r4)
83 71:     std     r7,16(r3)
84 271:    std     r6,24(r3)
85         addi    r3,r3,32
86         bdnz    21b
87 72:     std     r9,0(r3)
88 272:    std     r8,8(r3)
89         andi.   r5,r5,0xf
90         beq+    3f
91         addi    r4,r4,16
92 .Ldo_tail:
93         addi    r3,r3,16
94         bf      cr7*4+0,246f
95 244:    ld      r9,0(r4)
96         addi    r4,r4,8
97 245:    std     r9,0(r3)
98         addi    r3,r3,8
99 246:    bf      cr7*4+1,1f
100 23:     lwz     r9,0(r4)
101         addi    r4,r4,4
102 73:     stw     r9,0(r3)
103         addi    r3,r3,4
104 1:      bf      cr7*4+2,2f
105 44:     lhz     r9,0(r4)
106         addi    r4,r4,2
107 74:     sth     r9,0(r3)
108         addi    r3,r3,2
109 2:      bf      cr7*4+3,3f
110 45:     lbz     r9,0(r4)
111 75:     stb     r9,0(r3)
112 3:      li      r3,0
113         blr
115 .Lsrc_unaligned:
116         srdi    r6,r5,3
117         addi    r5,r5,-16
118         subf    r4,r0,r4
119         srdi    r7,r5,4
120         sldi    r10,r0,3
121         cmpldi  cr6,r6,3
122         andi.   r5,r5,7
123         mtctr   r7
124         subfic  r11,r10,64
125         add     r5,r5,r0
126         bt      cr7*4+0,28f
128 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
129 25:     ld      r0,8(r4)
130         sLd     r6,r9,r10
131 26:     ldu     r9,16(r4)
132         sHd     r7,r0,r11
133         sLd     r8,r0,r10
134         or      r7,r7,r6
135         blt     cr6,79f
136 27:     ld      r0,8(r4)
137         b       2f
139 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
140 29:     ldu     r9,8(r4)
141         sLd     r8,r0,r10
142         addi    r3,r3,-8
143         blt     cr6,5f
144 30:     ld      r0,8(r4)
145         sHd     r12,r9,r11
146         sLd     r6,r9,r10
147 31:     ldu     r9,16(r4)
148         or      r12,r8,r12
149         sHd     r7,r0,r11
150         sLd     r8,r0,r10
151         addi    r3,r3,16
152         beq     cr6,78f
154 1:      or      r7,r7,r6
155 32:     ld      r0,8(r4)
156 76:     std     r12,8(r3)
157 2:      sHd     r12,r9,r11
158         sLd     r6,r9,r10
159 33:     ldu     r9,16(r4)
160         or      r12,r8,r12
161 77:     stdu    r7,16(r3)
162         sHd     r7,r0,r11
163         sLd     r8,r0,r10
164         bdnz    1b
166 78:     std     r12,8(r3)
167         or      r7,r7,r6
168 79:     std     r7,16(r3)
169 5:      sHd     r12,r9,r11
170         or      r12,r8,r12
171 80:     std     r12,24(r3)
172         bne     6f
173         li      r3,0
174         blr
175 6:      cmpwi   cr1,r5,8
176         addi    r3,r3,32
177         sLd     r9,r9,r10
178         ble     cr1,7f
179 34:     ld      r0,8(r4)
180         sHd     r7,r0,r11
181         or      r9,r7,r9
183         bf      cr7*4+1,1f
184 #ifdef __BIG_ENDIAN__
185         rotldi  r9,r9,32
186 #endif
187 94:     stw     r9,0(r3)
188 #ifdef __LITTLE_ENDIAN__
189         rotrdi  r9,r9,32
190 #endif
191         addi    r3,r3,4
192 1:      bf      cr7*4+2,2f
193 #ifdef __BIG_ENDIAN__
194         rotldi  r9,r9,16
195 #endif
196 95:     sth     r9,0(r3)
197 #ifdef __LITTLE_ENDIAN__
198         rotrdi  r9,r9,16
199 #endif
200         addi    r3,r3,2
201 2:      bf      cr7*4+3,3f
202 #ifdef __BIG_ENDIAN__
203         rotldi  r9,r9,8
204 #endif
205 96:     stb     r9,0(r3)
206 #ifdef __LITTLE_ENDIAN__
207         rotrdi  r9,r9,8
208 #endif
209 3:      li      r3,0
210         blr
212 .Ldst_unaligned:
213         PPC_MTOCRF(0x01,r6)             /* put #bytes to 8B bdry into cr7 */
214         subf    r5,r6,r5
215         li      r7,0
216         cmpldi  cr1,r5,16
217         bf      cr7*4+3,1f
218 35:     lbz     r0,0(r4)
219 81:     stb     r0,0(r3)
220         addi    r7,r7,1
221 1:      bf      cr7*4+2,2f
222 36:     lhzx    r0,r7,r4
223 82:     sthx    r0,r7,r3
224         addi    r7,r7,2
225 2:      bf      cr7*4+1,3f
226 37:     lwzx    r0,r7,r4
227 83:     stwx    r0,r7,r3
228 3:      PPC_MTOCRF(0x01,r5)
229         add     r4,r6,r4
230         add     r3,r6,r3
231         b       .Ldst_aligned
233 .Lshort_copy:
234         bf      cr7*4+0,1f
235 38:     lwz     r0,0(r4)
236 39:     lwz     r9,4(r4)
237         addi    r4,r4,8
238 84:     stw     r0,0(r3)
239 85:     stw     r9,4(r3)
240         addi    r3,r3,8
241 1:      bf      cr7*4+1,2f
242 40:     lwz     r0,0(r4)
243         addi    r4,r4,4
244 86:     stw     r0,0(r3)
245         addi    r3,r3,4
246 2:      bf      cr7*4+2,3f
247 41:     lhz     r0,0(r4)
248         addi    r4,r4,2
249 87:     sth     r0,0(r3)
250         addi    r3,r3,2
251 3:      bf      cr7*4+3,4f
252 42:     lbz     r0,0(r4)
253 88:     stb     r0,0(r3)
254 4:      li      r3,0
255         blr
258  * exception handlers follow
259  * we have to return the number of bytes not copied
260  * for an exception on a load, we set the rest of the destination to 0
261  */
263 136:
264 137:
265         add     r3,r3,r7
266         b       1f
267 130:
268 131:
269         addi    r3,r3,8
270 120:
271 320:
272 122:
273 322:
274 124:
275 125:
276 126:
277 127:
278 128:
279 129:
280 133:
281         addi    r3,r3,8
282 132:
283         addi    r3,r3,8
284 121:
285 321:
286 344:
287 134:
288 135:
289 138:
290 139:
291 140:
292 141:
293 142:
294 123:
295 144:
296 145:
299  * here we have had a fault on a load and r3 points to the first
300  * unmodified byte of the destination
301  */
302 1:      ld      r6,-24(r1)
303         ld      r4,-16(r1)
304         ld      r5,-8(r1)
305         subf    r6,r6,r3
306         add     r4,r4,r6
307         subf    r5,r6,r5        /* #bytes left to go */
310  * first see if we can copy any more bytes before hitting another exception
311  */
312         mtctr   r5
313 43:     lbz     r0,0(r4)
314         addi    r4,r4,1
315 89:     stb     r0,0(r3)
316         addi    r3,r3,1
317         bdnz    43b
318         li      r3,0            /* huh? all copied successfully this time? */
319         blr
322  * here we have trapped again, amount remaining is in ctr.
323  */
324 143:    mfctr   r3
325         blr
328  * exception handlers for stores: we just need to work
329  * out how many bytes weren't copied
330  */
331 182:
332 183:
333         add     r3,r3,r7
334         b       1f
335 371:
336 180:
337         addi    r3,r3,8
338 171:
339 177:
340 179:
341         addi    r3,r3,8
342 370:
343 372:
344 176:
345 178:
346         addi    r3,r3,4
347 185:
348         addi    r3,r3,4
349 170:
350 172:
351 345:
352 173:
353 174:
354 175:
355 181:
356 184:
357 186:
358 187:
359 188:
360 189:    
361 194:
362 195:
363 196:
365         ld      r6,-24(r1)
366         ld      r5,-8(r1)
367         add     r6,r6,r5
368         subf    r3,r3,r6        /* #bytes not copied */
369         blr
371         EX_TABLE(20b,120b)
372         EX_TABLE(220b,320b)
373         EX_TABLE(21b,121b)
374         EX_TABLE(221b,321b)
375         EX_TABLE(70b,170b)
376         EX_TABLE(270b,370b)
377         EX_TABLE(22b,122b)
378         EX_TABLE(222b,322b)
379         EX_TABLE(71b,171b)
380         EX_TABLE(271b,371b)
381         EX_TABLE(72b,172b)
382         EX_TABLE(272b,372b)
383         EX_TABLE(244b,344b)
384         EX_TABLE(245b,345b)
385         EX_TABLE(23b,123b)
386         EX_TABLE(73b,173b)
387         EX_TABLE(44b,144b)
388         EX_TABLE(74b,174b)
389         EX_TABLE(45b,145b)
390         EX_TABLE(75b,175b)
391         EX_TABLE(24b,124b)
392         EX_TABLE(25b,125b)
393         EX_TABLE(26b,126b)
394         EX_TABLE(27b,127b)
395         EX_TABLE(28b,128b)
396         EX_TABLE(29b,129b)
397         EX_TABLE(30b,130b)
398         EX_TABLE(31b,131b)
399         EX_TABLE(32b,132b)
400         EX_TABLE(76b,176b)
401         EX_TABLE(33b,133b)
402         EX_TABLE(77b,177b)
403         EX_TABLE(78b,178b)
404         EX_TABLE(79b,179b)
405         EX_TABLE(80b,180b)
406         EX_TABLE(34b,134b)
407         EX_TABLE(94b,194b)
408         EX_TABLE(95b,195b)
409         EX_TABLE(96b,196b)
410         EX_TABLE(35b,135b)
411         EX_TABLE(81b,181b)
412         EX_TABLE(36b,136b)
413         EX_TABLE(82b,182b)
414         EX_TABLE(37b,137b)
415         EX_TABLE(83b,183b)
416         EX_TABLE(38b,138b)
417         EX_TABLE(39b,139b)
418         EX_TABLE(84b,184b)
419         EX_TABLE(85b,185b)
420         EX_TABLE(40b,140b)
421         EX_TABLE(86b,186b)
422         EX_TABLE(41b,141b)
423         EX_TABLE(87b,187b)
424         EX_TABLE(42b,142b)
425         EX_TABLE(88b,188b)
426         EX_TABLE(43b,143b)
427         EX_TABLE(89b,189b)
430  * Routine to copy a whole page of data, optimized for POWER4.
431  * On POWER4 it is more than 50% faster than the simple loop
432  * above (following the .Ldst_aligned label).
433  */
434 .Lcopy_page_4K:
435         std     r31,-32(1)
436         std     r30,-40(1)
437         std     r29,-48(1)
438         std     r28,-56(1)
439         std     r27,-64(1)
440         std     r26,-72(1)
441         std     r25,-80(1)
442         std     r24,-88(1)
443         std     r23,-96(1)
444         std     r22,-104(1)
445         std     r21,-112(1)
446         std     r20,-120(1)
447         li      r5,4096/32 - 1
448         addi    r3,r3,-8
449         li      r0,5
450 0:      addi    r5,r5,-24
451         mtctr   r0
452 20:     ld      r22,640(4)
453 21:     ld      r21,512(4)
454 22:     ld      r20,384(4)
455 23:     ld      r11,256(4)
456 24:     ld      r9,128(4)
457 25:     ld      r7,0(4)
458 26:     ld      r25,648(4)
459 27:     ld      r24,520(4)
460 28:     ld      r23,392(4)
461 29:     ld      r10,264(4)
462 30:     ld      r8,136(4)
463 31:     ldu     r6,8(4)
464         cmpwi   r5,24
466 32:     std     r22,648(3)
467 33:     std     r21,520(3)
468 34:     std     r20,392(3)
469 35:     std     r11,264(3)
470 36:     std     r9,136(3)
471 37:     std     r7,8(3)
472 38:     ld      r28,648(4)
473 39:     ld      r27,520(4)
474 40:     ld      r26,392(4)
475 41:     ld      r31,264(4)
476 42:     ld      r30,136(4)
477 43:     ld      r29,8(4)
478 44:     std     r25,656(3)
479 45:     std     r24,528(3)
480 46:     std     r23,400(3)
481 47:     std     r10,272(3)
482 48:     std     r8,144(3)
483 49:     std     r6,16(3)
484 50:     ld      r22,656(4)
485 51:     ld      r21,528(4)
486 52:     ld      r20,400(4)
487 53:     ld      r11,272(4)
488 54:     ld      r9,144(4)
489 55:     ld      r7,16(4)
490 56:     std     r28,664(3)
491 57:     std     r27,536(3)
492 58:     std     r26,408(3)
493 59:     std     r31,280(3)
494 60:     std     r30,152(3)
495 61:     stdu    r29,24(3)
496 62:     ld      r25,664(4)
497 63:     ld      r24,536(4)
498 64:     ld      r23,408(4)
499 65:     ld      r10,280(4)
500 66:     ld      r8,152(4)
501 67:     ldu     r6,24(4)
502         bdnz    1b
503 68:     std     r22,648(3)
504 69:     std     r21,520(3)
505 70:     std     r20,392(3)
506 71:     std     r11,264(3)
507 72:     std     r9,136(3)
508 73:     std     r7,8(3)
509 74:     addi    r4,r4,640
510 75:     addi    r3,r3,648
511         bge     0b
512         mtctr   r5
513 76:     ld      r7,0(4)
514 77:     ld      r8,8(4)
515 78:     ldu     r9,16(4)
517 79:     ld      r10,8(4)
518 80:     std     r7,8(3)
519 81:     ld      r7,16(4)
520 82:     std     r8,16(3)
521 83:     ld      r8,24(4)
522 84:     std     r9,24(3)
523 85:     ldu     r9,32(4)
524 86:     stdu    r10,32(3)
525         bdnz    3b
527 87:     ld      r10,8(4)
528 88:     std     r7,8(3)
529 89:     std     r8,16(3)
530 90:     std     r9,24(3)
531 91:     std     r10,32(3)
532 9:      ld      r20,-120(1)
533         ld      r21,-112(1)
534         ld      r22,-104(1)
535         ld      r23,-96(1)
536         ld      r24,-88(1)
537         ld      r25,-80(1)
538         ld      r26,-72(1)
539         ld      r27,-64(1)
540         ld      r28,-56(1)
541         ld      r29,-48(1)
542         ld      r30,-40(1)
543         ld      r31,-32(1)
544         li      r3,0
545         blr
548  * on an exception, reset to the beginning and jump back into the
549  * standard __copy_tofrom_user
550  */
551 100:    ld      r20,-120(1)
552         ld      r21,-112(1)
553         ld      r22,-104(1)
554         ld      r23,-96(1)
555         ld      r24,-88(1)
556         ld      r25,-80(1)
557         ld      r26,-72(1)
558         ld      r27,-64(1)
559         ld      r28,-56(1)
560         ld      r29,-48(1)
561         ld      r30,-40(1)
562         ld      r31,-32(1)
563         ld      r3,-24(r1)
564         ld      r4,-16(r1)
565         li      r5,4096
566         b       .Ldst_aligned
568         EX_TABLE(20b,100b)
569         EX_TABLE(21b,100b)
570         EX_TABLE(22b,100b)
571         EX_TABLE(23b,100b)
572         EX_TABLE(24b,100b)
573         EX_TABLE(25b,100b)
574         EX_TABLE(26b,100b)
575         EX_TABLE(27b,100b)
576         EX_TABLE(28b,100b)
577         EX_TABLE(29b,100b)
578         EX_TABLE(30b,100b)
579         EX_TABLE(31b,100b)
580         EX_TABLE(32b,100b)
581         EX_TABLE(33b,100b)
582         EX_TABLE(34b,100b)
583         EX_TABLE(35b,100b)
584         EX_TABLE(36b,100b)
585         EX_TABLE(37b,100b)
586         EX_TABLE(38b,100b)
587         EX_TABLE(39b,100b)
588         EX_TABLE(40b,100b)
589         EX_TABLE(41b,100b)
590         EX_TABLE(42b,100b)
591         EX_TABLE(43b,100b)
592         EX_TABLE(44b,100b)
593         EX_TABLE(45b,100b)
594         EX_TABLE(46b,100b)
595         EX_TABLE(47b,100b)
596         EX_TABLE(48b,100b)
597         EX_TABLE(49b,100b)
598         EX_TABLE(50b,100b)
599         EX_TABLE(51b,100b)
600         EX_TABLE(52b,100b)
601         EX_TABLE(53b,100b)
602         EX_TABLE(54b,100b)
603         EX_TABLE(55b,100b)
604         EX_TABLE(56b,100b)
605         EX_TABLE(57b,100b)
606         EX_TABLE(58b,100b)
607         EX_TABLE(59b,100b)
608         EX_TABLE(60b,100b)
609         EX_TABLE(61b,100b)
610         EX_TABLE(62b,100b)
611         EX_TABLE(63b,100b)
612         EX_TABLE(64b,100b)
613         EX_TABLE(65b,100b)
614         EX_TABLE(66b,100b)
615         EX_TABLE(67b,100b)
616         EX_TABLE(68b,100b)
617         EX_TABLE(69b,100b)
618         EX_TABLE(70b,100b)
619         EX_TABLE(71b,100b)
620         EX_TABLE(72b,100b)
621         EX_TABLE(73b,100b)
622         EX_TABLE(74b,100b)
623         EX_TABLE(75b,100b)
624         EX_TABLE(76b,100b)
625         EX_TABLE(77b,100b)
626         EX_TABLE(78b,100b)
627         EX_TABLE(79b,100b)
628         EX_TABLE(80b,100b)
629         EX_TABLE(81b,100b)
630         EX_TABLE(82b,100b)
631         EX_TABLE(83b,100b)
632         EX_TABLE(84b,100b)
633         EX_TABLE(85b,100b)
634         EX_TABLE(86b,100b)
635         EX_TABLE(87b,100b)
636         EX_TABLE(88b,100b)
637         EX_TABLE(89b,100b)
638         EX_TABLE(90b,100b)
639         EX_TABLE(91b,100b)
641 EXPORT_SYMBOL(__copy_tofrom_user)