Merge tag 'powerpc-4.6-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[linux/fpc-iii.git] / arch / powerpc / lib / copyuser_64.S
blobf09899e35991711d0a57e74519af662b59e59f15
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
12 #ifdef __BIG_ENDIAN__
13 #define sLd sld         /* Shift towards low-numbered address. */
14 #define sHd srd         /* Shift towards high-numbered address. */
15 #else
16 #define sLd srd         /* Shift towards low-numbered address. */
17 #define sHd sld         /* Shift towards high-numbered address. */
18 #endif
20         .align  7
21 _GLOBAL_TOC(__copy_tofrom_user)
22 BEGIN_FTR_SECTION
23         nop
24 FTR_SECTION_ELSE
25         b       __copy_tofrom_user_power7
26 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
27 _GLOBAL(__copy_tofrom_user_base)
28         /* first check for a whole page copy on a page boundary */
29         cmpldi  cr1,r5,16
30         cmpdi   cr6,r5,4096
31         or      r0,r3,r4
32         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
33         andi.   r0,r0,4095
34         std     r3,-24(r1)
35         crand   cr0*4+2,cr0*4+2,cr6*4+2
36         std     r4,-16(r1)
37         std     r5,-8(r1)
38         dcbt    0,r4
39         beq     .Lcopy_page_4K
40         andi.   r6,r6,7
41         PPC_MTOCRF(0x01,r5)
42         blt     cr1,.Lshort_copy
43 /* Below we want to nop out the bne if we're on a CPU that has the
44  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
45  * cleared.
46  * At the time of writing the only CPU that has this combination of bits
47  * set is Power6.
48  */
49 BEGIN_FTR_SECTION
50         nop
51 FTR_SECTION_ELSE
52         bne     .Ldst_unaligned
53 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
54                     CPU_FTR_UNALIGNED_LD_STD)
55 .Ldst_aligned:
56         addi    r3,r3,-16
57 BEGIN_FTR_SECTION
58         andi.   r0,r4,7
59         bne     .Lsrc_unaligned
60 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
61         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
62         srdi    r0,r5,5
63         cmpdi   cr1,r0,0
64 20:     ld      r7,0(r4)
65 220:    ld      r6,8(r4)
66         addi    r4,r4,16
67         mtctr   r0
68         andi.   r0,r5,0x10
69         beq     22f
70         addi    r3,r3,16
71         addi    r4,r4,-16
72         mr      r9,r7
73         mr      r8,r6
74         beq     cr1,72f
75 21:     ld      r7,16(r4)
76 221:    ld      r6,24(r4)
77         addi    r4,r4,32
78 70:     std     r9,0(r3)
79 270:    std     r8,8(r3)
80 22:     ld      r9,0(r4)
81 222:    ld      r8,8(r4)
82 71:     std     r7,16(r3)
83 271:    std     r6,24(r3)
84         addi    r3,r3,32
85         bdnz    21b
86 72:     std     r9,0(r3)
87 272:    std     r8,8(r3)
88         andi.   r5,r5,0xf
89         beq+    3f
90         addi    r4,r4,16
91 .Ldo_tail:
92         addi    r3,r3,16
93         bf      cr7*4+0,246f
94 244:    ld      r9,0(r4)
95         addi    r4,r4,8
96 245:    std     r9,0(r3)
97         addi    r3,r3,8
98 246:    bf      cr7*4+1,1f
99 23:     lwz     r9,0(r4)
100         addi    r4,r4,4
101 73:     stw     r9,0(r3)
102         addi    r3,r3,4
103 1:      bf      cr7*4+2,2f
104 44:     lhz     r9,0(r4)
105         addi    r4,r4,2
106 74:     sth     r9,0(r3)
107         addi    r3,r3,2
108 2:      bf      cr7*4+3,3f
109 45:     lbz     r9,0(r4)
110 75:     stb     r9,0(r3)
111 3:      li      r3,0
112         blr
114 .Lsrc_unaligned:
115         srdi    r6,r5,3
116         addi    r5,r5,-16
117         subf    r4,r0,r4
118         srdi    r7,r5,4
119         sldi    r10,r0,3
120         cmpldi  cr6,r6,3
121         andi.   r5,r5,7
122         mtctr   r7
123         subfic  r11,r10,64
124         add     r5,r5,r0
125         bt      cr7*4+0,28f
127 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
128 25:     ld      r0,8(r4)
129         sLd     r6,r9,r10
130 26:     ldu     r9,16(r4)
131         sHd     r7,r0,r11
132         sLd     r8,r0,r10
133         or      r7,r7,r6
134         blt     cr6,79f
135 27:     ld      r0,8(r4)
136         b       2f
138 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
139 29:     ldu     r9,8(r4)
140         sLd     r8,r0,r10
141         addi    r3,r3,-8
142         blt     cr6,5f
143 30:     ld      r0,8(r4)
144         sHd     r12,r9,r11
145         sLd     r6,r9,r10
146 31:     ldu     r9,16(r4)
147         or      r12,r8,r12
148         sHd     r7,r0,r11
149         sLd     r8,r0,r10
150         addi    r3,r3,16
151         beq     cr6,78f
153 1:      or      r7,r7,r6
154 32:     ld      r0,8(r4)
155 76:     std     r12,8(r3)
156 2:      sHd     r12,r9,r11
157         sLd     r6,r9,r10
158 33:     ldu     r9,16(r4)
159         or      r12,r8,r12
160 77:     stdu    r7,16(r3)
161         sHd     r7,r0,r11
162         sLd     r8,r0,r10
163         bdnz    1b
165 78:     std     r12,8(r3)
166         or      r7,r7,r6
167 79:     std     r7,16(r3)
168 5:      sHd     r12,r9,r11
169         or      r12,r8,r12
170 80:     std     r12,24(r3)
171         bne     6f
172         li      r3,0
173         blr
174 6:      cmpwi   cr1,r5,8
175         addi    r3,r3,32
176         sLd     r9,r9,r10
177         ble     cr1,7f
178 34:     ld      r0,8(r4)
179         sHd     r7,r0,r11
180         or      r9,r7,r9
182         bf      cr7*4+1,1f
183 #ifdef __BIG_ENDIAN__
184         rotldi  r9,r9,32
185 #endif
186 94:     stw     r9,0(r3)
187 #ifdef __LITTLE_ENDIAN__
188         rotrdi  r9,r9,32
189 #endif
190         addi    r3,r3,4
191 1:      bf      cr7*4+2,2f
192 #ifdef __BIG_ENDIAN__
193         rotldi  r9,r9,16
194 #endif
195 95:     sth     r9,0(r3)
196 #ifdef __LITTLE_ENDIAN__
197         rotrdi  r9,r9,16
198 #endif
199         addi    r3,r3,2
200 2:      bf      cr7*4+3,3f
201 #ifdef __BIG_ENDIAN__
202         rotldi  r9,r9,8
203 #endif
204 96:     stb     r9,0(r3)
205 #ifdef __LITTLE_ENDIAN__
206         rotrdi  r9,r9,8
207 #endif
208 3:      li      r3,0
209         blr
211 .Ldst_unaligned:
212         PPC_MTOCRF(0x01,r6)             /* put #bytes to 8B bdry into cr7 */
213         subf    r5,r6,r5
214         li      r7,0
215         cmpldi  cr1,r5,16
216         bf      cr7*4+3,1f
217 35:     lbz     r0,0(r4)
218 81:     stb     r0,0(r3)
219         addi    r7,r7,1
220 1:      bf      cr7*4+2,2f
221 36:     lhzx    r0,r7,r4
222 82:     sthx    r0,r7,r3
223         addi    r7,r7,2
224 2:      bf      cr7*4+1,3f
225 37:     lwzx    r0,r7,r4
226 83:     stwx    r0,r7,r3
227 3:      PPC_MTOCRF(0x01,r5)
228         add     r4,r6,r4
229         add     r3,r6,r3
230         b       .Ldst_aligned
232 .Lshort_copy:
233         bf      cr7*4+0,1f
234 38:     lwz     r0,0(r4)
235 39:     lwz     r9,4(r4)
236         addi    r4,r4,8
237 84:     stw     r0,0(r3)
238 85:     stw     r9,4(r3)
239         addi    r3,r3,8
240 1:      bf      cr7*4+1,2f
241 40:     lwz     r0,0(r4)
242         addi    r4,r4,4
243 86:     stw     r0,0(r3)
244         addi    r3,r3,4
245 2:      bf      cr7*4+2,3f
246 41:     lhz     r0,0(r4)
247         addi    r4,r4,2
248 87:     sth     r0,0(r3)
249         addi    r3,r3,2
250 3:      bf      cr7*4+3,4f
251 42:     lbz     r0,0(r4)
252 88:     stb     r0,0(r3)
253 4:      li      r3,0
254         blr
257  * exception handlers follow
258  * we have to return the number of bytes not copied
259  * for an exception on a load, we set the rest of the destination to 0
260  */
262 136:
263 137:
264         add     r3,r3,r7
265         b       1f
266 130:
267 131:
268         addi    r3,r3,8
269 120:
270 320:
271 122:
272 322:
273 124:
274 125:
275 126:
276 127:
277 128:
278 129:
279 133:
280         addi    r3,r3,8
281 132:
282         addi    r3,r3,8
283 121:
284 321:
285 344:
286 134:
287 135:
288 138:
289 139:
290 140:
291 141:
292 142:
293 123:
294 144:
295 145:
298  * here we have had a fault on a load and r3 points to the first
299  * unmodified byte of the destination
300  */
301 1:      ld      r6,-24(r1)
302         ld      r4,-16(r1)
303         ld      r5,-8(r1)
304         subf    r6,r6,r3
305         add     r4,r4,r6
306         subf    r5,r6,r5        /* #bytes left to go */
309  * first see if we can copy any more bytes before hitting another exception
310  */
311         mtctr   r5
312 43:     lbz     r0,0(r4)
313         addi    r4,r4,1
314 89:     stb     r0,0(r3)
315         addi    r3,r3,1
316         bdnz    43b
317         li      r3,0            /* huh? all copied successfully this time? */
318         blr
321  * here we have trapped again, need to clear ctr bytes starting at r3
322  */
323 143:    mfctr   r5
324         li      r0,0
325         mr      r4,r3
326         mr      r3,r5           /* return the number of bytes not copied */
327 1:      andi.   r9,r4,7
328         beq     3f
329 90:     stb     r0,0(r4)
330         addic.  r5,r5,-1
331         addi    r4,r4,1
332         bne     1b
333         blr
334 3:      cmpldi  cr1,r5,8
335         srdi    r9,r5,3
336         andi.   r5,r5,7
337         blt     cr1,93f
338         mtctr   r9
339 91:     std     r0,0(r4)
340         addi    r4,r4,8
341         bdnz    91b
342 93:     beqlr
343         mtctr   r5      
344 92:     stb     r0,0(r4)
345         addi    r4,r4,1
346         bdnz    92b
347         blr
350  * exception handlers for stores: we just need to work
351  * out how many bytes weren't copied
352  */
353 182:
354 183:
355         add     r3,r3,r7
356         b       1f
357 371:
358 180:
359         addi    r3,r3,8
360 171:
361 177:
362         addi    r3,r3,8
363 370:
364 372:
365 176:
366 178:
367         addi    r3,r3,4
368 185:
369         addi    r3,r3,4
370 170:
371 172:
372 345:
373 173:
374 174:
375 175:
376 179:
377 181:
378 184:
379 186:
380 187:
381 188:
382 189:    
383 194:
384 195:
385 196:
387         ld      r6,-24(r1)
388         ld      r5,-8(r1)
389         add     r6,r6,r5
390         subf    r3,r3,r6        /* #bytes not copied */
391 190:
392 191:
393 192:
394         blr                     /* #bytes not copied in r3 */
396         .section __ex_table,"a"
397         .align  3
398         .llong  20b,120b
399         .llong  220b,320b
400         .llong  21b,121b
401         .llong  221b,321b
402         .llong  70b,170b
403         .llong  270b,370b
404         .llong  22b,122b
405         .llong  222b,322b
406         .llong  71b,171b
407         .llong  271b,371b
408         .llong  72b,172b
409         .llong  272b,372b
410         .llong  244b,344b
411         .llong  245b,345b
412         .llong  23b,123b
413         .llong  73b,173b
414         .llong  44b,144b
415         .llong  74b,174b
416         .llong  45b,145b
417         .llong  75b,175b
418         .llong  24b,124b
419         .llong  25b,125b
420         .llong  26b,126b
421         .llong  27b,127b
422         .llong  28b,128b
423         .llong  29b,129b
424         .llong  30b,130b
425         .llong  31b,131b
426         .llong  32b,132b
427         .llong  76b,176b
428         .llong  33b,133b
429         .llong  77b,177b
430         .llong  78b,178b
431         .llong  79b,179b
432         .llong  80b,180b
433         .llong  34b,134b
434         .llong  94b,194b
435         .llong  95b,195b
436         .llong  96b,196b
437         .llong  35b,135b
438         .llong  81b,181b
439         .llong  36b,136b
440         .llong  82b,182b
441         .llong  37b,137b
442         .llong  83b,183b
443         .llong  38b,138b
444         .llong  39b,139b
445         .llong  84b,184b
446         .llong  85b,185b
447         .llong  40b,140b
448         .llong  86b,186b
449         .llong  41b,141b
450         .llong  87b,187b
451         .llong  42b,142b
452         .llong  88b,188b
453         .llong  43b,143b
454         .llong  89b,189b
455         .llong  90b,190b
456         .llong  91b,191b
457         .llong  92b,192b
458         
459         .text
462  * Routine to copy a whole page of data, optimized for POWER4.
463  * On POWER4 it is more than 50% faster than the simple loop
464  * above (following the .Ldst_aligned label).
465  */
466 .Lcopy_page_4K:
467         std     r31,-32(1)
468         std     r30,-40(1)
469         std     r29,-48(1)
470         std     r28,-56(1)
471         std     r27,-64(1)
472         std     r26,-72(1)
473         std     r25,-80(1)
474         std     r24,-88(1)
475         std     r23,-96(1)
476         std     r22,-104(1)
477         std     r21,-112(1)
478         std     r20,-120(1)
479         li      r5,4096/32 - 1
480         addi    r3,r3,-8
481         li      r0,5
482 0:      addi    r5,r5,-24
483         mtctr   r0
484 20:     ld      r22,640(4)
485 21:     ld      r21,512(4)
486 22:     ld      r20,384(4)
487 23:     ld      r11,256(4)
488 24:     ld      r9,128(4)
489 25:     ld      r7,0(4)
490 26:     ld      r25,648(4)
491 27:     ld      r24,520(4)
492 28:     ld      r23,392(4)
493 29:     ld      r10,264(4)
494 30:     ld      r8,136(4)
495 31:     ldu     r6,8(4)
496         cmpwi   r5,24
498 32:     std     r22,648(3)
499 33:     std     r21,520(3)
500 34:     std     r20,392(3)
501 35:     std     r11,264(3)
502 36:     std     r9,136(3)
503 37:     std     r7,8(3)
504 38:     ld      r28,648(4)
505 39:     ld      r27,520(4)
506 40:     ld      r26,392(4)
507 41:     ld      r31,264(4)
508 42:     ld      r30,136(4)
509 43:     ld      r29,8(4)
510 44:     std     r25,656(3)
511 45:     std     r24,528(3)
512 46:     std     r23,400(3)
513 47:     std     r10,272(3)
514 48:     std     r8,144(3)
515 49:     std     r6,16(3)
516 50:     ld      r22,656(4)
517 51:     ld      r21,528(4)
518 52:     ld      r20,400(4)
519 53:     ld      r11,272(4)
520 54:     ld      r9,144(4)
521 55:     ld      r7,16(4)
522 56:     std     r28,664(3)
523 57:     std     r27,536(3)
524 58:     std     r26,408(3)
525 59:     std     r31,280(3)
526 60:     std     r30,152(3)
527 61:     stdu    r29,24(3)
528 62:     ld      r25,664(4)
529 63:     ld      r24,536(4)
530 64:     ld      r23,408(4)
531 65:     ld      r10,280(4)
532 66:     ld      r8,152(4)
533 67:     ldu     r6,24(4)
534         bdnz    1b
535 68:     std     r22,648(3)
536 69:     std     r21,520(3)
537 70:     std     r20,392(3)
538 71:     std     r11,264(3)
539 72:     std     r9,136(3)
540 73:     std     r7,8(3)
541 74:     addi    r4,r4,640
542 75:     addi    r3,r3,648
543         bge     0b
544         mtctr   r5
545 76:     ld      r7,0(4)
546 77:     ld      r8,8(4)
547 78:     ldu     r9,16(4)
549 79:     ld      r10,8(4)
550 80:     std     r7,8(3)
551 81:     ld      r7,16(4)
552 82:     std     r8,16(3)
553 83:     ld      r8,24(4)
554 84:     std     r9,24(3)
555 85:     ldu     r9,32(4)
556 86:     stdu    r10,32(3)
557         bdnz    3b
559 87:     ld      r10,8(4)
560 88:     std     r7,8(3)
561 89:     std     r8,16(3)
562 90:     std     r9,24(3)
563 91:     std     r10,32(3)
564 9:      ld      r20,-120(1)
565         ld      r21,-112(1)
566         ld      r22,-104(1)
567         ld      r23,-96(1)
568         ld      r24,-88(1)
569         ld      r25,-80(1)
570         ld      r26,-72(1)
571         ld      r27,-64(1)
572         ld      r28,-56(1)
573         ld      r29,-48(1)
574         ld      r30,-40(1)
575         ld      r31,-32(1)
576         li      r3,0
577         blr
580  * on an exception, reset to the beginning and jump back into the
581  * standard __copy_tofrom_user
582  */
583 100:    ld      r20,-120(1)
584         ld      r21,-112(1)
585         ld      r22,-104(1)
586         ld      r23,-96(1)
587         ld      r24,-88(1)
588         ld      r25,-80(1)
589         ld      r26,-72(1)
590         ld      r27,-64(1)
591         ld      r28,-56(1)
592         ld      r29,-48(1)
593         ld      r30,-40(1)
594         ld      r31,-32(1)
595         ld      r3,-24(r1)
596         ld      r4,-16(r1)
597         li      r5,4096
598         b       .Ldst_aligned
600         .section __ex_table,"a"
601         .align  3
602         .llong  20b,100b
603         .llong  21b,100b
604         .llong  22b,100b
605         .llong  23b,100b
606         .llong  24b,100b
607         .llong  25b,100b
608         .llong  26b,100b
609         .llong  27b,100b
610         .llong  28b,100b
611         .llong  29b,100b
612         .llong  30b,100b
613         .llong  31b,100b
614         .llong  32b,100b
615         .llong  33b,100b
616         .llong  34b,100b
617         .llong  35b,100b
618         .llong  36b,100b
619         .llong  37b,100b
620         .llong  38b,100b
621         .llong  39b,100b
622         .llong  40b,100b
623         .llong  41b,100b
624         .llong  42b,100b
625         .llong  43b,100b
626         .llong  44b,100b
627         .llong  45b,100b
628         .llong  46b,100b
629         .llong  47b,100b
630         .llong  48b,100b
631         .llong  49b,100b
632         .llong  50b,100b
633         .llong  51b,100b
634         .llong  52b,100b
635         .llong  53b,100b
636         .llong  54b,100b
637         .llong  55b,100b
638         .llong  56b,100b
639         .llong  57b,100b
640         .llong  58b,100b
641         .llong  59b,100b
642         .llong  60b,100b
643         .llong  61b,100b
644         .llong  62b,100b
645         .llong  63b,100b
646         .llong  64b,100b
647         .llong  65b,100b
648         .llong  66b,100b
649         .llong  67b,100b
650         .llong  68b,100b
651         .llong  69b,100b
652         .llong  70b,100b
653         .llong  71b,100b
654         .llong  72b,100b
655         .llong  73b,100b
656         .llong  74b,100b
657         .llong  75b,100b
658         .llong  76b,100b
659         .llong  77b,100b
660         .llong  78b,100b
661         .llong  79b,100b
662         .llong  80b,100b
663         .llong  81b,100b
664         .llong  82b,100b
665         .llong  83b,100b
666         .llong  84b,100b
667         .llong  85b,100b
668         .llong  86b,100b
669         .llong  87b,100b
670         .llong  88b,100b
671         .llong  89b,100b
672         .llong  90b,100b
673         .llong  91b,100b