Merge tag 'v3.3.7' into 3.3/master
[zen-stable.git] / arch / powerpc / lib / copyuser_64.S
blob773d38f90aaa27a46a0ee2e1593322b4fc947b25
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
12         .align  7
13 _GLOBAL(__copy_tofrom_user)
14 BEGIN_FTR_SECTION
15         nop
16 FTR_SECTION_ELSE
17         b       __copy_tofrom_user_power7
18 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
19 _GLOBAL(__copy_tofrom_user_base)
20         /* first check for a whole page copy on a page boundary */
21         cmpldi  cr1,r5,16
22         cmpdi   cr6,r5,4096
23         or      r0,r3,r4
24         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
25         andi.   r0,r0,4095
26         std     r3,-24(r1)
27         crand   cr0*4+2,cr0*4+2,cr6*4+2
28         std     r4,-16(r1)
29         std     r5,-8(r1)
30         dcbt    0,r4
31         beq     .Lcopy_page_4K
32         andi.   r6,r6,7
33         PPC_MTOCRF      0x01,r5
34         blt     cr1,.Lshort_copy
35 /* Below we want to nop out the bne if we're on a CPU that has the
36  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
37  * cleared.
38  * At the time of writing the only CPU that has this combination of bits
39  * set is Power6.
40  */
41 BEGIN_FTR_SECTION
42         nop
43 FTR_SECTION_ELSE
44         bne     .Ldst_unaligned
45 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
46                     CPU_FTR_UNALIGNED_LD_STD)
47 .Ldst_aligned:
48         addi    r3,r3,-16
49 BEGIN_FTR_SECTION
50         andi.   r0,r4,7
51         bne     .Lsrc_unaligned
52 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
53         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
54         srdi    r0,r5,5
55         cmpdi   cr1,r0,0
56 20:     ld      r7,0(r4)
57 220:    ld      r6,8(r4)
58         addi    r4,r4,16
59         mtctr   r0
60         andi.   r0,r5,0x10
61         beq     22f
62         addi    r3,r3,16
63         addi    r4,r4,-16
64         mr      r9,r7
65         mr      r8,r6
66         beq     cr1,72f
67 21:     ld      r7,16(r4)
68 221:    ld      r6,24(r4)
69         addi    r4,r4,32
70 70:     std     r9,0(r3)
71 270:    std     r8,8(r3)
72 22:     ld      r9,0(r4)
73 222:    ld      r8,8(r4)
74 71:     std     r7,16(r3)
75 271:    std     r6,24(r3)
76         addi    r3,r3,32
77         bdnz    21b
78 72:     std     r9,0(r3)
79 272:    std     r8,8(r3)
80         andi.   r5,r5,0xf
81         beq+    3f
82         addi    r4,r4,16
83 .Ldo_tail:
84         addi    r3,r3,16
85         bf      cr7*4+0,246f
86 244:    ld      r9,0(r4)
87         addi    r4,r4,8
88 245:    std     r9,0(r3)
89         addi    r3,r3,8
90 246:    bf      cr7*4+1,1f
91 23:     lwz     r9,0(r4)
92         addi    r4,r4,4
93 73:     stw     r9,0(r3)
94         addi    r3,r3,4
95 1:      bf      cr7*4+2,2f
96 44:     lhz     r9,0(r4)
97         addi    r4,r4,2
98 74:     sth     r9,0(r3)
99         addi    r3,r3,2
100 2:      bf      cr7*4+3,3f
101 45:     lbz     r9,0(r4)
102 75:     stb     r9,0(r3)
103 3:      li      r3,0
104         blr
106 .Lsrc_unaligned:
107         srdi    r6,r5,3
108         addi    r5,r5,-16
109         subf    r4,r0,r4
110         srdi    r7,r5,4
111         sldi    r10,r0,3
112         cmpldi  cr6,r6,3
113         andi.   r5,r5,7
114         mtctr   r7
115         subfic  r11,r10,64
116         add     r5,r5,r0
117         bt      cr7*4+0,28f
119 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
120 25:     ld      r0,8(r4)
121         sld     r6,r9,r10
122 26:     ldu     r9,16(r4)
123         srd     r7,r0,r11
124         sld     r8,r0,r10
125         or      r7,r7,r6
126         blt     cr6,79f
127 27:     ld      r0,8(r4)
128         b       2f
130 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
131 29:     ldu     r9,8(r4)
132         sld     r8,r0,r10
133         addi    r3,r3,-8
134         blt     cr6,5f
135 30:     ld      r0,8(r4)
136         srd     r12,r9,r11
137         sld     r6,r9,r10
138 31:     ldu     r9,16(r4)
139         or      r12,r8,r12
140         srd     r7,r0,r11
141         sld     r8,r0,r10
142         addi    r3,r3,16
143         beq     cr6,78f
145 1:      or      r7,r7,r6
146 32:     ld      r0,8(r4)
147 76:     std     r12,8(r3)
148 2:      srd     r12,r9,r11
149         sld     r6,r9,r10
150 33:     ldu     r9,16(r4)
151         or      r12,r8,r12
152 77:     stdu    r7,16(r3)
153         srd     r7,r0,r11
154         sld     r8,r0,r10
155         bdnz    1b
157 78:     std     r12,8(r3)
158         or      r7,r7,r6
159 79:     std     r7,16(r3)
160 5:      srd     r12,r9,r11
161         or      r12,r8,r12
162 80:     std     r12,24(r3)
163         bne     6f
164         li      r3,0
165         blr
166 6:      cmpwi   cr1,r5,8
167         addi    r3,r3,32
168         sld     r9,r9,r10
169         ble     cr1,7f
170 34:     ld      r0,8(r4)
171         srd     r7,r0,r11
172         or      r9,r7,r9
174         bf      cr7*4+1,1f
175         rotldi  r9,r9,32
176 94:     stw     r9,0(r3)
177         addi    r3,r3,4
178 1:      bf      cr7*4+2,2f
179         rotldi  r9,r9,16
180 95:     sth     r9,0(r3)
181         addi    r3,r3,2
182 2:      bf      cr7*4+3,3f
183         rotldi  r9,r9,8
184 96:     stb     r9,0(r3)
185 3:      li      r3,0
186         blr
188 .Ldst_unaligned:
189         PPC_MTOCRF      0x01,r6         /* put #bytes to 8B bdry into cr7 */
190         subf    r5,r6,r5
191         li      r7,0
192         cmpldi  cr1,r5,16
193         bf      cr7*4+3,1f
194 35:     lbz     r0,0(r4)
195 81:     stb     r0,0(r3)
196         addi    r7,r7,1
197 1:      bf      cr7*4+2,2f
198 36:     lhzx    r0,r7,r4
199 82:     sthx    r0,r7,r3
200         addi    r7,r7,2
201 2:      bf      cr7*4+1,3f
202 37:     lwzx    r0,r7,r4
203 83:     stwx    r0,r7,r3
204 3:      PPC_MTOCRF      0x01,r5
205         add     r4,r6,r4
206         add     r3,r6,r3
207         b       .Ldst_aligned
209 .Lshort_copy:
210         bf      cr7*4+0,1f
211 38:     lwz     r0,0(r4)
212 39:     lwz     r9,4(r4)
213         addi    r4,r4,8
214 84:     stw     r0,0(r3)
215 85:     stw     r9,4(r3)
216         addi    r3,r3,8
217 1:      bf      cr7*4+1,2f
218 40:     lwz     r0,0(r4)
219         addi    r4,r4,4
220 86:     stw     r0,0(r3)
221         addi    r3,r3,4
222 2:      bf      cr7*4+2,3f
223 41:     lhz     r0,0(r4)
224         addi    r4,r4,2
225 87:     sth     r0,0(r3)
226         addi    r3,r3,2
227 3:      bf      cr7*4+3,4f
228 42:     lbz     r0,0(r4)
229 88:     stb     r0,0(r3)
230 4:      li      r3,0
231         blr
234  * exception handlers follow
235  * we have to return the number of bytes not copied
236  * for an exception on a load, we set the rest of the destination to 0
237  */
239 136:
240 137:
241         add     r3,r3,r7
242         b       1f
243 130:
244 131:
245         addi    r3,r3,8
246 120:
247 320:
248 122:
249 322:
250 124:
251 125:
252 126:
253 127:
254 128:
255 129:
256 133:
257         addi    r3,r3,8
258 132:
259         addi    r3,r3,8
260 121:
261 321:
262 344:
263 134:
264 135:
265 138:
266 139:
267 140:
268 141:
269 142:
270 123:
271 144:
272 145:
275  * here we have had a fault on a load and r3 points to the first
276  * unmodified byte of the destination
277  */
278 1:      ld      r6,-24(r1)
279         ld      r4,-16(r1)
280         ld      r5,-8(r1)
281         subf    r6,r6,r3
282         add     r4,r4,r6
283         subf    r5,r6,r5        /* #bytes left to go */
286  * first see if we can copy any more bytes before hitting another exception
287  */
288         mtctr   r5
289 43:     lbz     r0,0(r4)
290         addi    r4,r4,1
291 89:     stb     r0,0(r3)
292         addi    r3,r3,1
293         bdnz    43b
294         li      r3,0            /* huh? all copied successfully this time? */
295         blr
298  * here we have trapped again, need to clear ctr bytes starting at r3
299  */
300 143:    mfctr   r5
301         li      r0,0
302         mr      r4,r3
303         mr      r3,r5           /* return the number of bytes not copied */
304 1:      andi.   r9,r4,7
305         beq     3f
306 90:     stb     r0,0(r4)
307         addic.  r5,r5,-1
308         addi    r4,r4,1
309         bne     1b
310         blr
311 3:      cmpldi  cr1,r5,8
312         srdi    r9,r5,3
313         andi.   r5,r5,7
314         blt     cr1,93f
315         mtctr   r9
316 91:     std     r0,0(r4)
317         addi    r4,r4,8
318         bdnz    91b
319 93:     beqlr
320         mtctr   r5      
321 92:     stb     r0,0(r4)
322         addi    r4,r4,1
323         bdnz    92b
324         blr
327  * exception handlers for stores: we just need to work
328  * out how many bytes weren't copied
329  */
330 182:
331 183:
332         add     r3,r3,r7
333         b       1f
334 371:
335 180:
336         addi    r3,r3,8
337 171:
338 177:
339         addi    r3,r3,8
340 370:
341 372:
342 176:
343 178:
344         addi    r3,r3,4
345 185:
346         addi    r3,r3,4
347 170:
348 172:
349 345:
350 173:
351 174:
352 175:
353 179:
354 181:
355 184:
356 186:
357 187:
358 188:
359 189:    
360 194:
361 195:
362 196:
364         ld      r6,-24(r1)
365         ld      r5,-8(r1)
366         add     r6,r6,r5
367         subf    r3,r3,r6        /* #bytes not copied */
368 190:
369 191:
370 192:
371         blr                     /* #bytes not copied in r3 */
373         .section __ex_table,"a"
374         .align  3
375         .llong  20b,120b
376         .llong  220b,320b
377         .llong  21b,121b
378         .llong  221b,321b
379         .llong  70b,170b
380         .llong  270b,370b
381         .llong  22b,122b
382         .llong  222b,322b
383         .llong  71b,171b
384         .llong  271b,371b
385         .llong  72b,172b
386         .llong  272b,372b
387         .llong  244b,344b
388         .llong  245b,345b
389         .llong  23b,123b
390         .llong  73b,173b
391         .llong  44b,144b
392         .llong  74b,174b
393         .llong  45b,145b
394         .llong  75b,175b
395         .llong  24b,124b
396         .llong  25b,125b
397         .llong  26b,126b
398         .llong  27b,127b
399         .llong  28b,128b
400         .llong  29b,129b
401         .llong  30b,130b
402         .llong  31b,131b
403         .llong  32b,132b
404         .llong  76b,176b
405         .llong  33b,133b
406         .llong  77b,177b
407         .llong  78b,178b
408         .llong  79b,179b
409         .llong  80b,180b
410         .llong  34b,134b
411         .llong  94b,194b
412         .llong  95b,195b
413         .llong  96b,196b
414         .llong  35b,135b
415         .llong  81b,181b
416         .llong  36b,136b
417         .llong  82b,182b
418         .llong  37b,137b
419         .llong  83b,183b
420         .llong  38b,138b
421         .llong  39b,139b
422         .llong  84b,184b
423         .llong  85b,185b
424         .llong  40b,140b
425         .llong  86b,186b
426         .llong  41b,141b
427         .llong  87b,187b
428         .llong  42b,142b
429         .llong  88b,188b
430         .llong  43b,143b
431         .llong  89b,189b
432         .llong  90b,190b
433         .llong  91b,191b
434         .llong  92b,192b
435         
436         .text
439  * Routine to copy a whole page of data, optimized for POWER4.
440  * On POWER4 it is more than 50% faster than the simple loop
441  * above (following the .Ldst_aligned label) but it runs slightly
442  * slower on POWER3.
443  */
444 .Lcopy_page_4K:
445         std     r31,-32(1)
446         std     r30,-40(1)
447         std     r29,-48(1)
448         std     r28,-56(1)
449         std     r27,-64(1)
450         std     r26,-72(1)
451         std     r25,-80(1)
452         std     r24,-88(1)
453         std     r23,-96(1)
454         std     r22,-104(1)
455         std     r21,-112(1)
456         std     r20,-120(1)
457         li      r5,4096/32 - 1
458         addi    r3,r3,-8
459         li      r0,5
460 0:      addi    r5,r5,-24
461         mtctr   r0
462 20:     ld      r22,640(4)
463 21:     ld      r21,512(4)
464 22:     ld      r20,384(4)
465 23:     ld      r11,256(4)
466 24:     ld      r9,128(4)
467 25:     ld      r7,0(4)
468 26:     ld      r25,648(4)
469 27:     ld      r24,520(4)
470 28:     ld      r23,392(4)
471 29:     ld      r10,264(4)
472 30:     ld      r8,136(4)
473 31:     ldu     r6,8(4)
474         cmpwi   r5,24
476 32:     std     r22,648(3)
477 33:     std     r21,520(3)
478 34:     std     r20,392(3)
479 35:     std     r11,264(3)
480 36:     std     r9,136(3)
481 37:     std     r7,8(3)
482 38:     ld      r28,648(4)
483 39:     ld      r27,520(4)
484 40:     ld      r26,392(4)
485 41:     ld      r31,264(4)
486 42:     ld      r30,136(4)
487 43:     ld      r29,8(4)
488 44:     std     r25,656(3)
489 45:     std     r24,528(3)
490 46:     std     r23,400(3)
491 47:     std     r10,272(3)
492 48:     std     r8,144(3)
493 49:     std     r6,16(3)
494 50:     ld      r22,656(4)
495 51:     ld      r21,528(4)
496 52:     ld      r20,400(4)
497 53:     ld      r11,272(4)
498 54:     ld      r9,144(4)
499 55:     ld      r7,16(4)
500 56:     std     r28,664(3)
501 57:     std     r27,536(3)
502 58:     std     r26,408(3)
503 59:     std     r31,280(3)
504 60:     std     r30,152(3)
505 61:     stdu    r29,24(3)
506 62:     ld      r25,664(4)
507 63:     ld      r24,536(4)
508 64:     ld      r23,408(4)
509 65:     ld      r10,280(4)
510 66:     ld      r8,152(4)
511 67:     ldu     r6,24(4)
512         bdnz    1b
513 68:     std     r22,648(3)
514 69:     std     r21,520(3)
515 70:     std     r20,392(3)
516 71:     std     r11,264(3)
517 72:     std     r9,136(3)
518 73:     std     r7,8(3)
519 74:     addi    r4,r4,640
520 75:     addi    r3,r3,648
521         bge     0b
522         mtctr   r5
523 76:     ld      r7,0(4)
524 77:     ld      r8,8(4)
525 78:     ldu     r9,16(4)
527 79:     ld      r10,8(4)
528 80:     std     r7,8(3)
529 81:     ld      r7,16(4)
530 82:     std     r8,16(3)
531 83:     ld      r8,24(4)
532 84:     std     r9,24(3)
533 85:     ldu     r9,32(4)
534 86:     stdu    r10,32(3)
535         bdnz    3b
537 87:     ld      r10,8(4)
538 88:     std     r7,8(3)
539 89:     std     r8,16(3)
540 90:     std     r9,24(3)
541 91:     std     r10,32(3)
542 9:      ld      r20,-120(1)
543         ld      r21,-112(1)
544         ld      r22,-104(1)
545         ld      r23,-96(1)
546         ld      r24,-88(1)
547         ld      r25,-80(1)
548         ld      r26,-72(1)
549         ld      r27,-64(1)
550         ld      r28,-56(1)
551         ld      r29,-48(1)
552         ld      r30,-40(1)
553         ld      r31,-32(1)
554         li      r3,0
555         blr
558  * on an exception, reset to the beginning and jump back into the
559  * standard __copy_tofrom_user
560  */
561 100:    ld      r20,-120(1)
562         ld      r21,-112(1)
563         ld      r22,-104(1)
564         ld      r23,-96(1)
565         ld      r24,-88(1)
566         ld      r25,-80(1)
567         ld      r26,-72(1)
568         ld      r27,-64(1)
569         ld      r28,-56(1)
570         ld      r29,-48(1)
571         ld      r30,-40(1)
572         ld      r31,-32(1)
573         ld      r3,-24(r1)
574         ld      r4,-16(r1)
575         li      r5,4096
576         b       .Ldst_aligned
578         .section __ex_table,"a"
579         .align  3
580         .llong  20b,100b
581         .llong  21b,100b
582         .llong  22b,100b
583         .llong  23b,100b
584         .llong  24b,100b
585         .llong  25b,100b
586         .llong  26b,100b
587         .llong  27b,100b
588         .llong  28b,100b
589         .llong  29b,100b
590         .llong  30b,100b
591         .llong  31b,100b
592         .llong  32b,100b
593         .llong  33b,100b
594         .llong  34b,100b
595         .llong  35b,100b
596         .llong  36b,100b
597         .llong  37b,100b
598         .llong  38b,100b
599         .llong  39b,100b
600         .llong  40b,100b
601         .llong  41b,100b
602         .llong  42b,100b
603         .llong  43b,100b
604         .llong  44b,100b
605         .llong  45b,100b
606         .llong  46b,100b
607         .llong  47b,100b
608         .llong  48b,100b
609         .llong  49b,100b
610         .llong  50b,100b
611         .llong  51b,100b
612         .llong  52b,100b
613         .llong  53b,100b
614         .llong  54b,100b
615         .llong  55b,100b
616         .llong  56b,100b
617         .llong  57b,100b
618         .llong  58b,100b
619         .llong  59b,100b
620         .llong  60b,100b
621         .llong  61b,100b
622         .llong  62b,100b
623         .llong  63b,100b
624         .llong  64b,100b
625         .llong  65b,100b
626         .llong  66b,100b
627         .llong  67b,100b
628         .llong  68b,100b
629         .llong  69b,100b
630         .llong  70b,100b
631         .llong  71b,100b
632         .llong  72b,100b
633         .llong  73b,100b
634         .llong  74b,100b
635         .llong  75b,100b
636         .llong  76b,100b
637         .llong  77b,100b
638         .llong  78b,100b
639         .llong  79b,100b
640         .llong  80b,100b
641         .llong  81b,100b
642         .llong  82b,100b
643         .llong  83b,100b
644         .llong  84b,100b
645         .llong  85b,100b
646         .llong  86b,100b
647         .llong  87b,100b
648         .llong  88b,100b
649         .llong  89b,100b
650         .llong  90b,100b
651         .llong  91b,100b