On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / arch / powerpc / lib / copyuser_64.S
blob693b14a778fa152f4c595b22e30ca52bc724bee5
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
12         .align  7
13 _GLOBAL(__copy_tofrom_user)
14         /* first check for a whole page copy on a page boundary */
15         cmpldi  cr1,r5,16
16         cmpdi   cr6,r5,4096
17         or      r0,r3,r4
18         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
19         andi.   r0,r0,4095
20         std     r3,-24(r1)
21         crand   cr0*4+2,cr0*4+2,cr6*4+2
22         std     r4,-16(r1)
23         std     r5,-8(r1)
24         dcbt    0,r4
25         beq     .Lcopy_page_4K
26         andi.   r6,r6,7
27         PPC_MTOCRF      0x01,r5
28         blt     cr1,.Lshort_copy
29 /* Below we want to nop out the bne if we're on a CPU that has the
30  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
31  * cleared.
32  * At the time of writing the only CPU that has this combination of bits
33  * set is Power6.
34  */
35 BEGIN_FTR_SECTION
36         nop
37 FTR_SECTION_ELSE
38         bne     .Ldst_unaligned
39 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
40                     CPU_FTR_UNALIGNED_LD_STD)
41 .Ldst_aligned:
42         addi    r3,r3,-16
43 BEGIN_FTR_SECTION
44         andi.   r0,r4,7
45         bne     .Lsrc_unaligned
46 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
47         srdi    r7,r5,4
48 20:     ld      r9,0(r4)
49         addi    r4,r4,-8
50         mtctr   r7
51         andi.   r5,r5,7
52         bf      cr7*4+0,22f
53         addi    r3,r3,8
54         addi    r4,r4,8
55         mr      r8,r9
56         blt     cr1,72f
57 21:     ld      r9,8(r4)
58 70:     std     r8,8(r3)
59 22:     ldu     r8,16(r4)
60 71:     stdu    r9,16(r3)
61         bdnz    21b
62 72:     std     r8,8(r3)
63         beq+    3f
64         addi    r3,r3,16
65 .Ldo_tail:
66         bf      cr7*4+1,1f
67 23:     lwz     r9,8(r4)
68         addi    r4,r4,4
69 73:     stw     r9,0(r3)
70         addi    r3,r3,4
71 1:      bf      cr7*4+2,2f
72 44:     lhz     r9,8(r4)
73         addi    r4,r4,2
74 74:     sth     r9,0(r3)
75         addi    r3,r3,2
76 2:      bf      cr7*4+3,3f
77 45:     lbz     r9,8(r4)
78 75:     stb     r9,0(r3)
79 3:      li      r3,0
80         blr
82 .Lsrc_unaligned:
83         srdi    r6,r5,3
84         addi    r5,r5,-16
85         subf    r4,r0,r4
86         srdi    r7,r5,4
87         sldi    r10,r0,3
88         cmpldi  cr6,r6,3
89         andi.   r5,r5,7
90         mtctr   r7
91         subfic  r11,r10,64
92         add     r5,r5,r0
93         bt      cr7*4+0,28f
95 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
96 25:     ld      r0,8(r4)
97         sld     r6,r9,r10
98 26:     ldu     r9,16(r4)
99         srd     r7,r0,r11
100         sld     r8,r0,r10
101         or      r7,r7,r6
102         blt     cr6,79f
103 27:     ld      r0,8(r4)
104         b       2f
106 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
107 29:     ldu     r9,8(r4)
108         sld     r8,r0,r10
109         addi    r3,r3,-8
110         blt     cr6,5f
111 30:     ld      r0,8(r4)
112         srd     r12,r9,r11
113         sld     r6,r9,r10
114 31:     ldu     r9,16(r4)
115         or      r12,r8,r12
116         srd     r7,r0,r11
117         sld     r8,r0,r10
118         addi    r3,r3,16
119         beq     cr6,78f
121 1:      or      r7,r7,r6
122 32:     ld      r0,8(r4)
123 76:     std     r12,8(r3)
124 2:      srd     r12,r9,r11
125         sld     r6,r9,r10
126 33:     ldu     r9,16(r4)
127         or      r12,r8,r12
128 77:     stdu    r7,16(r3)
129         srd     r7,r0,r11
130         sld     r8,r0,r10
131         bdnz    1b
133 78:     std     r12,8(r3)
134         or      r7,r7,r6
135 79:     std     r7,16(r3)
136 5:      srd     r12,r9,r11
137         or      r12,r8,r12
138 80:     std     r12,24(r3)
139         bne     6f
140         li      r3,0
141         blr
142 6:      cmpwi   cr1,r5,8
143         addi    r3,r3,32
144         sld     r9,r9,r10
145         ble     cr1,7f
146 34:     ld      r0,8(r4)
147         srd     r7,r0,r11
148         or      r9,r7,r9
150         bf      cr7*4+1,1f
151         rotldi  r9,r9,32
152 94:     stw     r9,0(r3)
153         addi    r3,r3,4
154 1:      bf      cr7*4+2,2f
155         rotldi  r9,r9,16
156 95:     sth     r9,0(r3)
157         addi    r3,r3,2
158 2:      bf      cr7*4+3,3f
159         rotldi  r9,r9,8
160 96:     stb     r9,0(r3)
161 3:      li      r3,0
162         blr
164 .Ldst_unaligned:
165         PPC_MTOCRF      0x01,r6         /* put #bytes to 8B bdry into cr7 */
166         subf    r5,r6,r5
167         li      r7,0
168         cmpldi  cr1,r5,16
169         bf      cr7*4+3,1f
170 35:     lbz     r0,0(r4)
171 81:     stb     r0,0(r3)
172         addi    r7,r7,1
173 1:      bf      cr7*4+2,2f
174 36:     lhzx    r0,r7,r4
175 82:     sthx    r0,r7,r3
176         addi    r7,r7,2
177 2:      bf      cr7*4+1,3f
178 37:     lwzx    r0,r7,r4
179 83:     stwx    r0,r7,r3
180 3:      PPC_MTOCRF      0x01,r5
181         add     r4,r6,r4
182         add     r3,r6,r3
183         b       .Ldst_aligned
185 .Lshort_copy:
186         bf      cr7*4+0,1f
187 38:     lwz     r0,0(r4)
188 39:     lwz     r9,4(r4)
189         addi    r4,r4,8
190 84:     stw     r0,0(r3)
191 85:     stw     r9,4(r3)
192         addi    r3,r3,8
193 1:      bf      cr7*4+1,2f
194 40:     lwz     r0,0(r4)
195         addi    r4,r4,4
196 86:     stw     r0,0(r3)
197         addi    r3,r3,4
198 2:      bf      cr7*4+2,3f
199 41:     lhz     r0,0(r4)
200         addi    r4,r4,2
201 87:     sth     r0,0(r3)
202         addi    r3,r3,2
203 3:      bf      cr7*4+3,4f
204 42:     lbz     r0,0(r4)
205 88:     stb     r0,0(r3)
206 4:      li      r3,0
207         blr
210  * exception handlers follow
211  * we have to return the number of bytes not copied
212  * for an exception on a load, we set the rest of the destination to 0
213  */
215 136:
216 137:
217         add     r3,r3,r7
218         b       1f
219 130:
220 131:
221         addi    r3,r3,8
222 120:
223 122:
224 124:
225 125:
226 126:
227 127:
228 128:
229 129:
230 133:
231         addi    r3,r3,8
232 121:
233 132:
234         addi    r3,r3,8
235 134:
236 135:
237 138:
238 139:
239 140:
240 141:
241 142:
242 123:
243 144:
244 145:
247  * here we have had a fault on a load and r3 points to the first
248  * unmodified byte of the destination
249  */
250 1:      ld      r6,-24(r1)
251         ld      r4,-16(r1)
252         ld      r5,-8(r1)
253         subf    r6,r6,r3
254         add     r4,r4,r6
255         subf    r5,r6,r5        /* #bytes left to go */
258  * first see if we can copy any more bytes before hitting another exception
259  */
260         mtctr   r5
261 43:     lbz     r0,0(r4)
262         addi    r4,r4,1
263 89:     stb     r0,0(r3)
264         addi    r3,r3,1
265         bdnz    43b
266         li      r3,0            /* huh? all copied successfully this time? */
267         blr
270  * here we have trapped again, need to clear ctr bytes starting at r3
271  */
272 143:    mfctr   r5
273         li      r0,0
274         mr      r4,r3
275         mr      r3,r5           /* return the number of bytes not copied */
276 1:      andi.   r9,r4,7
277         beq     3f
278 90:     stb     r0,0(r4)
279         addic.  r5,r5,-1
280         addi    r4,r4,1
281         bne     1b
282         blr
283 3:      cmpldi  cr1,r5,8
284         srdi    r9,r5,3
285         andi.   r5,r5,7
286         blt     cr1,93f
287         mtctr   r9
288 91:     std     r0,0(r4)
289         addi    r4,r4,8
290         bdnz    91b
291 93:     beqlr
292         mtctr   r5      
293 92:     stb     r0,0(r4)
294         addi    r4,r4,1
295         bdnz    92b
296         blr
299  * exception handlers for stores: we just need to work
300  * out how many bytes weren't copied
301  */
302 182:
303 183:
304         add     r3,r3,r7
305         b       1f
306 180:
307         addi    r3,r3,8
308 171:
309 177:
310         addi    r3,r3,8
311 170:
312 172:
313 176:
314 178:
315         addi    r3,r3,4
316 185:
317         addi    r3,r3,4
318 173:
319 174:
320 175:
321 179:
322 181:
323 184:
324 186:
325 187:
326 188:
327 189:    
328 194:
329 195:
330 196:
332         ld      r6,-24(r1)
333         ld      r5,-8(r1)
334         add     r6,r6,r5
335         subf    r3,r3,r6        /* #bytes not copied */
336 190:
337 191:
338 192:
339         blr                     /* #bytes not copied in r3 */
341         .section __ex_table,"a"
342         .align  3
343         .llong  20b,120b
344         .llong  21b,121b
345         .llong  70b,170b
346         .llong  22b,122b
347         .llong  71b,171b
348         .llong  72b,172b
349         .llong  23b,123b
350         .llong  73b,173b
351         .llong  44b,144b
352         .llong  74b,174b
353         .llong  45b,145b
354         .llong  75b,175b
355         .llong  24b,124b
356         .llong  25b,125b
357         .llong  26b,126b
358         .llong  27b,127b
359         .llong  28b,128b
360         .llong  29b,129b
361         .llong  30b,130b
362         .llong  31b,131b
363         .llong  32b,132b
364         .llong  76b,176b
365         .llong  33b,133b
366         .llong  77b,177b
367         .llong  78b,178b
368         .llong  79b,179b
369         .llong  80b,180b
370         .llong  34b,134b
371         .llong  94b,194b
372         .llong  95b,195b
373         .llong  96b,196b
374         .llong  35b,135b
375         .llong  81b,181b
376         .llong  36b,136b
377         .llong  82b,182b
378         .llong  37b,137b
379         .llong  83b,183b
380         .llong  38b,138b
381         .llong  39b,139b
382         .llong  84b,184b
383         .llong  85b,185b
384         .llong  40b,140b
385         .llong  86b,186b
386         .llong  41b,141b
387         .llong  87b,187b
388         .llong  42b,142b
389         .llong  88b,188b
390         .llong  43b,143b
391         .llong  89b,189b
392         .llong  90b,190b
393         .llong  91b,191b
394         .llong  92b,192b
395         
396         .text
399  * Routine to copy a whole page of data, optimized for POWER4.
400  * On POWER4 it is more than 50% faster than the simple loop
401  * above (following the .Ldst_aligned label) but it runs slightly
402  * slower on POWER3.
403  */
404 .Lcopy_page_4K:
405         std     r31,-32(1)
406         std     r30,-40(1)
407         std     r29,-48(1)
408         std     r28,-56(1)
409         std     r27,-64(1)
410         std     r26,-72(1)
411         std     r25,-80(1)
412         std     r24,-88(1)
413         std     r23,-96(1)
414         std     r22,-104(1)
415         std     r21,-112(1)
416         std     r20,-120(1)
417         li      r5,4096/32 - 1
418         addi    r3,r3,-8
419         li      r0,5
420 0:      addi    r5,r5,-24
421         mtctr   r0
422 20:     ld      r22,640(4)
423 21:     ld      r21,512(4)
424 22:     ld      r20,384(4)
425 23:     ld      r11,256(4)
426 24:     ld      r9,128(4)
427 25:     ld      r7,0(4)
428 26:     ld      r25,648(4)
429 27:     ld      r24,520(4)
430 28:     ld      r23,392(4)
431 29:     ld      r10,264(4)
432 30:     ld      r8,136(4)
433 31:     ldu     r6,8(4)
434         cmpwi   r5,24
436 32:     std     r22,648(3)
437 33:     std     r21,520(3)
438 34:     std     r20,392(3)
439 35:     std     r11,264(3)
440 36:     std     r9,136(3)
441 37:     std     r7,8(3)
442 38:     ld      r28,648(4)
443 39:     ld      r27,520(4)
444 40:     ld      r26,392(4)
445 41:     ld      r31,264(4)
446 42:     ld      r30,136(4)
447 43:     ld      r29,8(4)
448 44:     std     r25,656(3)
449 45:     std     r24,528(3)
450 46:     std     r23,400(3)
451 47:     std     r10,272(3)
452 48:     std     r8,144(3)
453 49:     std     r6,16(3)
454 50:     ld      r22,656(4)
455 51:     ld      r21,528(4)
456 52:     ld      r20,400(4)
457 53:     ld      r11,272(4)
458 54:     ld      r9,144(4)
459 55:     ld      r7,16(4)
460 56:     std     r28,664(3)
461 57:     std     r27,536(3)
462 58:     std     r26,408(3)
463 59:     std     r31,280(3)
464 60:     std     r30,152(3)
465 61:     stdu    r29,24(3)
466 62:     ld      r25,664(4)
467 63:     ld      r24,536(4)
468 64:     ld      r23,408(4)
469 65:     ld      r10,280(4)
470 66:     ld      r8,152(4)
471 67:     ldu     r6,24(4)
472         bdnz    1b
473 68:     std     r22,648(3)
474 69:     std     r21,520(3)
475 70:     std     r20,392(3)
476 71:     std     r11,264(3)
477 72:     std     r9,136(3)
478 73:     std     r7,8(3)
479 74:     addi    r4,r4,640
480 75:     addi    r3,r3,648
481         bge     0b
482         mtctr   r5
483 76:     ld      r7,0(4)
484 77:     ld      r8,8(4)
485 78:     ldu     r9,16(4)
487 79:     ld      r10,8(4)
488 80:     std     r7,8(3)
489 81:     ld      r7,16(4)
490 82:     std     r8,16(3)
491 83:     ld      r8,24(4)
492 84:     std     r9,24(3)
493 85:     ldu     r9,32(4)
494 86:     stdu    r10,32(3)
495         bdnz    3b
497 87:     ld      r10,8(4)
498 88:     std     r7,8(3)
499 89:     std     r8,16(3)
500 90:     std     r9,24(3)
501 91:     std     r10,32(3)
502 9:      ld      r20,-120(1)
503         ld      r21,-112(1)
504         ld      r22,-104(1)
505         ld      r23,-96(1)
506         ld      r24,-88(1)
507         ld      r25,-80(1)
508         ld      r26,-72(1)
509         ld      r27,-64(1)
510         ld      r28,-56(1)
511         ld      r29,-48(1)
512         ld      r30,-40(1)
513         ld      r31,-32(1)
514         li      r3,0
515         blr
518  * on an exception, reset to the beginning and jump back into the
519  * standard __copy_tofrom_user
520  */
521 100:    ld      r20,-120(1)
522         ld      r21,-112(1)
523         ld      r22,-104(1)
524         ld      r23,-96(1)
525         ld      r24,-88(1)
526         ld      r25,-80(1)
527         ld      r26,-72(1)
528         ld      r27,-64(1)
529         ld      r28,-56(1)
530         ld      r29,-48(1)
531         ld      r30,-40(1)
532         ld      r31,-32(1)
533         ld      r3,-24(r1)
534         ld      r4,-16(r1)
535         li      r5,4096
536         b       .Ldst_aligned
538         .section __ex_table,"a"
539         .align  3
540         .llong  20b,100b
541         .llong  21b,100b
542         .llong  22b,100b
543         .llong  23b,100b
544         .llong  24b,100b
545         .llong  25b,100b
546         .llong  26b,100b
547         .llong  27b,100b
548         .llong  28b,100b
549         .llong  29b,100b
550         .llong  30b,100b
551         .llong  31b,100b
552         .llong  32b,100b
553         .llong  33b,100b
554         .llong  34b,100b
555         .llong  35b,100b
556         .llong  36b,100b
557         .llong  37b,100b
558         .llong  38b,100b
559         .llong  39b,100b
560         .llong  40b,100b
561         .llong  41b,100b
562         .llong  42b,100b
563         .llong  43b,100b
564         .llong  44b,100b
565         .llong  45b,100b
566         .llong  46b,100b
567         .llong  47b,100b
568         .llong  48b,100b
569         .llong  49b,100b
570         .llong  50b,100b
571         .llong  51b,100b
572         .llong  52b,100b
573         .llong  53b,100b
574         .llong  54b,100b
575         .llong  55b,100b
576         .llong  56b,100b
577         .llong  57b,100b
578         .llong  58b,100b
579         .llong  59b,100b
580         .llong  60b,100b
581         .llong  61b,100b
582         .llong  62b,100b
583         .llong  63b,100b
584         .llong  64b,100b
585         .llong  65b,100b
586         .llong  66b,100b
587         .llong  67b,100b
588         .llong  68b,100b
589         .llong  69b,100b
590         .llong  70b,100b
591         .llong  71b,100b
592         .llong  72b,100b
593         .llong  73b,100b
594         .llong  74b,100b
595         .llong  75b,100b
596         .llong  76b,100b
597         .llong  77b,100b
598         .llong  78b,100b
599         .llong  79b,100b
600         .llong  80b,100b
601         .llong  81b,100b
602         .llong  82b,100b
603         .llong  83b,100b
604         .llong  84b,100b
605         .llong  85b,100b
606         .llong  86b,100b
607         .llong  87b,100b
608         .llong  88b,100b
609         .llong  89b,100b
610         .llong  90b,100b
611         .llong  91b,100b