Merge tag 'sched-urgent-2020-12-27' of git://git.kernel.org/pub/scm/linux/kernel...
[linux/fpc-iii.git] / arch / powerpc / lib / copyuser_power7.S
blob28f0be523c06627fa1b0f447268b8a894c4e4bbd
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  *
4  * Copyright (C) IBM Corporation, 2011
5  *
6  * Author: Anton Blanchard <anton@au.ibm.com>
7  */
8 #include <asm/ppc_asm.h>
10 #ifndef SELFTEST_CASE
11 /* 0 == don't use VMX, 1 == use VMX */
12 #define SELFTEST_CASE   0
13 #endif
15 #ifdef __BIG_ENDIAN__
16 #define LVS(VRT,RA,RB)          lvsl    VRT,RA,RB
17 #define VPERM(VRT,VRA,VRB,VRC)  vperm   VRT,VRA,VRB,VRC
18 #else
19 #define LVS(VRT,RA,RB)          lvsr    VRT,RA,RB
20 #define VPERM(VRT,VRA,VRB,VRC)  vperm   VRT,VRB,VRA,VRC
21 #endif
23         .macro err1
24 100:
25         EX_TABLE(100b,.Ldo_err1)
26         .endm
28         .macro err2
29 200:
30         EX_TABLE(200b,.Ldo_err2)
31         .endm
33 #ifdef CONFIG_ALTIVEC
34         .macro err3
35 300:
36         EX_TABLE(300b,.Ldo_err3)
37         .endm
39         .macro err4
40 400:
41         EX_TABLE(400b,.Ldo_err4)
42         .endm
45 .Ldo_err4:
46         ld      r16,STK_REG(R16)(r1)
47         ld      r15,STK_REG(R15)(r1)
48         ld      r14,STK_REG(R14)(r1)
49 .Ldo_err3:
50         bl      exit_vmx_usercopy
51         ld      r0,STACKFRAMESIZE+16(r1)
52         mtlr    r0
53         b       .Lexit
54 #endif /* CONFIG_ALTIVEC */
56 .Ldo_err2:
57         ld      r22,STK_REG(R22)(r1)
58         ld      r21,STK_REG(R21)(r1)
59         ld      r20,STK_REG(R20)(r1)
60         ld      r19,STK_REG(R19)(r1)
61         ld      r18,STK_REG(R18)(r1)
62         ld      r17,STK_REG(R17)(r1)
63         ld      r16,STK_REG(R16)(r1)
64         ld      r15,STK_REG(R15)(r1)
65         ld      r14,STK_REG(R14)(r1)
66 .Lexit:
67         addi    r1,r1,STACKFRAMESIZE
68 .Ldo_err1:
69         ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
70         ld      r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
71         ld      r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
72         b       __copy_tofrom_user_base
75 _GLOBAL(__copy_tofrom_user_power7)
76         cmpldi  r5,16
77         cmpldi  cr1,r5,3328
79         std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
80         std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
81         std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
83         blt     .Lshort_copy
85 #ifdef CONFIG_ALTIVEC
86 test_feature = SELFTEST_CASE
87 BEGIN_FTR_SECTION
88         bgt     cr1,.Lvmx_copy
89 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
90 #endif
92 .Lnonvmx_copy:
93         /* Get the source 8B aligned */
94         neg     r6,r4
95         mtocrf  0x01,r6
96         clrldi  r6,r6,(64-3)
98         bf      cr7*4+3,1f
99 err1;   lbz     r0,0(r4)
100         addi    r4,r4,1
101 err1;   stb     r0,0(r3)
102         addi    r3,r3,1
104 1:      bf      cr7*4+2,2f
105 err1;   lhz     r0,0(r4)
106         addi    r4,r4,2
107 err1;   sth     r0,0(r3)
108         addi    r3,r3,2
110 2:      bf      cr7*4+1,3f
111 err1;   lwz     r0,0(r4)
112         addi    r4,r4,4
113 err1;   stw     r0,0(r3)
114         addi    r3,r3,4
116 3:      sub     r5,r5,r6
117         cmpldi  r5,128
118         blt     5f
120         mflr    r0
121         stdu    r1,-STACKFRAMESIZE(r1)
122         std     r14,STK_REG(R14)(r1)
123         std     r15,STK_REG(R15)(r1)
124         std     r16,STK_REG(R16)(r1)
125         std     r17,STK_REG(R17)(r1)
126         std     r18,STK_REG(R18)(r1)
127         std     r19,STK_REG(R19)(r1)
128         std     r20,STK_REG(R20)(r1)
129         std     r21,STK_REG(R21)(r1)
130         std     r22,STK_REG(R22)(r1)
131         std     r0,STACKFRAMESIZE+16(r1)
133         srdi    r6,r5,7
134         mtctr   r6
136         /* Now do cacheline (128B) sized loads and stores. */
137         .align  5
139 err2;   ld      r0,0(r4)
140 err2;   ld      r6,8(r4)
141 err2;   ld      r7,16(r4)
142 err2;   ld      r8,24(r4)
143 err2;   ld      r9,32(r4)
144 err2;   ld      r10,40(r4)
145 err2;   ld      r11,48(r4)
146 err2;   ld      r12,56(r4)
147 err2;   ld      r14,64(r4)
148 err2;   ld      r15,72(r4)
149 err2;   ld      r16,80(r4)
150 err2;   ld      r17,88(r4)
151 err2;   ld      r18,96(r4)
152 err2;   ld      r19,104(r4)
153 err2;   ld      r20,112(r4)
154 err2;   ld      r21,120(r4)
155         addi    r4,r4,128
156 err2;   std     r0,0(r3)
157 err2;   std     r6,8(r3)
158 err2;   std     r7,16(r3)
159 err2;   std     r8,24(r3)
160 err2;   std     r9,32(r3)
161 err2;   std     r10,40(r3)
162 err2;   std     r11,48(r3)
163 err2;   std     r12,56(r3)
164 err2;   std     r14,64(r3)
165 err2;   std     r15,72(r3)
166 err2;   std     r16,80(r3)
167 err2;   std     r17,88(r3)
168 err2;   std     r18,96(r3)
169 err2;   std     r19,104(r3)
170 err2;   std     r20,112(r3)
171 err2;   std     r21,120(r3)
172         addi    r3,r3,128
173         bdnz    4b
175         clrldi  r5,r5,(64-7)
177         ld      r14,STK_REG(R14)(r1)
178         ld      r15,STK_REG(R15)(r1)
179         ld      r16,STK_REG(R16)(r1)
180         ld      r17,STK_REG(R17)(r1)
181         ld      r18,STK_REG(R18)(r1)
182         ld      r19,STK_REG(R19)(r1)
183         ld      r20,STK_REG(R20)(r1)
184         ld      r21,STK_REG(R21)(r1)
185         ld      r22,STK_REG(R22)(r1)
186         addi    r1,r1,STACKFRAMESIZE
188         /* Up to 127B to go */
189 5:      srdi    r6,r5,4
190         mtocrf  0x01,r6
192 6:      bf      cr7*4+1,7f
193 err1;   ld      r0,0(r4)
194 err1;   ld      r6,8(r4)
195 err1;   ld      r7,16(r4)
196 err1;   ld      r8,24(r4)
197 err1;   ld      r9,32(r4)
198 err1;   ld      r10,40(r4)
199 err1;   ld      r11,48(r4)
200 err1;   ld      r12,56(r4)
201         addi    r4,r4,64
202 err1;   std     r0,0(r3)
203 err1;   std     r6,8(r3)
204 err1;   std     r7,16(r3)
205 err1;   std     r8,24(r3)
206 err1;   std     r9,32(r3)
207 err1;   std     r10,40(r3)
208 err1;   std     r11,48(r3)
209 err1;   std     r12,56(r3)
210         addi    r3,r3,64
212         /* Up to 63B to go */
213 7:      bf      cr7*4+2,8f
214 err1;   ld      r0,0(r4)
215 err1;   ld      r6,8(r4)
216 err1;   ld      r7,16(r4)
217 err1;   ld      r8,24(r4)
218         addi    r4,r4,32
219 err1;   std     r0,0(r3)
220 err1;   std     r6,8(r3)
221 err1;   std     r7,16(r3)
222 err1;   std     r8,24(r3)
223         addi    r3,r3,32
225         /* Up to 31B to go */
226 8:      bf      cr7*4+3,9f
227 err1;   ld      r0,0(r4)
228 err1;   ld      r6,8(r4)
229         addi    r4,r4,16
230 err1;   std     r0,0(r3)
231 err1;   std     r6,8(r3)
232         addi    r3,r3,16
234 9:      clrldi  r5,r5,(64-4)
236         /* Up to 15B to go */
237 .Lshort_copy:
238         mtocrf  0x01,r5
239         bf      cr7*4+0,12f
240 err1;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
241 err1;   lwz     r6,4(r4)
242         addi    r4,r4,8
243 err1;   stw     r0,0(r3)
244 err1;   stw     r6,4(r3)
245         addi    r3,r3,8
247 12:     bf      cr7*4+1,13f
248 err1;   lwz     r0,0(r4)
249         addi    r4,r4,4
250 err1;   stw     r0,0(r3)
251         addi    r3,r3,4
253 13:     bf      cr7*4+2,14f
254 err1;   lhz     r0,0(r4)
255         addi    r4,r4,2
256 err1;   sth     r0,0(r3)
257         addi    r3,r3,2
259 14:     bf      cr7*4+3,15f
260 err1;   lbz     r0,0(r4)
261 err1;   stb     r0,0(r3)
263 15:     li      r3,0
264         blr
266 .Lunwind_stack_nonvmx_copy:
267         addi    r1,r1,STACKFRAMESIZE
268         b       .Lnonvmx_copy
270 .Lvmx_copy:
271 #ifdef CONFIG_ALTIVEC
272         mflr    r0
273         std     r0,16(r1)
274         stdu    r1,-STACKFRAMESIZE(r1)
275         bl      enter_vmx_usercopy
276         cmpwi   cr1,r3,0
277         ld      r0,STACKFRAMESIZE+16(r1)
278         ld      r3,STK_REG(R31)(r1)
279         ld      r4,STK_REG(R30)(r1)
280         ld      r5,STK_REG(R29)(r1)
281         mtlr    r0
283         /*
284          * We prefetch both the source and destination using enhanced touch
285          * instructions. We use a stream ID of 0 for the load side and
286          * 1 for the store side.
287          */
288         clrrdi  r6,r4,7
289         clrrdi  r9,r3,7
290         ori     r9,r9,1         /* stream=1 */
292         srdi    r7,r5,7         /* length in cachelines, capped at 0x3FF */
293         cmpldi  r7,0x3FF
294         ble     1f
295         li      r7,0x3FF
296 1:      lis     r0,0x0E00       /* depth=7 */
297         sldi    r7,r7,7
298         or      r7,r7,r0
299         ori     r10,r7,1        /* stream=1 */
301         lis     r8,0x8000       /* GO=1 */
302         clrldi  r8,r8,32
304         /* setup read stream 0 */
305         dcbt    0,r6,0b01000   /* addr from */
306         dcbt    0,r7,0b01010   /* length and depth from */
307         /* setup write stream 1 */
308         dcbtst  0,r9,0b01000   /* addr to */
309         dcbtst  0,r10,0b01010  /* length and depth to */
310         eieio
311         dcbt    0,r8,0b01010    /* all streams GO */
313         beq     cr1,.Lunwind_stack_nonvmx_copy
315         /*
316          * If source and destination are not relatively aligned we use a
317          * slower permute loop.
318          */
319         xor     r6,r4,r3
320         rldicl. r6,r6,0,(64-4)
321         bne     .Lvmx_unaligned_copy
323         /* Get the destination 16B aligned */
324         neg     r6,r3
325         mtocrf  0x01,r6
326         clrldi  r6,r6,(64-4)
328         bf      cr7*4+3,1f
329 err3;   lbz     r0,0(r4)
330         addi    r4,r4,1
331 err3;   stb     r0,0(r3)
332         addi    r3,r3,1
334 1:      bf      cr7*4+2,2f
335 err3;   lhz     r0,0(r4)
336         addi    r4,r4,2
337 err3;   sth     r0,0(r3)
338         addi    r3,r3,2
340 2:      bf      cr7*4+1,3f
341 err3;   lwz     r0,0(r4)
342         addi    r4,r4,4
343 err3;   stw     r0,0(r3)
344         addi    r3,r3,4
346 3:      bf      cr7*4+0,4f
347 err3;   ld      r0,0(r4)
348         addi    r4,r4,8
349 err3;   std     r0,0(r3)
350         addi    r3,r3,8
352 4:      sub     r5,r5,r6
354         /* Get the desination 128B aligned */
355         neg     r6,r3
356         srdi    r7,r6,4
357         mtocrf  0x01,r7
358         clrldi  r6,r6,(64-7)
360         li      r9,16
361         li      r10,32
362         li      r11,48
364         bf      cr7*4+3,5f
365 err3;   lvx     v1,0,r4
366         addi    r4,r4,16
367 err3;   stvx    v1,0,r3
368         addi    r3,r3,16
370 5:      bf      cr7*4+2,6f
371 err3;   lvx     v1,0,r4
372 err3;   lvx     v0,r4,r9
373         addi    r4,r4,32
374 err3;   stvx    v1,0,r3
375 err3;   stvx    v0,r3,r9
376         addi    r3,r3,32
378 6:      bf      cr7*4+1,7f
379 err3;   lvx     v3,0,r4
380 err3;   lvx     v2,r4,r9
381 err3;   lvx     v1,r4,r10
382 err3;   lvx     v0,r4,r11
383         addi    r4,r4,64
384 err3;   stvx    v3,0,r3
385 err3;   stvx    v2,r3,r9
386 err3;   stvx    v1,r3,r10
387 err3;   stvx    v0,r3,r11
388         addi    r3,r3,64
390 7:      sub     r5,r5,r6
391         srdi    r6,r5,7
393         std     r14,STK_REG(R14)(r1)
394         std     r15,STK_REG(R15)(r1)
395         std     r16,STK_REG(R16)(r1)
397         li      r12,64
398         li      r14,80
399         li      r15,96
400         li      r16,112
402         mtctr   r6
404         /*
405          * Now do cacheline sized loads and stores. By this stage the
406          * cacheline stores are also cacheline aligned.
407          */
408         .align  5
410 err4;   lvx     v7,0,r4
411 err4;   lvx     v6,r4,r9
412 err4;   lvx     v5,r4,r10
413 err4;   lvx     v4,r4,r11
414 err4;   lvx     v3,r4,r12
415 err4;   lvx     v2,r4,r14
416 err4;   lvx     v1,r4,r15
417 err4;   lvx     v0,r4,r16
418         addi    r4,r4,128
419 err4;   stvx    v7,0,r3
420 err4;   stvx    v6,r3,r9
421 err4;   stvx    v5,r3,r10
422 err4;   stvx    v4,r3,r11
423 err4;   stvx    v3,r3,r12
424 err4;   stvx    v2,r3,r14
425 err4;   stvx    v1,r3,r15
426 err4;   stvx    v0,r3,r16
427         addi    r3,r3,128
428         bdnz    8b
430         ld      r14,STK_REG(R14)(r1)
431         ld      r15,STK_REG(R15)(r1)
432         ld      r16,STK_REG(R16)(r1)
434         /* Up to 127B to go */
435         clrldi  r5,r5,(64-7)
436         srdi    r6,r5,4
437         mtocrf  0x01,r6
439         bf      cr7*4+1,9f
440 err3;   lvx     v3,0,r4
441 err3;   lvx     v2,r4,r9
442 err3;   lvx     v1,r4,r10
443 err3;   lvx     v0,r4,r11
444         addi    r4,r4,64
445 err3;   stvx    v3,0,r3
446 err3;   stvx    v2,r3,r9
447 err3;   stvx    v1,r3,r10
448 err3;   stvx    v0,r3,r11
449         addi    r3,r3,64
451 9:      bf      cr7*4+2,10f
452 err3;   lvx     v1,0,r4
453 err3;   lvx     v0,r4,r9
454         addi    r4,r4,32
455 err3;   stvx    v1,0,r3
456 err3;   stvx    v0,r3,r9
457         addi    r3,r3,32
459 10:     bf      cr7*4+3,11f
460 err3;   lvx     v1,0,r4
461         addi    r4,r4,16
462 err3;   stvx    v1,0,r3
463         addi    r3,r3,16
465         /* Up to 15B to go */
466 11:     clrldi  r5,r5,(64-4)
467         mtocrf  0x01,r5
468         bf      cr7*4+0,12f
469 err3;   ld      r0,0(r4)
470         addi    r4,r4,8
471 err3;   std     r0,0(r3)
472         addi    r3,r3,8
474 12:     bf      cr7*4+1,13f
475 err3;   lwz     r0,0(r4)
476         addi    r4,r4,4
477 err3;   stw     r0,0(r3)
478         addi    r3,r3,4
480 13:     bf      cr7*4+2,14f
481 err3;   lhz     r0,0(r4)
482         addi    r4,r4,2
483 err3;   sth     r0,0(r3)
484         addi    r3,r3,2
486 14:     bf      cr7*4+3,15f
487 err3;   lbz     r0,0(r4)
488 err3;   stb     r0,0(r3)
490 15:     addi    r1,r1,STACKFRAMESIZE
491         b       exit_vmx_usercopy       /* tail call optimise */
493 .Lvmx_unaligned_copy:
494         /* Get the destination 16B aligned */
495         neg     r6,r3
496         mtocrf  0x01,r6
497         clrldi  r6,r6,(64-4)
499         bf      cr7*4+3,1f
500 err3;   lbz     r0,0(r4)
501         addi    r4,r4,1
502 err3;   stb     r0,0(r3)
503         addi    r3,r3,1
505 1:      bf      cr7*4+2,2f
506 err3;   lhz     r0,0(r4)
507         addi    r4,r4,2
508 err3;   sth     r0,0(r3)
509         addi    r3,r3,2
511 2:      bf      cr7*4+1,3f
512 err3;   lwz     r0,0(r4)
513         addi    r4,r4,4
514 err3;   stw     r0,0(r3)
515         addi    r3,r3,4
517 3:      bf      cr7*4+0,4f
518 err3;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
519 err3;   lwz     r7,4(r4)
520         addi    r4,r4,8
521 err3;   stw     r0,0(r3)
522 err3;   stw     r7,4(r3)
523         addi    r3,r3,8
525 4:      sub     r5,r5,r6
527         /* Get the desination 128B aligned */
528         neg     r6,r3
529         srdi    r7,r6,4
530         mtocrf  0x01,r7
531         clrldi  r6,r6,(64-7)
533         li      r9,16
534         li      r10,32
535         li      r11,48
537         LVS(v16,0,r4)           /* Setup permute control vector */
538 err3;   lvx     v0,0,r4
539         addi    r4,r4,16
541         bf      cr7*4+3,5f
542 err3;   lvx     v1,0,r4
543         VPERM(v8,v0,v1,v16)
544         addi    r4,r4,16
545 err3;   stvx    v8,0,r3
546         addi    r3,r3,16
547         vor     v0,v1,v1
549 5:      bf      cr7*4+2,6f
550 err3;   lvx     v1,0,r4
551         VPERM(v8,v0,v1,v16)
552 err3;   lvx     v0,r4,r9
553         VPERM(v9,v1,v0,v16)
554         addi    r4,r4,32
555 err3;   stvx    v8,0,r3
556 err3;   stvx    v9,r3,r9
557         addi    r3,r3,32
559 6:      bf      cr7*4+1,7f
560 err3;   lvx     v3,0,r4
561         VPERM(v8,v0,v3,v16)
562 err3;   lvx     v2,r4,r9
563         VPERM(v9,v3,v2,v16)
564 err3;   lvx     v1,r4,r10
565         VPERM(v10,v2,v1,v16)
566 err3;   lvx     v0,r4,r11
567         VPERM(v11,v1,v0,v16)
568         addi    r4,r4,64
569 err3;   stvx    v8,0,r3
570 err3;   stvx    v9,r3,r9
571 err3;   stvx    v10,r3,r10
572 err3;   stvx    v11,r3,r11
573         addi    r3,r3,64
575 7:      sub     r5,r5,r6
576         srdi    r6,r5,7
578         std     r14,STK_REG(R14)(r1)
579         std     r15,STK_REG(R15)(r1)
580         std     r16,STK_REG(R16)(r1)
582         li      r12,64
583         li      r14,80
584         li      r15,96
585         li      r16,112
587         mtctr   r6
589         /*
590          * Now do cacheline sized loads and stores. By this stage the
591          * cacheline stores are also cacheline aligned.
592          */
593         .align  5
595 err4;   lvx     v7,0,r4
596         VPERM(v8,v0,v7,v16)
597 err4;   lvx     v6,r4,r9
598         VPERM(v9,v7,v6,v16)
599 err4;   lvx     v5,r4,r10
600         VPERM(v10,v6,v5,v16)
601 err4;   lvx     v4,r4,r11
602         VPERM(v11,v5,v4,v16)
603 err4;   lvx     v3,r4,r12
604         VPERM(v12,v4,v3,v16)
605 err4;   lvx     v2,r4,r14
606         VPERM(v13,v3,v2,v16)
607 err4;   lvx     v1,r4,r15
608         VPERM(v14,v2,v1,v16)
609 err4;   lvx     v0,r4,r16
610         VPERM(v15,v1,v0,v16)
611         addi    r4,r4,128
612 err4;   stvx    v8,0,r3
613 err4;   stvx    v9,r3,r9
614 err4;   stvx    v10,r3,r10
615 err4;   stvx    v11,r3,r11
616 err4;   stvx    v12,r3,r12
617 err4;   stvx    v13,r3,r14
618 err4;   stvx    v14,r3,r15
619 err4;   stvx    v15,r3,r16
620         addi    r3,r3,128
621         bdnz    8b
623         ld      r14,STK_REG(R14)(r1)
624         ld      r15,STK_REG(R15)(r1)
625         ld      r16,STK_REG(R16)(r1)
627         /* Up to 127B to go */
628         clrldi  r5,r5,(64-7)
629         srdi    r6,r5,4
630         mtocrf  0x01,r6
632         bf      cr7*4+1,9f
633 err3;   lvx     v3,0,r4
634         VPERM(v8,v0,v3,v16)
635 err3;   lvx     v2,r4,r9
636         VPERM(v9,v3,v2,v16)
637 err3;   lvx     v1,r4,r10
638         VPERM(v10,v2,v1,v16)
639 err3;   lvx     v0,r4,r11
640         VPERM(v11,v1,v0,v16)
641         addi    r4,r4,64
642 err3;   stvx    v8,0,r3
643 err3;   stvx    v9,r3,r9
644 err3;   stvx    v10,r3,r10
645 err3;   stvx    v11,r3,r11
646         addi    r3,r3,64
648 9:      bf      cr7*4+2,10f
649 err3;   lvx     v1,0,r4
650         VPERM(v8,v0,v1,v16)
651 err3;   lvx     v0,r4,r9
652         VPERM(v9,v1,v0,v16)
653         addi    r4,r4,32
654 err3;   stvx    v8,0,r3
655 err3;   stvx    v9,r3,r9
656         addi    r3,r3,32
658 10:     bf      cr7*4+3,11f
659 err3;   lvx     v1,0,r4
660         VPERM(v8,v0,v1,v16)
661         addi    r4,r4,16
662 err3;   stvx    v8,0,r3
663         addi    r3,r3,16
665         /* Up to 15B to go */
666 11:     clrldi  r5,r5,(64-4)
667         addi    r4,r4,-16       /* Unwind the +16 load offset */
668         mtocrf  0x01,r5
669         bf      cr7*4+0,12f
670 err3;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
671 err3;   lwz     r6,4(r4)
672         addi    r4,r4,8
673 err3;   stw     r0,0(r3)
674 err3;   stw     r6,4(r3)
675         addi    r3,r3,8
677 12:     bf      cr7*4+1,13f
678 err3;   lwz     r0,0(r4)
679         addi    r4,r4,4
680 err3;   stw     r0,0(r3)
681         addi    r3,r3,4
683 13:     bf      cr7*4+2,14f
684 err3;   lhz     r0,0(r4)
685         addi    r4,r4,2
686 err3;   sth     r0,0(r3)
687         addi    r3,r3,2
689 14:     bf      cr7*4+3,15f
690 err3;   lbz     r0,0(r4)
691 err3;   stb     r0,0(r3)
693 15:     addi    r1,r1,STACKFRAMESIZE
694         b       exit_vmx_usercopy       /* tail call optimise */
695 #endif /* CONFIG_ALTIVEC */