Linux 3.16-rc2
[linux/fpc-iii.git] / arch / powerpc / lib / copypage_power7.S
blobd7dafb3777acc6badda7b391ebc48580f37710a8
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15  *
16  * Copyright (C) IBM Corporation, 2012
17  *
18  * Author: Anton Blanchard <anton@au.ibm.com>
19  */
20 #include <asm/page.h>
21 #include <asm/ppc_asm.h>
23 _GLOBAL(copypage_power7)
24         /*
25          * We prefetch both the source and destination using enhanced touch
26          * instructions. We use a stream ID of 0 for the load side and
27          * 1 for the store side. Since source and destination are page
28          * aligned we don't need to clear the bottom 7 bits of either
29          * address.
30          */
31         ori     r9,r3,1         /* stream=1 => to */
33 #ifdef CONFIG_PPC_64K_PAGES
34         lis     r7,0x0E01       /* depth=7
35                                  * units/cachelines=512 */
36 #else
37         lis     r7,0x0E00       /* depth=7 */
38         ori     r7,r7,0x1000    /* units/cachelines=32 */
39 #endif
40         ori     r10,r7,1        /* stream=1 */
42         lis     r8,0x8000       /* GO=1 */
43         clrldi  r8,r8,32
45 .machine push
46 .machine "power4"
47         /* setup read stream 0  */
48         dcbt    r0,r4,0b01000   /* addr from */
49         dcbt    r0,r7,0b01010   /* length and depth from */
50         /* setup write stream 1 */
51         dcbtst  r0,r9,0b01000   /* addr to */
52         dcbtst  r0,r10,0b01010  /* length and depth to */
53         eieio
54         dcbt    r0,r8,0b01010   /* all streams GO */
55 .machine pop
57 #ifdef CONFIG_ALTIVEC
58         mflr    r0
59         std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
60         std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
61         std     r0,16(r1)
62         stdu    r1,-STACKFRAMESIZE(r1)
63         bl      enter_vmx_copy
64         cmpwi   r3,0
65         ld      r0,STACKFRAMESIZE+16(r1)
66         ld      r3,STK_REG(R31)(r1)
67         ld      r4,STK_REG(R30)(r1)
68         mtlr    r0
70         li      r0,(PAGE_SIZE/128)
71         mtctr   r0
73         beq     .Lnonvmx_copy
75         addi    r1,r1,STACKFRAMESIZE
77         li      r6,16
78         li      r7,32
79         li      r8,48
80         li      r9,64
81         li      r10,80
82         li      r11,96
83         li      r12,112
85         .align  5
86 1:      lvx     vr7,r0,r4
87         lvx     vr6,r4,r6
88         lvx     vr5,r4,r7
89         lvx     vr4,r4,r8
90         lvx     vr3,r4,r9
91         lvx     vr2,r4,r10
92         lvx     vr1,r4,r11
93         lvx     vr0,r4,r12
94         addi    r4,r4,128
95         stvx    vr7,r0,r3
96         stvx    vr6,r3,r6
97         stvx    vr5,r3,r7
98         stvx    vr4,r3,r8
99         stvx    vr3,r3,r9
100         stvx    vr2,r3,r10
101         stvx    vr1,r3,r11
102         stvx    vr0,r3,r12
103         addi    r3,r3,128
104         bdnz    1b
106         b       exit_vmx_copy           /* tail call optimise */
108 #else
109         li      r0,(PAGE_SIZE/128)
110         mtctr   r0
112         stdu    r1,-STACKFRAMESIZE(r1)
113 #endif
115 .Lnonvmx_copy:
116         std     r14,STK_REG(R14)(r1)
117         std     r15,STK_REG(R15)(r1)
118         std     r16,STK_REG(R16)(r1)
119         std     r17,STK_REG(R17)(r1)
120         std     r18,STK_REG(R18)(r1)
121         std     r19,STK_REG(R19)(r1)
122         std     r20,STK_REG(R20)(r1)
124 1:      ld      r0,0(r4)
125         ld      r5,8(r4)
126         ld      r6,16(r4)
127         ld      r7,24(r4)
128         ld      r8,32(r4)
129         ld      r9,40(r4)
130         ld      r10,48(r4)
131         ld      r11,56(r4)
132         ld      r12,64(r4)
133         ld      r14,72(r4)
134         ld      r15,80(r4)
135         ld      r16,88(r4)
136         ld      r17,96(r4)
137         ld      r18,104(r4)
138         ld      r19,112(r4)
139         ld      r20,120(r4)
140         addi    r4,r4,128
141         std     r0,0(r3)
142         std     r5,8(r3)
143         std     r6,16(r3)
144         std     r7,24(r3)
145         std     r8,32(r3)
146         std     r9,40(r3)
147         std     r10,48(r3)
148         std     r11,56(r3)
149         std     r12,64(r3)
150         std     r14,72(r3)
151         std     r15,80(r3)
152         std     r16,88(r3)
153         std     r17,96(r3)
154         std     r18,104(r3)
155         std     r19,112(r3)
156         std     r20,120(r3)
157         addi    r3,r3,128
158         bdnz    1b
160         ld      r14,STK_REG(R14)(r1)
161         ld      r15,STK_REG(R15)(r1)
162         ld      r16,STK_REG(R16)(r1)
163         ld      r17,STK_REG(R17)(r1)
164         ld      r18,STK_REG(R18)(r1)
165         ld      r19,STK_REG(R19)(r1)
166         ld      r20,STK_REG(R20)(r1)
167         addi    r1,r1,STACKFRAMESIZE
168         blr