initial commit with v3.6.7
[linux-3.6.7-moxart.git] / arch / powerpc / lib / copypage_power7.S
blob0ef75bf0695cee25185842625526e1d46a4e7026
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15  *
16  * Copyright (C) IBM Corporation, 2012
17  *
18  * Author: Anton Blanchard <anton@au.ibm.com>
19  */
20 #include <asm/page.h>
21 #include <asm/ppc_asm.h>
23 _GLOBAL(copypage_power7)
24         /*
25          * We prefetch both the source and destination using enhanced touch
26          * instructions. We use a stream ID of 0 for the load side and
27          * 1 for the store side. Since source and destination are page
28          * aligned we don't need to clear the bottom 7 bits of either
29          * address.
30          */
31         ori     r9,r3,1         /* stream=1 */
33 #ifdef CONFIG_PPC_64K_PAGES
34         lis     r7,0x0E01       /* depth=7, units=512 */
35 #else
36         lis     r7,0x0E00       /* depth=7 */
37         ori     r7,r7,0x1000    /* units=32 */
38 #endif
39         ori     r10,r7,1        /* stream=1 */
41         lis     r8,0x8000       /* GO=1 */
42         clrldi  r8,r8,32
44 .machine push
45 .machine "power4"
46         dcbt    r0,r4,0b01000
47         dcbt    r0,r7,0b01010
48         dcbtst  r0,r9,0b01000
49         dcbtst  r0,r10,0b01010
50         eieio
51         dcbt    r0,r8,0b01010   /* GO */
52 .machine pop
54 #ifdef CONFIG_ALTIVEC
55         mflr    r0
56         std     r3,48(r1)
57         std     r4,56(r1)
58         std     r0,16(r1)
59         stdu    r1,-STACKFRAMESIZE(r1)
60         bl      .enter_vmx_copy
61         cmpwi   r3,0
62         ld      r0,STACKFRAMESIZE+16(r1)
63         ld      r3,STACKFRAMESIZE+48(r1)
64         ld      r4,STACKFRAMESIZE+56(r1)
65         mtlr    r0
67         li      r0,(PAGE_SIZE/128)
68         mtctr   r0
70         beq     .Lnonvmx_copy
72         addi    r1,r1,STACKFRAMESIZE
74         li      r6,16
75         li      r7,32
76         li      r8,48
77         li      r9,64
78         li      r10,80
79         li      r11,96
80         li      r12,112
82         .align  5
83 1:      lvx     vr7,r0,r4
84         lvx     vr6,r4,r6
85         lvx     vr5,r4,r7
86         lvx     vr4,r4,r8
87         lvx     vr3,r4,r9
88         lvx     vr2,r4,r10
89         lvx     vr1,r4,r11
90         lvx     vr0,r4,r12
91         addi    r4,r4,128
92         stvx    vr7,r0,r3
93         stvx    vr6,r3,r6
94         stvx    vr5,r3,r7
95         stvx    vr4,r3,r8
96         stvx    vr3,r3,r9
97         stvx    vr2,r3,r10
98         stvx    vr1,r3,r11
99         stvx    vr0,r3,r12
100         addi    r3,r3,128
101         bdnz    1b
103         b       .exit_vmx_copy          /* tail call optimise */
105 #else
106         li      r0,(PAGE_SIZE/128)
107         mtctr   r0
109         stdu    r1,-STACKFRAMESIZE(r1)
110 #endif
112 .Lnonvmx_copy:
113         std     r14,STK_REG(R14)(r1)
114         std     r15,STK_REG(R15)(r1)
115         std     r16,STK_REG(R16)(r1)
116         std     r17,STK_REG(R17)(r1)
117         std     r18,STK_REG(R18)(r1)
118         std     r19,STK_REG(R19)(r1)
119         std     r20,STK_REG(R20)(r1)
121 1:      ld      r0,0(r4)
122         ld      r5,8(r4)
123         ld      r6,16(r4)
124         ld      r7,24(r4)
125         ld      r8,32(r4)
126         ld      r9,40(r4)
127         ld      r10,48(r4)
128         ld      r11,56(r4)
129         ld      r12,64(r4)
130         ld      r14,72(r4)
131         ld      r15,80(r4)
132         ld      r16,88(r4)
133         ld      r17,96(r4)
134         ld      r18,104(r4)
135         ld      r19,112(r4)
136         ld      r20,120(r4)
137         addi    r4,r4,128
138         std     r0,0(r3)
139         std     r5,8(r3)
140         std     r6,16(r3)
141         std     r7,24(r3)
142         std     r8,32(r3)
143         std     r9,40(r3)
144         std     r10,48(r3)
145         std     r11,56(r3)
146         std     r12,64(r3)
147         std     r14,72(r3)
148         std     r15,80(r3)
149         std     r16,88(r3)
150         std     r17,96(r3)
151         std     r18,104(r3)
152         std     r19,112(r3)
153         std     r20,120(r3)
154         addi    r3,r3,128
155         bdnz    1b
157         ld      r14,STK_REG(R14)(r1)
158         ld      r15,STK_REG(R15)(r1)
159         ld      r16,STK_REG(R16)(r1)
160         ld      r17,STK_REG(R17)(r1)
161         ld      r18,STK_REG(R18)(r1)
162         ld      r19,STK_REG(R19)(r1)
163         ld      r20,STK_REG(R20)(r1)
164         addi    r1,r1,STACKFRAMESIZE
165         blr