2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
15 #include <linux/string.h>
16 #include <linux/smp.h>
17 #include <linux/module.h>
18 #include <linux/uaccess.h>
19 #include <asm/fixmap.h>
20 #include <asm/kmap_types.h>
21 #include <asm/tlbflush.h>
22 #include <hv/hypervisor.h>
23 #include <arch/chip.h>
26 #if !CHIP_HAS_COHERENT_LOCAL_CACHE()
28 /* Defined in memcpy.S */
29 extern unsigned long __memcpy_asm(void *to
, const void *from
, unsigned long n
);
30 extern unsigned long __copy_to_user_inatomic_asm(
31 void __user
*to
, const void *from
, unsigned long n
);
32 extern unsigned long __copy_from_user_inatomic_asm(
33 void *to
, const void __user
*from
, unsigned long n
);
34 extern unsigned long __copy_from_user_zeroing_asm(
35 void *to
, const void __user
*from
, unsigned long n
);
37 typedef unsigned long (*memcpy_t
)(void *, const void *, unsigned long);
39 /* Size above which to consider TLB games for performance */
40 #define LARGE_COPY_CUTOFF 2048
42 /* Communicate to the simulator what we are trying to do. */
43 #define sim_allow_multiple_caching(b) \
44 __insn_mtspr(SPR_SIM_CONTROL, \
45 SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
48 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
50 * We set up our own source and destination PTEs that we fully control.
51 * This is the only way to guarantee that we don't race with another
52 * thread that is modifying the PTE; we can't afford to try the
53 * copy_{to,from}_user() technique of catching the interrupt, since
54 * we must run with interrupts disabled to avoid the risk of some
55 * other code seeing the incoherent data in our cache. (Recall that
56 * our cache is indexed by PA, so even if the other code doesn't use
57 * our kmap_atomic virtual addresses, they'll still hit in cache using
58 * the normal VAs that aren't supposed to hit in cache.)
60 static void memcpy_multicache(void *dest
, const void *source
,
61 pte_t dst_pte
, pte_t src_pte
, int len
)
64 unsigned long flags
, newsrc
, newdst
;
71 * Disable interrupts so that we don't recurse into memcpy()
72 * in an interrupt handler, nor accidentally reference
73 * the PA of the source from an interrupt routine. Also
74 * notify the simulator that we're playing games so we don't
75 * generate spurious coherency warnings.
77 local_irq_save(flags
);
78 sim_allow_multiple_caching(1);
80 /* Set up the new dest mapping */
81 type0
= kmap_atomic_idx_push();
82 idx
= FIX_KMAP_BEGIN
+ (KM_TYPE_NR
* cpu
) + type0
;
83 newdst
= __fix_to_virt(idx
) + ((unsigned long)dest
& (PAGE_SIZE
-1));
84 pmdp
= pmd_offset(pud_offset(pgd_offset_k(newdst
), newdst
), newdst
);
85 ptep
= pte_offset_kernel(pmdp
, newdst
);
86 if (pte_val(*ptep
) != pte_val(dst_pte
)) {
87 set_pte(ptep
, dst_pte
);
88 local_flush_tlb_page(NULL
, newdst
, PAGE_SIZE
);
91 /* Set up the new source mapping */
92 type1
= kmap_atomic_idx_push();
93 idx
+= (type0
- type1
);
94 src_pte
= hv_pte_set_nc(src_pte
);
95 src_pte
= hv_pte_clear_writable(src_pte
); /* be paranoid */
96 newsrc
= __fix_to_virt(idx
) + ((unsigned long)source
& (PAGE_SIZE
-1));
97 pmdp
= pmd_offset(pud_offset(pgd_offset_k(newsrc
), newsrc
), newsrc
);
98 ptep
= pte_offset_kernel(pmdp
, newsrc
);
99 __set_pte(ptep
, src_pte
); /* set_pte() would be confused by this */
100 local_flush_tlb_page(NULL
, newsrc
, PAGE_SIZE
);
102 /* Actually move the data. */
103 __memcpy_asm((void *)newdst
, (const void *)newsrc
, len
);
106 * Remap the source as locally-cached and not OLOC'ed so that
107 * we can inval without also invaling the remote cpu's cache.
108 * This also avoids known errata with inv'ing cacheable oloc data.
110 src_pte
= hv_pte_set_mode(src_pte
, HV_PTE_MODE_CACHE_NO_L3
);
111 src_pte
= hv_pte_set_writable(src_pte
); /* need write access for inv */
112 __set_pte(ptep
, src_pte
); /* set_pte() would be confused by this */
113 local_flush_tlb_page(NULL
, newsrc
, PAGE_SIZE
);
116 * Do the actual invalidation, covering the full L2 cache line
117 * at the end since __memcpy_asm() is somewhat aggressive.
119 __inv_buffer((void *)newsrc
, len
);
122 * We're done: notify the simulator that all is back to normal,
123 * and re-enable interrupts and pre-emption.
125 kmap_atomic_idx_pop();
126 kmap_atomic_idx_pop();
127 sim_allow_multiple_caching(0);
128 local_irq_restore(flags
);
133 * Identify large copies from remotely-cached memory, and copy them
134 * via memcpy_multicache() if they look good, otherwise fall back
135 * to the particular kind of copying passed as the memcpy_t function.
137 static unsigned long fast_copy(void *dest
, const void *source
, int len
,
141 * Check if it's big enough to bother with. We may end up doing a
142 * small copy via TLB manipulation if we're near a page boundary,
143 * but presumably we'll make it up when we hit the second page.
145 while (len
>= LARGE_COPY_CUTOFF
) {
146 int copy_size
, bytes_left_on_page
;
147 pte_t
*src_ptep
, *dst_ptep
;
148 pte_t src_pte
, dst_pte
;
149 struct page
*src_page
, *dst_page
;
151 /* Is the source page oloc'ed to a remote cpu? */
153 src_ptep
= virt_to_pte(current
->mm
, (unsigned long)source
);
154 if (src_ptep
== NULL
)
157 if (!hv_pte_get_present(src_pte
) ||
158 !hv_pte_get_readable(src_pte
) ||
159 hv_pte_get_mode(src_pte
) != HV_PTE_MODE_CACHE_TILE_L3
)
161 if (get_remote_cache_cpu(src_pte
) == smp_processor_id())
163 src_page
= pfn_to_page(pte_pfn(src_pte
));
165 if (pte_val(src_pte
) != pte_val(*src_ptep
)) {
169 if (pte_huge(src_pte
)) {
170 /* Adjust the PTE to correspond to a small page */
171 int pfn
= pte_pfn(src_pte
);
172 pfn
+= (((unsigned long)source
& (HPAGE_SIZE
-1))
174 src_pte
= pfn_pte(pfn
, src_pte
);
175 src_pte
= pte_mksmall(src_pte
);
178 /* Is the destination page writable? */
180 dst_ptep
= virt_to_pte(current
->mm
, (unsigned long)dest
);
181 if (dst_ptep
== NULL
) {
186 if (!hv_pte_get_present(dst_pte
) ||
187 !hv_pte_get_writable(dst_pte
)) {
191 dst_page
= pfn_to_page(pte_pfn(dst_pte
));
192 if (dst_page
== src_page
) {
194 * Source and dest are on the same page; this
195 * potentially exposes us to incoherence if any
196 * part of src and dest overlap on a cache line.
197 * Just give up rather than trying to be precise.
203 if (pte_val(dst_pte
) != pte_val(*dst_ptep
)) {
207 if (pte_huge(dst_pte
)) {
208 /* Adjust the PTE to correspond to a small page */
209 int pfn
= pte_pfn(dst_pte
);
210 pfn
+= (((unsigned long)dest
& (HPAGE_SIZE
-1))
212 dst_pte
= pfn_pte(pfn
, dst_pte
);
213 dst_pte
= pte_mksmall(dst_pte
);
216 /* All looks good: create a cachable PTE and copy from it */
219 PAGE_SIZE
- (((int)source
) & (PAGE_SIZE
-1));
220 if (copy_size
> bytes_left_on_page
)
221 copy_size
= bytes_left_on_page
;
223 PAGE_SIZE
- (((int)dest
) & (PAGE_SIZE
-1));
224 if (copy_size
> bytes_left_on_page
)
225 copy_size
= bytes_left_on_page
;
226 memcpy_multicache(dest
, source
, dst_pte
, src_pte
, copy_size
);
228 /* Release the pages */
232 /* Continue on the next page */
238 return func(dest
, source
, len
);
241 void *memcpy(void *to
, const void *from
, __kernel_size_t n
)
243 if (n
< LARGE_COPY_CUTOFF
)
244 return (void *)__memcpy_asm(to
, from
, n
);
246 return (void *)fast_copy(to
, from
, n
, __memcpy_asm
);
249 unsigned long __copy_to_user_inatomic(void __user
*to
, const void *from
,
252 if (n
< LARGE_COPY_CUTOFF
)
253 return __copy_to_user_inatomic_asm(to
, from
, n
);
255 return fast_copy(to
, from
, n
, __copy_to_user_inatomic_asm
);
258 unsigned long __copy_from_user_inatomic(void *to
, const void __user
*from
,
261 if (n
< LARGE_COPY_CUTOFF
)
262 return __copy_from_user_inatomic_asm(to
, from
, n
);
264 return fast_copy(to
, from
, n
, __copy_from_user_inatomic_asm
);
267 unsigned long __copy_from_user_zeroing(void *to
, const void __user
*from
,
270 if (n
< LARGE_COPY_CUTOFF
)
271 return __copy_from_user_zeroing_asm(to
, from
, n
);
273 return fast_copy(to
, from
, n
, __copy_from_user_zeroing_asm
);
276 #endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */