1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org>
6 #include <linux/export.h>
7 #include <linux/linkage.h>
11 * copy_user_nocache - Uncached memory copy with exception handling
13 * This copies from user space into kernel space, but the kernel
14 * space accesses can take a machine check exception, so they too
15 * need exception handling.
17 * Note: only 32-bit and 64-bit stores have non-temporal versions,
18 * and we only use aligned versions. Any unaligned parts at the
19 * start or end of the copy will be done using normal cached stores.
27 * rax uncopied bytes or 0 if successful.
29 SYM_FUNC_START(__copy_user_nocache)
30 /* If destination is not 7-byte aligned, we'll have to align it */
42 12: movq 16(%rsi),%r10
43 13: movq 24(%rsi),%r11
45 21: movnti %r9,8(%rdi)
46 22: movnti %r10,16(%rdi)
47 23: movnti %r11,24(%rdi)
50 32: movq 48(%rsi),%r10
51 33: movq 56(%rsi),%r11
52 40: movnti %r8,32(%rdi)
53 41: movnti %r9,40(%rdi)
54 42: movnti %r10,48(%rdi)
55 43: movnti %r11,56(%rdi)
64 * First set of user mode loads have been done
65 * without any stores, so if they fail, we can
66 * just try the non-unrolled loop.
68 _ASM_EXTABLE_UA(10b, .Lquadwords)
69 _ASM_EXTABLE_UA(11b, .Lquadwords)
70 _ASM_EXTABLE_UA(12b, .Lquadwords)
71 _ASM_EXTABLE_UA(13b, .Lquadwords)
74 * The second set of user mode loads have been
75 * done with 32 bytes stored to the destination,
76 * so we need to take that into account before
77 * falling back to the unrolled loop.
79 _ASM_EXTABLE_UA(30b, .Lfixup32)
80 _ASM_EXTABLE_UA(31b, .Lfixup32)
81 _ASM_EXTABLE_UA(32b, .Lfixup32)
82 _ASM_EXTABLE_UA(33b, .Lfixup32)
85 * An exception on a write means that we're
86 * done, but we need to update the count
87 * depending on where in the unrolled loop
90 _ASM_EXTABLE_UA(20b, .Ldone0)
91 _ASM_EXTABLE_UA(21b, .Ldone8)
92 _ASM_EXTABLE_UA(22b, .Ldone16)
93 _ASM_EXTABLE_UA(23b, .Ldone24)
94 _ASM_EXTABLE_UA(40b, .Ldone32)
95 _ASM_EXTABLE_UA(41b, .Ldone40)
96 _ASM_EXTABLE_UA(42b, .Ldone48)
97 _ASM_EXTABLE_UA(43b, .Ldone56)
103 51: movnti %rax,(%rdi)
110 * If we fail on the last full quadword, we will
111 * not try to do any byte-wise cached accesses.
112 * We will try to do one more 4-byte uncached
115 _ASM_EXTABLE_UA(50b, .Llast4)
116 _ASM_EXTABLE_UA(51b, .Ldone0)
122 61: movnti %eax,(%rdi)
146 * If we fail on the last four bytes, we won't
147 * bother with any fixups. It's dead, Jim. Note
148 * that there's no need for 'sfence' for any
149 * of this, since the exception will have been
152 _ASM_EXTABLE_UA(60b, .Ldone)
153 _ASM_EXTABLE_UA(61b, .Ldone)
154 _ASM_EXTABLE_UA(70b, .Ldone)
155 _ASM_EXTABLE_UA(71b, .Ldone)
156 _ASM_EXTABLE_UA(80b, .Ldone)
157 _ASM_EXTABLE_UA(81b, .Ldone)
160 * This is the "head needs aliging" case when
161 * the destination isn't 8-byte aligned. The
162 * 4-byte case can be done uncached, but any
163 * smaller alignment is done with regular stores.
191 95: movnti %eax,(%rdi)
198 * If we fail on the initial alignment accesses,
199 * we're all done. Again, no point in trying to
200 * do byte-by-byte probing if the 4-byte load
201 * fails - we're not doing any uncached accesses
204 _ASM_EXTABLE_UA(90b, .Ldone)
205 _ASM_EXTABLE_UA(91b, .Ldone)
206 _ASM_EXTABLE_UA(92b, .Ldone)
207 _ASM_EXTABLE_UA(93b, .Ldone)
208 _ASM_EXTABLE_UA(94b, .Ldone)
209 _ASM_EXTABLE_UA(95b, .Ldone)
212 * Exception table fixups for faults in the middle
214 .Ldone56: sub $8,%edx
215 .Ldone48: sub $8,%edx
216 .Ldone40: sub $8,%edx
217 .Ldone32: sub $8,%edx
218 .Ldone24: sub $8,%edx
219 .Ldone16: sub $8,%edx
233 53: movnti %eax,(%rdi)
238 _ASM_EXTABLE_UA(52b, .Ldone0)
239 _ASM_EXTABLE_UA(53b, .Ldone0)
241 SYM_FUNC_END(__copy_user_nocache)
242 EXPORT_SYMBOL(__copy_user_nocache)