drm/panthor: Don't add write fences to the shared BOs
[drm/drm-misc.git] / arch / mips / include / asm / sync.h
blob44c04a82d0b7d4ef0a983ce97dbbae9fbe8db271
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef __MIPS_ASM_SYNC_H__
3 #define __MIPS_ASM_SYNC_H__
5 /*
6 * sync types are defined by the MIPS64 Instruction Set documentation in Volume
7 * II-A of the MIPS Architecture Reference Manual, which can be found here:
9 * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
11 * Two types of barrier are provided:
13 * 1) Completion barriers, which ensure that a memory operation has actually
14 * completed & often involve stalling the CPU pipeline to do so.
16 * 2) Ordering barriers, which only ensure that affected memory operations
17 * won't be reordered in the CPU pipeline in a manner that violates the
18 * restrictions imposed by the barrier.
20 * Ordering barriers can be more efficient than completion barriers, since:
22 * a) Ordering barriers only require memory access instructions which precede
23 * them in program order (older instructions) to reach a point in the
24 * load/store datapath beyond which reordering is not possible before
25 * allowing memory access instructions which follow them (younger
26 * instructions) to be performed. That is, older instructions don't
27 * actually need to complete - they just need to get far enough that all
28 * other coherent CPUs will observe their completion before they observe
29 * the effects of younger instructions.
31 * b) Multiple variants of ordering barrier are provided which allow the
32 * effects to be restricted to different combinations of older or younger
33 * loads or stores. By way of example, if we only care that stores older
34 * than a barrier are observed prior to stores that are younger than a
35 * barrier & don't care about the ordering of loads then the 'wmb'
36 * ordering barrier can be used. Limiting the barrier's effects to stores
37 * allows loads to continue unaffected & potentially allows the CPU to
38 * make progress faster than if younger loads had to wait for older stores
39 * to complete.
43 * No sync instruction at all; used to allow code to nullify the effect of the
44 * __SYNC() macro without needing lots of #ifdefery.
46 #define __SYNC_none -1
49 * A full completion barrier; all memory accesses appearing prior to this sync
50 * instruction in program order must complete before any memory accesses
51 * appearing after this sync instruction in program order.
53 #define __SYNC_full 0x00
56 * For now we use a full completion barrier to implement all sync types, until
57 * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
58 * sufficient to uphold our desired memory model.
60 #define __SYNC_aq __SYNC_full
61 #define __SYNC_rl __SYNC_full
62 #define __SYNC_mb __SYNC_full
65 * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
66 * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
67 * speculative reads.
69 #ifdef CONFIG_CPU_CAVIUM_OCTEON
70 # define __SYNC_rmb __SYNC_none
71 # define __SYNC_wmb 0x04
72 #else
73 # define __SYNC_rmb __SYNC_full
74 # define __SYNC_wmb __SYNC_full
75 #endif
78 * A GINV sync is a little different; it doesn't relate directly to loads or
79 * stores, but instead causes synchronization of an icache or TLB global
80 * invalidation operation triggered by the ginvi or ginvt instructions
81 * respectively. In cases where we need to know that a ginvi or ginvt operation
82 * has been performed by all coherent CPUs, we must issue a sync instruction of
83 * this type. Once this instruction graduates all coherent CPUs will have
84 * observed the invalidation.
86 #define __SYNC_ginv 0x14
88 /* Trivial; indicate that we always need this sync instruction. */
89 #define __SYNC_always (1 << 0)
92 * Indicate that we need this sync instruction only on systems with weakly
93 * ordered memory access. In general this is most MIPS systems, but there are
94 * exceptions which provide strongly ordered memory.
96 #ifdef CONFIG_WEAK_ORDERING
97 # define __SYNC_weak_ordering (1 << 1)
98 #else
99 # define __SYNC_weak_ordering 0
100 #endif
103 * Indicate that we need this sync instruction only on systems where LL/SC
104 * don't implicitly provide a memory barrier. In general this is most MIPS
105 * systems.
107 #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
108 # define __SYNC_weak_llsc (1 << 2)
109 #else
110 # define __SYNC_weak_llsc 0
111 #endif
114 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
115 * store or prefetch) in between an LL & SC can cause the SC instruction to
116 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
117 * containing such sequences, this bug bites harder than we might otherwise
118 * expect due to reordering & speculation:
120 * 1) A memory access appearing prior to the LL in program order may actually
121 * be executed after the LL - this is the reordering case.
123 * In order to avoid this we need to place a memory barrier (ie. a SYNC
124 * instruction) prior to every LL instruction, in between it and any earlier
125 * memory access instructions.
127 * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
129 * 2) If a conditional branch exists between an LL & SC with a target outside
130 * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
131 * or similar, then misprediction of the branch may allow speculative
132 * execution of memory accesses from outside of the LL-SC loop.
134 * In order to avoid this we need a memory barrier (ie. a SYNC instruction)
135 * at each affected branch target.
137 * This case affects all current Loongson 3 CPUs.
139 * The above described cases cause an error in the cache coherence protocol;
140 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
141 * erroneously observes its core still has Exclusive state and lets the SC
142 * proceed.
144 * Therefore the error only occurs on SMP systems.
146 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
147 # define __SYNC_loongson3_war (1 << 31)
148 #else
149 # define __SYNC_loongson3_war 0
150 #endif
153 * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
154 * barrier to be ineffective, requiring the use of 2 in sequence to provide an
155 * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
156 * optimized memory barrier primitives."). Here we specify that the affected
157 * sync instructions should be emitted twice.
158 * Note that this expression is evaluated by the assembler (not the compiler),
159 * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
161 #ifdef CONFIG_CPU_CAVIUM_OCTEON
162 # define __SYNC_rpt(type) (1 - (type == __SYNC_wmb))
163 #else
164 # define __SYNC_rpt(type) 1
165 #endif
168 * The main event. Here we actually emit a sync instruction of a given type, if
169 * reason is non-zero.
171 * In future we have the option of emitting entries in a fixups-style table
172 * here that would allow us to opportunistically remove some sync instructions
173 * when we detect at runtime that we're running on a CPU that doesn't need
174 * them.
176 #ifdef CONFIG_CPU_HAS_SYNC
177 # define ____SYNC(_type, _reason, _else) \
178 .if (( _type ) != -1) && ( _reason ); \
179 .set push; \
180 .set MIPS_ISA_LEVEL_RAW; \
181 .rept __SYNC_rpt(_type); \
182 sync _type; \
183 .endr; \
184 .set pop; \
185 .else; \
186 _else; \
187 .endif
188 #else
189 # define ____SYNC(_type, _reason, _else)
190 #endif
193 * Preprocessor magic to expand macros used as arguments before we insert them
194 * into assembly code.
196 #ifdef __ASSEMBLY__
197 # define ___SYNC(type, reason, else) \
198 ____SYNC(type, reason, else)
199 #else
200 # define ___SYNC(type, reason, else) \
201 __stringify(____SYNC(type, reason, else))
202 #endif
204 #define __SYNC(type, reason) \
205 ___SYNC(__SYNC_##type, __SYNC_##reason, )
206 #define __SYNC_ELSE(type, reason, else) \
207 ___SYNC(__SYNC_##type, __SYNC_##reason, else)
209 #endif /* __MIPS_ASM_SYNC_H__ */