arch/mips/include/asm/sync.h

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 #ifndef __MIPS_ASM_SYNC_H__
   3 #define __MIPS_ASM_SYNC_H__
   4
   5 /*
   6  * sync types are defined by the MIPS64 Instruction Set documentation in Volume
   7  * II-A of the MIPS Architecture Reference Manual, which can be found here:
   8  *
   9  *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
  10  *
  11  * Two types of barrier are provided:
  12  *
  13  *   1) Completion barriers, which ensure that a memory operation has actually
  14  *      completed & often involve stalling the CPU pipeline to do so.
  15  *
  16  *   2) Ordering barriers, which only ensure that affected memory operations
  17  *      won't be reordered in the CPU pipeline in a manner that violates the
  18  *      restrictions imposed by the barrier.
  19  *
  20  * Ordering barriers can be more efficient than completion barriers, since:
  21  *
  22  *   a) Ordering barriers only require memory access instructions which precede
  23  *      them in program order (older instructions) to reach a point in the
  24  *      load/store datapath beyond which reordering is not possible before
  25  *      allowing memory access instructions which follow them (younger
  26  *      instructions) to be performed.  That is, older instructions don't
  27  *      actually need to complete - they just need to get far enough that all
  28  *      other coherent CPUs will observe their completion before they observe
  29  *      the effects of younger instructions.
  30  *
  31  *   b) Multiple variants of ordering barrier are provided which allow the
  32  *      effects to be restricted to different combinations of older or younger
  33  *      loads or stores. By way of example, if we only care that stores older
  34  *      than a barrier are observed prior to stores that are younger than a
  35  *      barrier & don't care about the ordering of loads then the 'wmb'
  36  *      ordering barrier can be used. Limiting the barrier's effects to stores
  37  *      allows loads to continue unaffected & potentially allows the CPU to
  38  *      make progress faster than if younger loads had to wait for older stores
  39  *      to complete.
  40  */
  41
  42 /*
  43  * No sync instruction at all; used to allow code to nullify the effect of the
  44  * __SYNC() macro without needing lots of #ifdefery.
  45  */
  46 #define __SYNC_none     -1
  47
  48 /*
  49  * A full completion barrier; all memory accesses appearing prior to this sync
  50  * instruction in program order must complete before any memory accesses
  51  * appearing after this sync instruction in program order.
  52  */
  53 #define __SYNC_full     0x00
  54
  55 /*
  56  * For now we use a full completion barrier to implement all sync types, until
  57  * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
  58  * sufficient to uphold our desired memory model.
  59  */
  60 #define __SYNC_aq       __SYNC_full
  61 #define __SYNC_rl       __SYNC_full
  62 #define __SYNC_mb       __SYNC_full
  63
  64 /*
  65  * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
  66  * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
  67  * speculative reads.
  68  */
  69 #ifdef CONFIG_CPU_CAVIUM_OCTEON
  70 # define __SYNC_rmb     __SYNC_none
  71 # define __SYNC_wmb     0x04
  72 #else
  73 # define __SYNC_rmb     __SYNC_full
  74 # define __SYNC_wmb     __SYNC_full
  75 #endif
  76
  77 /*
  78  * A GINV sync is a little different; it doesn't relate directly to loads or
  79  * stores, but instead causes synchronization of an icache or TLB global
  80  * invalidation operation triggered by the ginvi or ginvt instructions
  81  * respectively. In cases where we need to know that a ginvi or ginvt operation
  82  * has been performed by all coherent CPUs, we must issue a sync instruction of
  83  * this type. Once this instruction graduates all coherent CPUs will have
  84  * observed the invalidation.
  85  */
  86 #define __SYNC_ginv     0x14
  87
  88 /* Trivial; indicate that we always need this sync instruction. */
  89 #define __SYNC_always   (1 << 0)
  90
  91 /*
  92  * Indicate that we need this sync instruction only on systems with weakly
  93  * ordered memory access. In general this is most MIPS systems, but there are
  94  * exceptions which provide strongly ordered memory.
  95  */
  96 #ifdef CONFIG_WEAK_ORDERING
  97 # define __SYNC_weak_ordering   (1 << 1)
  98 #else
  99 # define __SYNC_weak_ordering   0
 100 #endif
 101
 102 /*
 103  * Indicate that we need this sync instruction only on systems where LL/SC
 104  * don't implicitly provide a memory barrier. In general this is most MIPS
 105  * systems.
 106  */
 107 #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
 108 # define __SYNC_weak_llsc       (1 << 2)
 109 #else
 110 # define __SYNC_weak_llsc       0
 111 #endif
 112
 113 /*
 114  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
 115  * store or prefetch) in between an LL & SC can cause the SC instruction to
 116  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
 117  * containing such sequences, this bug bites harder than we might otherwise
 118  * expect due to reordering & speculation:
 119  *
 120  * 1) A memory access appearing prior to the LL in program order may actually
 121  *    be executed after the LL - this is the reordering case.
 122  *
 123  *    In order to avoid this we need to place a memory barrier (ie. a SYNC
 124  *    instruction) prior to every LL instruction, in between it and any earlier
 125  *    memory access instructions.
 126  *
 127  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
 128  *
 129  * 2) If a conditional branch exists between an LL & SC with a target outside
 130  *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
 131  *    or similar, then misprediction of the branch may allow speculative
 132  *    execution of memory accesses from outside of the LL-SC loop.
 133  *
 134  *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
 135  *    at each affected branch target.
 136  *
 137  *    This case affects all current Loongson 3 CPUs.
 138  *
 139  * The above described cases cause an error in the cache coherence protocol;
 140  * such that the Invalidate of a competing LL-SC goes 'missing' and SC
 141  * erroneously observes its core still has Exclusive state and lets the SC
 142  * proceed.
 143  *
 144  * Therefore the error only occurs on SMP systems.
 145  */
 146 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
 147 # define __SYNC_loongson3_war   (1 << 31)
 148 #else
 149 # define __SYNC_loongson3_war   0
 150 #endif
 151
 152 /*
 153  * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
 154  * barrier to be ineffective, requiring the use of 2 in sequence to provide an
 155  * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
 156  * optimized memory barrier primitives."). Here we specify that the affected
 157  * sync instructions should be emitted twice.
 158  * Note that this expression is evaluated by the assembler (not the compiler),
 159  * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
 160  */
 161 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 162 # define __SYNC_rpt(type)       (1 - (type == __SYNC_wmb))
 163 #else
 164 # define __SYNC_rpt(type)       1
 165 #endif
 166
 167 /*
 168  * The main event. Here we actually emit a sync instruction of a given type, if
 169  * reason is non-zero.
 170  *
 171  * In future we have the option of emitting entries in a fixups-style table
 172  * here that would allow us to opportunistically remove some sync instructions
 173  * when we detect at runtime that we're running on a CPU that doesn't need
 174  * them.
 175  */
 176 #ifdef CONFIG_CPU_HAS_SYNC
 177 # define ____SYNC(_type, _reason, _else)                        \
 178         .if     (( _type ) != -1) && ( _reason );               \
 179         .set    push;                                           \
 180         .set    MIPS_ISA_LEVEL_RAW;                             \
 181         .rept   __SYNC_rpt(_type);                              \
 182         sync    _type;                                          \
 183         .endr;                                                  \
 184         .set    pop;                                            \
 185         .else;                                                  \
 186         _else;                                                  \
 187         .endif
 188 #else
 189 # define ____SYNC(_type, _reason, _else)
 190 #endif
 191
 192 /*
 193  * Preprocessor magic to expand macros used as arguments before we insert them
 194  * into assembly code.
 195  */
 196 #ifdef __ASSEMBLY__
 197 # define ___SYNC(type, reason, else)                            \
 198         ____SYNC(type, reason, else)
 199 #else
 200 # define ___SYNC(type, reason, else)                            \
 201         __stringify(____SYNC(type, reason, else))
 202 #endif
 203
 204 #define __SYNC(type, reason)                                    \
 205         ___SYNC(__SYNC_##type, __SYNC_##reason, )
 206 #define __SYNC_ELSE(type, reason, else)                         \
 207         ___SYNC(__SYNC_##type, __SYNC_##reason, else)
 208
 209 #endif /* __MIPS_ASM_SYNC_H__ */