2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
6 * Common macros and functions for ring benchmarking.
17 #if defined(__x86_64__) || defined(__i386__)
18 #include "x86intrin.h"
20 static inline void wait_cycles(unsigned long long cycles
)
25 while (__rdtsc() - t
< cycles
) {}
28 #define VMEXIT_CYCLES 500
29 #define VMENTRY_CYCLES 500
31 #elif defined(__s390x__)
32 static inline void wait_cycles(unsigned long long cycles
)
34 asm volatile("0: brctg %0,0b" : : "d" (cycles
));
38 #define VMEXIT_CYCLES 200
39 #define VMENTRY_CYCLES 200
42 static inline void wait_cycles(unsigned long long cycles
)
46 #define VMEXIT_CYCLES 0
47 #define VMENTRY_CYCLES 0
50 static inline void vmexit(void)
55 wait_cycles(VMEXIT_CYCLES
);
57 static inline void vmentry(void)
62 wait_cycles(VMENTRY_CYCLES
);
65 /* implemented by ring */
66 void alloc_ring(void);
68 int add_inbuf(unsigned, void *, void *);
69 void *get_buf(unsigned *, void **);
73 void kick_available();
78 bool use_buf(unsigned *, void **);
81 /* implemented by main */
84 void wait_for_kick(void);
86 void wait_for_call(void);
88 extern unsigned ring_size
;
90 /* Compiler barrier - similar to what Linux uses */
91 #define barrier() asm volatile("" ::: "memory")
93 /* Is there a portable way to do this? */
94 #if defined(__x86_64__) || defined(__i386__)
95 #define cpu_relax() asm ("rep; nop" ::: "memory")
96 #elif defined(__s390x__)
97 #define cpu_relax() barrier()
99 #define cpu_relax() assert(0)
102 extern bool do_relax
;
104 static inline void busy_wait(void)
109 /* prevent compiler from removing busy loops */
113 #if defined(__x86_64__) || defined(__i386__)
114 #define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
117 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
118 * with other __ATOMIC_SEQ_CST calls.
120 #define smp_mb() __sync_synchronize()
124 * This abuses the atomic builtins for thread fences, and
125 * adds a compiler barrier.
127 #define smp_release() do { \
129 __atomic_thread_fence(__ATOMIC_RELEASE); \
132 #define smp_acquire() do { \
133 __atomic_thread_fence(__ATOMIC_ACQUIRE); \
137 #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
138 #define smp_wmb() barrier()
140 #define smp_wmb() smp_release()
144 #define smp_read_barrier_depends() smp_acquire()
146 #define smp_read_barrier_depends() do {} while(0)
149 static __always_inline
150 void __read_once_size(const volatile void *p
, void *res
, int size
)
153 case 1: *(unsigned char *)res
= *(volatile unsigned char *)p
; break; \
154 case 2: *(unsigned short *)res
= *(volatile unsigned short *)p
; break; \
155 case 4: *(unsigned int *)res
= *(volatile unsigned int *)p
; break; \
156 case 8: *(unsigned long long *)res
= *(volatile unsigned long long *)p
; break; \
159 __builtin_memcpy((void *)res
, (const void *)p
, size
); \
164 static __always_inline
void __write_once_size(volatile void *p
, void *res
, int size
)
167 case 1: *(volatile unsigned char *)p
= *(unsigned char *)res
; break;
168 case 2: *(volatile unsigned short *)p
= *(unsigned short *)res
; break;
169 case 4: *(volatile unsigned int *)p
= *(unsigned int *)res
; break;
170 case 8: *(volatile unsigned long long *)p
= *(unsigned long long *)res
; break;
173 __builtin_memcpy((void *)p
, (const void *)res
, size
);
178 #define READ_ONCE(x) \
180 union { typeof(x) __val; char __c[1]; } __u; \
181 __read_once_size(&(x), __u.__c, sizeof(x)); \
182 smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
186 #define WRITE_ONCE(x, val) \
188 union { typeof(x) __val; char __c[1]; } __u = \
189 { .__val = (typeof(x)) (val) }; \
190 __write_once_size(&(x), __u.__c, sizeof(x)); \