1 //===---------- emutls.c - Implements __emutls_get_address ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
16 // There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
17 // to round 2. We need to delay deallocation because:
18 // - Android versions older than M lack __cxa_thread_atexit_impl, so apps
19 // use a pthread key destructor to call C++ destructors.
20 // - Apps might use __thread/thread_local variables in pthread destructors.
21 // We can't wait until the final two rounds, because jemalloc needs two rounds
22 // after the final malloc/free call to free its thread-specific data (see
23 // https://reviews.llvm.org/D46978#1107507).
24 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
26 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
29 #if defined(_MSC_VER) && !defined(__clang__)
30 // MSVC raises a warning about a nonstandard extension being used for the 0
31 // sized element in this array. Disable this for warn-as-error builds.
33 #pragma warning(disable : 4200)
36 typedef struct emutls_address_array
{
37 uintptr_t skip_destructor_rounds
;
38 uintptr_t size
; // number of elements in the 'data' array
40 } emutls_address_array
;
42 #if defined(_MSC_VER) && !defined(__clang__)
46 static void emutls_shutdown(emutls_address_array
*array
);
52 static pthread_mutex_t emutls_mutex
= PTHREAD_MUTEX_INITIALIZER
;
53 static pthread_key_t emutls_pthread_key
;
54 static bool emutls_key_created
= false;
56 typedef unsigned int gcc_word
__attribute__((mode(word
)));
57 typedef unsigned int gcc_pointer
__attribute__((mode(pointer
)));
59 // Default is not to use posix_memalign, so systems like Android
60 // can use thread local data without heavier POSIX memory allocators.
61 #ifndef EMUTLS_USE_POSIX_MEMALIGN
62 #define EMUTLS_USE_POSIX_MEMALIGN 0
65 static __inline
void *emutls_memalign_alloc(size_t align
, size_t size
) {
67 #if EMUTLS_USE_POSIX_MEMALIGN
68 if (posix_memalign(&base
, align
, size
) != 0)
71 #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *))
73 if ((object
= (char *)malloc(EXTRA_ALIGN_PTR_BYTES
+ size
)) == NULL
)
75 base
= (void *)(((uintptr_t)(object
+ EXTRA_ALIGN_PTR_BYTES
)) &
76 ~(uintptr_t)(align
- 1));
78 ((void **)base
)[-1] = object
;
83 static __inline
void emutls_memalign_free(void *base
) {
84 #if EMUTLS_USE_POSIX_MEMALIGN
87 // The mallocated address is in ((void**)base)[-1]
88 free(((void **)base
)[-1]);
92 static __inline
void emutls_setspecific(emutls_address_array
*value
) {
93 pthread_setspecific(emutls_pthread_key
, (void *)value
);
96 static __inline emutls_address_array
*emutls_getspecific(void) {
97 return (emutls_address_array
*)pthread_getspecific(emutls_pthread_key
);
100 static void emutls_key_destructor(void *ptr
) {
101 emutls_address_array
*array
= (emutls_address_array
*)ptr
;
102 if (array
->skip_destructor_rounds
> 0) {
103 // emutls is deallocated using a pthread key destructor. These
104 // destructors are called in several rounds to accommodate destructor
105 // functions that (re)initialize key values with pthread_setspecific.
106 // Delay the emutls deallocation to accommodate other end-of-thread
107 // cleanup tasks like calling thread_local destructors (e.g. the
108 // __cxa_thread_atexit fallback in libc++abi).
109 array
->skip_destructor_rounds
--;
110 emutls_setspecific(array
);
112 emutls_shutdown(array
);
117 static __inline
void emutls_init(void) {
118 if (pthread_key_create(&emutls_pthread_key
, emutls_key_destructor
) != 0)
120 emutls_key_created
= true;
123 static __inline
void emutls_init_once(void) {
124 static pthread_once_t once
= PTHREAD_ONCE_INIT
;
125 pthread_once(&once
, emutls_init
);
128 static __inline
void emutls_lock(void) { pthread_mutex_lock(&emutls_mutex
); }
130 static __inline
void emutls_unlock(void) { pthread_mutex_unlock(&emutls_mutex
); }
139 static LPCRITICAL_SECTION emutls_mutex
;
140 static DWORD emutls_tls_index
= TLS_OUT_OF_INDEXES
;
142 typedef uintptr_t gcc_word
;
143 typedef void *gcc_pointer
;
145 static void win_error(DWORD last_err
, const char *hint
) {
147 if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER
|
148 FORMAT_MESSAGE_FROM_SYSTEM
|
149 FORMAT_MESSAGE_MAX_WIDTH_MASK
,
150 NULL
, last_err
, 0, (LPSTR
)&buffer
, 1, NULL
)) {
151 fprintf(stderr
, "Windows error: %s\n", buffer
);
153 fprintf(stderr
, "Unknown Windows error: %s\n", hint
);
158 static __inline
void win_abort(DWORD last_err
, const char *hint
) {
159 win_error(last_err
, hint
);
163 static __inline
void *emutls_memalign_alloc(size_t align
, size_t size
) {
164 void *base
= _aligned_malloc(size
, align
);
166 win_abort(GetLastError(), "_aligned_malloc");
170 static __inline
void emutls_memalign_free(void *base
) { _aligned_free(base
); }
172 static void emutls_exit(void) {
174 DeleteCriticalSection(emutls_mutex
);
175 _aligned_free(emutls_mutex
);
178 if (emutls_tls_index
!= TLS_OUT_OF_INDEXES
) {
179 emutls_shutdown((emutls_address_array
*)TlsGetValue(emutls_tls_index
));
180 TlsFree(emutls_tls_index
);
181 emutls_tls_index
= TLS_OUT_OF_INDEXES
;
185 static BOOL CALLBACK
emutls_init(PINIT_ONCE p0
, PVOID p1
, PVOID
*p2
) {
190 (LPCRITICAL_SECTION
)_aligned_malloc(sizeof(CRITICAL_SECTION
), 16);
192 win_error(GetLastError(), "_aligned_malloc");
195 InitializeCriticalSection(emutls_mutex
);
197 emutls_tls_index
= TlsAlloc();
198 if (emutls_tls_index
== TLS_OUT_OF_INDEXES
) {
200 win_error(GetLastError(), "TlsAlloc");
203 atexit(&emutls_exit
);
207 static __inline
void emutls_init_once(void) {
208 static INIT_ONCE once
;
209 InitOnceExecuteOnce(&once
, emutls_init
, NULL
, NULL
);
212 static __inline
void emutls_lock(void) { EnterCriticalSection(emutls_mutex
); }
214 static __inline
void emutls_unlock(void) { LeaveCriticalSection(emutls_mutex
); }
216 static __inline
void emutls_setspecific(emutls_address_array
*value
) {
217 if (TlsSetValue(emutls_tls_index
, (LPVOID
)value
) == 0)
218 win_abort(GetLastError(), "TlsSetValue");
221 static __inline emutls_address_array
*emutls_getspecific(void) {
222 LPVOID value
= TlsGetValue(emutls_tls_index
);
224 const DWORD err
= GetLastError();
225 if (err
!= ERROR_SUCCESS
)
226 win_abort(err
, "TlsGetValue");
228 return (emutls_address_array
*)value
;
231 // Provide atomic load/store functions for emutls_get_index if built with MSVC.
232 #if !defined(__ATOMIC_RELEASE)
235 enum { __ATOMIC_ACQUIRE
= 2, __ATOMIC_RELEASE
= 3 };
237 static __inline
uintptr_t __atomic_load_n(void *ptr
, unsigned type
) {
238 assert(type
== __ATOMIC_ACQUIRE
);
239 // These return the previous value - but since we do an OR with 0,
240 // it's equivalent to a plain load.
242 return InterlockedOr64(ptr
, 0);
244 return InterlockedOr(ptr
, 0);
248 static __inline
void __atomic_store_n(void *ptr
, uintptr_t val
, unsigned type
) {
249 assert(type
== __ATOMIC_RELEASE
);
250 InterlockedExchangePointer((void *volatile *)ptr
, (void *)val
);
253 #endif // __ATOMIC_RELEASE
257 static size_t emutls_num_object
= 0; // number of allocated TLS objects
259 // Free the allocated TLS data
260 static void emutls_shutdown(emutls_address_array
*array
) {
263 for (i
= 0; i
< array
->size
; ++i
) {
265 emutls_memalign_free(array
->data
[i
]);
270 // For every TLS variable xyz,
271 // there is one __emutls_control variable named __emutls_v.xyz.
272 // If xyz has non-zero initial value, __emutls_v.xyz's "value"
273 // will point to __emutls_t.xyz, which has the initial value.
274 typedef struct __emutls_control
{
275 // Must use gcc_word here, instead of size_t, to match GCC. When
276 // gcc_word is larger than size_t, the upper extra bits are all
277 // zeros. We can use variables of size_t to operate on size and
279 gcc_word size
; // size of the object in bytes
280 gcc_word align
; // alignment of the object in bytes
282 uintptr_t index
; // data[index-1] is the object address
283 void *address
; // object address, when in single thread env
285 void *value
; // null or non-zero initial value for the object
288 // Emulated TLS objects are always allocated at run-time.
289 static __inline
void *emutls_allocate_object(__emutls_control
*control
) {
290 // Use standard C types, check with gcc's emutls.o.
291 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer
));
292 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *));
294 size_t size
= control
->size
;
295 size_t align
= control
->align
;
297 if (align
< sizeof(void *))
298 align
= sizeof(void *);
299 // Make sure that align is power of 2.
300 if ((align
& (align
- 1)) != 0)
303 base
= emutls_memalign_alloc(align
, size
);
305 memcpy(base
, control
->value
, size
);
307 memset(base
, 0, size
);
311 // Returns control->object.index; set index if not allocated yet.
312 static __inline
uintptr_t emutls_get_index(__emutls_control
*control
) {
313 uintptr_t index
= __atomic_load_n(&control
->object
.index
, __ATOMIC_ACQUIRE
);
317 index
= control
->object
.index
;
319 index
= ++emutls_num_object
;
320 __atomic_store_n(&control
->object
.index
, index
, __ATOMIC_RELEASE
);
327 // Updates newly allocated thread local emutls_address_array.
328 static __inline
void emutls_check_array_set_size(emutls_address_array
*array
,
333 emutls_setspecific(array
);
336 // Returns the new 'data' array size, number of elements,
337 // which must be no smaller than the given index.
338 static __inline
uintptr_t emutls_new_data_array_size(uintptr_t index
) {
339 // Need to allocate emutls_address_array with extra slots
340 // to store the header.
341 // Round up the emutls_address_array size to multiple of 16.
342 uintptr_t header_words
= sizeof(emutls_address_array
) / sizeof(void *);
343 return ((index
+ header_words
+ 15) & ~((uintptr_t)15)) - header_words
;
346 // Returns the size in bytes required for an emutls_address_array with
347 // N number of elements for data field.
348 static __inline
uintptr_t emutls_asize(uintptr_t N
) {
349 return N
* sizeof(void *) + sizeof(emutls_address_array
);
352 // Returns the thread local emutls_address_array.
353 // Extends its size if necessary to hold address at index.
354 static __inline emutls_address_array
*
355 emutls_get_address_array(uintptr_t index
) {
356 emutls_address_array
*array
= emutls_getspecific();
358 uintptr_t new_size
= emutls_new_data_array_size(index
);
359 array
= (emutls_address_array
*)malloc(emutls_asize(new_size
));
361 memset(array
->data
, 0, new_size
* sizeof(void *));
362 array
->skip_destructor_rounds
= EMUTLS_SKIP_DESTRUCTOR_ROUNDS
;
364 emutls_check_array_set_size(array
, new_size
);
365 } else if (index
> array
->size
) {
366 uintptr_t orig_size
= array
->size
;
367 uintptr_t new_size
= emutls_new_data_array_size(index
);
368 array
= (emutls_address_array
*)realloc(array
, emutls_asize(new_size
));
370 memset(array
->data
+ orig_size
, 0,
371 (new_size
- orig_size
) * sizeof(void *));
372 emutls_check_array_set_size(array
, new_size
);
378 // Our emulated TLS implementation relies on local state (e.g. for the pthread
379 // key), and if we duplicate this state across different shared libraries,
380 // accesses to the same TLS variable from different shared libraries will yield
381 // different results (see https://github.com/android/ndk/issues/1551 for an
382 // example). __emutls_get_address is the only external entry point for emulated
383 // TLS, and by making it default visibility and weak, we can rely on the dynamic
384 // linker to coalesce multiple copies at runtime and ensure a single unique copy
385 // of TLS state. This is a best effort; it won't work if the user is linking
386 // with -Bsymbolic or -Bsymbolic-functions, and it also won't work on Windows,
387 // where the dynamic linker has no notion of coalescing weak symbols at runtime.
388 // A more robust solution would be to create a separate shared library for
389 // emulated TLS, to ensure a single copy of its state.
390 __attribute__((visibility("default"), weak
))
392 void *__emutls_get_address(__emutls_control
*control
) {
393 uintptr_t index
= emutls_get_index(control
);
394 emutls_address_array
*array
= emutls_get_address_array(index
--);
395 if (array
->data
[index
] == NULL
)
396 array
->data
[index
] = emutls_allocate_object(control
);
397 return array
->data
[index
];
401 // Called by Bionic on dlclose to delete the emutls pthread key.
402 __attribute__((visibility("hidden"))) void __emutls_unregister_key(void) {
403 if (emutls_key_created
) {
404 pthread_key_delete(emutls_pthread_key
);
405 emutls_key_created
= false;