usr/src/lib/libc/port/threads/tsd.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28
  29 #include "lint.h"
  30 #include "thr_uberdata.h"
  31 #include <stddef.h>
  32
  33 /*
  34  * These symbols should not be exported from libc, but
  35  * /lib/libm.so.2 references them.  libm needs to be fixed.
  36  * Also, some older versions of the Studio compiler/debugger
  37  * components reference them.  These need to be fixed, too.
  38  */
  39 #pragma weak _thr_getspecific = thr_getspecific
  40 #pragma weak _thr_keycreate = thr_keycreate
  41 #pragma weak _thr_setspecific = thr_setspecific
  42
  43 /*
  44  * 128 million keys should be enough for anyone.
  45  * This allocates half a gigabyte of memory for the keys themselves and
  46  * half a gigabyte of memory for each thread that uses the largest key.
  47  */
  48 #define MAX_KEYS        0x08000000U
  49
  50 int
  51 thr_keycreate(thread_key_t *pkey, void (*destructor)(void *))
  52 {
  53         tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
  54         void (**old_data)(void *) = NULL;
  55         void (**new_data)(void *);
  56         uint_t old_nkeys;
  57         uint_t new_nkeys;
  58
  59         lmutex_lock(&tsdm->tsdm_lock);
  60
  61         /*
  62          * Unfortunately, pthread_getspecific() specifies that a
  63          * pthread_getspecific() on an allocated key upon which the
  64          * calling thread has not performed a pthread_setspecifc()
  65          * must return NULL.  Consider the following sequence:
  66          *
  67          *      pthread_key_create(&key);
  68          *      pthread_setspecific(key, datum);
  69          *      pthread_key_delete(&key);
  70          *      pthread_key_create(&key);
  71          *      val = pthread_getspecific(key);
  72          *
  73          * According to POSIX, if the deleted key is reused for the new
  74          * key returned by the second pthread_key_create(), then the
  75          * pthread_getspecific() in the above example must return NULL
  76          * (and not the stale datum).  The implementation is thus left
  77          * with two alternatives:
  78          *
  79          *  (1) Reuse deleted keys.  If this is to be implemented optimally,
  80          *      it requires that pthread_key_create() somehow associate
  81          *      the value NULL with the new (reused) key for each thread.
  82          *      Keeping the hot path fast and lock-free induces substantial
  83          *      complexity on the implementation.
  84          *
  85          *  (2) Never reuse deleted keys. This allows the pthread_getspecific()
  86          *      implementation to simply perform a check against the number
  87          *      of keys set by the calling thread, returning NULL if the
  88          *      specified key is larger than the highest set key.  This has
  89          *      the disadvantage of wasting memory (a program which simply
  90          *      loops calling pthread_key_create()/pthread_key_delete()
  91          *      will ultimately run out of memory), but permits an optimal
  92          *      pthread_getspecific() while allowing for simple key creation
  93          *      and deletion.
  94          *
  95          * All Solaris implementations have opted for (2).  Given the
  96          * ~10 years that this has been in the field, it is safe to assume
  97          * that applications don't loop creating and destroying keys; we
  98          * stick with (2).
  99          */
 100         if (tsdm->tsdm_nused == (old_nkeys = tsdm->tsdm_nkeys)) {
 101                 /*
 102                  * We need to allocate or double the number of keys.
 103                  * tsdm->tsdm_nused must always be a power of two.
 104                  */
 105                 if ((new_nkeys = (old_nkeys << 1)) == 0)
 106                         new_nkeys = 8;
 107
 108                 if (new_nkeys > MAX_KEYS) {
 109                         lmutex_unlock(&tsdm->tsdm_lock);
 110                         return (EAGAIN);
 111                 }
 112                 if ((new_data = lmalloc(new_nkeys * sizeof (void *))) == NULL) {
 113                         lmutex_unlock(&tsdm->tsdm_lock);
 114                         return (ENOMEM);
 115                 }
 116                 if ((old_data = tsdm->tsdm_destro) == NULL) {
 117                         /* key == 0 is always invalid */
 118                         new_data[0] = TSD_UNALLOCATED;
 119                         tsdm->tsdm_nused = 1;
 120                 } else {
 121                         (void) memcpy(new_data, old_data,
 122                             old_nkeys * sizeof (void *));
 123                 }
 124                 tsdm->tsdm_destro = new_data;
 125                 tsdm->tsdm_nkeys = new_nkeys;
 126         }
 127
 128         *pkey = tsdm->tsdm_nused;
 129         tsdm->tsdm_destro[tsdm->tsdm_nused++] = destructor;
 130         lmutex_unlock(&tsdm->tsdm_lock);
 131
 132         if (old_data != NULL)
 133                 lfree(old_data, old_nkeys * sizeof (void *));
 134
 135         return (0);
 136 }
 137
 138 #pragma weak _pthread_key_create = pthread_key_create
 139 int
 140 pthread_key_create(pthread_key_t *pkey, void (*destructor)(void *))
 141 {
 142         return (thr_keycreate(pkey, destructor));
 143 }
 144
 145 /*
 146  * Same as thr_keycreate(), above, except that the key creation
 147  * is performed only once.  This relies upon the fact that a key
 148  * value of THR_ONCE_KEY is invalid, and requires that the key be
 149  * allocated with a value of THR_ONCE_KEY before calling here.
 150  * THR_ONCE_KEY and PTHREAD_ONCE_KEY_NP, defined in <thread.h>
 151  * and <pthread.h> respectively, must have the same value.
 152  * Example:
 153  *
 154  *      static pthread_key_t key = PTHREAD_ONCE_KEY_NP;
 155  *      ...
 156  *      pthread_key_create_once_np(&key, destructor);
 157  */
 158 #pragma weak pthread_key_create_once_np = thr_keycreate_once
 159 int
 160 thr_keycreate_once(thread_key_t *keyp, void (*destructor)(void *))
 161 {
 162         static mutex_t key_lock = DEFAULTMUTEX;
 163         thread_key_t key;
 164         int error;
 165
 166         if (*keyp == THR_ONCE_KEY) {
 167                 lmutex_lock(&key_lock);
 168                 if (*keyp == THR_ONCE_KEY) {
 169                         error = thr_keycreate(&key, destructor);
 170                         if (error) {
 171                                 lmutex_unlock(&key_lock);
 172                                 return (error);
 173                         }
 174                         membar_producer();
 175                         *keyp = key;
 176                 }
 177                 lmutex_unlock(&key_lock);
 178         }
 179         membar_consumer();
 180
 181         return (0);
 182 }
 183
 184 int
 185 pthread_key_delete(pthread_key_t key)
 186 {
 187         tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
 188
 189         lmutex_lock(&tsdm->tsdm_lock);
 190
 191         if (key >= tsdm->tsdm_nused ||
 192             tsdm->tsdm_destro[key] == TSD_UNALLOCATED) {
 193                 lmutex_unlock(&tsdm->tsdm_lock);
 194                 return (EINVAL);
 195         }
 196
 197         tsdm->tsdm_destro[key] = TSD_UNALLOCATED;
 198         lmutex_unlock(&tsdm->tsdm_lock);
 199
 200         return (0);
 201 }
 202
 203 /*
 204  * Blessedly, the pthread_getspecific() interface is much better than the
 205  * thr_getspecific() interface in that it cannot return an error status.
 206  * Thus, if the key specified is bogus, pthread_getspecific()'s behavior
 207  * is undefined.  As an added bonus (and as an artificat of not returning
 208  * an error code), the requested datum is returned rather than stored
 209  * through a parameter -- thereby avoiding the unnecessary store/load pair
 210  * incurred by thr_getspecific().  Every once in a while, the Standards
 211  * get it right -- but usually by accident.
 212  */
 213 void *
 214 pthread_getspecific(pthread_key_t key)
 215 {
 216         tsd_t *stsd;
 217
 218         /*
 219          * We are cycle-shaving in this function because some
 220          * applications make heavy use of it and one machine cycle
 221          * can make a measurable difference in performance.  This
 222          * is why we waste a little memory and allocate a NULL value
 223          * for the invalid key == 0 in curthread->ul_ftsd[0] rather
 224          * than adjusting the key by subtracting one.
 225          */
 226         if (key < TSD_NFAST)
 227                 return (curthread->ul_ftsd[key]);
 228
 229         if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
 230                 return (stsd->tsd_data[key]);
 231
 232         return (NULL);
 233 }
 234
 235 int
 236 thr_getspecific(thread_key_t key, void **valuep)
 237 {
 238         tsd_t *stsd;
 239
 240         /*
 241          * Amazingly, some application code (and worse, some particularly
 242          * fugly Solaris library code) _relies_ on the fact that 0 is always
 243          * an invalid key.  To preserve this semantic, 0 is never returned
 244          * as a key from thr_/pthread_key_create(); we explicitly check
 245          * for it here and return EINVAL.
 246          */
 247         if (key == 0)
 248                 return (EINVAL);
 249
 250         if (key < TSD_NFAST)
 251                 *valuep = curthread->ul_ftsd[key];
 252         else if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
 253                 *valuep = stsd->tsd_data[key];
 254         else
 255                 *valuep = NULL;
 256
 257         return (0);
 258 }
 259
 260 /*
 261  * We call thr_setspecific_slow() when the key specified
 262  * is beyond the current thread's currently allocated range.
 263  * This case is in a separate function because we want
 264  * the compiler to optimize for the common case.
 265  */
 266 static int
 267 thr_setspecific_slow(thread_key_t key, void *value)
 268 {
 269         ulwp_t *self = curthread;
 270         tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
 271         tsd_t *stsd;
 272         tsd_t *ntsd;
 273         uint_t nkeys;
 274
 275         /*
 276          * It isn't necessary to grab locks in this path;
 277          * tsdm->tsdm_nused can only increase.
 278          */
 279         if (key >= tsdm->tsdm_nused)
 280                 return (EINVAL);
 281
 282         /*
 283          * We would like to test (tsdm->tsdm_destro[key] == TSD_UNALLOCATED)
 284          * here but that would require acquiring tsdm->tsdm_lock and we
 285          * want to avoid locks in this path.
 286          *
 287          * We have a key which is (or at least _was_) valid.  If this key
 288          * is later deleted (or indeed, is deleted before we set the value),
 289          * we don't care; such a condition would indicate an application
 290          * race for which POSIX thankfully leaves the behavior unspecified.
 291          *
 292          * First, determine our new size.  To avoid allocating more than we
 293          * have to, continue doubling our size only until the new key fits.
 294          * stsd->tsd_nalloc must always be a power of two.
 295          */
 296         nkeys = ((stsd = self->ul_stsd) != NULL)? stsd->tsd_nalloc : 8;
 297         for (; key >= nkeys; nkeys <<= 1)
 298                 continue;
 299
 300         /*
 301          * Allocate the new TSD.
 302          */
 303         if ((ntsd = lmalloc(nkeys * sizeof (void *))) == NULL)
 304                 return (ENOMEM);
 305
 306         if (stsd != NULL) {
 307                 /*
 308                  * Copy the old TSD across to the new.
 309                  */
 310                 (void) memcpy(ntsd, stsd, stsd->tsd_nalloc * sizeof (void *));
 311                 lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
 312         }
 313
 314         ntsd->tsd_nalloc = nkeys;
 315         ntsd->tsd_data[key] = value;
 316         self->ul_stsd = ntsd;
 317
 318         return (0);
 319 }
 320
 321 int
 322 thr_setspecific(thread_key_t key, void *value)
 323 {
 324         tsd_t *stsd;
 325         int ret;
 326         ulwp_t *self = curthread;
 327
 328         /*
 329          * See the comment in thr_getspecific(), above.
 330          */
 331         if (key == 0)
 332                 return (EINVAL);
 333
 334         if (key < TSD_NFAST) {
 335                 curthread->ul_ftsd[key] = value;
 336                 return (0);
 337         }
 338
 339         if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) {
 340                 stsd->tsd_data[key] = value;
 341                 return (0);
 342         }
 343
 344         /*
 345          * This is a critical region since we are dealing with memory
 346          * allocation and free. Similar protection required in tsd_free().
 347          */
 348         enter_critical(self);
 349         ret = thr_setspecific_slow(key, value);
 350         exit_critical(self);
 351         return (ret);
 352 }
 353
 354 int
 355 pthread_setspecific(pthread_key_t key, const void *value)
 356 {
 357         return (thr_setspecific(key, (void *)value));
 358 }
 359
 360 /*
 361  * Contract-private interface for java.  See PSARC/2003/159
 362  *
 363  * If the key falls within the TSD_NFAST range, return a non-negative
 364  * offset that can be used by the caller to fetch the TSD data value
 365  * directly out of the thread structure using %g7 (sparc) or %gs (x86).
 366  * With the advent of TLS, %g7 and %gs are part of the ABI, even though
 367  * the definition of the thread structure itself (ulwp_t) is private.
 368  *
 369  * We guarantee that the offset returned on sparc will fit within
 370  * a SIMM13 field (that is, it is less than 2048).
 371  *
 372  * On failure (key is not in the TSD_NFAST range), return -1.
 373  */
 374 ptrdiff_t
 375 _thr_slot_offset(thread_key_t key)
 376 {
 377         if (key != 0 && key < TSD_NFAST)
 378                 return ((ptrdiff_t)offsetof(ulwp_t, ul_ftsd[key]));
 379         return (-1);
 380 }
 381
 382 /*
 383  * This is called by _thrp_exit() to apply destructors to the thread's tsd.
 384  */
 385 void
 386 tsd_exit()
 387 {
 388         ulwp_t *self = curthread;
 389         tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
 390         thread_key_t key;
 391         int recheck;
 392         void *val;
 393         void (*func)(void *);
 394
 395         lmutex_lock(&tsdm->tsdm_lock);
 396
 397         do {
 398                 recheck = 0;
 399
 400                 for (key = 1; key < TSD_NFAST &&
 401                     key < tsdm->tsdm_nused; key++) {
 402                         if ((func = tsdm->tsdm_destro[key]) != NULL &&
 403                             func != TSD_UNALLOCATED &&
 404                             (val = self->ul_ftsd[key]) != NULL) {
 405                                 self->ul_ftsd[key] = NULL;
 406                                 lmutex_unlock(&tsdm->tsdm_lock);
 407                                 (*func)(val);
 408                                 lmutex_lock(&tsdm->tsdm_lock);
 409                                 recheck = 1;
 410                         }
 411                 }
 412
 413                 if (self->ul_stsd == NULL)
 414                         continue;
 415
 416                 /*
 417                  * Any of these destructors could cause us to grow the number
 418                  * TSD keys in the slow TSD; we cannot cache the slow TSD
 419                  * pointer through this loop.
 420                  */
 421                 for (; key < self->ul_stsd->tsd_nalloc &&
 422                     key < tsdm->tsdm_nused; key++) {
 423                         if ((func = tsdm->tsdm_destro[key]) != NULL &&
 424                             func != TSD_UNALLOCATED &&
 425                             (val = self->ul_stsd->tsd_data[key]) != NULL) {
 426                                 self->ul_stsd->tsd_data[key] = NULL;
 427                                 lmutex_unlock(&tsdm->tsdm_lock);
 428                                 (*func)(val);
 429                                 lmutex_lock(&tsdm->tsdm_lock);
 430                                 recheck = 1;
 431                         }
 432                 }
 433         } while (recheck);
 434
 435         lmutex_unlock(&tsdm->tsdm_lock);
 436
 437         /*
 438          * We're done; if we have slow TSD, we need to free it.
 439          */
 440         tsd_free(self);
 441 }
 442
 443 void
 444 tsd_free(ulwp_t *ulwp)
 445 {
 446         tsd_t *stsd;
 447         ulwp_t *self = curthread;
 448
 449         enter_critical(self);
 450         if ((stsd = ulwp->ul_stsd) != NULL)
 451                 lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
 452         ulwp->ul_stsd = NULL;
 453         exit_critical(self);
 454 }