headers/cpp/pthread_alloc

   1 /*
   2  * Copyright (c) 1996
   3  * Silicon Graphics Computer Systems, Inc.
   4  *
   5  * Permission to use, copy, modify, distribute and sell this software
   6  * and its documentation for any purpose is hereby granted without fee,
   7  * provided that the above copyright notice appear in all copies and
   8  * that both that copyright notice and this permission notice appear
   9  * in supporting documentation.  Silicon Graphics makes no
  10  * representations about the suitability of this software for any
  11  * purpose.  It is provided "as is" without express or implied warranty.
  12  */
  13
  14 #ifndef __SGI_STL_PTHREAD_ALLOC
  15 #define __SGI_STL_PTHREAD_ALLOC
  16
  17 // Pthread-specific node allocator.
  18 // This is similar to the default allocator, except that free-list
  19 // information is kept separately for each thread, avoiding locking.
  20 // This should be reasonably fast even in the presence of threads.
  21 // The down side is that storage may not be well-utilized.
  22 // It is not an error to allocate memory in thread A and deallocate
  23 // it in thread B.  But this effectively transfers ownership of the memory,
  24 // so that it can only be reallocated by thread B.  Thus this can effectively
  25 // result in a storage leak if it's done on a regular basis.
  26 // It can also result in frequent sharing of
  27 // cache lines among processors, with potentially serious performance
  28 // consequences.
  29
  30 #include <stl_config.h>
  31 #include <stl_alloc.h>
  32 #ifndef __RESTRICT
  33 #  define __RESTRICT
  34 #endif
  35
  36 __STL_BEGIN_NAMESPACE
  37
  38 #define __STL_DATA_ALIGNMENT 8
  39
  40 union _Pthread_alloc_obj {
  41     union _Pthread_alloc_obj * __free_list_link;
  42     char __client_data[__STL_DATA_ALIGNMENT];    /* The client sees this.    */
  43 };
  44
  45 // Pthread allocators don't appear to the client to have meaningful
  46 // instances.  We do in fact need to associate some state with each
  47 // thread.  That state is represented by
  48 // _Pthread_alloc_per_thread_state<_Max_size>.
  49
  50 template<size_t _Max_size>
  51 struct _Pthread_alloc_per_thread_state {
  52   typedef _Pthread_alloc_obj __obj;
  53   enum { _S_NFREELISTS = _Max_size/__STL_DATA_ALIGNMENT };
  54   _Pthread_alloc_obj* volatile __free_list[_S_NFREELISTS];
  55   _Pthread_alloc_per_thread_state<_Max_size> * __next;
  56         // Free list link for list of available per thread structures.
  57         // When one of these becomes available for reuse due to thread
  58         // termination, any objects in its free list remain associated
  59         // with it.  The whole structure may then be used by a newly
  60         // created thread.
  61   _Pthread_alloc_per_thread_state() : __next(0)
  62   {
  63     memset((void *)__free_list, 0, _S_NFREELISTS * sizeof(__obj *));
  64   }
  65   // Returns an object of size __n, and possibly adds to size n free list.
  66   void *_M_refill(size_t __n);
  67 };
  68
  69 // Pthread-specific allocator.
  70 // The argument specifies the largest object size allocated from per-thread
  71 // free lists.  Larger objects are allocated using malloc_alloc.
  72 // Max_size must be a power of 2.
  73 template <size_t _Max_size = 128>
  74 class _Pthread_alloc_template {
  75
  76 public: // but only for internal use:
  77
  78   typedef _Pthread_alloc_obj __obj;
  79
  80   // Allocates a chunk for nobjs of size "size".  nobjs may be reduced
  81   // if it is inconvenient to allocate the requested number.
  82   static char *_S_chunk_alloc(size_t __size, int &__nobjs);
  83
  84   enum {_S_ALIGN = __STL_DATA_ALIGNMENT};
  85
  86   static size_t _S_round_up(size_t __bytes) {
  87         return (((__bytes) + _S_ALIGN-1) & ~(_S_ALIGN - 1));
  88   }
  89   static size_t _S_freelist_index(size_t __bytes) {
  90         return (((__bytes) + _S_ALIGN-1)/_S_ALIGN - 1);
  91   }
  92
  93 private:
  94   // Chunk allocation state. And other shared state.
  95   // Protected by _S_chunk_allocator_lock.
  96   static pthread_mutex_t _S_chunk_allocator_lock;
  97   static char *_S_start_free;
  98   static char *_S_end_free;
  99   static size_t _S_heap_size;
 100   static _Pthread_alloc_per_thread_state<_Max_size>* _S_free_per_thread_states;
 101   static pthread_key_t _S_key;
 102   static bool _S_key_initialized;
 103         // Pthread key under which per thread state is stored.
 104         // Allocator instances that are currently unclaimed by any thread.
 105   static void _S_destructor(void *instance);
 106         // Function to be called on thread exit to reclaim per thread
 107         // state.
 108   static _Pthread_alloc_per_thread_state<_Max_size> *_S_new_per_thread_state();
 109         // Return a recycled or new per thread state.
 110   static _Pthread_alloc_per_thread_state<_Max_size> *_S_get_per_thread_state();
 111         // ensure that the current thread has an associated
 112         // per thread state.
 113   friend class _M_lock;
 114   class _M_lock {
 115       public:
 116         _M_lock () { pthread_mutex_lock(&_S_chunk_allocator_lock); }
 117         ~_M_lock () { pthread_mutex_unlock(&_S_chunk_allocator_lock); }
 118   };
 119
 120 public:
 121
 122   /* n must be > 0      */
 123   static void * allocate(size_t __n)
 124   {
 125     __obj * volatile * __my_free_list;
 126     __obj * __RESTRICT __result;
 127     _Pthread_alloc_per_thread_state<_Max_size>* __a;
 128
 129     if (__n > _Max_size) {
 130         return(malloc_alloc::allocate(__n));
 131     }
 132     if (!_S_key_initialized ||
 133         !(__a = (_Pthread_alloc_per_thread_state<_Max_size>*)
 134                                  pthread_getspecific(_S_key))) {
 135         __a = _S_get_per_thread_state();
 136     }
 137     __my_free_list = __a -> __free_list + _S_freelist_index(__n);
 138     __result = *__my_free_list;
 139     if (__result == 0) {
 140         void *__r = __a -> _M_refill(_S_round_up(__n));
 141         return __r;
 142     }
 143     *__my_free_list = __result -> __free_list_link;
 144     return (__result);
 145   };
 146
 147   /* p may not be 0 */
 148   static void deallocate(void *__p, size_t __n)
 149   {
 150     __obj *__q = (__obj *)__p;
 151     __obj * volatile * __my_free_list;
 152     _Pthread_alloc_per_thread_state<_Max_size>* __a;
 153
 154     if (__n > _Max_size) {
 155         malloc_alloc::deallocate(__p, __n);
 156         return;
 157     }
 158     if (!_S_key_initialized ||
 159         !(__a = (_Pthread_alloc_per_thread_state<_Max_size> *)
 160                 pthread_getspecific(_S_key))) {
 161         __a = _S_get_per_thread_state();
 162     }
 163     __my_free_list = __a->__free_list + _S_freelist_index(__n);
 164     __q -> __free_list_link = *__my_free_list;
 165     *__my_free_list = __q;
 166   }
 167
 168   static void * reallocate(void *__p, size_t __old_sz, size_t __new_sz);
 169
 170 } ;
 171
 172 typedef _Pthread_alloc_template<> pthread_alloc;
 173
 174
 175 template <size_t _Max_size>
 176 void _Pthread_alloc_template<_Max_size>::_S_destructor(void * __instance)
 177 {
 178     _M_lock __lock_instance;    // Need to acquire lock here.
 179     _Pthread_alloc_per_thread_state<_Max_size>* __s =
 180         (_Pthread_alloc_per_thread_state<_Max_size> *)__instance;
 181     __s -> __next = _S_free_per_thread_states;
 182     _S_free_per_thread_states = __s;
 183 }
 184
 185 template <size_t _Max_size>
 186 _Pthread_alloc_per_thread_state<_Max_size> *
 187 _Pthread_alloc_template<_Max_size>::_S_new_per_thread_state()
 188 {
 189     /* lock already held here.  */
 190     if (0 != _S_free_per_thread_states) {
 191         _Pthread_alloc_per_thread_state<_Max_size> *__result =
 192                                         _S_free_per_thread_states;
 193         _S_free_per_thread_states = _S_free_per_thread_states -> __next;
 194         return __result;
 195     } else {
 196         return new _Pthread_alloc_per_thread_state<_Max_size>;
 197     }
 198 }
 199
 200 template <size_t _Max_size>
 201 _Pthread_alloc_per_thread_state<_Max_size> *
 202 _Pthread_alloc_template<_Max_size>::_S_get_per_thread_state()
 203 {
 204     /*REFERENCED*/
 205     _M_lock __lock_instance;    // Need to acquire lock here.
 206     _Pthread_alloc_per_thread_state<_Max_size> * __result;
 207     if (!_S_key_initialized) {
 208         if (pthread_key_create(&_S_key, _S_destructor)) {
 209             abort();  // failed
 210         }
 211         _S_key_initialized = true;
 212     }
 213     __result = _S_new_per_thread_state();
 214     if (pthread_setspecific(_S_key, __result)) abort();
 215     return __result;
 216 }
 217
 218 /* We allocate memory in large chunks in order to avoid fragmenting     */
 219 /* the malloc heap too much.                                            */
 220 /* We assume that size is properly aligned.                             */
 221 template <size_t _Max_size>
 222 char *_Pthread_alloc_template<_Max_size>
 223 ::_S_chunk_alloc(size_t __size, int &__nobjs)
 224 {
 225   {
 226     char * __result;
 227     size_t __total_bytes;
 228     size_t __bytes_left;
 229     /*REFERENCED*/
 230     _M_lock __lock_instance;         // Acquire lock for this routine
 231
 232     __total_bytes = __size * __nobjs;
 233     __bytes_left = _S_end_free - _S_start_free;
 234     if (__bytes_left >= __total_bytes) {
 235         __result = _S_start_free;
 236         _S_start_free += __total_bytes;
 237         return(__result);
 238     } else if (__bytes_left >= __size) {
 239         __nobjs = __bytes_left/__size;
 240         __total_bytes = __size * __nobjs;
 241         __result = _S_start_free;
 242         _S_start_free += __total_bytes;
 243         return(__result);
 244     } else {
 245         size_t __bytes_to_get =
 246                 2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
 247         // Try to make use of the left-over piece.
 248         if (__bytes_left > 0) {
 249             _Pthread_alloc_per_thread_state<_Max_size>* __a =
 250                 (_Pthread_alloc_per_thread_state<_Max_size>*)
 251                         pthread_getspecific(_S_key);
 252             __obj * volatile * __my_free_list =
 253                         __a->__free_list + _S_freelist_index(__bytes_left);
 254
 255             ((__obj *)_S_start_free) -> __free_list_link = *__my_free_list;
 256             *__my_free_list = (__obj *)_S_start_free;
 257         }
 258 #       ifdef _SGI_SOURCE
 259           // Try to get memory that's aligned on something like a
 260           // cache line boundary, so as to avoid parceling out
 261           // parts of the same line to different threads and thus
 262           // possibly different processors.
 263           {
 264             const int __cache_line_size = 128;  // probable upper bound
 265             __bytes_to_get &= ~(__cache_line_size-1);
 266             _S_start_free = (char *)memalign(__cache_line_size, __bytes_to_get);
 267             if (0 == _S_start_free) {
 268               _S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get);
 269             }
 270           }
 271 #       else  /* !SGI_SOURCE */
 272           _S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get);
 273 #       endif
 274         _S_heap_size += __bytes_to_get;
 275         _S_end_free = _S_start_free + __bytes_to_get;
 276     }
 277   }
 278   // lock is released here
 279   return(_S_chunk_alloc(__size, __nobjs));
 280 }
 281
 282
 283 /* Returns an object of size n, and optionally adds to size n free list.*/
 284 /* We assume that n is properly aligned.                                */
 285 /* We hold the allocation lock.                                         */
 286 template <size_t _Max_size>
 287 void *_Pthread_alloc_per_thread_state<_Max_size>
 288 ::_M_refill(size_t __n)
 289 {
 290     int __nobjs = 128;
 291     char * __chunk =
 292         _Pthread_alloc_template<_Max_size>::_S_chunk_alloc(__n, __nobjs);
 293     __obj * volatile * __my_free_list;
 294     __obj * __result;
 295     __obj * __current_obj, * __next_obj;
 296     int __i;
 297
 298     if (1 == __nobjs)  {
 299         return(__chunk);
 300     }
 301     __my_free_list = __free_list
 302                  + _Pthread_alloc_template<_Max_size>::_S_freelist_index(__n);
 303
 304     /* Build free list in chunk */
 305       __result = (__obj *)__chunk;
 306       *__my_free_list = __next_obj = (__obj *)(__chunk + __n);
 307       for (__i = 1; ; __i++) {
 308         __current_obj = __next_obj;
 309         __next_obj = (__obj *)((char *)__next_obj + __n);
 310         if (__nobjs - 1 == __i) {
 311             __current_obj -> __free_list_link = 0;
 312             break;
 313         } else {
 314             __current_obj -> __free_list_link = __next_obj;
 315         }
 316       }
 317     return(__result);
 318 }
 319
 320 template <size_t _Max_size>
 321 void *_Pthread_alloc_template<_Max_size>
 322 ::reallocate(void *__p, size_t __old_sz, size_t __new_sz)
 323 {
 324     void * __result;
 325     size_t __copy_sz;
 326
 327     if (__old_sz > _Max_size
 328         && __new_sz > _Max_size) {
 329         return(realloc(__p, __new_sz));
 330     }
 331     if (_S_round_up(__old_sz) == _S_round_up(__new_sz)) return(__p);
 332     __result = allocate(__new_sz);
 333     __copy_sz = __new_sz > __old_sz? __old_sz : __new_sz;
 334     memcpy(__result, __p, __copy_sz);
 335     deallocate(__p, __old_sz);
 336     return(__result);
 337 }
 338
 339 template <size_t _Max_size>
 340 _Pthread_alloc_per_thread_state<_Max_size> *
 341 _Pthread_alloc_template<_Max_size>::_S_free_per_thread_states = 0;
 342
 343 template <size_t _Max_size>
 344 pthread_key_t _Pthread_alloc_template<_Max_size>::_S_key;
 345
 346 template <size_t _Max_size>
 347 bool _Pthread_alloc_template<_Max_size>::_S_key_initialized = false;
 348
 349 template <size_t _Max_size>
 350 pthread_mutex_t _Pthread_alloc_template<_Max_size>::_S_chunk_allocator_lock
 351 = PTHREAD_MUTEX_INITIALIZER;
 352
 353 template <size_t _Max_size>
 354 char *_Pthread_alloc_template<_Max_size>
 355 ::_S_start_free = 0;
 356
 357 template <size_t _Max_size>
 358 char *_Pthread_alloc_template<_Max_size>
 359 ::_S_end_free = 0;
 360
 361 template <size_t _Max_size>
 362 size_t _Pthread_alloc_template<_Max_size>
 363 ::_S_heap_size = 0;
 364
 365 #ifdef __STL_USE_STD_ALLOCATORS
 366
 367 template <class _Tp>
 368 class pthread_allocator {
 369   typedef pthread_alloc _S_Alloc;          // The underlying allocator.
 370 public:
 371   typedef size_t     size_type;
 372   typedef ptrdiff_t  difference_type;
 373   typedef _Tp*       pointer;
 374   typedef const _Tp* const_pointer;
 375   typedef _Tp&       reference;
 376   typedef const _Tp& const_reference;
 377   typedef _Tp        value_type;
 378
 379   template <class _Up> struct rebind {
 380     typedef pthread_allocator<_Up> other;
 381   };
 382
 383   pthread_allocator() __STL_NOTHROW {}
 384   pthread_allocator(const pthread_allocator& a) __STL_NOTHROW {}
 385   template <class _Up> pthread_allocator(const pthread_allocator<_Up>&)
 386                 __STL_NOTHROW {}
 387   ~pthread_allocator() __STL_NOTHROW {}
 388
 389   pointer address(reference __x) const { return &__x; }
 390   const_pointer address(const_reference __x) const { return &__x; }
 391
 392   // __n is permitted to be 0.  The C++ standard says nothing about what
 393   // the return value is when __n == 0.
 394   _Tp* allocate(size_type __n, const void* = 0) {
 395     return __n != 0 ? static_cast<_Tp*>(_S_Alloc::allocate(__n * sizeof(_Tp)))
 396                     : 0;
 397   }
 398
 399   // p is not permitted to be a null pointer.
 400   void deallocate(pointer __p, size_type __n)
 401     { _S_Alloc::deallocate(__p, __n * sizeof(_Tp)); }
 402
 403   size_type max_size() const __STL_NOTHROW
 404     { return size_t(-1) / sizeof(_Tp); }
 405
 406   void construct(pointer __p, const _Tp& __val) { new(__p) _Tp(__val); }
 407   void destroy(pointer _p) { _p->~_Tp(); }
 408 };
 409
 410 template<>
 411 class pthread_allocator<void> {
 412 public:
 413   typedef size_t      size_type;
 414   typedef ptrdiff_t   difference_type;
 415   typedef void*       pointer;
 416   typedef const void* const_pointer;
 417   typedef void        value_type;
 418
 419   template <class _Up> struct rebind {
 420     typedef pthread_allocator<_Up> other;
 421   };
 422 };
 423
 424 template <size_t _Max_size>
 425 inline bool operator==(const _Pthread_alloc_template<_Max_size>&,
 426                        const _Pthread_alloc_template<_Max_size>&)
 427 {
 428   return true;
 429 }
 430
 431 template <class _T1, class _T2>
 432 inline bool operator==(const pthread_allocator<_T1>&,
 433                        const pthread_allocator<_T2>& a2)
 434 {
 435   return true;
 436 }
 437
 438 template <class _T1, class _T2>
 439 inline bool operator!=(const pthread_allocator<_T1>&,
 440                        const pthread_allocator<_T2>&)
 441 {
 442   return false;
 443 }
 444
 445 template <class _Tp, size_t _Max_size>
 446 struct _Alloc_traits<_Tp, _Pthread_alloc_template<_Max_size> >
 447 {
 448   static const bool _S_instanceless = true;
 449   typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max_size> > _Alloc_type;
 450   typedef __allocator<_Tp, _Pthread_alloc_template<_Max_size> >
 451           allocator_type;
 452 };
 453
 454 template <class _Tp, class _Up, size_t _Max>
 455 struct _Alloc_traits<_Tp, __allocator<_Up, _Pthread_alloc_template<_Max> > >
 456 {
 457   static const bool _S_instanceless = true;
 458   typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max> > _Alloc_type;
 459   typedef __allocator<_Tp, _Pthread_alloc_template<_Max> > allocator_type;
 460 };
 461
 462 template <class _Tp, class _Up>
 463 struct _Alloc_traits<_Tp, pthread_allocator<_Up> >
 464 {
 465   static const bool _S_instanceless = true;
 466   typedef simple_alloc<_Tp, _Pthread_alloc_template<> > _Alloc_type;
 467   typedef pthread_allocator<_Tp> allocator_type;
 468 };
 469
 470
 471 #endif /* __STL_USE_STD_ALLOCATORS */
 472
 473 __STL_END_NAMESPACE
 474
 475 #endif /* __SGI_STL_PTHREAD_ALLOC */
 476
 477 // Local Variables:
 478 // mode:C++
 479 // End: