3 // HigherOrderMessaging
5 // Created by Ofri Wolfus on 09/05/07.
6 // Copyright 2007 Ofri Wolfus. All rights reserved.
8 // Redistribution and use in source and binary forms, with or without modification,
9 // are permitted provided that the following conditions are met:
11 // 1. Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // 2. Redistributions in binary form must reproduce the above copyright
14 // notice, this list of conditions and the following disclaimer in the
15 // documentation and/or other materials provided with the distribution.
16 // 3. Neither the name of Ofri Wolfus nor the names of his contributors
17 // may be used to endorse or promote products derived from this software
18 // without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 // IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #import "DPObjCRuntime.h"
33 #import <Foundation/Foundation.h>
34 #include <libkern/OSAtomic.h>
39 struct _dp_message_content content;
42 Class _dp_uninitialized_msg_cls = Nil;
43 Class _dp_msg_cls = Nil;
44 static Class _dp_refCounted_msg_cls = Nil;
46 @class _DPUninitializedMessage, DPRefCountedMessage;
48 #define MAX_CACHED_MESSAGES 50
49 static struct _dp_msg _messages_cache[MAX_CACHED_MESSAGES];
50 static id _unused_messages_cache[MAX_CACHED_MESSAGES] = { NULL };
51 static int32_t _messages_cache_size = MAX_CACHED_MESSAGES;
52 static OSSpinLock _messages_cache_lock = OS_SPINLOCK_INIT;
55 * Before the main() is entered (but after all libraries are loaded),
56 * we keep pointers to the DPMessage and _DPUninitializedMessage classes.
57 * This way we can skip the repeated calls to objc_getClass() in MSG(),
58 * MSGV() and -[_DPUninitializedMessage forward::], which gives us a nice
60 * Internally, objc_getClass() uses a pthread mutex to make sure the class
61 * table is thread safe, which slows us down for no good reason.
63 static void __attribute__((constructor)) _dp_init_DPMessage(void) {
64 _dp_uninitialized_msg_cls = (Class)objc_getClass("_DPUninitializedMessage");
65 _dp_msg_cls = [DPMessage class];
66 _dp_refCounted_msg_cls = [DPRefCountedMessage class];
68 // Initialize our unused messages cache
70 for (i = 0; i < MAX_CACHED_MESSAGES; i++)
71 _unused_messages_cache[i] = (id)&(_messages_cache[i]);
74 // Returns a cached message from our pool of messages.
75 // If the pool is empty, nil is returned.
76 id _dp_getCachedMessage(void) {
79 // Try to get the lock.
80 // If we can't, don't waste any time and just return nil.
81 if (__builtin_expect(OSSpinLockTry(&_messages_cache_lock), true)) {
82 int32_t index = _messages_cache_size - 1;
84 // Be sure we got any objects
86 --_messages_cache_size;
87 // Grab the message from the cache
88 msg = _unused_messages_cache[index];
89 _unused_messages_cache[index] = nil;
93 OSSpinLockUnlock(&_messages_cache_lock);
95 // Initialize our message and autorelease it
97 // MSG() and MSGV() remember the class of the message
98 // and set it back when needed.
99 msg->isa = _dp_refCounted_msg_cls;
107 @implementation DPMessage
109 // Every base class must implement +initialize
113 + (id)messageWithSelector:(SEL)sel frame:(void *)frame {
114 id m = class_createInstance(_dp_uninitialized_msg_cls, 0);
115 [m forward:sel :frame];
116 m->isa = _dp_refCounted_msg_cls;
117 return [m autorelease];
120 - (id)forward:(SEL)sel :(marg_list)args {
121 #if defined(__APPLE__) // GNUStep doesn't support @throw
122 @throw [NSException exceptionWithName:@"Unknown message"
123 reason:[NSString stringWithFormat:@"Selector %@ not recognized",
124 NSStringFromSelector(sel)]
135 - (marg_list)arguments {
136 return (marg_list)(content._frame);
139 - (unsigned)sizeOfArguments {
140 return content._frameSize;
147 * This class is used to convert from the Objective-C
148 * runtime's marg_list structure to a structure compatible
149 * with GCC's __builtin_apply() function.
151 * The logic behind this class is very simple. If we have
152 * a marg_list we can use it to message our class, and if our
153 * class responds to it the result will be a method being invoked.
154 * Now ObjC methods are simply C functions under the hood, so calling
155 * __builtin_apply_args() from within this method will give us a new
156 * structure we can safely pass to __builtin_apply() and that contain
157 * all the information of our original marg_list.
159 * In order to use this class two steps are needed:
161 * 1) Invoke +[_DPMargListConvertor _prepareForSelector:] with the
162 * selector that matches the marg_list you wish to convert.
163 * This method will make sure _DPMargListConvertor can repond to a
164 * message with this selector and will register a new method if needed.
166 * 2) Use objc_msgSendv() to message _DPMargListConvertor with the
167 * marg_list you wish to convert. The result objc_msgSendv() will return
168 * is a newly allocated (using malloc) block with a GCC compatible frame.
169 * The returned block must be manually freed.
171 * This technique works *ONLY* with methods returning integral types.
172 * It might also work with floating points (depending on the architecture),
173 * but will definitly *NOT* work for struct return types.
175 * Warning: This class is *NOT* intended to be instantiated. Don't attempt
176 * to instantiate it as the first message sent to an instance will crash
179 @interface _DPMargListConvertor {
183 + (void)_prepareForSelector:(SEL)sel;
187 @implementation _DPMargListConvertor
189 // Every base class must implement +initialize
193 // We must also implement forward::
194 // Note that we implement it as a class method as this class
195 // is not designed to be instantiated. Any attempt to message
196 // an instance of this class will crash the process due to missing
197 // implementation of -forward::
198 + (id)forward:(SEL)sel :(marg_list)args {
202 // This is the implementation we put in newly added methods.
203 // It does exactly one thing: generates a GCC compatible arguments
204 // frame and copies it to a malloc() allocated block.
205 static void * _dp_captureArguments(id self, SEL sel) {
206 // Since marg_list and GCC's frame structure are pretty much the same
207 // (I have no idea why simply passing marg_list to __builtin_apply() doesn't
208 // work. Apparently they are just structured differently), the same size
209 // calculation applies to both:
210 // frame size = dp_maxArgSizeForSelector() + dp_marg_prearg_size
211 // Note that the structure of GCC's frame struct is documented
212 // (http://developer.apple.com/documentation/DeveloperTools/gcc-4.0.1/gcc/Constructing-Calls.html#Constructing-Calls )
213 // to contain the same info as marg_list, and the calculation is based on this assumption.
214 // If this fail to work for some reason, we should just use a 1024 bytes size
215 // like DPMessage uses.
216 unsigned int s = dp_maxArgSizeForSelector(sel) + dp_marg_prearg_size;
217 return memcpy(malloc(s), __builtin_apply_args(), s);
220 + (void)_prepareForSelector:(SEL)sel {
221 if (__builtin_expect(!class_getClassMethod(self, sel), 0))
222 class_addMethod(self, sel, (IMP)_dp_captureArguments,
223 // Since the types don't really matter, we'll just reuse the
224 // types of a well known existing method like -[NSObject self].
225 method_getTypeEncoding(class_getInstanceMethod([NSObject class],
232 @implementation DPMessage (Extensions)
234 - (BOOL)returnsStruct {
235 // Struct returning methods begin with a pointer to
236 // the memory to which the result will be written
237 // while non-struct returns begin with a pointer
239 return marg_getValue([self arguments], 0, void *) != self;
242 - (unsigned)numberOfArguments {
243 return sel_getNumberOfArguments([self selector]);
246 - (id)sendTo:(id)receiver {
247 return objc_msgSendv(receiver, [self selector], [self sizeOfArguments], [self arguments]);
255 return [object_getClass(self) class];
258 - (NSString *)description {
259 return [NSString stringWithFormat:@"%@ %p <%@>", NSStringFromClass([self class]),
260 self, NSStringFromSelector([self selector])];
263 - (BOOL)respondsToSelector:(SEL)aSelector {
264 return dp_getMethod(self, aSelector) != NULL;
267 - (void *)gccArgumentsFrame {
268 static id convertor = nil;
270 // Find our convertor class
271 if (__builtin_expect(!convertor, 0))
272 convertor = objc_getClass("_DPMargListConvertor");
274 // Make sure it responds to our method
275 [convertor _prepareForSelector:[self selector]];
277 // Send ourself to the convertor which in turn,
278 // returns us a fresh new arguments frame.
279 return [self sendTo:convertor];
282 - (unsigned)realFrameSize {
283 return [self sizeOfArguments] + dp_marg_prearg_size;
289 * A private subclass for ref-counted messages.
290 * DPMessage is designed to be allocated on stack
291 * and therefor completely ignores reference counting.
292 * The returned object from -[DPMessage copy] is actually
293 * a DPRefCountedMessage instance.
295 * IMPORTANT!! DPRefCountedMessage must not define any instance
296 * variables so that it can be interchangeable with DPMessage
297 * by simply swapping the isa pointer. If you must define new
298 * ivars, add them to the _dp_message_content struct and use from
301 @interface DPRefCountedMessage : DPMessage <NSCopying>
304 @implementation DPRefCountedMessage
306 - (id)copyWithZone:(NSZone *)zone {
307 return NSCopyObject((id)self, 0, zone);
311 return [self copyWithZone:NULL];
315 NSIncrementExtraRefCount(self);
320 if (NSDecrementExtraRefCountWasZero(self)) {
321 // If we're a message from the pool of messages
322 // &(_messages_cache[0]) <= self <= &(_messages_cache[MAX_CACHED_MESSAGES - 1])
323 if ((void *)self >= (void *)&(_messages_cache[0]) &&
324 (void *)self <= (void *)&(_messages_cache[MAX_CACHED_MESSAGES - 1]))
327 OSSpinLockLock(&_messages_cache_lock);
328 // Put us back in the cache
329 _unused_messages_cache[_messages_cache_size] = self;
330 // We added a message, right?
331 ++_messages_cache_size;
333 OSSpinLockUnlock(&_messages_cache_lock);
335 // We're just a regular dynamically allocated instance
336 // so deallocate normally.
337 NSDeallocateObject((id)self);
343 [NSAutoreleasePool addObject:self];
350 @implementation DPMessage (MemoryManagement)
352 - (id)copyWithZone:(NSZone *)zone {
353 DPMessage *o = (id)NSCopyObject((id)self, 0, zone);
354 o->isa = [DPRefCountedMessage class];
359 return [self copyWithZone:NULL];
375 @interface NSObject (StopGCCFromWhining)
376 - (const char *)typeEncodingForMessage:(id)msg;
377 + (const char *)typeEncodingForMessage:(id)msg;
380 /************************************************************
381 * * * * * * * * * * * VERY IMPORTANT!! * * * * * * * * * * *
382 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
383 * _DPUninitializedMessage must be defined *EXACTLY* like
384 * DPMessage so both can be exchangeable.
385 ************************************************************/
386 @interface _DPUninitializedMessage {
388 struct _dp_message_content content;
393 @implementation _DPUninitializedMessage
398 - (id)forward:(SEL)sel :(marg_list)args {
402 // Get the frame's size
403 content._frameSize = dp_maxArgSizeForSelector(content._sel);
405 // Copy the marg list
406 // Note the size returned by dp_maxArgSizeForSelector() is the same
407 // as the size returned by method_getSizeOfArguments(), which is *NOT*
408 // the real size of the frame in PPC. The true size is always
409 // method_getSizeOfArguments() + dp_marg_prearg_size.
410 memcpy(content._frame, args, content._frameSize + dp_marg_prearg_size);
412 // We're good to go. Set our class back to DPMessage
413 self->isa = _dp_msg_cls;
415 // Get the frame's size
416 if (__builtin_expect(sel == @selector(initWithFormat:) ||
417 sel == @selector(stringByAppendingFormat:) ||
418 sel == @selector(initWithFormat:locale:) ||
419 sel == @selector(appendFormat:) ||
420 sel == @selector(stringWithFormat:), 0))
422 const char *t = [NSString typeEncodingForMessage:self];
423 unsigned size = dp_getSizeOfArguments(t);
424 if (size > content._frameSize) {
425 memcpy((void *)(content._frame) + content._frameSize + dp_marg_prearg_size,
426 args + content._frameSize + dp_marg_prearg_size,
427 size - content._frameSize);
428 content._frameSize = size;
435 + (id)forward:(SEL)sel :(marg_list)args {
442 //=========================================================//
443 //=========================================================//
445 #pragma mark Frame size lookup functions
448 // Thomas Wang's 64 bit Mix Function: http://www.cris.com/~Ttwang/tech/inthash.htm
449 CFHashCode DPIntHash(uint64_t key)
459 return (CFHashCode)key;
462 // Thomas Wang's 32 Bit Mix Function: http://www.cris.com/~Ttwang/tech/inthash.htm
463 CFHashCode DPIntHash(uint32_t key)
471 return (CFHashCode)key;
475 // Static variables (thread safe)
476 static CFMutableDictionaryRef _hash_cache = NULL;
477 static OSSpinLock _hash_cache_lock = OS_SPINLOCK_INIT;
480 * The first call to this function searches the entire class
481 * list, and caches the arguments size of the method with the
482 * largest arguments frame for every available selector.
483 * Subsequent calls simply return results from the cache.
484 * Thanks to Andre Pang for suggesting this approach, and the
485 * double-cache algorithm (see below).
487 * OSSpinLock is used to ensure thread-safety as contention is expected
488 * to be very very low.
490 * For the possibility of future porting to GNUStep, NSHashMap is used
493 unsigned int _dp_lookupMaxArgSizeForSelector(SEL sel, BOOL rebuildCache) {
494 unsigned int result = 0U;
496 // Like in the ObjC runtime, thread synchronization is always active
497 OSSpinLockLock(&_hash_cache_lock);
499 // Make sure our cache is initialized
500 if (__builtin_expect(_hash_cache == NULL, 0)) {
501 CFDictionaryKeyCallBacks keyCallbacks = { 0, NULL, NULL, NULL, NULL, (void *)DPIntHash };
504 // According to the runtime sources, libobjc, CoreText, Foundation, HIToolbox, CoreData,
505 // QuartzCore, AppKit and WebKit have 16371 selectors. The runtime also says that most
506 // apps use 2000..7000 extra sels, so we'll add ~3500 as a compromise.
507 _hash_cache = CFDictionaryCreateMutable(NULL, 19000, &keyCallbacks, NULL);
511 if (__builtin_expect(!rebuildCache, 1)) {
512 result = (unsigned int)CFDictionaryGetValue(_hash_cache, sel); // Returns NULL (== 0) if not found
514 // Rebuild the cache if needed
515 unsigned count = objc_getClassList(NULL, 0);
516 Class *classes = calloc(count, sizeof(Class));
519 // Get all available classes
520 objc_getClassList(classes, count);
522 // NOTE: Since CFDictionarySetValue() replaces existing values
523 // there's no need to call CFDictionaryRemoveAllValues(). The new
524 // values will override the existing ones.
526 // Loop though all classes
527 for (i = 0; i < count; i++) {
528 Class cls = classes[i];
530 // Get all methods of this class (and its super classes')
532 // Does this include super classes in ObjC 2?
533 Method *methods = class_copyMethodList(cls, &c);
535 // Loop through all instance methods and store them in our cache
536 for (j = 0; j < c; j++) {
537 unsigned int size = method_getSizeOfArguments(methods[j]);
538 SEL s = method_getName(methods[j]);
540 if (s == sel && size > result)
543 if (size > (unsigned int)CFDictionaryGetValue(_hash_cache, s))
544 CFDictionarySetValue(_hash_cache, s, (const void *)size);
550 // And now class methods
551 methods = class_copyMethodList(object_getClass(cls), &c);
552 for (j = 0; j < c; j++) {
553 unsigned int size = method_getSizeOfArguments(methods[j]);
554 SEL s = method_getName(methods[j]);
556 if (s == sel && size > result)
559 if (size > (unsigned int)CFDictionaryGetValue(_hash_cache, s))
560 CFDictionarySetValue(_hash_cache, s, (const void *)size);
570 OSSpinLockUnlock(&_hash_cache_lock);
575 unsigned dp_getCacheSize(void) {
577 OSSpinLockLock(&_hash_cache_lock);
578 count = CFDictionaryGetCount(_hash_cache);
579 OSSpinLockUnlock(&_hash_cache_lock);
584 // This is the internal class cout of dp_maxArgSizeForSelector()
585 // It's not declared inside it to enable acces for
586 // dp_flushArgSizeCache() to it.
587 static volatile int32_t _classCount = -1;
591 * dp_maxArgSizeForSelector() is the function that gets invoked by
592 * _DPUninitializedMessage in order to find a matching method for
595 * It builds on top of _dp_lookupMaxArgSizeForSelector() but implements
596 * a second cache. Real world experience shows only seveal tens of
597 * methods are actually being used with HOM, and searching a tiny
598 * cache of this size is going to be much faster than the cache used
599 * by _dp_lookupMaxArgSizeForSelector() (which may hold ~23,000 entries in
600 * a large Cocoa app). Again, thanks to Andre Pang for suggesting this
601 * two levels cache algorithm.
603 * Unlike the cache used by _dp_lookupMaxArgSizeForSelector() the cache
604 * used in this function is simply two fixed size C arrays. The selectors
605 * array is sotred by the selctors' pointer values, from the lowest to the
606 * highest. A binary search is then used to find the index of a selector
607 * and match it with its value in the values array. If at some later point
608 * selectors are not guaranteed to be unique, we can still store their names
609 * and search according to that.
610 * NOTE: The current implementation does not handle the case of the cache
611 * getting larger than its max size. When the cache is full selectors will
612 * no longer be cached.
614 * In order to keep up with classes being registerd/unregisterd,
615 * the total class count is also cached and is checked in each call.
616 * If the total count suddenly changes, the entire cache gets rebuilt.
617 * This catches most cases (currently all, as class unloading is not
618 * supported in 10.4), except if you load some classes, unload some
619 * others, but the total amount is left the same. Of course this applies
620 * only if no call to this function is made between loading and unloading.
621 * The dp_flushMethodsCache() function is provided for forcing the cache(s)
622 * to be rebuilt the next time a lookup is done.
624 * OSSpinLock is used to ensure thread-safety as contention is expected
625 * to be very very low.
629 * When our array reaches 200 items, it takes ~7.5 steps
630 * to find a selector. Meanwhile, searching our hash-table cache
631 * with ~23000 items takes at most ~14 steps (and Apple says it'll
632 * usually be better). This means that when our array is full, there's
633 * no point in searching it before searching the hash table, so we
634 * simply skip it and ignore it.
636 #define MAX_ARRAY_CACHE_SIZE 200
638 // The binary search implementation
639 DP_STATIC_INLINE int _DPFindPtrInArray(void *ptr, void **arr, int count) {
640 register int low = 0;
641 register int high = count - 1;
643 while (low <= high) {
644 int mid = (low + high) / 2;
648 else if (arr[mid] < ptr)
657 // Inserts a given pointer to a pointers array, sorted from the lowest
659 DP_STATIC_INLINE int _DPInsertPtrToArray(void *ptr, void **arr, int count) {
663 // Find the place for out new pointer
664 for (i = 0; i < count; i++) {
672 // Now make space for it by pushing
673 // everything else down
674 for (i = count - 1; i >= j; i--)
681 unsigned int dp_maxArgSizeForSelector(SEL sel) {
682 // Static variables (thread safe)
683 static SEL cachedSels[MAX_ARRAY_CACHE_SIZE];
684 static unsigned int cachedSizes[MAX_ARRAY_CACHE_SIZE];
685 static unsigned cacheCount = 0;
686 static OSSpinLock cacheLock = OS_SPINLOCK_INIT;
690 #if DP_NSBUNEL_DETECTION_ONLY
693 int32_t count = objc_getClassList(NULL, 0);
695 BOOL insertToCache = NO;
697 // We're always thread-safe, just like the runtime
698 OSSpinLockLock(&cacheLock);
700 // The total number of classes has changed,
701 // let's rebuild our cache.
702 if (__builtin_expect(_classCount != count, 0)) {
703 #if DP_NSBUNEL_DETECTION_ONLY
704 // If we got here it means _classCount == 0 so we just set it to 1.
705 OSAtomicCompareAndSwap32Barrier(_classCount, 1, (int32_t *)&_classCount);
707 // Remember the class count.
708 // This is an equivalent to "_classCount = count;" but thread safe.
709 OSAtomicCompareAndSwap32Barrier(_classCount, count, (int32_t *)&_classCount);
715 // Let's find a method
716 r = _dp_lookupMaxArgSizeForSelector(sel, YES);
722 // Below MAX_ARRAY_CACHE_SIZE we try to search in our array
723 // cache before falling back to the dictionary cache, which
724 // assuming we have a method in the array cache, will be faster.
725 if (__builtin_expect(cacheCount < MAX_ARRAY_CACHE_SIZE, 1)) {
726 // First, attempt to search our internal cache for a method
727 int index = _DPFindPtrInArray(sel, (void **)cachedSels, cacheCount);
729 if (__builtin_expect(index > -1, 1))
730 r = cachedSizes[index];
732 // If the first search failed, use the larger cache
733 // containing all methods.
734 r = _dp_lookupMaxArgSizeForSelector(sel, NO);
739 // When we reached MAX_ARRAY_CACHE_SIZE, the time it takes
740 // to search the array cache is almost identical to the time
741 // needed for the dictionary lookup, so we just skip the array.
742 r = _dp_lookupMaxArgSizeForSelector(sel, NO);
746 // Should we cache the result of this method?
747 if (__builtin_expect((insertToCache && cacheCount < MAX_ARRAY_CACHE_SIZE), 0)) {
748 // Insert the new selector and remember its index
749 int index = _DPInsertPtrToArray(sel, (void **)cachedSels, cacheCount);
751 // Push all cached sizes and make space for the new size
752 for (j = cacheCount - 1; j >= index; j--)
753 cachedSizes[j + 1] = cachedSizes[j];
755 cachedSizes[index] = r;
760 OSSpinLockUnlock(&cacheLock);
765 // In order to reset the cache we just set the class count to 0
766 void dp_flushArgSizeCache(void) {
767 OSAtomicCompareAndSwap32Barrier(_classCount, 0, (int32_t *)&_classCount);
770 #if DP_NSBUNEL_DETECTION_ONLY
771 // If NSBundle is the only way classes are added/removed
772 // we can use its NSBundleDidLoadNotification notification to flush
773 // our caches and save the locking overhead of objc_getClassList().
774 @interface DPMessage (_DPLookupCallback)
775 - (void)_bundleLoaded:(NSNotification *)notif;
778 @implementation DPMessage (_DPLookupCallback)
780 - (void)_bundleLoaded:(NSNotification *)notif {
781 if (_classCount > -1) // Ignore notifications before the cache is first built
782 dp_flushArgSizeCache();
787 static void __attribute__((constructor)) _dp_setUpNSBundleCallback(void) {
788 [[NSNotificationCenter defaultCenter] addObserver:[DPMessage class]
789 selector:@selector(_bundleLoaded:)
790 name:NSBundleDidLoadNotification