objc4-371.2.tar.gz

[apple/objc4.git] / runtime / objc-sync.m
diff --git a/runtime/objc-sync.m b/runtime/objc-sync.m

index fe8e3a20757dace245d291e653b0b5b11f4afbea..070d06a0fa6823356f2f8ca0a45298ca30bf1af4 100644 (file)
--- a/runtime/objc-sync.m
+++ b/runtime/objc-sync.m
@@ -1,9 +1,7 @@
  /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
+ * Copyright (c) 1999-2007 Apple Inc.  All Rights Reserved.
   * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_LICENSE_HEADER_START@
   * 
   * This file contains Original Code and/or Modifications of Original Code
   * as defined in and that are subject to the Apple Public Source License
@@ -22,11 +20,6 @@
   * 
   * @APPLE_LICENSE_HEADER_END@
   */
-//
-//  objc_sync.m
-//
-//  Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
-//
  
  #include <stdbool.h>
  #include <stdlib.h>
@@ -34,14 +27,10 @@
  #include <pthread.h>
  #include <errno.h>
  #include <AssertMacros.h>
+#include <libkern/OSAtomic.h>
  
  #include "objc-sync.h"
-
-//
-// Code by Nick Kledzik
-//
-
-// revised comments by Blaine
+#include "objc-private.h"
  
  //
  // Allocate a lock only when needed.  Since few locks are needed at any point
@@ -68,43 +57,146 @@ done:
  }
  
  
-struct SyncData
+typedef uintptr_t spin_lock_t;
+extern void _spin_lock(spin_lock_t *lockp);
+extern int  _spin_lock_try(spin_lock_t *lockp);
+extern void _spin_unlock(spin_lock_t *lockp);
+
+typedef struct SyncData {
+    struct SyncData* nextData;
+    id               object;
+    int              threadCount;  // number of THREADS using this block
+    pthread_mutex_t  mutex;
+    pthread_cond_t   conditionVariable;
+} SyncData;
+
+typedef struct {
+    SyncData *data;
+    unsigned int lockCount;  // number of times THIS THREAD locked this block
+} SyncCacheItem;
+
+typedef struct SyncCache {
+    unsigned int allocated;
+    unsigned int used;
+    SyncCacheItem list[0];
+} SyncCache;
+
+typedef struct {
+    spin_lock_t lock;
+    SyncData *data;
+} SyncList __attribute__((aligned(64)));
+// aligned to put locks on separate cache lines
+
+// Use multiple parallel lists to decrease contention among unrelated objects.
+#define COUNT 16
+#define HASH(obj) ((((uintptr_t)(obj)) >> 5) & (COUNT - 1))
+#define LOCK_FOR_OBJ(obj) sDataLists[HASH(obj)].lock
+#define LIST_FOR_OBJ(obj) sDataLists[HASH(obj)].data
+static SyncList sDataLists[COUNT];
+
+
+enum usage { ACQUIRE, RELEASE, CHECK };
+
+static SyncCache *fetch_cache(BOOL create)
  {
-       struct SyncData*        nextData;       // only accessed while holding sTableLock
-       id                      object;         // only accessed while holding sTableLock
-       unsigned int            lockCount;      // only accessed while holding sTableLock
-       pthread_mutex_t         mutex;
-       pthread_cond_t          conditionVariable;
-};
-typedef struct SyncData SyncData;
+    _objc_pthread_data *data;
+    
+    data = _objc_fetch_pthread_data(create);
+    if (!data  &&  !create) return NULL;
+
+    if (!data->syncCache) {
+        if (!create) {
+            return NULL;
+        } else {
+            int count = 4;
+            data->syncCache = calloc(1, sizeof(SyncCache) + 
+                                     count*sizeof(SyncCacheItem));
+            data->syncCache->allocated = count;
+        }
+    }
  
+    // Make sure there's at least one open slot in the list.
+    if (data->syncCache->allocated == data->syncCache->used) {
+        data->syncCache->allocated *= 2;
+        data->syncCache = 
+            realloc(data->syncCache, sizeof(SyncCache) 
+                    + data->syncCache->allocated * sizeof(SyncCacheItem));
+    }
  
-static pthread_mutex_t         sTableLock = PTHREAD_MUTEX_INITIALIZER;
-static SyncData*               sDataList = NULL;
+    return data->syncCache;
+}
+
+
+__private_extern__ void _destroySyncCache(struct SyncCache *cache)
+{
+    if (cache) free(cache);
+}
  
-enum usage { ACQUIRE, RELEASE, CHECK };
  
  static SyncData* id2data(id object, enum usage why)
  {
+    spin_lock_t *lockp = &LOCK_FOR_OBJ(object);
+    SyncData **listp = &LIST_FOR_OBJ(object);
      SyncData* result = NULL;
      int err;
-    
-    pthread_mutex_lock(&sTableLock);
-    
+
+    // Check per-thread cache of already-owned locks for matching object
+    SyncCache *cache = fetch_cache(NO);
+    if (cache) {
+        int i;
+        for (i = 0; i < cache->used; i++) {
+            SyncCacheItem *item = &cache->list[i];
+            if (item->data->object != object) continue;
+
+            // Found a match.
+            result = item->data;
+            require_action_string(result->threadCount > 0, cache_done, 
+                                  result = NULL, "id2data cache is buggy");
+            require_action_string(item->lockCount > 0, cache_done, 
+                                  result = NULL, "id2data cache is buggy");
+                
+            switch(why) {
+            case ACQUIRE:
+                item->lockCount++;
+                break;
+            case RELEASE:
+                item->lockCount--;
+                if (item->lockCount == 0) {
+                    // remove from per-thread cache
+                    cache->list[i] = cache->list[--cache->used];
+                    // atomic because may collide with concurrent ACQUIRE
+                    OSAtomicDecrement32Barrier(&result->threadCount);
+                }
+                break;
+            case CHECK:
+                // do nothing
+                break;
+            }
+
+        cache_done:            
+            return result;
+        }
+    }
+
+    // Thread cache didn't find anything.
      // Walk in-use list looking for matching object
-    // sTableLock keeps other threads from winning an allocation race
-    // for the same new object.
+    // Spinlock prevents multiple threads from creating multiple 
+    // locks for the same new object.
      // We could keep the nodes in some hash table if we find that there are
      // more than 20 or so distinct locks active, but we don't do that now.
      
-    SyncData* firstUnused = NULL;
+    _spin_lock(lockp);
+
      SyncData* p;
-    for (p = sDataList; p != NULL; p = p->nextData) {
+    SyncData* firstUnused = NULL;
+    for (p = *listp; p != NULL; p = p->nextData) {
          if ( p->object == object ) {
              result = p;
+            // atomic because may collide with concurrent RELEASE
+            OSAtomicIncrement32Barrier(&result->threadCount);
              goto done;
          }
-        if ( (firstUnused == NULL) && (p->object == NULL) )
+        if ( (firstUnused == NULL) && (p->threadCount == 0) )
              firstUnused = p;
      }
      
@@ -116,7 +208,7 @@ static SyncData* id2data(id object, enum usage why)
      if ( firstUnused != NULL ) {
          result = firstUnused;
          result->object = object;
-        result->lockCount = 0; // sanity
+        result->threadCount = 1;
          goto done;
      }
                              
@@ -126,35 +218,42 @@ static SyncData* id2data(id object, enum usage why)
      // But since we never free these guys we won't be stuck in malloc very often.
      result = (SyncData*)malloc(sizeof(SyncData));
      result->object = object;
-    result->lockCount = 0;
+    result->threadCount = 1;
      err = pthread_mutex_init(&result->mutex, recursiveAttributes());
      require_noerr_string(err, done, "pthread_mutex_init failed");
      err = pthread_cond_init(&result->conditionVariable, NULL);
      require_noerr_string(err, done, "pthread_cond_init failed");
-    result->nextData = sDataList;
-    sDataList = result;
+    result->nextData = *listp;
+    *listp = result;
      
-done:
-    if ( result != NULL ) {
-        switch ( why ) {
-            case ACQUIRE:
-                result->lockCount++;
-                break;
-            case RELEASE:
-                result->lockCount--;
-                if ( result->lockCount == 0 )
-                    result->object = NULL;  // now recycled
-                break;
-            case CHECK:
-                // do nothing
-                break;
-        }
+ done:
+    _spin_unlock(lockp);
+    if (result) {
+        // Only new ACQUIRE should get here.
+        // All RELEASE and CHECK and recursive ACQUIRE are 
+        // handled by the per-thread cache above.
+        
+        require_string(result != NULL, really_done, "id2data is buggy");
+        require_action_string(why == ACQUIRE, really_done, 
+                              result = NULL, "id2data is buggy");
+        require_action_string(result->object == object, really_done, 
+                              result = NULL, "id2data is buggy");
+        
+        if (!cache) cache = fetch_cache(YES);
+        cache->list[cache->used++] = (SyncCacheItem){result, 1};
      }
-    pthread_mutex_unlock(&sTableLock);
+
+ really_done:
      return result;
  }
  
  
+__private_extern__ __attribute__((noinline))
+int objc_sync_nil(void)
+{
+    return OBJC_SYNC_SUCCESS;  // something to foil the optimizer
+}
+
  
  // Begin synchronizing on 'obj'.  
  // Allocates recursive pthread_mutex associated with 'obj' if needed.
@@ -171,6 +270,10 @@ int objc_sync_enter(id obj)
          require_noerr_string(result, done, "pthread_mutex_lock failed");
      } else {
          // @synchronized(nil) does nothing
+        if (DebugNilSync) {
+            _objc_inform("NIL SYNC DEBUG: @synchronized(nil); set a breakpoint on objc_sync_nil to debug");
+        }
+        result = objc_sync_nil();
      }
  
  done: 
@@ -219,8 +322,8 @@ int objc_sync_wait(id obj, long long milliSecondsMaxWait)
      }
      else {
                 struct timespec maxWait;
-        maxWait.tv_sec  = milliSecondsMaxWait / 1000;
-        maxWait.tv_nsec = (milliSecondsMaxWait - (maxWait.tv_sec * 1000)) * 1000000;
+        maxWait.tv_sec  = (time_t)(milliSecondsMaxWait / 1000);
+        maxWait.tv_nsec = (long)((milliSecondsMaxWait - (maxWait.tv_sec * 1000)) * 1000000);
          result = pthread_cond_timedwait_relative_np(&data->conditionVariable, &data->mutex, &maxWait);
         require_noerr_string(result, done, "pthread_cond_timedwait_relative_np failed");
      }