xnu-4903.231.4.tar.gz

[apple/xnu.git] / iokit / Kernel / IOSharedDataQueue.cpp
diff --git a/iokit/Kernel/IOSharedDataQueue.cpp b/iokit/Kernel/IOSharedDataQueue.cpp

index 71daaa6b449ae17a5ee5c881ae8c96b706a7a240..385393f65c1bb7d34538870f0391859f6264d773 100644 (file)
--- a/iokit/Kernel/IOSharedDataQueue.cpp
+++ b/iokit/Kernel/IOSharedDataQueue.cpp
@@ -157,25 +157,36 @@ IOMemoryDescriptor *IOSharedDataQueue::getMemoryDescriptor()
  
  IODataQueueEntry * IOSharedDataQueue::peek()
  {
-    IODataQueueEntry *entry = 0;
+    IODataQueueEntry *entry      = 0;
+    UInt32            headOffset;
+    UInt32            tailOffset;
  
-    if (dataQueue && (dataQueue->head != dataQueue->tail)) {
+    if (!dataQueue) {
+        return NULL;
+    }
+
+    // Read head and tail with acquire barrier
+    // See rdar://problem/40780584 for an explanation of relaxed/acquire barriers
+    headOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_RELAXED);
+    tailOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->tail, __ATOMIC_ACQUIRE);
+
+    if (headOffset != tailOffset) {
          IODataQueueEntry *  head        = 0;
          UInt32              headSize    = 0;
          UInt32              headOffset  = dataQueue->head;
          UInt32              queueSize   = getQueueSize();
-        
+
          if (headOffset >= queueSize) {
              return NULL;
          }
-        
+
          head         = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset);
          headSize     = head->size;
-        
+
          // Check if there's enough room before the end of the queue for a header.
          // If there is room, check if there's enough room to hold the header and
          // the data.
-        
+
          if ((headOffset > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
              (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize) ||
              (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headSize) ||
@@ -194,11 +205,17 @@ IODataQueueEntry * IOSharedDataQueue::peek()
  
  Boolean IOSharedDataQueue::enqueue(void * data, UInt32 dataSize)
  {
-    const UInt32       head      = dataQueue->head;  // volatile
-    const UInt32       tail      = dataQueue->tail;
+    UInt32             head;
+    UInt32             tail;
+    UInt32             newTail;
      const UInt32       entrySize = dataSize + DATA_QUEUE_ENTRY_HEADER_SIZE;
      IODataQueueEntry * entry;
      
+    // Force a single read of head and tail
+    // See rdar://problem/40780584 for an explanation of relaxed/acquire barriers
+    tail = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->tail, __ATOMIC_RELAXED);
+    head = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_ACQUIRE);
+
      // Check for overflow of entrySize
      if (dataSize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) {
          return false;
@@ -223,7 +240,7 @@ Boolean IOSharedDataQueue::enqueue(void * data, UInt32 dataSize)
              // exactly matches the available space at the end of the queue.
              // The tail can range from 0 to dataQueue->queueSize inclusive.
              
-            OSAddAtomic(entrySize, (SInt32 *)&dataQueue->tail);
+            newTail = tail + entrySize;
          }
          else if ( head > entrySize )     // Is there enough room at the beginning?
          {
@@ -242,7 +259,7 @@ Boolean IOSharedDataQueue::enqueue(void * data, UInt32 dataSize)
              }
              
              memcpy(&dataQueue->queue->data, data, dataSize);
-            OSCompareAndSwap(dataQueue->tail, entrySize, &dataQueue->tail);
+            newTail = entrySize;
          }
          else
          {
@@ -260,23 +277,36 @@ Boolean IOSharedDataQueue::enqueue(void * data, UInt32 dataSize)
              
              entry->size = dataSize;
              memcpy(&entry->data, data, dataSize);
-            OSAddAtomic(entrySize, (SInt32 *)&dataQueue->tail);
+            newTail = tail + entrySize;
          }
          else
          {
              return false;    // queue is full
          }
      }
-    
-    // Send notification (via mach message) that data is available.
-    
-    if ( ( head == tail )                                                   /* queue was empty prior to enqueue() */
-        ||   ( dataQueue->head == tail ) )   /* queue was emptied during enqueue() */
-    {
-        sendDataAvailableNotification();
-    }
-    
-    return true;
+
+       // Publish the data we just enqueued
+       __c11_atomic_store((_Atomic UInt32 *)&dataQueue->tail, newTail, __ATOMIC_RELEASE);
+
+       if (tail != head) {
+               //
+               // The memory barrier below paris with the one in ::dequeue
+               // so that either our store to the tail cannot be missed by
+               // the next dequeue attempt, or we will observe the dequeuer
+               // making the queue empty.
+               //
+               // Of course, if we already think the queue is empty,
+               // there's no point paying this extra cost.
+               //
+               __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+               head = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_RELAXED);
+       }
+
+       if (tail == head) {
+               // Send notification (via mach message) that data is now available.
+               sendDataAvailableNotification();
+       }
+       return true;
  }
  
  Boolean IOSharedDataQueue::dequeue(void *data, UInt32 *dataSize)
@@ -284,76 +314,82 @@ Boolean IOSharedDataQueue::dequeue(void *data, UInt32 *dataSize)
      Boolean             retVal          = TRUE;
      IODataQueueEntry *  entry           = 0;
      UInt32              entrySize       = 0;
+    UInt32              headOffset      = 0;
+    UInt32              tailOffset      = 0;
      UInt32              newHeadOffset   = 0;
  
-    if (dataQueue) {
-        if (dataQueue->head != dataQueue->tail) {
-            IODataQueueEntry *  head        = 0;
-            UInt32              headSize    = 0;
-            UInt32              headOffset  = dataQueue->head;
-            UInt32              queueSize   = getQueueSize();
-            
-            if (headOffset > queueSize) {
-                return false;
-            }
-            
-            head         = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset);
-            headSize     = head->size;
-            
-            // we wrapped around to beginning, so read from there
-            // either there was not even room for the header
-            if ((headOffset > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
-                (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize) ||
-                // or there was room for the header, but not for the data
-                (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headSize) ||
-                (headOffset + headSize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
-                // Note: we have to wrap to the beginning even with the UINT32_MAX checks
-                // because we have to support a queueSize of UINT32_MAX.
-                entry           = dataQueue->queue;
-                entrySize       = entry->size;
-                if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
-                    (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
-                    return false;
-                }
-                newHeadOffset   = entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
-                // else it is at the end
-            } else {
-                entry           = head;
-                entrySize       = entry->size;
-                if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
-                    (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headOffset) ||
-                    (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE + headOffset > queueSize)) {
-                    return false;
-                }
-                newHeadOffset   = headOffset + entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
-            }
+       if (!dataQueue || (data && !dataSize)) {
+        return false;
+    }
+
+    // Read head and tail with acquire barrier
+    // See rdar://problem/40780584 for an explanation of relaxed/acquire barriers
+    headOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->head, __ATOMIC_RELAXED);
+    tailOffset = __c11_atomic_load((_Atomic UInt32 *)&dataQueue->tail, __ATOMIC_ACQUIRE);
+
+    if (headOffset != tailOffset) {
+        IODataQueueEntry *  head        = 0;
+        UInt32              headSize    = 0;
+        UInt32              queueSize   = getQueueSize();
+
+        if (headOffset > queueSize) {
+            return false;
          }
-        
-        if (entry) {
-            if (data) {
-                if (dataSize) {
-                    if (entrySize <= *dataSize) {
-                        memcpy(data, &(entry->data), entrySize);
-                        OSCompareAndSwap( dataQueue->head, newHeadOffset, (SInt32 *)&dataQueue->head);
-                    } else {
-                        retVal = FALSE;
-                    }
-                } else {
-                    retVal = FALSE;
-                }
-            } else {
-                OSCompareAndSwap( dataQueue->head, newHeadOffset, (SInt32 *)&dataQueue->head);
-            }
  
-            if (dataSize) {
-                *dataSize = entrySize;
+        head         = (IODataQueueEntry *)((char *)dataQueue->queue + headOffset);
+        headSize     = head->size;
+
+        // we wrapped around to beginning, so read from there
+        // either there was not even room for the header
+        if ((headOffset > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
+            (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize) ||
+            // or there was room for the header, but not for the data
+            (headOffset + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headSize) ||
+            (headOffset + headSize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
+            // Note: we have to wrap to the beginning even with the UINT32_MAX checks
+            // because we have to support a queueSize of UINT32_MAX.
+            entry           = dataQueue->queue;
+            entrySize       = entry->size;
+            if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
+                (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > queueSize)) {
+                return false;
              }
+            newHeadOffset   = entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
+            // else it is at the end
          } else {
-            retVal = FALSE;
+            entry           = head;
+            entrySize       = entry->size;
+            if ((entrySize > UINT32_MAX - DATA_QUEUE_ENTRY_HEADER_SIZE) ||
+                (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE > UINT32_MAX - headOffset) ||
+                (entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE + headOffset > queueSize)) {
+                return false;
+            }
+            newHeadOffset   = headOffset + entrySize + DATA_QUEUE_ENTRY_HEADER_SIZE;
          }
-    } else {
-        retVal = FALSE;
-    }
+       } else {
+               // empty queue
+               return false;
+       }
+
+       if (data) {
+               if (entrySize > *dataSize) {
+                       // not enough space
+                       return false;
+               }
+               memcpy(data, &(entry->data), entrySize);
+               *dataSize = entrySize;
+       }
+
+       __c11_atomic_store((_Atomic UInt32 *)&dataQueue->head, newHeadOffset, __ATOMIC_RELEASE);
+
+       if (newHeadOffset == tailOffset) {
+               //
+               // If we are making the queue empty, then we need to make sure
+               // that either the enqueuer notices, or we notice the enqueue
+               // that raced with our making of the queue empty.
+               //
+               __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+       }
      
      return retVal;
  }