2  * Copyright (c) 2008-2011 Apple Inc. All rights reserved. 
   4  * @APPLE_APACHE_LICENSE_HEADER_START@ 
   6  * Licensed under the Apache License, Version 2.0 (the "License"); 
   7  * you may not use this file except in compliance with the License. 
   8  * You may obtain a copy of the License at 
  10  *     http://www.apache.org/licenses/LICENSE-2.0 
  12  * Unless required by applicable law or agreed to in writing, software 
  13  * distributed under the License is distributed on an "AS IS" BASIS, 
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
  15  * See the License for the specific language governing permissions and 
  16  * limitations under the License. 
  18  * @APPLE_APACHE_LICENSE_HEADER_END@ 
  24 #undef dispatch_once_f 
  27 struct _dispatch_once_waiter_s 
{ 
  28         volatile struct _dispatch_once_waiter_s 
*volatile dow_next
; 
  29         _dispatch_thread_semaphore_t dow_sema
; 
  32 #define DISPATCH_ONCE_DONE ((struct _dispatch_once_waiter_s *)~0l) 
  36 dispatch_once(dispatch_once_t 
*val
, dispatch_block_t block
) 
  38         struct Block_basic 
*bb 
= (void *)block
; 
  40         dispatch_once_f(val
, block
, (void *)bb
->Block_invoke
); 
  46 dispatch_once_f(dispatch_once_t 
*val
, void *ctxt
, dispatch_function_t func
) 
  48         struct _dispatch_once_waiter_s 
* volatile *vval 
= 
  49                         (struct _dispatch_once_waiter_s
**)val
; 
  50         struct _dispatch_once_waiter_s dow 
= { NULL
, 0 }; 
  51         struct _dispatch_once_waiter_s 
*tail
, *tmp
; 
  52         _dispatch_thread_semaphore_t sema
; 
  54         if (dispatch_atomic_cmpxchg(vval
, NULL
, &dow
)) { 
  55                 dispatch_atomic_acquire_barrier(); 
  56                 _dispatch_client_callout(ctxt
, func
); 
  58                 // The next barrier must be long and strong. 
  60                 // The scenario: SMP systems with weakly ordered memory models 
  61                 // and aggressive out-of-order instruction execution. 
  65                 // The dispatch_once*() wrapper macro causes the callee's 
  66                 // instruction stream to look like this (pseudo-RISC): 
  71                 //      call dispatch_once*() 
  75                 // May be re-ordered like so: 
  81                 //      call dispatch_once*() 
  84                 // Normally, a barrier on the read side is used to workaround 
  85                 // the weakly ordered memory model. But barriers are expensive 
  86                 // and we only need to synchronize once! After func(ctxt) 
  87                 // completes, the predicate will be marked as "done" and the 
  88                 // branch predictor will correctly skip the call to 
  91                 // A far faster alternative solution: Defeat the speculative 
  92                 // read-ahead of peer CPUs. 
  94                 // Modern architectures will throw away speculative results 
  95                 // once a branch mis-prediction occurs. Therefore, if we can 
  96                 // ensure that the predicate is not marked as being complete 
  97                 // until long after the last store by func(ctxt), then we have 
  98                 // defeated the read-ahead of peer CPUs. 
 100                 // In other words, the last "store" by func(ctxt) must complete 
 101                 // and then N cycles must elapse before ~0l is stored to *val. 
 102                 // The value of N is whatever is sufficient to defeat the 
 103                 // read-ahead mechanism of peer CPUs. 
 105                 // On some CPUs, the most fully synchronizing instruction might 
 106                 // need to be issued. 
 108                 dispatch_atomic_maximally_synchronizing_barrier(); 
 109                 //dispatch_atomic_release_barrier(); // assumed contained in above 
 110                 tmp 
= dispatch_atomic_xchg(vval
, DISPATCH_ONCE_DONE
); 
 112                 while (tail 
!= tmp
) { 
 113                         while (!tmp
->dow_next
) { 
 114                                 _dispatch_hardware_pause(); 
 116                         sema 
= tmp
->dow_sema
; 
 117                         tmp 
= (struct _dispatch_once_waiter_s
*)tmp
->dow_next
; 
 118                         _dispatch_thread_semaphore_signal(sema
); 
 121                 dow
.dow_sema 
= _dispatch_get_thread_semaphore(); 
 124                         if (tmp 
== DISPATCH_ONCE_DONE
) { 
 127                         dispatch_atomic_store_barrier(); 
 128                         if (dispatch_atomic_cmpxchg(vval
, tmp
, &dow
)) { 
 130                                 _dispatch_thread_semaphore_wait(dow
.dow_sema
); 
 133                 _dispatch_put_thread_semaphore(dow
.dow_sema
);