2 * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
24 #undef dispatch_once_f
27 struct _dispatch_once_waiter_s
{
28 volatile struct _dispatch_once_waiter_s
*volatile dow_next
;
29 _dispatch_thread_semaphore_t dow_sema
;
32 #define DISPATCH_ONCE_DONE ((struct _dispatch_once_waiter_s *)~0l)
36 dispatch_once(dispatch_once_t
*val
, dispatch_block_t block
)
38 struct Block_basic
*bb
= (void *)block
;
40 dispatch_once_f(val
, block
, (void *)bb
->Block_invoke
);
46 dispatch_once_f(dispatch_once_t
*val
, void *ctxt
, dispatch_function_t func
)
48 struct _dispatch_once_waiter_s
* volatile *vval
=
49 (struct _dispatch_once_waiter_s
**)val
;
50 struct _dispatch_once_waiter_s dow
= { NULL
, 0 };
51 struct _dispatch_once_waiter_s
*tail
, *tmp
;
52 _dispatch_thread_semaphore_t sema
;
54 if (dispatch_atomic_cmpxchg(vval
, NULL
, &dow
)) {
55 dispatch_atomic_acquire_barrier();
56 _dispatch_client_callout(ctxt
, func
);
58 // The next barrier must be long and strong.
60 // The scenario: SMP systems with weakly ordered memory models
61 // and aggressive out-of-order instruction execution.
65 // The dispatch_once*() wrapper macro causes the callee's
66 // instruction stream to look like this (pseudo-RISC):
71 // call dispatch_once*()
75 // May be re-ordered like so:
81 // call dispatch_once*()
84 // Normally, a barrier on the read side is used to workaround
85 // the weakly ordered memory model. But barriers are expensive
86 // and we only need to synchronize once! After func(ctxt)
87 // completes, the predicate will be marked as "done" and the
88 // branch predictor will correctly skip the call to
91 // A far faster alternative solution: Defeat the speculative
92 // read-ahead of peer CPUs.
94 // Modern architectures will throw away speculative results
95 // once a branch mis-prediction occurs. Therefore, if we can
96 // ensure that the predicate is not marked as being complete
97 // until long after the last store by func(ctxt), then we have
98 // defeated the read-ahead of peer CPUs.
100 // In other words, the last "store" by func(ctxt) must complete
101 // and then N cycles must elapse before ~0l is stored to *val.
102 // The value of N is whatever is sufficient to defeat the
103 // read-ahead mechanism of peer CPUs.
105 // On some CPUs, the most fully synchronizing instruction might
106 // need to be issued.
108 dispatch_atomic_maximally_synchronizing_barrier();
109 //dispatch_atomic_release_barrier(); // assumed contained in above
110 tmp
= dispatch_atomic_xchg(vval
, DISPATCH_ONCE_DONE
);
112 while (tail
!= tmp
) {
113 while (!tmp
->dow_next
) {
114 _dispatch_hardware_pause();
116 sema
= tmp
->dow_sema
;
117 tmp
= (struct _dispatch_once_waiter_s
*)tmp
->dow_next
;
118 _dispatch_thread_semaphore_signal(sema
);
121 dow
.dow_sema
= _dispatch_get_thread_semaphore();
124 if (tmp
== DISPATCH_ONCE_DONE
) {
127 dispatch_atomic_store_barrier();
128 if (dispatch_atomic_cmpxchg(vval
, tmp
, &dow
)) {
130 _dispatch_thread_semaphore_wait(dow
.dow_sema
);
133 _dispatch_put_thread_semaphore(dow
.dow_sema
);