2 * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
24 #undef dispatch_once_f
27 typedef struct _dispatch_once_waiter_s
{
28 volatile struct _dispatch_once_waiter_s
*volatile dow_next
;
29 dispatch_thread_event_s dow_event
;
30 mach_port_t dow_thread
;
31 } *_dispatch_once_waiter_t
;
33 #define DISPATCH_ONCE_DONE ((_dispatch_once_waiter_t)~0l)
37 dispatch_once(dispatch_once_t
*val
, dispatch_block_t block
)
39 dispatch_once_f(val
, block
, _dispatch_Block_invoke(block
));
45 dispatch_once_f(dispatch_once_t
*val
, void *ctxt
, dispatch_function_t func
)
47 #if DISPATCH_GATE_USE_FOR_DISPATCH_ONCE
48 dispatch_once_gate_t l
= (dispatch_once_gate_t
)val
;
50 if (_dispatch_once_gate_tryenter(l
)) {
51 _dispatch_client_callout(ctxt
, func
);
52 _dispatch_once_gate_broadcast(l
);
54 _dispatch_once_gate_wait(l
);
57 _dispatch_once_waiter_t
volatile *vval
= (_dispatch_once_waiter_t
*)val
;
58 struct _dispatch_once_waiter_s dow
= { };
59 _dispatch_once_waiter_t tail
= &dow
, next
, tmp
;
60 dispatch_thread_event_t event
;
62 if (os_atomic_cmpxchg(vval
, NULL
, tail
, acquire
)) {
63 dow
.dow_thread
= _dispatch_tid_self();
64 _dispatch_client_callout(ctxt
, func
);
66 // The next barrier must be long and strong.
68 // The scenario: SMP systems with weakly ordered memory models
69 // and aggressive out-of-order instruction execution.
73 // The dispatch_once*() wrapper macro causes the callee's
74 // instruction stream to look like this (pseudo-RISC):
79 // call dispatch_once*()
83 // May be re-ordered like so:
89 // call dispatch_once*()
92 // Normally, a barrier on the read side is used to workaround
93 // the weakly ordered memory model. But barriers are expensive
94 // and we only need to synchronize once! After func(ctxt)
95 // completes, the predicate will be marked as "done" and the
96 // branch predictor will correctly skip the call to
99 // A far faster alternative solution: Defeat the speculative
100 // read-ahead of peer CPUs.
102 // Modern architectures will throw away speculative results
103 // once a branch mis-prediction occurs. Therefore, if we can
104 // ensure that the predicate is not marked as being complete
105 // until long after the last store by func(ctxt), then we have
106 // defeated the read-ahead of peer CPUs.
108 // In other words, the last "store" by func(ctxt) must complete
109 // and then N cycles must elapse before ~0l is stored to *val.
110 // The value of N is whatever is sufficient to defeat the
111 // read-ahead mechanism of peer CPUs.
113 // On some CPUs, the most fully synchronizing instruction might
114 // need to be issued.
116 os_atomic_maximally_synchronizing_barrier();
117 // above assumed to contain release barrier
118 next
= os_atomic_xchg(vval
, DISPATCH_ONCE_DONE
, relaxed
);
119 while (next
!= tail
) {
120 _dispatch_wait_until(tmp
= (_dispatch_once_waiter_t
)next
->dow_next
);
121 event
= &next
->dow_event
;
123 _dispatch_thread_event_signal(event
);
126 _dispatch_thread_event_init(&dow
.dow_event
);
129 if (next
== DISPATCH_ONCE_DONE
) {
132 if (os_atomic_cmpxchgvw(vval
, next
, tail
, &next
, release
)) {
133 dow
.dow_thread
= next
->dow_thread
;
135 if (dow
.dow_thread
) {
136 pthread_priority_t pp
= _dispatch_get_priority();
137 _dispatch_thread_override_start(dow
.dow_thread
, pp
, val
);
139 _dispatch_thread_event_wait(&dow
.dow_event
);
140 if (dow
.dow_thread
) {
141 _dispatch_thread_override_end(dow
.dow_thread
, val
);
146 _dispatch_thread_event_destroy(&dow
.dow_event
);