2 * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
24 #undef dispatch_once_f
27 typedef struct _dispatch_once_waiter_s
{
28 volatile struct _dispatch_once_waiter_s
*volatile dow_next
;
29 _dispatch_thread_semaphore_t dow_sema
;
30 mach_port_t dow_thread
;
31 } *_dispatch_once_waiter_t
;
33 #define DISPATCH_ONCE_DONE ((_dispatch_once_waiter_t)~0l)
37 dispatch_once(dispatch_once_t
*val
, dispatch_block_t block
)
39 dispatch_once_f(val
, block
, _dispatch_Block_invoke(block
));
45 dispatch_once_f(dispatch_once_t
*val
, void *ctxt
, dispatch_function_t func
)
47 _dispatch_once_waiter_t
volatile *vval
= (_dispatch_once_waiter_t
*)val
;
48 struct _dispatch_once_waiter_s dow
= { NULL
, 0, MACH_PORT_NULL
};
49 _dispatch_once_waiter_t tail
= &dow
, next
, tmp
;
50 _dispatch_thread_semaphore_t sema
;
52 if (dispatch_atomic_cmpxchg(vval
, NULL
, tail
, acquire
)) {
53 dow
.dow_thread
= _dispatch_thread_port();
54 _dispatch_client_callout(ctxt
, func
);
56 // The next barrier must be long and strong.
58 // The scenario: SMP systems with weakly ordered memory models
59 // and aggressive out-of-order instruction execution.
63 // The dispatch_once*() wrapper macro causes the callee's
64 // instruction stream to look like this (pseudo-RISC):
69 // call dispatch_once*()
73 // May be re-ordered like so:
79 // call dispatch_once*()
82 // Normally, a barrier on the read side is used to workaround
83 // the weakly ordered memory model. But barriers are expensive
84 // and we only need to synchronize once! After func(ctxt)
85 // completes, the predicate will be marked as "done" and the
86 // branch predictor will correctly skip the call to
89 // A far faster alternative solution: Defeat the speculative
90 // read-ahead of peer CPUs.
92 // Modern architectures will throw away speculative results
93 // once a branch mis-prediction occurs. Therefore, if we can
94 // ensure that the predicate is not marked as being complete
95 // until long after the last store by func(ctxt), then we have
96 // defeated the read-ahead of peer CPUs.
98 // In other words, the last "store" by func(ctxt) must complete
99 // and then N cycles must elapse before ~0l is stored to *val.
100 // The value of N is whatever is sufficient to defeat the
101 // read-ahead mechanism of peer CPUs.
103 // On some CPUs, the most fully synchronizing instruction might
104 // need to be issued.
106 dispatch_atomic_maximally_synchronizing_barrier();
107 // above assumed to contain release barrier
108 next
= dispatch_atomic_xchg(vval
, DISPATCH_ONCE_DONE
, relaxed
);
109 while (next
!= tail
) {
110 _dispatch_wait_until(tmp
= (_dispatch_once_waiter_t
)next
->dow_next
);
111 sema
= next
->dow_sema
;
113 _dispatch_thread_semaphore_signal(sema
);
116 dow
.dow_sema
= _dispatch_get_thread_semaphore();
119 if (next
== DISPATCH_ONCE_DONE
) {
122 if (dispatch_atomic_cmpxchgvw(vval
, next
, tail
, &next
, release
)) {
123 dow
.dow_thread
= next
->dow_thread
;
125 if (dow
.dow_thread
) {
126 pthread_priority_t pp
= _dispatch_get_priority();
127 _dispatch_thread_override_start(dow
.dow_thread
, pp
);
129 _dispatch_thread_semaphore_wait(dow
.dow_sema
);
130 if (dow
.dow_thread
) {
131 _dispatch_thread_override_end(dow
.dow_thread
);
136 _dispatch_put_thread_semaphore(dow
.dow_sema
);