]> git.saurik.com Git - apple/libdispatch.git/blob - src/once.c
libdispatch-703.30.5.tar.gz
[apple/libdispatch.git] / src / once.c
1 /*
2 * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 #include "internal.h"
22
23 #undef dispatch_once
24 #undef dispatch_once_f
25
26
27 typedef struct _dispatch_once_waiter_s {
28 volatile struct _dispatch_once_waiter_s *volatile dow_next;
29 dispatch_thread_event_s dow_event;
30 mach_port_t dow_thread;
31 } *_dispatch_once_waiter_t;
32
33 #define DISPATCH_ONCE_DONE ((_dispatch_once_waiter_t)~0l)
34
35 #ifdef __BLOCKS__
36 void
37 dispatch_once(dispatch_once_t *val, dispatch_block_t block)
38 {
39 dispatch_once_f(val, block, _dispatch_Block_invoke(block));
40 }
41 #endif
42
43 DISPATCH_NOINLINE
44 void
45 dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
46 {
47 #if DISPATCH_GATE_USE_FOR_DISPATCH_ONCE
48 dispatch_once_gate_t l = (dispatch_once_gate_t)val;
49
50 if (_dispatch_once_gate_tryenter(l)) {
51 _dispatch_client_callout(ctxt, func);
52 _dispatch_once_gate_broadcast(l);
53 } else {
54 _dispatch_once_gate_wait(l);
55 }
56 #else
57 _dispatch_once_waiter_t volatile *vval = (_dispatch_once_waiter_t*)val;
58 struct _dispatch_once_waiter_s dow = { };
59 _dispatch_once_waiter_t tail = &dow, next, tmp;
60 dispatch_thread_event_t event;
61
62 if (os_atomic_cmpxchg(vval, NULL, tail, acquire)) {
63 dow.dow_thread = _dispatch_tid_self();
64 _dispatch_client_callout(ctxt, func);
65
66 // The next barrier must be long and strong.
67 //
68 // The scenario: SMP systems with weakly ordered memory models
69 // and aggressive out-of-order instruction execution.
70 //
71 // The problem:
72 //
73 // The dispatch_once*() wrapper macro causes the callee's
74 // instruction stream to look like this (pseudo-RISC):
75 //
76 // load r5, pred-addr
77 // cmpi r5, -1
78 // beq 1f
79 // call dispatch_once*()
80 // 1f:
81 // load r6, data-addr
82 //
83 // May be re-ordered like so:
84 //
85 // load r6, data-addr
86 // load r5, pred-addr
87 // cmpi r5, -1
88 // beq 1f
89 // call dispatch_once*()
90 // 1f:
91 //
92 // Normally, a barrier on the read side is used to workaround
93 // the weakly ordered memory model. But barriers are expensive
94 // and we only need to synchronize once! After func(ctxt)
95 // completes, the predicate will be marked as "done" and the
96 // branch predictor will correctly skip the call to
97 // dispatch_once*().
98 //
99 // A far faster alternative solution: Defeat the speculative
100 // read-ahead of peer CPUs.
101 //
102 // Modern architectures will throw away speculative results
103 // once a branch mis-prediction occurs. Therefore, if we can
104 // ensure that the predicate is not marked as being complete
105 // until long after the last store by func(ctxt), then we have
106 // defeated the read-ahead of peer CPUs.
107 //
108 // In other words, the last "store" by func(ctxt) must complete
109 // and then N cycles must elapse before ~0l is stored to *val.
110 // The value of N is whatever is sufficient to defeat the
111 // read-ahead mechanism of peer CPUs.
112 //
113 // On some CPUs, the most fully synchronizing instruction might
114 // need to be issued.
115
116 os_atomic_maximally_synchronizing_barrier();
117 // above assumed to contain release barrier
118 next = os_atomic_xchg(vval, DISPATCH_ONCE_DONE, relaxed);
119 while (next != tail) {
120 _dispatch_wait_until(tmp = (_dispatch_once_waiter_t)next->dow_next);
121 event = &next->dow_event;
122 next = tmp;
123 _dispatch_thread_event_signal(event);
124 }
125 } else {
126 _dispatch_thread_event_init(&dow.dow_event);
127 next = *vval;
128 for (;;) {
129 if (next == DISPATCH_ONCE_DONE) {
130 break;
131 }
132 if (os_atomic_cmpxchgvw(vval, next, tail, &next, release)) {
133 dow.dow_thread = next->dow_thread;
134 dow.dow_next = next;
135 if (dow.dow_thread) {
136 pthread_priority_t pp = _dispatch_get_priority();
137 _dispatch_thread_override_start(dow.dow_thread, pp, val);
138 }
139 _dispatch_thread_event_wait(&dow.dow_event);
140 if (dow.dow_thread) {
141 _dispatch_thread_override_end(dow.dow_thread, val);
142 }
143 break;
144 }
145 }
146 _dispatch_thread_event_destroy(&dow.dow_event);
147 }
148 #endif
149 }