]> git.saurik.com Git - apple/libdispatch.git/blob - src/once.c
libdispatch-228.23.tar.gz
[apple/libdispatch.git] / src / once.c
1 /*
2 * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 #include "internal.h"
22
23 #undef dispatch_once
24 #undef dispatch_once_f
25
26
27 struct _dispatch_once_waiter_s {
28 volatile struct _dispatch_once_waiter_s *volatile dow_next;
29 _dispatch_thread_semaphore_t dow_sema;
30 };
31
32 #define DISPATCH_ONCE_DONE ((struct _dispatch_once_waiter_s *)~0l)
33
34 #ifdef __BLOCKS__
35 void
36 dispatch_once(dispatch_once_t *val, dispatch_block_t block)
37 {
38 struct Block_basic *bb = (void *)block;
39
40 dispatch_once_f(val, block, (void *)bb->Block_invoke);
41 }
42 #endif
43
44 DISPATCH_NOINLINE
45 void
46 dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
47 {
48 struct _dispatch_once_waiter_s * volatile *vval =
49 (struct _dispatch_once_waiter_s**)val;
50 struct _dispatch_once_waiter_s dow = { NULL, 0 };
51 struct _dispatch_once_waiter_s *tail, *tmp;
52 _dispatch_thread_semaphore_t sema;
53
54 if (dispatch_atomic_cmpxchg(vval, NULL, &dow)) {
55 dispatch_atomic_acquire_barrier();
56 _dispatch_client_callout(ctxt, func);
57
58 // The next barrier must be long and strong.
59 //
60 // The scenario: SMP systems with weakly ordered memory models
61 // and aggressive out-of-order instruction execution.
62 //
63 // The problem:
64 //
65 // The dispatch_once*() wrapper macro causes the callee's
66 // instruction stream to look like this (pseudo-RISC):
67 //
68 // load r5, pred-addr
69 // cmpi r5, -1
70 // beq 1f
71 // call dispatch_once*()
72 // 1f:
73 // load r6, data-addr
74 //
75 // May be re-ordered like so:
76 //
77 // load r6, data-addr
78 // load r5, pred-addr
79 // cmpi r5, -1
80 // beq 1f
81 // call dispatch_once*()
82 // 1f:
83 //
84 // Normally, a barrier on the read side is used to workaround
85 // the weakly ordered memory model. But barriers are expensive
86 // and we only need to synchronize once! After func(ctxt)
87 // completes, the predicate will be marked as "done" and the
88 // branch predictor will correctly skip the call to
89 // dispatch_once*().
90 //
91 // A far faster alternative solution: Defeat the speculative
92 // read-ahead of peer CPUs.
93 //
94 // Modern architectures will throw away speculative results
95 // once a branch mis-prediction occurs. Therefore, if we can
96 // ensure that the predicate is not marked as being complete
97 // until long after the last store by func(ctxt), then we have
98 // defeated the read-ahead of peer CPUs.
99 //
100 // In other words, the last "store" by func(ctxt) must complete
101 // and then N cycles must elapse before ~0l is stored to *val.
102 // The value of N is whatever is sufficient to defeat the
103 // read-ahead mechanism of peer CPUs.
104 //
105 // On some CPUs, the most fully synchronizing instruction might
106 // need to be issued.
107
108 dispatch_atomic_maximally_synchronizing_barrier();
109 //dispatch_atomic_release_barrier(); // assumed contained in above
110 tmp = dispatch_atomic_xchg(vval, DISPATCH_ONCE_DONE);
111 tail = &dow;
112 while (tail != tmp) {
113 while (!tmp->dow_next) {
114 _dispatch_hardware_pause();
115 }
116 sema = tmp->dow_sema;
117 tmp = (struct _dispatch_once_waiter_s*)tmp->dow_next;
118 _dispatch_thread_semaphore_signal(sema);
119 }
120 } else {
121 dow.dow_sema = _dispatch_get_thread_semaphore();
122 for (;;) {
123 tmp = *vval;
124 if (tmp == DISPATCH_ONCE_DONE) {
125 break;
126 }
127 dispatch_atomic_store_barrier();
128 if (dispatch_atomic_cmpxchg(vval, tmp, &dow)) {
129 dow.dow_next = tmp;
130 _dispatch_thread_semaphore_wait(dow.dow_sema);
131 }
132 }
133 _dispatch_put_thread_semaphore(dow.dow_sema);
134 }
135 }