]>
Commit | Line | Data |
---|---|---|
b0d623f7 A |
1 | /* |
2 | * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/appleapiopts.h> | |
30 | #include <machine/cpu_capabilities.h> | |
31 | #include <machine/commpage.h> | |
32 | #include <mach/i386/syscall_sw.h> | |
33 | ||
34 | ||
35 | /* PREEMPTION FREE ZONE (PFZ) | |
36 | * | |
37 | * A portion of the commpage is speacial-cased by the kernel to be "preemption free", | |
38 | * ie as if we had disabled interrupts in user mode. This facilitates writing | |
39 | * "nearly-lockless" code, for example code that must be serialized by a spinlock but | |
40 | * which we do not want to preempt while the spinlock is held. | |
41 | * | |
42 | * The PFZ is implemented by collecting all the "preemption-free" code into a single | |
43 | * contiguous region of the commpage. Register %ebx is used as a flag register; | |
44 | * before entering the PFZ, %ebx is cleared. If some event occurs that would normally | |
45 | * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of | |
46 | * preempting. Then, when the routine leaves the PFZ we check %ebx and | |
47 | * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. | |
48 | * | |
49 | * PFZ code must bound the amount of time spent in the PFZ, in order to control | |
50 | * latency. Backward branches are dangerous and must not be used in a way that | |
51 | * could inadvertently create a long-running loop. | |
52 | * | |
53 | * Because they cannot be implemented reasonably without a lock, we put the "atomic" | |
54 | * FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to | |
55 | * access queue elements, these implementations behave nearly-locklessly. | |
56 | * But we still must take a spinlock to serialize, and in case of page faults. | |
57 | */ | |
58 | ||
316670eb A |
59 | /* Work around 10062261 with a dummy non-local symbol */ |
60 | fifo_queue_dummy_symbol: | |
61 | ||
b0d623f7 A |
62 | /* |
63 | * typedef volatile struct { | |
64 | * void *opaque1; <-- ptr to first queue element or null | |
65 | * void *opaque2; <-- ptr to last queue element or null | |
66 | * int opaque3; <-- spinlock | |
67 | * } OSFifoQueueHead; | |
68 | * | |
69 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
70 | */ | |
71 | ||
b0d623f7 A |
72 | |
73 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
74 | * nonzero after returning from a PFZ subroutine. | |
75 | * When we enter kernel: | |
76 | * %edx = return address | |
77 | * %ecx = stack ptr | |
78 | * Destroys %eax, %ecx, and %edx. | |
79 | */ | |
80 | COMMPAGE_FUNCTION_START(preempt, 32, 4) | |
81 | popl %edx // get return address | |
82 | movl %esp,%ecx // save stack ptr here | |
83 | movl $(-58),%eax /* 58 = pfz_exit */ | |
84 | xorl %ebx,%ebx // clear "preemption pending" flag | |
85 | sysenter | |
bd504ef0 | 86 | COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT) |
b0d623f7 A |
87 | |
88 | ||
89 | /* Subroutine to back off if we cannot get the spinlock. Called | |
90 | * after a few attempts inline in the PFZ subroutines. This code is | |
91 | * not in the PFZ. | |
92 | * %edi = ptr to queue head structure | |
93 | * %ebx = preemption flag (nonzero if preemption pending) | |
94 | * Destroys %eax. | |
95 | */ | |
96 | COMMPAGE_FUNCTION_START(backoff, 32, 4) | |
97 | testl %ebx,%ebx // does kernel want to preempt us? | |
98 | jz 1f // no | |
99 | xorl %ebx,%ebx // yes, clear flag | |
100 | pushl %edx // preserve regs used by preempt syscall | |
101 | pushl %ecx | |
102 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff) | |
103 | popl %ecx | |
104 | popl %edx | |
105 | 1: | |
106 | pause // SMT-friendly backoff | |
107 | cmpl $0,8(%edi) // sniff the lockword | |
108 | jnz 1b // loop if still taken | |
109 | ret // lockword is free, so reenter PFZ | |
bd504ef0 | 110 | COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF) |
b0d623f7 A |
111 | |
112 | ||
113 | /* Preemption-free-zone routine to FIFO Enqueue: | |
114 | * %edi = ptr to queue head structure | |
115 | * %esi = ptr to element to enqueue | |
116 | * %edx = offset of link field in elements | |
117 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
118 | */ | |
119 | ||
120 | COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4) | |
121 | movl $0,(%edx,%esi) // zero forward link in new element | |
122 | 1: | |
123 | xorl %eax, %eax | |
124 | orl $-1, %ecx | |
125 | lock | |
126 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
127 | jz 2f // got it | |
128 | ||
129 | pause | |
130 | xorl %eax, %eax | |
131 | lock | |
132 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
133 | jz 2f // got it | |
134 | ||
135 | pause | |
136 | xorl %eax, %eax | |
137 | lock | |
138 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
139 | jz 2f // got it | |
140 | ||
141 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) | |
142 | jmp 1b // loop to try again | |
143 | 2: | |
144 | movl 4(%edi),%ecx // get ptr to last element in q | |
145 | testl %ecx,%ecx // q null? | |
146 | jnz 3f // no | |
147 | movl %esi,(%edi) // q empty so this is first element | |
148 | jmp 4f | |
149 | 3: | |
150 | movl %esi,(%edx,%ecx) // point to new element from last | |
151 | 4: | |
152 | movl %esi,4(%edi) // new element becomes last in q | |
153 | movl $0,8(%edi) // unlock spinlock | |
154 | ret | |
bd504ef0 | 155 | COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE) |
b0d623f7 A |
156 | |
157 | ||
158 | /* Preemption-free-zone routine to FIFO Dequeue: | |
159 | * %edi = ptr to queue head structure | |
160 | * %edx = offset of link field in elements | |
161 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
162 | * | |
163 | * Returns with next element (or 0) in %eax. | |
164 | */ | |
165 | ||
166 | COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4) | |
167 | 1: | |
168 | xorl %eax, %eax | |
169 | orl $-1, %ecx | |
170 | lock | |
171 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
172 | jz 2f // got it | |
173 | ||
174 | pause | |
175 | xorl %eax, %eax | |
176 | lock | |
177 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
178 | jz 2f // got it | |
179 | ||
180 | pause | |
181 | xorl %eax, %eax | |
182 | lock | |
183 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
184 | jz 2f // got it | |
185 | ||
186 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) | |
187 | jmp 1b // loop to try again | |
188 | 2: | |
189 | movl (%edi),%eax // get ptr to first element in q | |
190 | testl %eax,%eax // q null? | |
191 | jz 4f // yes | |
192 | movl (%edx,%eax),%esi// get ptr to 2nd element in q | |
193 | testl %esi,%esi // is there a 2nd element? | |
194 | jnz 3f // yes | |
195 | movl %esi,4(%edi) // clear "last" field of q head | |
196 | 3: | |
197 | movl %esi,(%edi) // update "first" field of q head | |
198 | 4: | |
199 | movl $0,8(%edi) // unlock spinlock | |
200 | ret | |
bd504ef0 | 201 | COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE) |
b0d623f7 A |
202 | |
203 | ||
204 | ||
205 | ||
206 | /************************* x86_64 versions follow **************************/ | |
207 | ||
208 | ||
209 | /* | |
210 | * typedef volatile struct { | |
211 | * void *opaque1; <-- ptr to first queue element or null | |
212 | * void *opaque2; <-- ptr to last queue element or null | |
213 | * int opaque3; <-- spinlock | |
214 | * } OSFifoQueueHead; | |
215 | * | |
216 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
217 | */ | |
218 | ||
b0d623f7 A |
219 | |
220 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
221 | * nonzero after returning from a PFZ subroutine. Not in PFZ. | |
222 | * | |
223 | * All registers preserved (but does clear the %ebx preemption flag). | |
224 | */ | |
225 | COMMPAGE_FUNCTION_START(preempt_64, 64, 4) | |
226 | pushq %rax | |
227 | pushq %rcx | |
228 | pushq %r11 | |
229 | movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */ | |
230 | xorl %ebx,%ebx | |
231 | syscall | |
232 | popq %r11 | |
233 | popq %rcx | |
234 | popq %rax | |
235 | ret | |
bd504ef0 | 236 | COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT) |
b0d623f7 A |
237 | |
238 | ||
239 | /* Subroutine to back off if we cannot get the spinlock. Called | |
240 | * after a few attempts inline in the PFZ subroutines. This code is | |
241 | * not in the PFZ. | |
242 | * %rdi = ptr to queue head structure | |
243 | * %ebx = preemption flag (nonzero if preemption pending) | |
244 | * Uses: %rax. | |
245 | */ | |
246 | COMMPAGE_FUNCTION_START(backoff_64, 64, 4) | |
247 | testl %ebx,%ebx // does kernel want to preempt us? | |
248 | jz 1f // no | |
249 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64) | |
250 | 1: | |
251 | pause // SMT-friendly backoff | |
252 | cmpl $0,16(%rdi) // sniff the lockword | |
253 | jnz 1b // loop if still taken | |
254 | ret // lockword is free, so reenter PFZ | |
bd504ef0 | 255 | COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF) |
b0d623f7 A |
256 | |
257 | ||
258 | /* Preemption-free-zone routine to FIFO Enqueue: | |
259 | * %rdi = ptr to queue head structure | |
260 | * %rsi = ptr to new element to enqueue | |
261 | * %rdx = offset of link field in elements | |
262 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
263 | */ | |
264 | ||
265 | COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4) | |
266 | movq $0,(%rdx,%rsi) // zero forward link in new element | |
267 | 1: | |
268 | xorl %eax, %eax | |
269 | orl $-1, %ecx | |
270 | lock | |
271 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
272 | jz 2f // got it | |
273 | ||
274 | pause | |
275 | xorl %eax, %eax | |
276 | lock | |
277 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
278 | jz 2f // got it | |
279 | ||
280 | pause | |
281 | xorl %eax, %eax | |
282 | lock | |
283 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
284 | jz 2f // got it | |
285 | ||
286 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) | |
287 | jmp 1b // loop to try again | |
288 | 2: | |
289 | movq 8(%rdi),%rcx // get ptr to last element in q | |
290 | testq %rcx,%rcx // q null? | |
291 | jnz 3f // no | |
292 | movq %rsi,(%rdi) // q empty so this is first element | |
293 | jmp 4f | |
294 | 3: | |
295 | movq %rsi,(%rdx,%rcx) // point to new element from last | |
296 | 4: | |
297 | movq %rsi,8(%rdi) // new element becomes last in q | |
298 | movl $0,16(%rdi) // unlock spinlock | |
299 | ret | |
bd504ef0 | 300 | COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE) |
b0d623f7 A |
301 | |
302 | ||
303 | ||
304 | /* Preemption-free-zone routine to FIFO Dequeue: | |
305 | * %rdi = ptr to queue head structure | |
306 | * %rdx = offset of link field in elements | |
307 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
308 | * | |
309 | * Returns with next element (or 0) in %rax. | |
310 | */ | |
311 | ||
312 | COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4) | |
313 | 1: | |
314 | xorl %eax, %eax | |
315 | orl $-1, %ecx | |
316 | lock | |
317 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
318 | jz 2f // got it | |
319 | ||
320 | pause | |
321 | xorl %eax, %eax | |
322 | lock | |
323 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
324 | jz 2f // got it | |
325 | ||
326 | pause | |
327 | xorl %eax, %eax | |
328 | lock | |
329 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
330 | jz 2f // got it | |
331 | ||
332 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) | |
333 | jmp 1b // loop to try again | |
334 | 2: | |
335 | movq (%rdi),%rax // get ptr to first element in q | |
336 | testq %rax,%rax // q null? | |
337 | jz 4f // yes | |
338 | movq (%rdx,%rax),%rsi// get ptr to 2nd element in q | |
339 | testq %rsi,%rsi // is there a 2nd element? | |
340 | jnz 3f // yes | |
341 | movq %rsi,8(%rdi) // no - clear "last" field of q head | |
342 | 3: | |
343 | movq %rsi,(%rdi) // update "first" field of q head | |
344 | 4: | |
345 | movl $0,16(%rdi) // unlock spinlock | |
346 | ret | |
bd504ef0 | 347 | COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE) |