]>
Commit | Line | Data |
---|---|---|
b0d623f7 A |
1 | /* |
2 | * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/appleapiopts.h> | |
30 | #include <machine/cpu_capabilities.h> | |
31 | #include <machine/commpage.h> | |
32 | #include <mach/i386/syscall_sw.h> | |
33 | ||
34 | ||
35 | /* PREEMPTION FREE ZONE (PFZ) | |
36 | * | |
37 | * A portion of the commpage is speacial-cased by the kernel to be "preemption free", | |
38 | * ie as if we had disabled interrupts in user mode. This facilitates writing | |
39 | * "nearly-lockless" code, for example code that must be serialized by a spinlock but | |
40 | * which we do not want to preempt while the spinlock is held. | |
41 | * | |
42 | * The PFZ is implemented by collecting all the "preemption-free" code into a single | |
43 | * contiguous region of the commpage. Register %ebx is used as a flag register; | |
44 | * before entering the PFZ, %ebx is cleared. If some event occurs that would normally | |
45 | * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of | |
46 | * preempting. Then, when the routine leaves the PFZ we check %ebx and | |
47 | * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. | |
48 | * | |
49 | * PFZ code must bound the amount of time spent in the PFZ, in order to control | |
50 | * latency. Backward branches are dangerous and must not be used in a way that | |
51 | * could inadvertently create a long-running loop. | |
52 | * | |
53 | * Because they cannot be implemented reasonably without a lock, we put the "atomic" | |
54 | * FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to | |
55 | * access queue elements, these implementations behave nearly-locklessly. | |
56 | * But we still must take a spinlock to serialize, and in case of page faults. | |
57 | */ | |
58 | ||
59 | /* | |
60 | * typedef volatile struct { | |
61 | * void *opaque1; <-- ptr to first queue element or null | |
62 | * void *opaque2; <-- ptr to last queue element or null | |
63 | * int opaque3; <-- spinlock | |
64 | * } OSFifoQueueHead; | |
65 | * | |
66 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
67 | */ | |
68 | ||
69 | COMMPAGE_FUNCTION_START(AtomicFifoEnqueue, 32, 4) | |
70 | pushl %edi | |
71 | pushl %esi | |
72 | pushl %ebx | |
73 | xorl %ebx,%ebx // clear "preemption pending" flag | |
74 | movl 16(%esp),%edi // %edi == ptr to list head | |
75 | movl 20(%esp),%esi // %esi == new | |
76 | movl 24(%esp),%edx // %edx == offset | |
77 | COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue) | |
78 | testl %ebx,%ebx // pending preemption? | |
79 | jz 1f | |
80 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue) | |
81 | 1: | |
82 | popl %ebx | |
83 | popl %esi | |
84 | popl %edi | |
85 | ret | |
86 | COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue,_COMM_PAGE_FIFO_ENQUEUE,0,0) | |
87 | ||
88 | ||
89 | /* void* OSAtomicFifoDequeue( OSFifoQueueHead *list, size_t offset); */ | |
90 | ||
91 | COMMPAGE_FUNCTION_START(AtomicFifoDequeue, 32, 4) | |
92 | pushl %edi | |
93 | pushl %esi | |
94 | pushl %ebx | |
95 | xorl %ebx,%ebx // clear "preemption pending" flag | |
96 | movl 16(%esp),%edi // %edi == ptr to list head | |
97 | movl 20(%esp),%edx // %edx == offset | |
98 | COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue) | |
99 | testl %ebx,%ebx // pending preemption? | |
100 | jz 1f | |
101 | pushl %eax // save return value across sysenter | |
102 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue) | |
103 | popl %eax | |
104 | 1: | |
105 | popl %ebx | |
106 | popl %esi | |
107 | popl %edi | |
108 | ret // ptr to 1st element in Q still in %eax | |
109 | COMMPAGE_DESCRIPTOR(AtomicFifoDequeue,_COMM_PAGE_FIFO_DEQUEUE,0,0) | |
110 | ||
111 | ||
112 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
113 | * nonzero after returning from a PFZ subroutine. | |
114 | * When we enter kernel: | |
115 | * %edx = return address | |
116 | * %ecx = stack ptr | |
117 | * Destroys %eax, %ecx, and %edx. | |
118 | */ | |
119 | COMMPAGE_FUNCTION_START(preempt, 32, 4) | |
120 | popl %edx // get return address | |
121 | movl %esp,%ecx // save stack ptr here | |
122 | movl $(-58),%eax /* 58 = pfz_exit */ | |
123 | xorl %ebx,%ebx // clear "preemption pending" flag | |
124 | sysenter | |
125 | COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT,0,0) | |
126 | ||
127 | ||
128 | /* Subroutine to back off if we cannot get the spinlock. Called | |
129 | * after a few attempts inline in the PFZ subroutines. This code is | |
130 | * not in the PFZ. | |
131 | * %edi = ptr to queue head structure | |
132 | * %ebx = preemption flag (nonzero if preemption pending) | |
133 | * Destroys %eax. | |
134 | */ | |
135 | COMMPAGE_FUNCTION_START(backoff, 32, 4) | |
136 | testl %ebx,%ebx // does kernel want to preempt us? | |
137 | jz 1f // no | |
138 | xorl %ebx,%ebx // yes, clear flag | |
139 | pushl %edx // preserve regs used by preempt syscall | |
140 | pushl %ecx | |
141 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff) | |
142 | popl %ecx | |
143 | popl %edx | |
144 | 1: | |
145 | pause // SMT-friendly backoff | |
146 | cmpl $0,8(%edi) // sniff the lockword | |
147 | jnz 1b // loop if still taken | |
148 | ret // lockword is free, so reenter PFZ | |
149 | COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF,0,0) | |
150 | ||
151 | ||
152 | /* Preemption-free-zone routine to FIFO Enqueue: | |
153 | * %edi = ptr to queue head structure | |
154 | * %esi = ptr to element to enqueue | |
155 | * %edx = offset of link field in elements | |
156 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
157 | */ | |
158 | ||
159 | COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4) | |
160 | movl $0,(%edx,%esi) // zero forward link in new element | |
161 | 1: | |
162 | xorl %eax, %eax | |
163 | orl $-1, %ecx | |
164 | lock | |
165 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
166 | jz 2f // got it | |
167 | ||
168 | pause | |
169 | xorl %eax, %eax | |
170 | lock | |
171 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
172 | jz 2f // got it | |
173 | ||
174 | pause | |
175 | xorl %eax, %eax | |
176 | lock | |
177 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
178 | jz 2f // got it | |
179 | ||
180 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) | |
181 | jmp 1b // loop to try again | |
182 | 2: | |
183 | movl 4(%edi),%ecx // get ptr to last element in q | |
184 | testl %ecx,%ecx // q null? | |
185 | jnz 3f // no | |
186 | movl %esi,(%edi) // q empty so this is first element | |
187 | jmp 4f | |
188 | 3: | |
189 | movl %esi,(%edx,%ecx) // point to new element from last | |
190 | 4: | |
191 | movl %esi,4(%edi) // new element becomes last in q | |
192 | movl $0,8(%edi) // unlock spinlock | |
193 | ret | |
194 | COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE,0,0) | |
195 | ||
196 | ||
197 | /* Preemption-free-zone routine to FIFO Dequeue: | |
198 | * %edi = ptr to queue head structure | |
199 | * %edx = offset of link field in elements | |
200 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
201 | * | |
202 | * Returns with next element (or 0) in %eax. | |
203 | */ | |
204 | ||
205 | COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4) | |
206 | 1: | |
207 | xorl %eax, %eax | |
208 | orl $-1, %ecx | |
209 | lock | |
210 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
211 | jz 2f // got it | |
212 | ||
213 | pause | |
214 | xorl %eax, %eax | |
215 | lock | |
216 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
217 | jz 2f // got it | |
218 | ||
219 | pause | |
220 | xorl %eax, %eax | |
221 | lock | |
222 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
223 | jz 2f // got it | |
224 | ||
225 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) | |
226 | jmp 1b // loop to try again | |
227 | 2: | |
228 | movl (%edi),%eax // get ptr to first element in q | |
229 | testl %eax,%eax // q null? | |
230 | jz 4f // yes | |
231 | movl (%edx,%eax),%esi// get ptr to 2nd element in q | |
232 | testl %esi,%esi // is there a 2nd element? | |
233 | jnz 3f // yes | |
234 | movl %esi,4(%edi) // clear "last" field of q head | |
235 | 3: | |
236 | movl %esi,(%edi) // update "first" field of q head | |
237 | 4: | |
238 | movl $0,8(%edi) // unlock spinlock | |
239 | ret | |
240 | COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE,0,0) | |
241 | ||
242 | ||
243 | ||
244 | ||
245 | /************************* x86_64 versions follow **************************/ | |
246 | ||
247 | ||
248 | /* | |
249 | * typedef volatile struct { | |
250 | * void *opaque1; <-- ptr to first queue element or null | |
251 | * void *opaque2; <-- ptr to last queue element or null | |
252 | * int opaque3; <-- spinlock | |
253 | * } OSFifoQueueHead; | |
254 | * | |
255 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
256 | */ | |
257 | ||
258 | // %rdi == list head, %rsi == new, %rdx == offset | |
259 | ||
260 | COMMPAGE_FUNCTION_START(AtomicFifoEnqueue_64, 64, 4) | |
261 | pushq %rbx | |
262 | xorl %ebx,%ebx // clear "preemption pending" flag | |
263 | COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64) | |
264 | testl %ebx,%ebx // pending preemption? | |
265 | jz 1f | |
266 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64) | |
267 | 1: | |
268 | popq %rbx | |
269 | ret | |
270 | COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue_64,_COMM_PAGE_FIFO_ENQUEUE,0,0) | |
271 | ||
272 | ||
273 | /* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ | |
274 | ||
275 | // %rdi == list head, %rsi == offset | |
276 | ||
277 | COMMPAGE_FUNCTION_START(AtomicFifoDequeue_64, 64, 4) | |
278 | pushq %rbx | |
279 | xorl %ebx,%ebx // clear "preemption pending" flag | |
280 | movq %rsi,%rdx // move offset to %rdx to be like the Enqueue case | |
281 | COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64) | |
282 | testl %ebx,%ebx // pending preemption? | |
283 | jz 1f | |
284 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64) | |
285 | 1: | |
286 | popq %rbx | |
287 | ret // ptr to 1st element in Q in %rax | |
288 | COMMPAGE_DESCRIPTOR(AtomicFifoDequeue_64,_COMM_PAGE_FIFO_DEQUEUE,0,0) | |
289 | ||
290 | ||
291 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
292 | * nonzero after returning from a PFZ subroutine. Not in PFZ. | |
293 | * | |
294 | * All registers preserved (but does clear the %ebx preemption flag). | |
295 | */ | |
296 | COMMPAGE_FUNCTION_START(preempt_64, 64, 4) | |
297 | pushq %rax | |
298 | pushq %rcx | |
299 | pushq %r11 | |
300 | movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */ | |
301 | xorl %ebx,%ebx | |
302 | syscall | |
303 | popq %r11 | |
304 | popq %rcx | |
305 | popq %rax | |
306 | ret | |
307 | COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT,0,0) | |
308 | ||
309 | ||
310 | /* Subroutine to back off if we cannot get the spinlock. Called | |
311 | * after a few attempts inline in the PFZ subroutines. This code is | |
312 | * not in the PFZ. | |
313 | * %rdi = ptr to queue head structure | |
314 | * %ebx = preemption flag (nonzero if preemption pending) | |
315 | * Uses: %rax. | |
316 | */ | |
317 | COMMPAGE_FUNCTION_START(backoff_64, 64, 4) | |
318 | testl %ebx,%ebx // does kernel want to preempt us? | |
319 | jz 1f // no | |
320 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64) | |
321 | 1: | |
322 | pause // SMT-friendly backoff | |
323 | cmpl $0,16(%rdi) // sniff the lockword | |
324 | jnz 1b // loop if still taken | |
325 | ret // lockword is free, so reenter PFZ | |
326 | COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF,0,0) | |
327 | ||
328 | ||
329 | /* Preemption-free-zone routine to FIFO Enqueue: | |
330 | * %rdi = ptr to queue head structure | |
331 | * %rsi = ptr to new element to enqueue | |
332 | * %rdx = offset of link field in elements | |
333 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
334 | */ | |
335 | ||
336 | COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4) | |
337 | movq $0,(%rdx,%rsi) // zero forward link in new element | |
338 | 1: | |
339 | xorl %eax, %eax | |
340 | orl $-1, %ecx | |
341 | lock | |
342 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
343 | jz 2f // got it | |
344 | ||
345 | pause | |
346 | xorl %eax, %eax | |
347 | lock | |
348 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
349 | jz 2f // got it | |
350 | ||
351 | pause | |
352 | xorl %eax, %eax | |
353 | lock | |
354 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
355 | jz 2f // got it | |
356 | ||
357 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) | |
358 | jmp 1b // loop to try again | |
359 | 2: | |
360 | movq 8(%rdi),%rcx // get ptr to last element in q | |
361 | testq %rcx,%rcx // q null? | |
362 | jnz 3f // no | |
363 | movq %rsi,(%rdi) // q empty so this is first element | |
364 | jmp 4f | |
365 | 3: | |
366 | movq %rsi,(%rdx,%rcx) // point to new element from last | |
367 | 4: | |
368 | movq %rsi,8(%rdi) // new element becomes last in q | |
369 | movl $0,16(%rdi) // unlock spinlock | |
370 | ret | |
371 | COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE,0,0) | |
372 | ||
373 | ||
374 | ||
375 | /* Preemption-free-zone routine to FIFO Dequeue: | |
376 | * %rdi = ptr to queue head structure | |
377 | * %rdx = offset of link field in elements | |
378 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
379 | * | |
380 | * Returns with next element (or 0) in %rax. | |
381 | */ | |
382 | ||
383 | COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4) | |
384 | 1: | |
385 | xorl %eax, %eax | |
386 | orl $-1, %ecx | |
387 | lock | |
388 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
389 | jz 2f // got it | |
390 | ||
391 | pause | |
392 | xorl %eax, %eax | |
393 | lock | |
394 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
395 | jz 2f // got it | |
396 | ||
397 | pause | |
398 | xorl %eax, %eax | |
399 | lock | |
400 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
401 | jz 2f // got it | |
402 | ||
403 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) | |
404 | jmp 1b // loop to try again | |
405 | 2: | |
406 | movq (%rdi),%rax // get ptr to first element in q | |
407 | testq %rax,%rax // q null? | |
408 | jz 4f // yes | |
409 | movq (%rdx,%rax),%rsi// get ptr to 2nd element in q | |
410 | testq %rsi,%rsi // is there a 2nd element? | |
411 | jnz 3f // yes | |
412 | movq %rsi,8(%rdi) // no - clear "last" field of q head | |
413 | 3: | |
414 | movq %rsi,(%rdi) // update "first" field of q head | |
415 | 4: | |
416 | movl $0,16(%rdi) // unlock spinlock | |
417 | ret | |
418 | COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE,0,0) |