]>
Commit | Line | Data |
---|---|---|
b0d623f7 A |
1 | /* |
2 | * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/appleapiopts.h> | |
30 | #include <machine/cpu_capabilities.h> | |
31 | #include <machine/commpage.h> | |
32 | #include <mach/i386/syscall_sw.h> | |
33 | ||
34 | ||
35 | /* PREEMPTION FREE ZONE (PFZ) | |
36 | * | |
37 | * A portion of the commpage is speacial-cased by the kernel to be "preemption free", | |
38 | * ie as if we had disabled interrupts in user mode. This facilitates writing | |
39 | * "nearly-lockless" code, for example code that must be serialized by a spinlock but | |
40 | * which we do not want to preempt while the spinlock is held. | |
41 | * | |
42 | * The PFZ is implemented by collecting all the "preemption-free" code into a single | |
43 | * contiguous region of the commpage. Register %ebx is used as a flag register; | |
44 | * before entering the PFZ, %ebx is cleared. If some event occurs that would normally | |
45 | * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of | |
46 | * preempting. Then, when the routine leaves the PFZ we check %ebx and | |
47 | * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. | |
48 | * | |
49 | * PFZ code must bound the amount of time spent in the PFZ, in order to control | |
50 | * latency. Backward branches are dangerous and must not be used in a way that | |
51 | * could inadvertently create a long-running loop. | |
52 | * | |
53 | * Because they cannot be implemented reasonably without a lock, we put the "atomic" | |
54 | * FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to | |
55 | * access queue elements, these implementations behave nearly-locklessly. | |
56 | * But we still must take a spinlock to serialize, and in case of page faults. | |
57 | */ | |
58 | ||
316670eb A |
59 | /* Work around 10062261 with a dummy non-local symbol */ |
60 | fifo_queue_dummy_symbol: | |
61 | ||
b0d623f7 A |
62 | /* |
63 | * typedef volatile struct { | |
64 | * void *opaque1; <-- ptr to first queue element or null | |
65 | * void *opaque2; <-- ptr to last queue element or null | |
66 | * int opaque3; <-- spinlock | |
67 | * } OSFifoQueueHead; | |
68 | * | |
69 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
70 | */ | |
71 | ||
b0d623f7 A |
72 | |
73 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
74 | * nonzero after returning from a PFZ subroutine. | |
75 | * When we enter kernel: | |
76 | * %edx = return address | |
77 | * %ecx = stack ptr | |
78 | * Destroys %eax, %ecx, and %edx. | |
79 | */ | |
80 | COMMPAGE_FUNCTION_START(preempt, 32, 4) | |
81 | popl %edx // get return address | |
82 | movl %esp,%ecx // save stack ptr here | |
83 | movl $(-58),%eax /* 58 = pfz_exit */ | |
84 | xorl %ebx,%ebx // clear "preemption pending" flag | |
85 | sysenter | |
bd504ef0 | 86 | COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT) |
b0d623f7 A |
87 | |
88 | ||
89 | /* Subroutine to back off if we cannot get the spinlock. Called | |
90 | * after a few attempts inline in the PFZ subroutines. This code is | |
91 | * not in the PFZ. | |
92 | * %edi = ptr to queue head structure | |
93 | * %ebx = preemption flag (nonzero if preemption pending) | |
94 | * Destroys %eax. | |
95 | */ | |
96 | COMMPAGE_FUNCTION_START(backoff, 32, 4) | |
97 | testl %ebx,%ebx // does kernel want to preempt us? | |
98 | jz 1f // no | |
99 | xorl %ebx,%ebx // yes, clear flag | |
100 | pushl %edx // preserve regs used by preempt syscall | |
101 | pushl %ecx | |
102 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff) | |
103 | popl %ecx | |
104 | popl %edx | |
105 | 1: | |
106 | pause // SMT-friendly backoff | |
107 | cmpl $0,8(%edi) // sniff the lockword | |
108 | jnz 1b // loop if still taken | |
109 | ret // lockword is free, so reenter PFZ | |
bd504ef0 | 110 | COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF) |
b0d623f7 A |
111 | |
112 | ||
113 | /* Preemption-free-zone routine to FIFO Enqueue: | |
114 | * %edi = ptr to queue head structure | |
115 | * %esi = ptr to element to enqueue | |
116 | * %edx = offset of link field in elements | |
117 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
118 | */ | |
119 | ||
120 | COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4) | |
121 | movl $0,(%edx,%esi) // zero forward link in new element | |
122 | 1: | |
123 | xorl %eax, %eax | |
124 | orl $-1, %ecx | |
125 | lock | |
126 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
127 | jz 2f // got it | |
128 | ||
129 | pause | |
130 | xorl %eax, %eax | |
131 | lock | |
132 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
133 | jz 2f // got it | |
134 | ||
135 | pause | |
136 | xorl %eax, %eax | |
137 | lock | |
138 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
139 | jz 2f // got it | |
140 | ||
141 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) | |
142 | jmp 1b // loop to try again | |
143 | 2: | |
00867663 A |
144 | movl 4(%edi),%eax // get ptr to last element in q |
145 | testl %eax,%eax // q null? | |
b0d623f7 A |
146 | jnz 3f // no |
147 | movl %esi,(%edi) // q empty so this is first element | |
148 | jmp 4f | |
149 | 3: | |
00867663 | 150 | movl %esi,(%edx,%eax) // point to new element from last |
b0d623f7 A |
151 | 4: |
152 | movl %esi,4(%edi) // new element becomes last in q | |
153 | movl $0,8(%edi) // unlock spinlock | |
00867663 | 154 | COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) |
bd504ef0 | 155 | COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE) |
b0d623f7 A |
156 | |
157 | ||
158 | /* Preemption-free-zone routine to FIFO Dequeue: | |
159 | * %edi = ptr to queue head structure | |
160 | * %edx = offset of link field in elements | |
161 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
162 | * | |
163 | * Returns with next element (or 0) in %eax. | |
164 | */ | |
165 | ||
166 | COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4) | |
167 | 1: | |
168 | xorl %eax, %eax | |
169 | orl $-1, %ecx | |
170 | lock | |
171 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
172 | jz 2f // got it | |
173 | ||
174 | pause | |
175 | xorl %eax, %eax | |
176 | lock | |
177 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
178 | jz 2f // got it | |
179 | ||
180 | pause | |
181 | xorl %eax, %eax | |
182 | lock | |
183 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
184 | jz 2f // got it | |
185 | ||
186 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) | |
187 | jmp 1b // loop to try again | |
188 | 2: | |
189 | movl (%edi),%eax // get ptr to first element in q | |
190 | testl %eax,%eax // q null? | |
191 | jz 4f // yes | |
192 | movl (%edx,%eax),%esi// get ptr to 2nd element in q | |
193 | testl %esi,%esi // is there a 2nd element? | |
194 | jnz 3f // yes | |
195 | movl %esi,4(%edi) // clear "last" field of q head | |
196 | 3: | |
197 | movl %esi,(%edi) // update "first" field of q head | |
198 | 4: | |
199 | movl $0,8(%edi) // unlock spinlock | |
00867663 | 200 | COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) |
bd504ef0 | 201 | COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE) |
b0d623f7 A |
202 | |
203 | ||
00867663 A |
204 | COMMPAGE_FUNCTION_START(ret, 32, 4) |
205 | ret | |
206 | COMMPAGE_DESCRIPTOR(ret,_COMM_PAGE_RET) | |
207 | ||
b0d623f7 A |
208 | |
209 | ||
210 | /************************* x86_64 versions follow **************************/ | |
211 | ||
212 | ||
213 | /* | |
214 | * typedef volatile struct { | |
215 | * void *opaque1; <-- ptr to first queue element or null | |
216 | * void *opaque2; <-- ptr to last queue element or null | |
217 | * int opaque3; <-- spinlock | |
218 | * } OSFifoQueueHead; | |
219 | * | |
220 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
221 | */ | |
222 | ||
b0d623f7 A |
223 | |
224 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
225 | * nonzero after returning from a PFZ subroutine. Not in PFZ. | |
226 | * | |
227 | * All registers preserved (but does clear the %ebx preemption flag). | |
228 | */ | |
229 | COMMPAGE_FUNCTION_START(preempt_64, 64, 4) | |
230 | pushq %rax | |
231 | pushq %rcx | |
232 | pushq %r11 | |
233 | movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */ | |
234 | xorl %ebx,%ebx | |
235 | syscall | |
236 | popq %r11 | |
237 | popq %rcx | |
238 | popq %rax | |
239 | ret | |
bd504ef0 | 240 | COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT) |
b0d623f7 A |
241 | |
242 | ||
243 | /* Subroutine to back off if we cannot get the spinlock. Called | |
244 | * after a few attempts inline in the PFZ subroutines. This code is | |
245 | * not in the PFZ. | |
246 | * %rdi = ptr to queue head structure | |
247 | * %ebx = preemption flag (nonzero if preemption pending) | |
248 | * Uses: %rax. | |
249 | */ | |
250 | COMMPAGE_FUNCTION_START(backoff_64, 64, 4) | |
251 | testl %ebx,%ebx // does kernel want to preempt us? | |
252 | jz 1f // no | |
253 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64) | |
254 | 1: | |
255 | pause // SMT-friendly backoff | |
256 | cmpl $0,16(%rdi) // sniff the lockword | |
257 | jnz 1b // loop if still taken | |
258 | ret // lockword is free, so reenter PFZ | |
bd504ef0 | 259 | COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF) |
b0d623f7 A |
260 | |
261 | ||
262 | /* Preemption-free-zone routine to FIFO Enqueue: | |
263 | * %rdi = ptr to queue head structure | |
264 | * %rsi = ptr to new element to enqueue | |
265 | * %rdx = offset of link field in elements | |
266 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
267 | */ | |
268 | ||
269 | COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4) | |
270 | movq $0,(%rdx,%rsi) // zero forward link in new element | |
271 | 1: | |
272 | xorl %eax, %eax | |
273 | orl $-1, %ecx | |
274 | lock | |
275 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
276 | jz 2f // got it | |
277 | ||
278 | pause | |
279 | xorl %eax, %eax | |
280 | lock | |
281 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
282 | jz 2f // got it | |
283 | ||
284 | pause | |
285 | xorl %eax, %eax | |
286 | lock | |
287 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
288 | jz 2f // got it | |
289 | ||
290 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) | |
291 | jmp 1b // loop to try again | |
292 | 2: | |
00867663 A |
293 | movq 8(%rdi),%rax // get ptr to last element in q |
294 | testq %rax,%rax // q null? | |
b0d623f7 A |
295 | jnz 3f // no |
296 | movq %rsi,(%rdi) // q empty so this is first element | |
297 | jmp 4f | |
298 | 3: | |
00867663 | 299 | movq %rsi,(%rdx,%rax) // point to new element from last |
b0d623f7 A |
300 | 4: |
301 | movq %rsi,8(%rdi) // new element becomes last in q | |
302 | movl $0,16(%rdi) // unlock spinlock | |
00867663 | 303 | COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) |
bd504ef0 | 304 | COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE) |
b0d623f7 A |
305 | |
306 | ||
307 | ||
308 | /* Preemption-free-zone routine to FIFO Dequeue: | |
309 | * %rdi = ptr to queue head structure | |
310 | * %rdx = offset of link field in elements | |
311 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
312 | * | |
313 | * Returns with next element (or 0) in %rax. | |
314 | */ | |
315 | ||
316 | COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4) | |
317 | 1: | |
318 | xorl %eax, %eax | |
319 | orl $-1, %ecx | |
320 | lock | |
321 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
322 | jz 2f // got it | |
323 | ||
324 | pause | |
325 | xorl %eax, %eax | |
326 | lock | |
327 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
328 | jz 2f // got it | |
329 | ||
330 | pause | |
331 | xorl %eax, %eax | |
332 | lock | |
333 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
334 | jz 2f // got it | |
335 | ||
336 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) | |
337 | jmp 1b // loop to try again | |
338 | 2: | |
339 | movq (%rdi),%rax // get ptr to first element in q | |
340 | testq %rax,%rax // q null? | |
341 | jz 4f // yes | |
342 | movq (%rdx,%rax),%rsi// get ptr to 2nd element in q | |
343 | testq %rsi,%rsi // is there a 2nd element? | |
344 | jnz 3f // yes | |
345 | movq %rsi,8(%rdi) // no - clear "last" field of q head | |
346 | 3: | |
347 | movq %rsi,(%rdi) // update "first" field of q head | |
348 | 4: | |
349 | movl $0,16(%rdi) // unlock spinlock | |
00867663 | 350 | COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) |
bd504ef0 | 351 | COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE) |
00867663 A |
352 | |
353 | COMMPAGE_FUNCTION_START(ret_64, 64, 4) | |
354 | ret | |
355 | COMMPAGE_DESCRIPTOR(ret_64,_COMM_PAGE_RET) |