]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/appleapiopts.h> | |
30 | #include <machine/cpu_capabilities.h> | |
31 | #include <machine/commpage.h> | |
32 | #include <mach/i386/syscall_sw.h> | |
33 | ||
34 | ||
35 | /* PREEMPTION FREE ZONE (PFZ) | |
36 | * | |
37 | * A portion of the commpage is speacial-cased by the kernel to be "preemption free", | |
38 | * ie as if we had disabled interrupts in user mode. This facilitates writing | |
39 | * "nearly-lockless" code, for example code that must be serialized by a spinlock but | |
40 | * which we do not want to preempt while the spinlock is held. | |
41 | * | |
42 | * The PFZ is implemented by collecting all the "preemption-free" code into a single | |
43 | * contiguous region of the commpage. Register %ebx is used as a flag register; | |
44 | * before entering the PFZ, %ebx is cleared. If some event occurs that would normally | |
45 | * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of | |
46 | * preempting. Then, when the routine leaves the PFZ we check %ebx and | |
47 | * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. | |
48 | * | |
49 | * PFZ code must bound the amount of time spent in the PFZ, in order to control | |
50 | * latency. Backward branches are dangerous and must not be used in a way that | |
51 | * could inadvertently create a long-running loop. | |
52 | * | |
53 | * Because they cannot be implemented reasonably without a lock, we put the "atomic" | |
54 | * FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to | |
55 | * access queue elements, these implementations behave nearly-locklessly. | |
56 | * But we still must take a spinlock to serialize, and in case of page faults. | |
57 | */ | |
58 | ||
59 | /* | |
60 | * typedef volatile struct { | |
61 | * void *opaque1; <-- ptr to first queue element or null | |
62 | * void *opaque2; <-- ptr to last queue element or null | |
63 | * int opaque3; <-- spinlock | |
64 | * } OSFifoQueueHead; | |
65 | * | |
66 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
67 | */ | |
68 | ||
69 | ||
70 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
71 | * nonzero after returning from a PFZ subroutine. | |
72 | * When we enter kernel: | |
73 | * %edx = return address | |
74 | * %ecx = stack ptr | |
75 | * Destroys %eax, %ecx, and %edx. | |
76 | */ | |
77 | COMMPAGE_FUNCTION_START(preempt, 32, 4) | |
78 | popl %edx // get return address | |
79 | movl %esp,%ecx // save stack ptr here | |
80 | movl $(-58),%eax /* 58 = pfz_exit */ | |
81 | xorl %ebx,%ebx // clear "preemption pending" flag | |
82 | sysenter | |
83 | COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT,0,0) | |
84 | ||
85 | ||
86 | /* Subroutine to back off if we cannot get the spinlock. Called | |
87 | * after a few attempts inline in the PFZ subroutines. This code is | |
88 | * not in the PFZ. | |
89 | * %edi = ptr to queue head structure | |
90 | * %ebx = preemption flag (nonzero if preemption pending) | |
91 | * Destroys %eax. | |
92 | */ | |
93 | COMMPAGE_FUNCTION_START(backoff, 32, 4) | |
94 | testl %ebx,%ebx // does kernel want to preempt us? | |
95 | jz 1f // no | |
96 | xorl %ebx,%ebx // yes, clear flag | |
97 | pushl %edx // preserve regs used by preempt syscall | |
98 | pushl %ecx | |
99 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff) | |
100 | popl %ecx | |
101 | popl %edx | |
102 | 1: | |
103 | pause // SMT-friendly backoff | |
104 | cmpl $0,8(%edi) // sniff the lockword | |
105 | jnz 1b // loop if still taken | |
106 | ret // lockword is free, so reenter PFZ | |
107 | COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF,0,0) | |
108 | ||
109 | ||
110 | /* Preemption-free-zone routine to FIFO Enqueue: | |
111 | * %edi = ptr to queue head structure | |
112 | * %esi = ptr to element to enqueue | |
113 | * %edx = offset of link field in elements | |
114 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
115 | */ | |
116 | ||
117 | COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4) | |
118 | movl $0,(%edx,%esi) // zero forward link in new element | |
119 | 1: | |
120 | xorl %eax, %eax | |
121 | orl $-1, %ecx | |
122 | lock | |
123 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
124 | jz 2f // got it | |
125 | ||
126 | pause | |
127 | xorl %eax, %eax | |
128 | lock | |
129 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
130 | jz 2f // got it | |
131 | ||
132 | pause | |
133 | xorl %eax, %eax | |
134 | lock | |
135 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
136 | jz 2f // got it | |
137 | ||
138 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) | |
139 | jmp 1b // loop to try again | |
140 | 2: | |
141 | movl 4(%edi),%ecx // get ptr to last element in q | |
142 | testl %ecx,%ecx // q null? | |
143 | jnz 3f // no | |
144 | movl %esi,(%edi) // q empty so this is first element | |
145 | jmp 4f | |
146 | 3: | |
147 | movl %esi,(%edx,%ecx) // point to new element from last | |
148 | 4: | |
149 | movl %esi,4(%edi) // new element becomes last in q | |
150 | movl $0,8(%edi) // unlock spinlock | |
151 | ret | |
152 | COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE,0,0) | |
153 | ||
154 | ||
155 | /* Preemption-free-zone routine to FIFO Dequeue: | |
156 | * %edi = ptr to queue head structure | |
157 | * %edx = offset of link field in elements | |
158 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
159 | * | |
160 | * Returns with next element (or 0) in %eax. | |
161 | */ | |
162 | ||
163 | COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4) | |
164 | 1: | |
165 | xorl %eax, %eax | |
166 | orl $-1, %ecx | |
167 | lock | |
168 | cmpxchgl %ecx, 8(%edi) // try to take the spinlock | |
169 | jz 2f // got it | |
170 | ||
171 | pause | |
172 | xorl %eax, %eax | |
173 | lock | |
174 | cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock | |
175 | jz 2f // got it | |
176 | ||
177 | pause | |
178 | xorl %eax, %eax | |
179 | lock | |
180 | cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock | |
181 | jz 2f // got it | |
182 | ||
183 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) | |
184 | jmp 1b // loop to try again | |
185 | 2: | |
186 | movl (%edi),%eax // get ptr to first element in q | |
187 | testl %eax,%eax // q null? | |
188 | jz 4f // yes | |
189 | movl (%edx,%eax),%esi// get ptr to 2nd element in q | |
190 | testl %esi,%esi // is there a 2nd element? | |
191 | jnz 3f // yes | |
192 | movl %esi,4(%edi) // clear "last" field of q head | |
193 | 3: | |
194 | movl %esi,(%edi) // update "first" field of q head | |
195 | 4: | |
196 | movl $0,8(%edi) // unlock spinlock | |
197 | ret | |
198 | COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE,0,0) | |
199 | ||
200 | ||
201 | ||
202 | ||
203 | /************************* x86_64 versions follow **************************/ | |
204 | ||
205 | ||
206 | /* | |
207 | * typedef volatile struct { | |
208 | * void *opaque1; <-- ptr to first queue element or null | |
209 | * void *opaque2; <-- ptr to last queue element or null | |
210 | * int opaque3; <-- spinlock | |
211 | * } OSFifoQueueHead; | |
212 | * | |
213 | * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); | |
214 | */ | |
215 | ||
216 | ||
217 | /* Subroutine to make a preempt syscall. Called when we notice %ebx is | |
218 | * nonzero after returning from a PFZ subroutine. Not in PFZ. | |
219 | * | |
220 | * All registers preserved (but does clear the %ebx preemption flag). | |
221 | */ | |
222 | COMMPAGE_FUNCTION_START(preempt_64, 64, 4) | |
223 | pushq %rax | |
224 | pushq %rcx | |
225 | pushq %r11 | |
226 | movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */ | |
227 | xorl %ebx,%ebx | |
228 | syscall | |
229 | popq %r11 | |
230 | popq %rcx | |
231 | popq %rax | |
232 | ret | |
233 | COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT,0,0) | |
234 | ||
235 | ||
236 | /* Subroutine to back off if we cannot get the spinlock. Called | |
237 | * after a few attempts inline in the PFZ subroutines. This code is | |
238 | * not in the PFZ. | |
239 | * %rdi = ptr to queue head structure | |
240 | * %ebx = preemption flag (nonzero if preemption pending) | |
241 | * Uses: %rax. | |
242 | */ | |
243 | COMMPAGE_FUNCTION_START(backoff_64, 64, 4) | |
244 | testl %ebx,%ebx // does kernel want to preempt us? | |
245 | jz 1f // no | |
246 | COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64) | |
247 | 1: | |
248 | pause // SMT-friendly backoff | |
249 | cmpl $0,16(%rdi) // sniff the lockword | |
250 | jnz 1b // loop if still taken | |
251 | ret // lockword is free, so reenter PFZ | |
252 | COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF,0,0) | |
253 | ||
254 | ||
255 | /* Preemption-free-zone routine to FIFO Enqueue: | |
256 | * %rdi = ptr to queue head structure | |
257 | * %rsi = ptr to new element to enqueue | |
258 | * %rdx = offset of link field in elements | |
259 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
260 | */ | |
261 | ||
262 | COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4) | |
263 | movq $0,(%rdx,%rsi) // zero forward link in new element | |
264 | 1: | |
265 | xorl %eax, %eax | |
266 | orl $-1, %ecx | |
267 | lock | |
268 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
269 | jz 2f // got it | |
270 | ||
271 | pause | |
272 | xorl %eax, %eax | |
273 | lock | |
274 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
275 | jz 2f // got it | |
276 | ||
277 | pause | |
278 | xorl %eax, %eax | |
279 | lock | |
280 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
281 | jz 2f // got it | |
282 | ||
283 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) | |
284 | jmp 1b // loop to try again | |
285 | 2: | |
286 | movq 8(%rdi),%rcx // get ptr to last element in q | |
287 | testq %rcx,%rcx // q null? | |
288 | jnz 3f // no | |
289 | movq %rsi,(%rdi) // q empty so this is first element | |
290 | jmp 4f | |
291 | 3: | |
292 | movq %rsi,(%rdx,%rcx) // point to new element from last | |
293 | 4: | |
294 | movq %rsi,8(%rdi) // new element becomes last in q | |
295 | movl $0,16(%rdi) // unlock spinlock | |
296 | ret | |
297 | COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE,0,0) | |
298 | ||
299 | ||
300 | ||
301 | /* Preemption-free-zone routine to FIFO Dequeue: | |
302 | * %rdi = ptr to queue head structure | |
303 | * %rdx = offset of link field in elements | |
304 | * %ebx = preemption flag (kernel sets nonzero if we should preempt) | |
305 | * | |
306 | * Returns with next element (or 0) in %rax. | |
307 | */ | |
308 | ||
309 | COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4) | |
310 | 1: | |
311 | xorl %eax, %eax | |
312 | orl $-1, %ecx | |
313 | lock | |
314 | cmpxchgl %ecx,16(%rdi) // try to take the spinlock | |
315 | jz 2f // got it | |
316 | ||
317 | pause | |
318 | xorl %eax, %eax | |
319 | lock | |
320 | cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock | |
321 | jz 2f // got it | |
322 | ||
323 | pause | |
324 | xorl %eax, %eax | |
325 | lock | |
326 | cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock | |
327 | jz 2f // got it | |
328 | ||
329 | COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) | |
330 | jmp 1b // loop to try again | |
331 | 2: | |
332 | movq (%rdi),%rax // get ptr to first element in q | |
333 | testq %rax,%rax // q null? | |
334 | jz 4f // yes | |
335 | movq (%rdx,%rax),%rsi// get ptr to 2nd element in q | |
336 | testq %rsi,%rsi // is there a 2nd element? | |
337 | jnz 3f // yes | |
338 | movq %rsi,8(%rdi) // no - clear "last" field of q head | |
339 | 3: | |
340 | movq %rsi,(%rdi) // update "first" field of q head | |
341 | 4: | |
342 | movl $0,16(%rdi) // unlock spinlock | |
343 | ret | |
344 | COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE,0,0) |