]>
Commit | Line | Data |
---|---|---|
91447636 | 1 | /* |
0c530ab8 | 2 | * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. |
91447636 | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
91447636 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
91447636 A |
27 | */ |
28 | ||
29 | #include <sys/appleapiopts.h> | |
30 | #include <machine/cpu_capabilities.h> | |
31 | #include <machine/commpage.h> | |
32 | ||
33 | /* OSAtomic.h library native implementations. */ | |
34 | ||
35 | .text | |
36 | .align 2, 0x90 | |
37 | ||
38 | // This is a regparm(3) subroutine used by: | |
39 | ||
40 | // bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); | |
41 | // int32_t OSAtomicAnd32( int32_t mask, int32_t *value); | |
42 | // int32_t OSAtomicOr32( int32_t mask, int32_t *value); | |
43 | // int32_t OSAtomicXor32( int32_t mask, int32_t *value); | |
44 | ||
45 | // It assumes old -> %eax, new -> %edx, value -> %ecx | |
46 | // on success: returns with ZF set | |
47 | // on failure: returns with *value in %eax, ZF clear | |
48 | ||
49 | // The first word of the routine contains the address of the first instruction, | |
50 | // so callers can pass parameters in registers by using the absolute: | |
51 | ||
52 | // call *_COMPARE_AND_SWAP32 | |
53 | ||
54 | // TODO: move the .long onto a separate page to reduce icache pollution (?) | |
55 | ||
56 | Lcompare_and_swap32_mp: | |
57 | .long _COMM_PAGE_COMPARE_AND_SWAP32+4 | |
58 | lock | |
59 | cmpxchgl %edx, (%ecx) | |
60 | ret | |
61 | ||
62 | COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) | |
63 | ||
64 | Lcompare_and_swap32_up: | |
65 | .long _COMM_PAGE_COMPARE_AND_SWAP32+4 | |
66 | cmpxchgl %edx, (%ecx) | |
67 | ret | |
68 | ||
69 | COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) | |
70 | ||
71 | // This is a subroutine used by: | |
72 | // bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); | |
73 | ||
74 | // It assumes old -> %eax/%edx, new -> %ebx/%ecx, value -> %esi | |
75 | // on success: returns with ZF set | |
76 | // on failure: returns with *value in %eax/%edx, ZF clear | |
77 | ||
78 | Lcompare_and_swap64_mp: | |
79 | .long _COMM_PAGE_COMPARE_AND_SWAP64+4 | |
80 | lock | |
81 | cmpxchg8b (%esi) | |
82 | ret | |
83 | ||
84 | COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) | |
85 | ||
86 | Lcompare_and_swap64_up: | |
87 | .long _COMM_PAGE_COMPARE_AND_SWAP64+4 | |
88 | cmpxchg8b (%esi) | |
89 | ret | |
90 | ||
91 | COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) | |
92 | ||
93 | // This is a subroutine used by: | |
94 | // bool OSAtomicTestAndSet( uint32_t n, void *value ); | |
95 | // It assumes n -> %eax, value -> %edx | |
96 | ||
97 | // Returns: old value of bit in CF | |
98 | ||
99 | Lbit_test_and_set_mp: | |
100 | .long _COMM_PAGE_BTS+4 | |
101 | lock | |
0c530ab8 | 102 | btsl %eax, (%edx) |
91447636 A |
103 | ret |
104 | ||
105 | COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP) | |
106 | ||
107 | Lbit_test_and_set_up: | |
108 | .long _COMM_PAGE_BTS+4 | |
0c530ab8 | 109 | btsl %eax, (%edx) |
91447636 A |
110 | ret |
111 | ||
112 | COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0) | |
113 | ||
114 | // This is a subroutine used by: | |
115 | // bool OSAtomicTestAndClear( uint32_t n, void *value ); | |
116 | // It assumes n -> %eax, value -> %edx | |
117 | ||
118 | // Returns: old value of bit in CF | |
119 | ||
120 | Lbit_test_and_clear_mp: | |
121 | .long _COMM_PAGE_BTC+4 | |
122 | lock | |
0c530ab8 | 123 | btrl %eax, (%edx) |
91447636 A |
124 | ret |
125 | ||
126 | COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP) | |
127 | ||
128 | Lbit_test_and_clear_up: | |
129 | .long _COMM_PAGE_BTC+4 | |
0c530ab8 | 130 | btrl %eax, (%edx) |
91447636 A |
131 | ret |
132 | ||
133 | COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0) | |
134 | ||
135 | // This is a subroutine used by: | |
136 | // int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); | |
137 | // It assumes amt -> %eax, value -> %edx | |
138 | ||
139 | // Returns: old value in %eax | |
140 | // NB: OSAtomicAdd32 returns the new value, so clients will add amt to %eax | |
141 | ||
142 | Latomic_add32_mp: | |
143 | .long _COMM_PAGE_ATOMIC_ADD32+4 | |
144 | lock | |
145 | xaddl %eax, (%edx) | |
146 | ret | |
147 | ||
148 | COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP) | |
149 | ||
150 | Latomic_add32_up: | |
151 | .long _COMM_PAGE_ATOMIC_ADD32+4 | |
152 | xaddl %eax, (%edx) | |
153 | ret | |
154 | ||
155 | COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0) | |
2d21ac55 A |
156 | |
157 | ||
158 | // OSMemoryBarrier() | |
159 | // These are used both in 32 and 64-bit mode. We use a fence even on UP | |
160 | // machines, so this function can be used with nontemporal stores. | |
161 | ||
162 | Lmemory_barrier: | |
163 | lock | |
164 | addl $0,(%esp) | |
165 | ret | |
166 | ||
167 | COMMPAGE_DESCRIPTOR(memory_barrier,_COMM_PAGE_MEMORY_BARRIER,0,kHasSSE2); | |
168 | ||
169 | Lmemory_barrier_sse2: | |
170 | mfence | |
171 | ret | |
172 | ||
173 | COMMPAGE_DESCRIPTOR(memory_barrier_sse2,_COMM_PAGE_MEMORY_BARRIER,kHasSSE2,0); | |
174 | ||
175 | ||
176 | /* | |
177 | * typedef volatile struct { | |
178 | * void *opaque1; <-- ptr to 1st queue element or null | |
179 | * long opaque2; <-- generation count | |
180 | * } OSQueueHead; | |
181 | * | |
182 | * void OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset); | |
183 | */ | |
184 | ||
185 | LAtomicEnqueue: | |
186 | pushl %edi | |
187 | pushl %esi | |
188 | pushl %ebx | |
189 | movl 16(%esp),%edi // %edi == ptr to list head | |
190 | movl 20(%esp),%ebx // %ebx == new | |
191 | movl 24(%esp),%esi // %esi == offset | |
192 | movl (%edi),%eax // %eax == ptr to 1st element in Q | |
193 | movl 4(%edi),%edx // %edx == current generation count | |
194 | 1: | |
195 | movl %eax,(%ebx,%esi)// link to old list head from new element | |
196 | movl %edx,%ecx | |
197 | incl %ecx // increment generation count | |
198 | lock // always lock for now... | |
199 | cmpxchg8b (%edi) // ...push on new element | |
200 | jnz 1b | |
201 | popl %ebx | |
202 | popl %esi | |
203 | popl %edi | |
204 | ret | |
205 | ||
206 | COMMPAGE_DESCRIPTOR(AtomicEnqueue,_COMM_PAGE_ENQUEUE,0,0) | |
207 | ||
208 | ||
209 | /* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ | |
210 | ||
211 | LAtomicDequeue: | |
212 | pushl %edi | |
213 | pushl %esi | |
214 | pushl %ebx | |
215 | movl 16(%esp),%edi // %edi == ptr to list head | |
216 | movl 20(%esp),%esi // %esi == offset | |
217 | movl (%edi),%eax // %eax == ptr to 1st element in Q | |
218 | movl 4(%edi),%edx // %edx == current generation count | |
219 | 1: | |
220 | testl %eax,%eax // list empty? | |
221 | jz 2f // yes | |
222 | movl (%eax,%esi),%ebx // point to 2nd in Q | |
223 | movl %edx,%ecx | |
224 | incl %ecx // increment generation count | |
225 | lock // always lock for now... | |
226 | cmpxchg8b (%edi) // ...pop off 1st element | |
227 | jnz 1b | |
228 | 2: | |
229 | popl %ebx | |
230 | popl %esi | |
231 | popl %edi | |
232 | ret // ptr to 1st element in Q still in %eax | |
233 | ||
234 | COMMPAGE_DESCRIPTOR(AtomicDequeue,_COMM_PAGE_DEQUEUE,0,0) | |
235 | ||
0c530ab8 A |
236 | |
237 | ||
238 | /************************* x86_64 versions follow **************************/ | |
239 | ||
240 | ||
241 | // This is a subroutine used by: | |
242 | ||
243 | // bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); | |
244 | // int32_t OSAtomicAnd32( int32_t mask, int32_t *value); | |
245 | // int32_t OSAtomicOr32( int32_t mask, int32_t *value); | |
246 | // int32_t OSAtomicXor32( int32_t mask, int32_t *value); | |
247 | ||
248 | // It assumes: old -> %rdi (ie, it follows the ABI parameter conventions) | |
249 | // new -> %rsi | |
250 | // value -> %rdx | |
251 | // on success: returns with ZF set | |
252 | // on failure: returns with *value in %eax, ZF clear | |
253 | ||
254 | .code64 | |
255 | Lcompare_and_swap32_mp_64: | |
256 | movl %edi,%eax // put old value where "cmpxchg" wants it | |
257 | lock | |
258 | cmpxchgl %esi, (%rdx) | |
259 | ret | |
260 | ||
261 | COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) | |
262 | ||
263 | .code64 | |
264 | Lcompare_and_swap32_up_64: | |
265 | movl %edi,%eax // put old value where "cmpxchg" wants it | |
266 | cmpxchgl %esi, (%rdx) | |
267 | ret | |
268 | ||
269 | COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) | |
270 | ||
271 | // This is a subroutine used by: | |
272 | // bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); | |
273 | ||
274 | // It assumes: old -> %rdi (ie, it follows the ABI parameter conventions) | |
275 | // new -> %rsi | |
276 | // value -> %rdx | |
277 | // on success: returns with ZF set | |
278 | // on failure: returns with *value in %rax, ZF clear | |
279 | ||
280 | .code64 | |
281 | Lcompare_and_swap64_mp_64: | |
282 | movq %rdi,%rax // put old value where "cmpxchg" wants it | |
283 | lock | |
284 | cmpxchgq %rsi, (%rdx) | |
285 | ret | |
286 | ||
287 | COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) | |
288 | ||
289 | .code64 | |
290 | Lcompare_and_swap64_up_64: | |
291 | movq %rdi,%rax // put old value where "cmpxchg" wants it | |
292 | cmpxchgq %rsi, (%rdx) | |
293 | ret | |
294 | ||
295 | COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) | |
296 | ||
297 | // This is a subroutine used by: | |
298 | // bool OSAtomicTestAndSet( uint32_t n, void *value ); | |
299 | // It is called with standard register conventions: | |
300 | // n = %rdi | |
301 | // value = %rsi | |
302 | // Returns: old value of bit in CF | |
303 | ||
304 | .code64 | |
305 | Lbit_test_and_set_mp_64: | |
306 | lock | |
307 | btsl %edi, (%rsi) | |
308 | ret | |
309 | ||
310 | COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP) | |
311 | ||
312 | .code64 | |
313 | Lbit_test_and_set_up_64: | |
314 | btsl %edi, (%rsi) | |
315 | ret | |
316 | ||
317 | COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0) | |
318 | ||
319 | // This is a subroutine used by: | |
320 | // bool OSAtomicTestAndClear( uint32_t n, void *value ); | |
321 | // It is called with standard register conventions: | |
322 | // n = %rdi | |
323 | // value = %rsi | |
324 | // Returns: old value of bit in CF | |
325 | ||
326 | .code64 | |
327 | Lbit_test_and_clear_mp_64: | |
328 | lock | |
329 | btrl %edi, (%rsi) | |
330 | ret | |
331 | ||
332 | COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP) | |
333 | ||
334 | .code64 | |
335 | Lbit_test_and_clear_up_64: | |
336 | btrl %edi, (%rsi) | |
337 | ret | |
338 | ||
339 | COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0) | |
340 | ||
341 | // This is a subroutine used by: | |
342 | // int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); | |
343 | // It is called with standard register conventions: | |
344 | // amt = %rdi | |
345 | // value = %rsi | |
346 | // Returns: old value in %edi | |
347 | // NB: OSAtomicAdd32 returns the new value, so clients will add amt to %edi | |
348 | ||
349 | .code64 | |
350 | Latomic_add32_mp_64: | |
351 | lock | |
352 | xaddl %edi, (%rsi) | |
353 | ret | |
354 | ||
355 | COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP) | |
356 | ||
357 | .code64 | |
358 | Latomic_add32_up_64: | |
359 | xaddl %edi, (%rsi) | |
360 | ret | |
361 | ||
362 | COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0) | |
363 | ||
364 | // This is a subroutine used by: | |
365 | // int64_t OSAtomicAdd64( int64_t amt, int64_t *value ); | |
366 | // It is called with standard register conventions: | |
367 | // amt = %rdi | |
368 | // value = %rsi | |
369 | // Returns: old value in %rdi | |
370 | // NB: OSAtomicAdd64 returns the new value, so clients will add amt to %rdi | |
371 | ||
372 | .code64 | |
373 | Latomic_add64_mp_64: | |
374 | lock | |
375 | xaddq %rdi, (%rsi) | |
376 | ret | |
377 | ||
378 | COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP) | |
379 | ||
380 | .code64 | |
381 | Latomic_add64_up_64: | |
382 | xaddq %rdi, (%rsi) | |
383 | ret | |
384 | ||
385 | COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0) | |
2d21ac55 A |
386 | |
387 | ||
388 | /* | |
389 | * typedef volatile struct { | |
390 | * void *opaque1; <-- ptr to 1st queue element or null | |
391 | * long opaque2; <-- generation count | |
392 | * } OSQueueHead; | |
393 | * | |
394 | * void OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset); | |
395 | */ | |
396 | ||
397 | .code64 | |
398 | LAtomicEnqueue_64: // %rdi == list head, %rsi == new, %rdx == offset | |
399 | pushq %rbx | |
400 | movq %rsi,%rbx // %rbx == new | |
401 | movq %rdx,%rsi // %rsi == offset | |
402 | movq (%rdi),%rax // %rax == ptr to 1st element in Q | |
403 | movq 8(%rdi),%rdx // %rdx == current generation count | |
404 | 1: | |
405 | movq %rax,(%rbx,%rsi)// link to old list head from new element | |
406 | movq %rdx,%rcx | |
407 | incq %rcx // increment generation count | |
408 | lock // always lock for now... | |
409 | cmpxchg16b (%rdi) // ...push on new element | |
410 | jnz 1b | |
411 | popq %rbx | |
412 | ret | |
413 | ||
414 | COMMPAGE_DESCRIPTOR(AtomicEnqueue_64,_COMM_PAGE_ENQUEUE,0,0) | |
415 | ||
416 | ||
417 | /* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ | |
418 | ||
419 | .code64 | |
420 | LAtomicDequeue_64: // %rdi == list head, %rsi == offset | |
421 | pushq %rbx | |
422 | movq (%rdi),%rax // %rax == ptr to 1st element in Q | |
423 | movq 8(%rdi),%rdx // %rdx == current generation count | |
424 | 1: | |
425 | testq %rax,%rax // list empty? | |
426 | jz 2f // yes | |
427 | movq (%rax,%rsi),%rbx // point to 2nd in Q | |
428 | movq %rdx,%rcx | |
429 | incq %rcx // increment generation count | |
430 | lock // always lock for now... | |
431 | cmpxchg16b (%rdi) // ...pop off 1st element | |
432 | jnz 1b | |
433 | 2: | |
434 | popq %rbx | |
435 | ret // ptr to 1st element in Q still in %rax | |
436 | ||
437 | COMMPAGE_DESCRIPTOR(AtomicDequeue_64,_COMM_PAGE_DEQUEUE,0,0) |