]> git.saurik.com Git - apple/dyld.git/blob - src/threadLocalHelpers.s
dyld-625.13.tar.gz
[apple/dyld.git] / src / threadLocalHelpers.s
1 /*
2 * Copyright (c) 2010-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #include <System/machine/cpu_capabilities.h>
25
26 // bool save_xxm = (*((uint32_t*)_COMM_PAGE_CPU_CAPABILITIES) & kHasAVX1_0) != 0;
27
28 #if __x86_64__
29
30 #define RDI_SAVE_RBP -8
31 #define RSI_SAVE_RBP -16
32 #define RDX_SAVE_RBP -24
33 #define RCX_SAVE_RBP -32
34 #define RBX_SAVE_RBP -40
35 #define R8_SAVE_RBP -48
36 #define R9_SAVE_RBP -56
37 #define R10_SAVE_RBP -64
38 #define R11_SAVE_RBP -72
39 #define STATIC_STACK_SIZE 256 // extra padding to allow it to be 64-byte aligned
40
41 #define XMM0_SAVE_RSP 0x00
42 #define XMM1_SAVE_RSP 0x10
43 #define XMM2_SAVE_RSP 0x20
44 #define XMM3_SAVE_RSP 0x30
45 #define XMM4_SAVE_RSP 0x40
46 #define XMM5_SAVE_RSP 0x50
47 #define XMM6_SAVE_RSP 0x60
48 #define XMM7_SAVE_RSP 0x70
49
50
51 // returns address of TLV in %rax, all other registers preserved
52 .globl _tlv_get_addr
53 .private_extern _tlv_get_addr
54 _tlv_get_addr:
55 movq 8(%rdi),%rax // get key from descriptor
56 movq %gs:0x0(,%rax,8),%rax // get thread value
57 testq %rax,%rax // if NULL, lazily allocate
58 je LlazyAllocate
59 addq 16(%rdi),%rax // add offset from descriptor
60 ret
61 LlazyAllocate:
62 pushq %rbp
63 movq %rsp,%rbp
64 subq $STATIC_STACK_SIZE,%rsp
65 movq %rdi,RDI_SAVE_RBP(%rbp) # save registers that might be used as parameters
66 movq %rsi,RSI_SAVE_RBP(%rbp)
67 movq %rdx,RDX_SAVE_RBP(%rbp)
68 movq %rcx,RCX_SAVE_RBP(%rbp)
69 movq %rbx,RBX_SAVE_RBP(%rbp)
70 movq %r8, R8_SAVE_RBP(%rbp)
71 movq %r9, R9_SAVE_RBP(%rbp)
72 movq %r10,R10_SAVE_RBP(%rbp)
73 movq %r11,R11_SAVE_RBP(%rbp)
74
75 cmpl $0, _inited(%rip)
76 jne Linited
77 movl $0x01,%eax
78 cpuid # get cpu features to check on xsave instruction support
79 andl $0x08000000,%ecx # check OSXSAVE bit
80 movl %ecx,_hasXSave(%rip)
81 cmpl $0, %ecx
82 jne LxsaveInfo
83 movl $1, _inited(%rip)
84 jmp Lsse
85
86 LxsaveInfo:
87 movl $0x0D,%eax
88 movl $0x00,%ecx
89 cpuid # get xsave parameter info
90 movl %eax,_features_lo32(%rip)
91 movl %edx,_features_hi32(%rip)
92 movl %ecx,_bufferSize32(%rip)
93 movl $1, _inited(%rip)
94
95 Linited:
96 cmpl $0, _hasXSave(%rip)
97 jne Lxsave
98
99 Lsse:
100 subq $128, %rsp
101 movdqa %xmm0, XMM0_SAVE_RSP(%rsp)
102 movdqa %xmm1, XMM1_SAVE_RSP(%rsp)
103 movdqa %xmm2, XMM2_SAVE_RSP(%rsp)
104 movdqa %xmm3, XMM3_SAVE_RSP(%rsp)
105 movdqa %xmm4, XMM4_SAVE_RSP(%rsp)
106 movdqa %xmm5, XMM5_SAVE_RSP(%rsp)
107 movdqa %xmm6, XMM6_SAVE_RSP(%rsp)
108 movdqa %xmm7, XMM7_SAVE_RSP(%rsp)
109 jmp Lalloc
110
111 Lxsave:
112 movl _bufferSize32(%rip),%eax
113 movq %rsp, %rdi
114 subq %rax, %rdi # stack alloc buffer
115 andq $-64, %rdi # 64-byte align stack
116 movq %rdi, %rsp
117 # xsave requires buffer to be zero'ed out
118 movq $0, %rcx
119 movq %rdi, %r8
120 movq %rdi, %r9
121 addq %rax, %r9
122 Lz: movq %rcx, (%r8)
123 addq $8, %r8
124 cmpq %r8,%r9
125 ja Lz
126
127 movl _features_lo32(%rip),%eax
128 movl _features_hi32(%rip),%edx
129 # call xsave with buffer on stack and eax:edx flag bits
130 # note: do not use xsaveopt, it assumes you are using the same
131 # buffer as previous xsaves, and this thread is on the same cpu.
132 xsave (%rsp)
133
134 Lalloc:
135 movq RDI_SAVE_RBP(%rbp),%rdi
136 movq 8(%rdi),%rdi // get key from descriptor
137 call _tlv_allocate_and_initialize_for_key
138
139 cmpl $0, _hasXSave(%rip)
140 jne Lxrstror
141
142 movdqa XMM0_SAVE_RSP(%rsp),%xmm0
143 movdqa XMM1_SAVE_RSP(%rsp),%xmm1
144 movdqa XMM2_SAVE_RSP(%rsp),%xmm2
145 movdqa XMM3_SAVE_RSP(%rsp),%xmm3
146 movdqa XMM4_SAVE_RSP(%rsp),%xmm4
147 movdqa XMM5_SAVE_RSP(%rsp),%xmm5
148 movdqa XMM6_SAVE_RSP(%rsp),%xmm6
149 movdqa XMM7_SAVE_RSP(%rsp),%xmm7
150 jmp Ldone
151
152 Lxrstror:
153 movq %rax,%r11
154 movl _features_lo32(%rip),%eax
155 movl _features_hi32(%rip),%edx
156 # call xsave with buffer on stack and eax:edx flag bits
157 xrstor (%rsp)
158 movq %r11,%rax
159
160 Ldone:
161 movq RDI_SAVE_RBP(%rbp),%rdi
162 movq RSI_SAVE_RBP(%rbp),%rsi
163 movq RDX_SAVE_RBP(%rbp),%rdx
164 movq RCX_SAVE_RBP(%rbp),%rcx
165 movq RBX_SAVE_RBP(%rbp),%rbx
166 movq R8_SAVE_RBP(%rbp),%r8
167 movq R9_SAVE_RBP(%rbp),%r9
168 movq R10_SAVE_RBP(%rbp),%r10
169 movq R11_SAVE_RBP(%rbp),%r11
170 movq %rbp,%rsp
171 popq %rbp
172 addq 16(%rdi),%rax // result = buffer + offset
173 ret
174
175 .data
176 # Cached info from cpuid.
177 _inited: .long 0
178 _features_lo32: .long 0
179 _features_hi32: .long 0
180 _bufferSize32: .long 0
181 _hasXSave: .long 0
182
183 #endif
184
185
186
187 #if __i386__
188 // returns address of TLV in %eax, all other registers (except %ecx) preserved
189 .globl _tlv_get_addr
190 .private_extern _tlv_get_addr
191 _tlv_get_addr:
192 movl 4(%eax),%ecx // get key from descriptor
193 movl %gs:0x0(,%ecx,4),%ecx // get thread value
194 testl %ecx,%ecx // if NULL, lazily allocate
195 je LlazyAllocate
196 movl 8(%eax),%eax // add offset from descriptor
197 addl %ecx,%eax
198 ret
199 LlazyAllocate:
200 pushl %ebp
201 movl %esp,%ebp
202 pushl %edx // save edx
203 subl $548,%esp
204 movl %eax,-8(%ebp) // save descriptor
205 lea -528(%ebp),%ecx // get 512 byte buffer in frame
206 and $-16, %ecx // 16-byte align buffer for fxsave
207 fxsave (%ecx)
208 movl 4(%eax),%ecx // get key from descriptor
209 movl %ecx,(%esp) // push key parameter, also leaves stack aligned properly
210 call _tlv_allocate_and_initialize_for_key
211 movl -8(%ebp),%ecx // get descriptor
212 movl 8(%ecx),%ecx // get offset from descriptor
213 addl %ecx,%eax // add offset to buffer
214 lea -528(%ebp),%ecx
215 and $-16, %ecx // 16-byte align buffer for fxrstor
216 fxrstor (%ecx)
217 addl $548,%esp
218 popl %edx // restore edx
219 popl %ebp
220 ret
221 #endif
222
223 #if __arm64__
224 // Parameters: X0 = descriptor
225 // Result: X0 = address of TLV
226 // Note: all registers except X0, x16, and x17 are preserved
227 .align 2
228 .globl _tlv_get_addr
229 .private_extern _tlv_get_addr
230 _tlv_get_addr:
231 #if __LP64__
232 ldr x16, [x0, #8] // get key from descriptor
233 #else
234 ldr w16, [x0, #4] // get key from descriptor
235 #endif
236 mrs x17, TPIDRRO_EL0
237 and x17, x17, #-8 // clear low 3 bits???
238 #if __LP64__
239 ldr x17, [x17, x16, lsl #3] // get thread allocation address for this key
240 #else
241 ldr w17, [x17, x16, lsl #2] // get thread allocation address for this key
242 #endif
243 cbz x17, LlazyAllocate // if NULL, lazily allocate
244 #if __LP64__
245 ldr x16, [x0, #16] // get offset from descriptor
246 #else
247 ldr w16, [x0, #8] // get offset from descriptor
248 #endif
249 add x0, x17, x16 // return allocation+offset
250 ret lr
251
252 LlazyAllocate:
253 stp fp, lr, [sp, #-16]!
254 mov fp, sp
255 sub sp, sp, #288
256 stp x1, x2, [sp, #-16]! // save all registers that C function might trash
257 stp x3, x4, [sp, #-16]!
258 stp x5, x6, [sp, #-16]!
259 stp x7, x8, [sp, #-16]!
260 stp x9, x10, [sp, #-16]!
261 stp x11, x12, [sp, #-16]!
262 stp x13, x14, [sp, #-16]!
263 stp x15, x16, [sp, #-16]!
264 stp q0, q1, [sp, #-32]!
265 stp q2, q3, [sp, #-32]!
266 stp q4, q5, [sp, #-32]!
267 stp q6, q7, [sp, #-32]!
268 stp x0, x17, [sp, #-16]! // save descriptor
269
270 mov x0, x16 // use key from descriptor as parameter
271 bl _tlv_allocate_and_initialize_for_key
272 ldp x16, x17, [sp], #16 // pop descriptor
273 #if __LP64__
274 ldr x16, [x16, #16] // get offset from descriptor
275 #else
276 ldr w16, [x16, #8] // get offset from descriptor
277 #endif
278 add x0, x0, x16 // return allocation+offset
279
280 ldp q6, q7, [sp], #32
281 ldp q4, q5, [sp], #32
282 ldp q2, q3, [sp], #32
283 ldp q0, q1, [sp], #32
284 ldp x15, x16, [sp], #16
285 ldp x13, x14, [sp], #16
286 ldp x11, x12, [sp], #16
287 ldp x9, x10, [sp], #16
288 ldp x7, x8, [sp], #16
289 ldp x5, x6, [sp], #16
290 ldp x3, x4, [sp], #16
291 ldp x1, x2, [sp], #16
292
293 mov sp, fp
294 ldp fp, lr, [sp], #16
295 ret lr
296
297 #endif
298
299 #if __arm__
300 // returns address of TLV in r0, all other registers preserved
301 .align 2
302 .globl _tlv_get_addr
303 .private_extern _tlv_get_addr
304 _tlv_get_addr:
305 push {r1,r2,r3,r7,lr}
306 #if __ARM_ARCH_7K__
307 sub sp, sp, #12 // align stack to 16 bytes
308 #endif
309 mov r7, r0 // save descriptor in r7
310 ldr r0, [r7, #4] // get key from descriptor
311 bl _pthread_getspecific // get thread value
312 cmp r0, #0
313 bne L2 // if NULL, lazily allocate
314 #if __ARM_ARCH_7K__
315 vpush {d0, d1, d2, d3, d4, d5, d6, d7}
316 #endif
317 ldr r0, [r7, #4] // get key from descriptor
318 bl _tlv_allocate_and_initialize_for_key
319 #if __ARM_ARCH_7K__
320 vpop {d0, d1, d2, d3, d4, d5, d6, d7}
321 #endif
322 L2: ldr r1, [r7, #8] // get offset from descriptor
323 add r0, r1, r0 // add offset into allocation block
324 #if __ARM_ARCH_7K__
325 add sp, sp, #12
326 #endif
327 pop {r1,r2,r3,r7,pc}
328 #endif
329
330 .subsections_via_symbols
331
332