]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2010-2013 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | ||
24 | #include <System/machine/cpu_capabilities.h> | |
25 | ||
26 | // bool save_xxm = (*((uint32_t*)_COMM_PAGE_CPU_CAPABILITIES) & kHasAVX1_0) != 0; | |
27 | ||
28 | #if __x86_64__ | |
29 | ||
30 | #define RDI_SAVE_RBP -8 | |
31 | #define RSI_SAVE_RBP -16 | |
32 | #define RDX_SAVE_RBP -24 | |
33 | #define RCX_SAVE_RBP -32 | |
34 | #define RBX_SAVE_RBP -40 | |
35 | #define R8_SAVE_RBP -48 | |
36 | #define R9_SAVE_RBP -56 | |
37 | #define R10_SAVE_RBP -64 | |
38 | #define R11_SAVE_RBP -72 | |
39 | #define STATIC_STACK_SIZE 256 // extra padding to allow it to be 64-byte aligned | |
40 | ||
41 | #define XMM0_SAVE_RSP 0x00 | |
42 | #define XMM1_SAVE_RSP 0x10 | |
43 | #define XMM2_SAVE_RSP 0x20 | |
44 | #define XMM3_SAVE_RSP 0x30 | |
45 | #define XMM4_SAVE_RSP 0x40 | |
46 | #define XMM5_SAVE_RSP 0x50 | |
47 | #define XMM6_SAVE_RSP 0x60 | |
48 | #define XMM7_SAVE_RSP 0x70 | |
49 | ||
50 | ||
51 | // returns address of TLV in %rax, all other registers preserved | |
52 | .globl _tlv_get_addr | |
53 | .private_extern _tlv_get_addr | |
54 | _tlv_get_addr: | |
55 | movq 8(%rdi),%rax // get key from descriptor | |
56 | movq %gs:0x0(,%rax,8),%rax // get thread value | |
57 | testq %rax,%rax // if NULL, lazily allocate | |
58 | je LlazyAllocate | |
59 | addq 16(%rdi),%rax // add offset from descriptor | |
60 | ret | |
61 | LlazyAllocate: | |
62 | pushq %rbp | |
63 | movq %rsp,%rbp | |
64 | subq $STATIC_STACK_SIZE,%rsp | |
65 | movq %rdi,RDI_SAVE_RBP(%rbp) # save registers that might be used as parameters | |
66 | movq %rsi,RSI_SAVE_RBP(%rbp) | |
67 | movq %rdx,RDX_SAVE_RBP(%rbp) | |
68 | movq %rcx,RCX_SAVE_RBP(%rbp) | |
69 | movq %rbx,RBX_SAVE_RBP(%rbp) | |
70 | movq %r8, R8_SAVE_RBP(%rbp) | |
71 | movq %r9, R9_SAVE_RBP(%rbp) | |
72 | movq %r10,R10_SAVE_RBP(%rbp) | |
73 | movq %r11,R11_SAVE_RBP(%rbp) | |
74 | ||
75 | cmpl $0, _inited(%rip) | |
76 | jne Linited | |
77 | movl $0x01,%eax | |
78 | cpuid # get cpu features to check on xsave instruction support | |
79 | andl $0x08000000,%ecx # check OSXSAVE bit | |
80 | movl %ecx,_hasXSave(%rip) | |
81 | cmpl $0, %ecx | |
82 | jne LxsaveInfo | |
83 | movl $1, _inited(%rip) | |
84 | jmp Lsse | |
85 | ||
86 | LxsaveInfo: | |
87 | movl $0x0D,%eax | |
88 | movl $0x00,%ecx | |
89 | cpuid # get xsave parameter info | |
90 | movl %eax,_features_lo32(%rip) | |
91 | movl %edx,_features_hi32(%rip) | |
92 | movl %ecx,_bufferSize32(%rip) | |
93 | movl $1, _inited(%rip) | |
94 | ||
95 | Linited: | |
96 | cmpl $0, _hasXSave(%rip) | |
97 | jne Lxsave | |
98 | ||
99 | Lsse: | |
100 | subq $128, %rsp | |
101 | movdqa %xmm0, XMM0_SAVE_RSP(%rsp) | |
102 | movdqa %xmm1, XMM1_SAVE_RSP(%rsp) | |
103 | movdqa %xmm2, XMM2_SAVE_RSP(%rsp) | |
104 | movdqa %xmm3, XMM3_SAVE_RSP(%rsp) | |
105 | movdqa %xmm4, XMM4_SAVE_RSP(%rsp) | |
106 | movdqa %xmm5, XMM5_SAVE_RSP(%rsp) | |
107 | movdqa %xmm6, XMM6_SAVE_RSP(%rsp) | |
108 | movdqa %xmm7, XMM7_SAVE_RSP(%rsp) | |
109 | jmp Lalloc | |
110 | ||
111 | Lxsave: | |
112 | movl _bufferSize32(%rip),%eax | |
113 | movq %rsp, %rdi | |
114 | subq %rax, %rdi # stack alloc buffer | |
115 | andq $-64, %rdi # 64-byte align stack | |
116 | movq %rdi, %rsp | |
117 | # xsave requires buffer to be zero'ed out | |
118 | movq $0, %rcx | |
119 | movq %rdi, %r8 | |
120 | movq %rdi, %r9 | |
121 | addq %rax, %r9 | |
122 | Lz: movq %rcx, (%r8) | |
123 | addq $8, %r8 | |
124 | cmpq %r8,%r9 | |
125 | ja Lz | |
126 | ||
127 | movl _features_lo32(%rip),%eax | |
128 | movl _features_hi32(%rip),%edx | |
129 | # call xsave with buffer on stack and eax:edx flag bits | |
130 | # note: do not use xsaveopt, it assumes you are using the same | |
131 | # buffer as previous xsaves, and this thread is on the same cpu. | |
132 | xsave (%rsp) | |
133 | ||
134 | Lalloc: | |
135 | movq RDI_SAVE_RBP(%rbp),%rdi | |
136 | movq 8(%rdi),%rdi // get key from descriptor | |
137 | call _tlv_allocate_and_initialize_for_key | |
138 | ||
139 | cmpl $0, _hasXSave(%rip) | |
140 | jne Lxrstror | |
141 | ||
142 | movdqa XMM0_SAVE_RSP(%rsp),%xmm0 | |
143 | movdqa XMM1_SAVE_RSP(%rsp),%xmm1 | |
144 | movdqa XMM2_SAVE_RSP(%rsp),%xmm2 | |
145 | movdqa XMM3_SAVE_RSP(%rsp),%xmm3 | |
146 | movdqa XMM4_SAVE_RSP(%rsp),%xmm4 | |
147 | movdqa XMM5_SAVE_RSP(%rsp),%xmm5 | |
148 | movdqa XMM6_SAVE_RSP(%rsp),%xmm6 | |
149 | movdqa XMM7_SAVE_RSP(%rsp),%xmm7 | |
150 | jmp Ldone | |
151 | ||
152 | Lxrstror: | |
153 | movq %rax,%r11 | |
154 | movl _features_lo32(%rip),%eax | |
155 | movl _features_hi32(%rip),%edx | |
156 | # call xsave with buffer on stack and eax:edx flag bits | |
157 | xrstor (%rsp) | |
158 | movq %r11,%rax | |
159 | ||
160 | Ldone: | |
161 | movq RDI_SAVE_RBP(%rbp),%rdi | |
162 | movq RSI_SAVE_RBP(%rbp),%rsi | |
163 | movq RDX_SAVE_RBP(%rbp),%rdx | |
164 | movq RCX_SAVE_RBP(%rbp),%rcx | |
165 | movq RBX_SAVE_RBP(%rbp),%rbx | |
166 | movq R8_SAVE_RBP(%rbp),%r8 | |
167 | movq R9_SAVE_RBP(%rbp),%r9 | |
168 | movq R10_SAVE_RBP(%rbp),%r10 | |
169 | movq R11_SAVE_RBP(%rbp),%r11 | |
170 | movq %rbp,%rsp | |
171 | popq %rbp | |
172 | addq 16(%rdi),%rax // result = buffer + offset | |
173 | ret | |
174 | ||
175 | .data | |
176 | # Cached info from cpuid. | |
177 | _inited: .long 0 | |
178 | _features_lo32: .long 0 | |
179 | _features_hi32: .long 0 | |
180 | _bufferSize32: .long 0 | |
181 | _hasXSave: .long 0 | |
182 | ||
183 | #endif | |
184 | ||
185 | ||
186 | ||
187 | #if __i386__ | |
188 | // returns address of TLV in %eax, all other registers (except %ecx) preserved | |
189 | .globl _tlv_get_addr | |
190 | .private_extern _tlv_get_addr | |
191 | _tlv_get_addr: | |
192 | movl 4(%eax),%ecx // get key from descriptor | |
193 | movl %gs:0x0(,%ecx,4),%ecx // get thread value | |
194 | testl %ecx,%ecx // if NULL, lazily allocate | |
195 | je LlazyAllocate | |
196 | movl 8(%eax),%eax // add offset from descriptor | |
197 | addl %ecx,%eax | |
198 | ret | |
199 | LlazyAllocate: | |
200 | pushl %ebp | |
201 | movl %esp,%ebp | |
202 | pushl %edx // save edx | |
203 | subl $548,%esp | |
204 | movl %eax,-8(%ebp) // save descriptor | |
205 | lea -528(%ebp),%ecx // get 512 byte buffer in frame | |
206 | and $-16, %ecx // 16-byte align buffer for fxsave | |
207 | fxsave (%ecx) | |
208 | movl 4(%eax),%ecx // get key from descriptor | |
209 | movl %ecx,(%esp) // push key parameter, also leaves stack aligned properly | |
210 | call _tlv_allocate_and_initialize_for_key | |
211 | movl -8(%ebp),%ecx // get descriptor | |
212 | movl 8(%ecx),%ecx // get offset from descriptor | |
213 | addl %ecx,%eax // add offset to buffer | |
214 | lea -528(%ebp),%ecx | |
215 | and $-16, %ecx // 16-byte align buffer for fxrstor | |
216 | fxrstor (%ecx) | |
217 | addl $548,%esp | |
218 | popl %edx // restore edx | |
219 | popl %ebp | |
220 | ret | |
221 | #endif | |
222 | ||
223 | #if __arm64__ | |
224 | // Parameters: X0 = descriptor | |
225 | // Result: X0 = address of TLV | |
226 | // Note: all registers except X0, x16, and x17 are preserved | |
227 | .align 2 | |
228 | .globl _tlv_get_addr | |
229 | .private_extern _tlv_get_addr | |
230 | _tlv_get_addr: | |
231 | ldr x16, [x0, #8] // get key from descriptor | |
232 | mrs x17, TPIDRRO_EL0 | |
233 | and x17, x17, #-8 // clear low 3 bits??? | |
234 | ldr x17, [x17, x16, lsl #3] // get thread allocation address for this key | |
235 | cbz x17, LlazyAllocate // if NULL, lazily allocate | |
236 | ldr x16, [x0, #16] // get offset from descriptor | |
237 | add x0, x17, x16 // return allocation+offset | |
238 | ret lr | |
239 | ||
240 | LlazyAllocate: | |
241 | stp fp, lr, [sp, #-16]! | |
242 | mov fp, sp | |
243 | sub sp, sp, #288 | |
244 | stp x1, x2, [sp, #-16]! // save all registers that C function might trash | |
245 | stp x3, x4, [sp, #-16]! | |
246 | stp x5, x6, [sp, #-16]! | |
247 | stp x7, x8, [sp, #-16]! | |
248 | stp x9, x10, [sp, #-16]! | |
249 | stp x11, x12, [sp, #-16]! | |
250 | stp x13, x14, [sp, #-16]! | |
251 | stp x15, x16, [sp, #-16]! | |
252 | stp q0, q1, [sp, #-32]! | |
253 | stp q2, q3, [sp, #-32]! | |
254 | stp q4, q5, [sp, #-32]! | |
255 | stp q6, q7, [sp, #-32]! | |
256 | stp x0, x17, [sp, #-16]! // save descriptor | |
257 | ||
258 | mov x0, x16 // use key from descriptor as parameter | |
259 | bl _tlv_allocate_and_initialize_for_key | |
260 | ldp x16, x17, [sp], #16 // pop descriptor | |
261 | ldr x16, [x16, #16] // get offset from descriptor | |
262 | add x0, x0, x16 // return allocation+offset | |
263 | ||
264 | ldp q6, q7, [sp], #32 | |
265 | ldp q4, q5, [sp], #32 | |
266 | ldp q2, q3, [sp], #32 | |
267 | ldp q0, q1, [sp], #32 | |
268 | ldp x15, x16, [sp], #16 | |
269 | ldp x13, x14, [sp], #16 | |
270 | ldp x11, x12, [sp], #16 | |
271 | ldp x9, x10, [sp], #16 | |
272 | ldp x7, x8, [sp], #16 | |
273 | ldp x5, x6, [sp], #16 | |
274 | ldp x3, x4, [sp], #16 | |
275 | ldp x1, x2, [sp], #16 | |
276 | ||
277 | mov sp, fp | |
278 | ldp fp, lr, [sp], #16 | |
279 | ret lr | |
280 | ||
281 | #endif | |
282 | ||
283 | #if __arm__ | |
284 | // returns address of TLV in r0, all other registers preserved | |
285 | .globl _tlv_get_addr | |
286 | .private_extern _tlv_get_addr | |
287 | _tlv_get_addr: | |
288 | push {r1,r2,r3,r7,lr} | |
289 | #if __ARM_ARCH_7K__ | |
290 | sub sp, sp, #12 // align stack to 16 bytes | |
291 | #endif | |
292 | mov r7, r0 // save descriptor in r7 | |
293 | ldr r0, [r7, #4] // get key from descriptor | |
294 | bl _pthread_getspecific // get thread value | |
295 | cmp r0, #0 | |
296 | bne L2 // if NULL, lazily allocate | |
297 | #if __ARM_ARCH_7K__ | |
298 | vpush {d0, d1, d2, d3, d4, d5, d6, d7} | |
299 | #endif | |
300 | ldr r0, [r7, #4] // get key from descriptor | |
301 | bl _tlv_allocate_and_initialize_for_key | |
302 | #if __ARM_ARCH_7K__ | |
303 | vpop {d0, d1, d2, d3, d4, d5, d6, d7} | |
304 | #endif | |
305 | L2: ldr r1, [r7, #8] // get offset from descriptor | |
306 | add r0, r1, r0 // add offset into allocation block | |
307 | #if __ARM_ARCH_7K__ | |
308 | add sp, sp, #12 | |
309 | #endif | |
310 | pop {r1,r2,r3,r7,pc} | |
311 | #endif | |
312 | ||
313 | .subsections_via_symbols | |
314 | ||
315 |