]>
Commit | Line | Data |
---|---|---|
f1a1da6c | 1 | /* |
214d78a2 | 2 | * Copyright (c) 2000-2017 Apple Inc. All rights reserved. |
f1a1da6c A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
a0619f9c | 5 | * |
f1a1da6c A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
a0619f9c | 14 | * |
f1a1da6c A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
a0619f9c | 17 | * |
f1a1da6c A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
a0619f9c | 25 | * |
f1a1da6c A |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ | |
28 | /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * pthread_synch.c | |
31 | */ | |
32 | ||
964d3577 A |
33 | #pragma mark - Front Matter |
34 | ||
214d78a2 A |
35 | #define _PTHREAD_CONDATTR_T |
36 | #define _PTHREAD_COND_T | |
f1a1da6c A |
37 | #define _PTHREAD_MUTEXATTR_T |
38 | #define _PTHREAD_MUTEX_T | |
39 | #define _PTHREAD_RWLOCKATTR_T | |
40 | #define _PTHREAD_RWLOCK_T | |
41 | ||
42 | #undef pthread_mutexattr_t | |
43 | #undef pthread_mutex_t | |
44 | #undef pthread_condattr_t | |
45 | #undef pthread_cond_t | |
46 | #undef pthread_rwlockattr_t | |
47 | #undef pthread_rwlock_t | |
48 | ||
2546420a | 49 | #include <sys/cdefs.h> |
0691f650 | 50 | #include <os/log.h> |
2546420a A |
51 | |
52 | // <rdar://problem/26158937> panic() should be marked noreturn | |
53 | extern void panic(const char *string, ...) __printflike(1,2) __dead2; | |
54 | ||
f1a1da6c A |
55 | #include <sys/param.h> |
56 | #include <sys/queue.h> | |
57 | #include <sys/resourcevar.h> | |
58 | //#include <sys/proc_internal.h> | |
59 | #include <sys/kauth.h> | |
60 | #include <sys/systm.h> | |
61 | #include <sys/timeb.h> | |
62 | #include <sys/times.h> | |
63 | #include <sys/acct.h> | |
64 | #include <sys/kernel.h> | |
65 | #include <sys/wait.h> | |
66 | #include <sys/signalvar.h> | |
67 | #include <sys/sysctl.h> | |
68 | #include <sys/syslog.h> | |
69 | #include <sys/stat.h> | |
70 | #include <sys/lock.h> | |
71 | #include <sys/kdebug.h> | |
72 | //#include <sys/sysproto.h> | |
73 | #include <sys/vm.h> | |
74 | #include <sys/user.h> /* for coredump */ | |
75 | #include <sys/proc_info.h> /* for fill_procworkqueue */ | |
76 | ||
f1a1da6c A |
77 | #include <mach/mach_port.h> |
78 | #include <mach/mach_types.h> | |
79 | #include <mach/semaphore.h> | |
80 | #include <mach/sync_policy.h> | |
81 | #include <mach/task.h> | |
82 | #include <mach/vm_prot.h> | |
83 | #include <kern/kern_types.h> | |
84 | #include <kern/task.h> | |
85 | #include <kern/clock.h> | |
86 | #include <mach/kern_return.h> | |
87 | #include <kern/thread.h> | |
a0619f9c | 88 | #include <kern/zalloc.h> |
f1a1da6c A |
89 | #include <kern/sched_prim.h> /* for thread_exception_return */ |
90 | #include <kern/processor.h> | |
91 | #include <kern/assert.h> | |
92 | #include <mach/mach_vm.h> | |
93 | #include <mach/mach_param.h> | |
94 | #include <mach/thread_status.h> | |
95 | #include <mach/thread_policy.h> | |
96 | #include <mach/message.h> | |
97 | #include <mach/port.h> | |
98 | //#include <vm/vm_protos.h> | |
99 | #include <vm/vm_fault.h> | |
100 | #include <vm/vm_map.h> | |
101 | #include <mach/thread_act.h> /* for thread_resume */ | |
102 | #include <machine/machine_routines.h> | |
964d3577 | 103 | #include <mach/shared_region.h> |
f1a1da6c | 104 | |
c1f56ec9 | 105 | #include "kern/kern_internal.h" |
f1a1da6c | 106 | |
214d78a2 A |
107 | #ifndef WQ_SETUP_EXIT_THREAD |
108 | #define WQ_SETUP_EXIT_THREAD 8 | |
109 | #endif | |
f1a1da6c | 110 | |
2546420a A |
111 | // XXX: Ditto for thread tags from kern/thread.h |
112 | #define THREAD_TAG_MAINTHREAD 0x1 | |
113 | #define THREAD_TAG_PTHREAD 0x10 | |
114 | #define THREAD_TAG_WORKQUEUE 0x20 | |
115 | ||
f1a1da6c A |
116 | lck_grp_attr_t *pthread_lck_grp_attr; |
117 | lck_grp_t *pthread_lck_grp; | |
118 | lck_attr_t *pthread_lck_attr; | |
119 | ||
f1a1da6c A |
120 | #define C_32_STK_ALIGN 16 |
121 | #define C_64_STK_ALIGN 16 | |
964d3577 | 122 | |
214d78a2 A |
123 | // WORKQ use the largest alignment any platform needs |
124 | #define C_WORKQ_STK_ALIGN 16 | |
125 | ||
c6e5f90c A |
126 | #if defined(__arm64__) |
127 | /* Pull the pthread_t into the same page as the top of the stack so we dirty one less page. | |
128 | * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned | |
129 | */ | |
130 | #define PTHREAD_T_OFFSET (12*1024) | |
131 | #else | |
964d3577 | 132 | #define PTHREAD_T_OFFSET 0 |
c6e5f90c | 133 | #endif |
f1a1da6c A |
134 | |
135 | /* | |
a0619f9c | 136 | * Flags filed passed to bsdthread_create and back in pthread_start |
f1a1da6c A |
137 | 31 <---------------------------------> 0 |
138 | _________________________________________ | |
139 | | flags(8) | policy(8) | importance(16) | | |
140 | ----------------------------------------- | |
141 | */ | |
142 | ||
214d78a2 | 143 | #define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401> |
2546420a | 144 | #define PTHREAD_START_SETSCHED 0x02000000 |
214d78a2 | 145 | // was PTHREAD_START_DETACHED 0x04000000 |
2546420a A |
146 | #define PTHREAD_START_QOSCLASS 0x08000000 |
147 | #define PTHREAD_START_TSD_BASE_SET 0x10000000 | |
214d78a2 | 148 | #define PTHREAD_START_SUSPENDED 0x20000000 |
2546420a | 149 | #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff |
f1a1da6c A |
150 | #define PTHREAD_START_POLICY_BITSHIFT 16 |
151 | #define PTHREAD_START_POLICY_MASK 0xff | |
152 | #define PTHREAD_START_IMPORTANCE_MASK 0xffff | |
153 | ||
154 | #define SCHED_OTHER POLICY_TIMESHARE | |
155 | #define SCHED_FIFO POLICY_FIFO | |
156 | #define SCHED_RR POLICY_RR | |
157 | ||
964d3577 A |
158 | #define BASEPRI_DEFAULT 31 |
159 | ||
2546420a A |
160 | uint32_t pthread_debug_tracing = 1; |
161 | ||
76b7b9a2 A |
162 | static uint32_t pthread_mutex_default_policy; |
163 | ||
164 | SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED, | |
165 | &pthread_mutex_default_policy, 0, ""); | |
166 | ||
964d3577 A |
167 | #pragma mark - Process/Thread Setup/Teardown syscalls |
168 | ||
2546420a A |
169 | static mach_vm_offset_t |
170 | stack_addr_hint(proc_t p, vm_map_t vmap) | |
171 | { | |
964d3577 | 172 | mach_vm_offset_t stackaddr; |
2546420a A |
173 | mach_vm_offset_t aslr_offset; |
174 | bool proc64bit = proc_is64bit(p); | |
c6e5f90c | 175 | bool proc64bit_data = proc_is64bit_data(p); |
2546420a A |
176 | |
177 | // We can't safely take random values % something unless its a power-of-two | |
178 | _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two"); | |
179 | ||
964d3577 | 180 | #if defined(__i386__) || defined(__x86_64__) |
c6e5f90c | 181 | (void)proc64bit_data; |
2546420a A |
182 | if (proc64bit) { |
183 | // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu | |
184 | aslr_offset = random() % (1 << 28); // about 512 stacks | |
185 | } else { | |
186 | // Actually bigger than the image shift, we've got ~256MB to work with | |
187 | aslr_offset = random() % (16 * PTH_DEFAULT_STACKSIZE); | |
188 | } | |
189 | aslr_offset = vm_map_trunc_page_mask(aslr_offset, vm_map_page_mask(vmap)); | |
190 | if (proc64bit) { | |
964d3577 | 191 | // Above nanomalloc range (see NANOZONE_SIGNATURE) |
2546420a | 192 | stackaddr = 0x700000000000 + aslr_offset; |
964d3577 | 193 | } else { |
2546420a | 194 | stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset; |
964d3577 A |
195 | } |
196 | #elif defined(__arm__) || defined(__arm64__) | |
a0619f9c A |
197 | user_addr_t main_thread_stack_top = 0; |
198 | if (pthread_kern->proc_get_user_stack) { | |
199 | main_thread_stack_top = pthread_kern->proc_get_user_stack(p); | |
200 | } | |
201 | if (proc64bit && main_thread_stack_top) { | |
202 | // The main thread stack position is randomly slid by xnu (c.f. | |
203 | // load_main() in mach_loader.c), so basing pthread stack allocations | |
204 | // where the main thread stack ends is already ASLRd and doing so | |
205 | // avoids creating a gap in the process address space that may cause | |
206 | // extra PTE memory usage. rdar://problem/33328206 | |
207 | stackaddr = vm_map_trunc_page_mask((vm_map_offset_t)main_thread_stack_top, | |
208 | vm_map_page_mask(vmap)); | |
964d3577 | 209 | } else { |
a0619f9c A |
210 | // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better |
211 | aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE); | |
212 | aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset, | |
213 | vm_map_page_mask(vmap)); | |
214 | if (proc64bit) { | |
215 | // 64 stacks below shared region | |
216 | stackaddr = SHARED_REGION_BASE_ARM64 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset; | |
217 | } else { | |
218 | // If you try to slide down from this point, you risk ending up in memory consumed by malloc | |
c6e5f90c A |
219 | if (proc64bit_data) { |
220 | stackaddr = SHARED_REGION_BASE_ARM64_32; | |
221 | } else { | |
222 | stackaddr = SHARED_REGION_BASE_ARM; | |
223 | } | |
224 | ||
225 | stackaddr -= 32 * PTH_DEFAULT_STACKSIZE + aslr_offset; | |
a0619f9c | 226 | } |
964d3577 A |
227 | } |
228 | #else | |
229 | #error Need to define a stack address hint for this architecture | |
230 | #endif | |
231 | return stackaddr; | |
232 | } | |
233 | ||
214d78a2 A |
234 | static bool |
235 | _pthread_priority_to_policy(pthread_priority_t priority, | |
236 | thread_qos_policy_data_t *data) | |
237 | { | |
238 | data->qos_tier = _pthread_priority_thread_qos(priority); | |
239 | data->tier_importance = _pthread_priority_relpri(priority); | |
240 | if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 || | |
241 | data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { | |
242 | return false; | |
243 | } | |
244 | return true; | |
245 | } | |
246 | ||
964d3577 A |
247 | /** |
248 | * bsdthread_create system call. Used by pthread_create. | |
249 | */ | |
f1a1da6c | 250 | int |
214d78a2 A |
251 | _bsdthread_create(struct proc *p, |
252 | __unused user_addr_t user_func, __unused user_addr_t user_funcarg, | |
253 | user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, | |
254 | user_addr_t *retval) | |
f1a1da6c A |
255 | { |
256 | kern_return_t kret; | |
257 | void * sright; | |
258 | int error = 0; | |
2546420a | 259 | mach_vm_offset_t th_tsd_base; |
f1a1da6c A |
260 | mach_port_name_t th_thport; |
261 | thread_t th; | |
f1a1da6c A |
262 | task_t ctask = current_task(); |
263 | unsigned int policy, importance; | |
2546420a | 264 | uint32_t tsd_offset; |
214d78a2 | 265 | bool start_suspended = (flags & PTHREAD_START_SUSPENDED); |
f1a1da6c A |
266 | |
267 | if (pthread_kern->proc_get_register(p) == 0) { | |
268 | return EINVAL; | |
269 | } | |
270 | ||
214d78a2 | 271 | PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0); |
f5f12756 A |
272 | |
273 | /* Create thread and make it immovable, do not pin control port yet */ | |
274 | if (pthread_kern->thread_create_immovable) { | |
275 | kret = pthread_kern->thread_create_immovable(ctask, &th); | |
276 | } else { | |
277 | kret = pthread_kern->thread_create(ctask, &th); | |
278 | } | |
279 | ||
f1a1da6c A |
280 | if (kret != KERN_SUCCESS) |
281 | return(ENOMEM); | |
282 | thread_reference(th); | |
283 | ||
2546420a A |
284 | pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD); |
285 | ||
f5f12756 A |
286 | if (pthread_kern->convert_thread_to_port_pinned) { |
287 | /* Convert to immovable/pinned thread port, but port is not pinned yet */ | |
288 | sright = (void *)pthread_kern->convert_thread_to_port_pinned(th); | |
289 | } else { | |
290 | sright = (void *)pthread_kern->convert_thread_to_port(th); | |
291 | } | |
292 | ||
293 | if (pthread_kern->ipc_port_copyout_send_pinned) { | |
294 | /* Atomically, pin and copy out the port */ | |
295 | th_thport = pthread_kern->ipc_port_copyout_send_pinned(sright, pthread_kern->task_get_ipcspace(ctask)); | |
296 | } else { | |
297 | th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask)); | |
298 | } | |
299 | ||
a0619f9c A |
300 | if (!MACH_PORT_VALID(th_thport)) { |
301 | error = EMFILE; // userland will convert this into a crash | |
302 | goto out; | |
303 | } | |
f1a1da6c | 304 | |
964d3577 | 305 | if ((flags & PTHREAD_START_CUSTOM) == 0) { |
214d78a2 A |
306 | error = EINVAL; |
307 | goto out; | |
f1a1da6c | 308 | } |
2546420a | 309 | |
214d78a2 A |
310 | PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3); |
311 | ||
2546420a A |
312 | tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); |
313 | if (tsd_offset) { | |
214d78a2 | 314 | th_tsd_base = user_pthread + tsd_offset; |
2546420a A |
315 | kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base); |
316 | if (kret == KERN_SUCCESS) { | |
317 | flags |= PTHREAD_START_TSD_BASE_SET; | |
318 | } | |
319 | } | |
214d78a2 A |
320 | /* |
321 | * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel | |
322 | * supports this flag (after the fact). | |
323 | */ | |
324 | flags &= ~PTHREAD_START_SUSPENDED; | |
2546420a | 325 | |
f1a1da6c | 326 | /* |
214d78a2 | 327 | * Set up registers & function call. |
f1a1da6c | 328 | */ |
214d78a2 A |
329 | #if defined(__i386__) || defined(__x86_64__) |
330 | if (proc_is64bit_data(p)) { | |
331 | x86_thread_state64_t state = { | |
332 | .rip = (uint64_t)pthread_kern->proc_get_threadstart(p), | |
333 | .rdi = (uint64_t)user_pthread, | |
334 | .rsi = (uint64_t)th_thport, | |
335 | .rdx = (uint64_t)user_func, /* golang wants this */ | |
336 | .rcx = (uint64_t)user_funcarg, /* golang wants this */ | |
337 | .r8 = (uint64_t)user_stack, /* golang wants this */ | |
338 | .r9 = (uint64_t)flags, | |
339 | ||
c6e5f90c | 340 | .rsp = (uint64_t)user_stack, |
964d3577 | 341 | }; |
f1a1da6c | 342 | |
214d78a2 | 343 | (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); |
f1a1da6c | 344 | } else { |
214d78a2 A |
345 | x86_thread_state32_t state = { |
346 | .eip = (uint32_t)pthread_kern->proc_get_threadstart(p), | |
347 | .eax = (uint32_t)user_pthread, | |
348 | .ebx = (uint32_t)th_thport, | |
349 | .ecx = (uint32_t)user_func, /* golang wants this */ | |
350 | .edx = (uint32_t)user_funcarg, /* golang wants this */ | |
351 | .edi = (uint32_t)user_stack, /* golang wants this */ | |
352 | .esi = (uint32_t)flags, | |
353 | ||
c6e5f90c A |
354 | .esp = (uint32_t)user_stack, |
355 | }; | |
356 | ||
357 | (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); | |
358 | } | |
359 | #elif defined(__arm__) || defined(__arm64__) | |
360 | if (proc_is64bit_data(p)) { | |
361 | #ifdef __arm64__ | |
362 | arm_thread_state64_t state = { | |
363 | .pc = (uint64_t)pthread_kern->proc_get_threadstart(p), | |
364 | .x[0] = (uint64_t)user_pthread, | |
365 | .x[1] = (uint64_t)th_thport, | |
366 | .x[2] = (uint64_t)user_func, /* golang wants this */ | |
367 | .x[3] = (uint64_t)user_funcarg, /* golang wants this */ | |
368 | .x[4] = (uint64_t)user_stack, /* golang wants this */ | |
369 | .x[5] = (uint64_t)flags, | |
370 | ||
371 | .sp = (uint64_t)user_stack, | |
372 | }; | |
373 | ||
374 | (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); | |
375 | #else | |
376 | panic("Shouldn't have a 64-bit thread on a 32-bit kernel..."); | |
377 | #endif // defined(__arm64__) | |
378 | } else { | |
379 | arm_thread_state_t state = { | |
380 | .pc = (uint32_t)pthread_kern->proc_get_threadstart(p), | |
381 | .r[0] = (uint32_t)user_pthread, | |
382 | .r[1] = (uint32_t)th_thport, | |
383 | .r[2] = (uint32_t)user_func, /* golang wants this */ | |
384 | .r[3] = (uint32_t)user_funcarg, /* golang wants this */ | |
385 | .r[4] = (uint32_t)user_stack, /* golang wants this */ | |
386 | .r[5] = (uint32_t)flags, | |
387 | ||
388 | .sp = (uint32_t)user_stack, | |
964d3577 | 389 | }; |
f1a1da6c | 390 | |
214d78a2 | 391 | (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); |
f1a1da6c | 392 | } |
f1a1da6c A |
393 | #else |
394 | #error bsdthread_create not defined for this architecture | |
395 | #endif | |
396 | ||
214d78a2 | 397 | if (flags & PTHREAD_START_SETSCHED) { |
f1a1da6c A |
398 | /* Set scheduling parameters if needed */ |
399 | thread_extended_policy_data_t extinfo; | |
400 | thread_precedence_policy_data_t precedinfo; | |
401 | ||
402 | importance = (flags & PTHREAD_START_IMPORTANCE_MASK); | |
403 | policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK; | |
404 | ||
405 | if (policy == SCHED_OTHER) { | |
406 | extinfo.timeshare = 1; | |
407 | } else { | |
408 | extinfo.timeshare = 0; | |
409 | } | |
410 | ||
411 | thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT); | |
412 | ||
f1a1da6c A |
413 | precedinfo.importance = (importance - BASEPRI_DEFAULT); |
414 | thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); | |
214d78a2 | 415 | } else if (flags & PTHREAD_START_QOSCLASS) { |
f1a1da6c | 416 | /* Set thread QoS class if requested. */ |
f1a1da6c | 417 | thread_qos_policy_data_t qos; |
f1a1da6c | 418 | |
214d78a2 A |
419 | if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) { |
420 | error = EINVAL; | |
421 | goto out; | |
422 | } | |
423 | pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, | |
424 | (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); | |
f1a1da6c A |
425 | } |
426 | ||
a0619f9c A |
427 | if (pthread_kern->proc_get_mach_thread_self_tsd_offset) { |
428 | uint64_t mach_thread_self_offset = | |
429 | pthread_kern->proc_get_mach_thread_self_tsd_offset(p); | |
430 | if (mach_thread_self_offset && tsd_offset) { | |
431 | bool proc64bit = proc_is64bit(p); | |
432 | if (proc64bit) { | |
433 | uint64_t th_thport_tsd = (uint64_t)th_thport; | |
214d78a2 | 434 | error = copyout(&th_thport_tsd, user_pthread + tsd_offset + |
a0619f9c A |
435 | mach_thread_self_offset, sizeof(th_thport_tsd)); |
436 | } else { | |
437 | uint32_t th_thport_tsd = (uint32_t)th_thport; | |
214d78a2 | 438 | error = copyout(&th_thport_tsd, user_pthread + tsd_offset + |
a0619f9c A |
439 | mach_thread_self_offset, sizeof(th_thport_tsd)); |
440 | } | |
441 | if (error) { | |
214d78a2 | 442 | goto out; |
a0619f9c A |
443 | } |
444 | } | |
445 | } | |
446 | ||
214d78a2 A |
447 | if (!start_suspended) { |
448 | kret = pthread_kern->thread_resume(th); | |
449 | if (kret != KERN_SUCCESS) { | |
450 | error = EINVAL; | |
451 | goto out; | |
452 | } | |
f1a1da6c A |
453 | } |
454 | thread_deallocate(th); /* drop the creator reference */ | |
455 | ||
214d78a2 | 456 | PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0); |
f1a1da6c | 457 | |
214d78a2 | 458 | *retval = user_pthread; |
f1a1da6c A |
459 | return(0); |
460 | ||
f1a1da6c A |
461 | out: |
462 | (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport); | |
a0619f9c A |
463 | if (pthread_kern->thread_will_park_or_terminate) { |
464 | pthread_kern->thread_will_park_or_terminate(th); | |
465 | } | |
f1a1da6c A |
466 | (void)thread_terminate(th); |
467 | (void)thread_deallocate(th); | |
468 | return(error); | |
469 | } | |
470 | ||
964d3577 A |
471 | /** |
472 | * bsdthread_terminate system call. Used by pthread_terminate | |
473 | */ | |
f1a1da6c A |
474 | int |
475 | _bsdthread_terminate(__unused struct proc *p, | |
476 | user_addr_t stackaddr, | |
477 | size_t size, | |
478 | uint32_t kthport, | |
479 | uint32_t sem, | |
480 | __unused int32_t *retval) | |
481 | { | |
482 | mach_vm_offset_t freeaddr; | |
483 | mach_vm_size_t freesize; | |
484 | kern_return_t kret; | |
2546420a | 485 | thread_t th = current_thread(); |
f1a1da6c A |
486 | |
487 | freeaddr = (mach_vm_offset_t)stackaddr; | |
488 | freesize = size; | |
489 | ||
214d78a2 | 490 | PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff); |
f1a1da6c A |
491 | |
492 | if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) { | |
2546420a A |
493 | if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){ |
494 | vm_map_t user_map = pthread_kern->current_map(); | |
495 | freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map)); | |
496 | kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE); | |
214d78a2 A |
497 | #if MACH_ASSERT |
498 | if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) { | |
499 | os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret); | |
500 | } | |
501 | #endif | |
2546420a A |
502 | kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE); |
503 | assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS); | |
504 | } else { | |
505 | kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize); | |
506 | if (kret != KERN_SUCCESS) { | |
214d78a2 | 507 | PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); |
2546420a | 508 | } |
f1a1da6c A |
509 | } |
510 | } | |
a0619f9c A |
511 | |
512 | if (pthread_kern->thread_will_park_or_terminate) { | |
513 | pthread_kern->thread_will_park_or_terminate(th); | |
514 | } | |
f5f12756 A |
515 | if (pthread_kern->thread_terminate_pinned) { |
516 | (void)pthread_kern->thread_terminate_pinned(th); | |
517 | } else { | |
518 | (void)thread_terminate(th); | |
519 | } | |
f1a1da6c | 520 | if (sem != MACH_PORT_NULL) { |
214d78a2 | 521 | kret = pthread_kern->semaphore_signal_internal_trap(sem); |
f1a1da6c | 522 | if (kret != KERN_SUCCESS) { |
214d78a2 | 523 | PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); |
f1a1da6c A |
524 | } |
525 | } | |
a0619f9c | 526 | |
f1a1da6c A |
527 | if (kthport != MACH_PORT_NULL) { |
528 | pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport); | |
529 | } | |
530 | ||
214d78a2 | 531 | PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0); |
f1a1da6c A |
532 | |
533 | pthread_kern->thread_exception_return(); | |
214d78a2 | 534 | __builtin_unreachable(); |
f1a1da6c A |
535 | } |
536 | ||
964d3577 A |
537 | /** |
538 | * bsdthread_register system call. Performs per-process setup. Responsible for | |
539 | * returning capabilitiy bits to userspace and receiving userspace function addresses. | |
540 | */ | |
f1a1da6c A |
541 | int |
542 | _bsdthread_register(struct proc *p, | |
543 | user_addr_t threadstart, | |
544 | user_addr_t wqthread, | |
545 | int pthsize, | |
546 | user_addr_t pthread_init_data, | |
2546420a | 547 | user_addr_t pthread_init_data_size, |
f1a1da6c A |
548 | uint64_t dispatchqueue_offset, |
549 | int32_t *retval) | |
550 | { | |
a0619f9c A |
551 | struct _pthread_registration_data data = {}; |
552 | uint32_t max_tsd_offset; | |
553 | kern_return_t kr; | |
554 | size_t pthread_init_sz = 0; | |
2546420a | 555 | |
f1a1da6c A |
556 | /* syscall randomizer test can pass bogus values */ |
557 | if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) { | |
558 | return(EINVAL); | |
559 | } | |
a0619f9c A |
560 | /* |
561 | * if we have pthread_init_data, then we use that and target_concptr | |
562 | * (which is an offset) get data. | |
563 | */ | |
564 | if (pthread_init_data != 0) { | |
565 | if (pthread_init_data_size < sizeof(data.version)) { | |
566 | return EINVAL; | |
567 | } | |
568 | pthread_init_sz = MIN(sizeof(data), (size_t)pthread_init_data_size); | |
569 | int ret = copyin(pthread_init_data, &data, pthread_init_sz); | |
570 | if (ret) { | |
571 | return ret; | |
572 | } | |
573 | if (data.version != (size_t)pthread_init_data_size) { | |
574 | return EINVAL; | |
575 | } | |
576 | } else { | |
577 | data.dispatch_queue_offset = dispatchqueue_offset; | |
578 | } | |
579 | ||
580 | /* We have to do this before proc_get_register so that it resets after fork */ | |
581 | mach_vm_offset_t stackaddr = stack_addr_hint(p, pthread_kern->current_map()); | |
582 | pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stackaddr); | |
583 | ||
584 | /* prevent multiple registrations */ | |
585 | if (pthread_kern->proc_get_register(p) != 0) { | |
586 | return(EINVAL); | |
587 | } | |
588 | ||
f1a1da6c A |
589 | pthread_kern->proc_set_threadstart(p, threadstart); |
590 | pthread_kern->proc_set_wqthread(p, wqthread); | |
591 | pthread_kern->proc_set_pthsize(p, pthsize); | |
592 | pthread_kern->proc_set_register(p); | |
593 | ||
a0619f9c A |
594 | uint32_t tsd_slot_sz = proc_is64bit(p) ? sizeof(uint64_t) : sizeof(uint32_t); |
595 | if ((uint32_t)pthsize >= tsd_slot_sz && | |
596 | data.tsd_offset <= (uint32_t)(pthsize - tsd_slot_sz)) { | |
597 | max_tsd_offset = ((uint32_t)pthsize - data.tsd_offset - tsd_slot_sz); | |
598 | } else { | |
599 | data.tsd_offset = 0; | |
600 | max_tsd_offset = 0; | |
601 | } | |
602 | pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset); | |
f1a1da6c | 603 | |
a0619f9c A |
604 | if (data.dispatch_queue_offset > max_tsd_offset) { |
605 | data.dispatch_queue_offset = 0; | |
606 | } | |
607 | pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset); | |
f1a1da6c | 608 | |
a0619f9c A |
609 | if (pthread_kern->proc_set_return_to_kernel_offset) { |
610 | if (data.return_to_kernel_offset > max_tsd_offset) { | |
611 | data.return_to_kernel_offset = 0; | |
f1a1da6c | 612 | } |
a0619f9c A |
613 | pthread_kern->proc_set_return_to_kernel_offset(p, |
614 | data.return_to_kernel_offset); | |
615 | } | |
f1a1da6c | 616 | |
a0619f9c A |
617 | if (pthread_kern->proc_set_mach_thread_self_tsd_offset) { |
618 | if (data.mach_thread_self_offset > max_tsd_offset) { | |
619 | data.mach_thread_self_offset = 0; | |
2546420a | 620 | } |
a0619f9c A |
621 | pthread_kern->proc_set_mach_thread_self_tsd_offset(p, |
622 | data.mach_thread_self_offset); | |
623 | } | |
f1a1da6c | 624 | |
a0619f9c | 625 | if (pthread_init_data != 0) { |
f1a1da6c | 626 | /* Outgoing data that userspace expects as a reply */ |
2546420a | 627 | data.version = sizeof(struct _pthread_registration_data); |
214d78a2 A |
628 | data.main_qos = _pthread_unspecified_priority(); |
629 | ||
f1a1da6c A |
630 | if (pthread_kern->qos_main_thread_active()) { |
631 | mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT; | |
a0619f9c | 632 | thread_qos_policy_data_t qos; |
f1a1da6c A |
633 | boolean_t gd = FALSE; |
634 | ||
214d78a2 A |
635 | kr = pthread_kern->thread_policy_get(current_thread(), |
636 | THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); | |
f1a1da6c | 637 | if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) { |
214d78a2 A |
638 | /* |
639 | * Unspecified threads means the kernel wants us | |
640 | * to impose legacy upon the thread. | |
641 | */ | |
f1a1da6c A |
642 | qos.qos_tier = THREAD_QOS_LEGACY; |
643 | qos.tier_importance = 0; | |
644 | ||
214d78a2 A |
645 | kr = pthread_kern->thread_policy_set_internal(current_thread(), |
646 | THREAD_QOS_POLICY, (thread_policy_t)&qos, | |
647 | THREAD_QOS_POLICY_COUNT); | |
f1a1da6c A |
648 | } |
649 | ||
650 | if (kr == KERN_SUCCESS) { | |
214d78a2 A |
651 | data.main_qos = _pthread_priority_make_from_thread_qos( |
652 | qos.qos_tier, 0, 0); | |
f1a1da6c | 653 | } |
f1a1da6c A |
654 | } |
655 | ||
214d78a2 | 656 | data.stack_addr_hint = stackaddr; |
76b7b9a2 A |
657 | data.mutex_default_policy = pthread_mutex_default_policy; |
658 | ||
f1a1da6c A |
659 | kr = copyout(&data, pthread_init_data, pthread_init_sz); |
660 | if (kr != KERN_SUCCESS) { | |
661 | return EINVAL; | |
662 | } | |
f1a1da6c A |
663 | } |
664 | ||
665 | /* return the supported feature set as the return value. */ | |
666 | *retval = PTHREAD_FEATURE_SUPPORTED; | |
667 | ||
668 | return(0); | |
669 | } | |
670 | ||
214d78a2 A |
671 | |
672 | #pragma mark - Workqueue Thread Support | |
673 | ||
674 | static mach_vm_size_t | |
675 | workq_thread_allocsize(proc_t p, vm_map_t wq_map, | |
676 | mach_vm_size_t *guardsize_out) | |
677 | { | |
678 | mach_vm_size_t guardsize = vm_map_page_size(wq_map); | |
679 | mach_vm_size_t pthread_size = vm_map_round_page_mask( | |
680 | pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, | |
681 | vm_map_page_mask(wq_map)); | |
682 | if (guardsize_out) *guardsize_out = guardsize; | |
683 | return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; | |
684 | } | |
964d3577 | 685 | |
f1a1da6c | 686 | int |
214d78a2 | 687 | workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr) |
f1a1da6c | 688 | { |
214d78a2 A |
689 | mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p); |
690 | mach_vm_size_t guardsize, th_allocsize; | |
691 | kern_return_t kret; | |
f1a1da6c | 692 | |
214d78a2 A |
693 | th_allocsize = workq_thread_allocsize(p, vmap, &guardsize); |
694 | kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1, | |
695 | VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE, | |
696 | VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); | |
f1a1da6c | 697 | |
214d78a2 A |
698 | if (kret != KERN_SUCCESS) { |
699 | kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize, | |
700 | VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE); | |
f1a1da6c A |
701 | } |
702 | ||
214d78a2 A |
703 | if (kret != KERN_SUCCESS) { |
704 | goto fail; | |
f1a1da6c A |
705 | } |
706 | ||
214d78a2 A |
707 | /* |
708 | * The guard page is at the lowest address | |
709 | * The stack base is the highest address | |
710 | */ | |
711 | kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE); | |
712 | if (kret != KERN_SUCCESS) { | |
713 | goto fail_vm_deallocate; | |
f1a1da6c A |
714 | } |
715 | ||
214d78a2 A |
716 | if (out_addr) { |
717 | *out_addr = stackaddr; | |
f1a1da6c | 718 | } |
214d78a2 | 719 | return 0; |
f1a1da6c | 720 | |
214d78a2 A |
721 | fail_vm_deallocate: |
722 | (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); | |
723 | fail: | |
724 | return kret; | |
f1a1da6c A |
725 | } |
726 | ||
214d78a2 A |
727 | int |
728 | workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr) | |
f1a1da6c | 729 | { |
214d78a2 A |
730 | return mach_vm_deallocate(vmap, stackaddr, |
731 | workq_thread_allocsize(p, vmap, NULL)); | |
f1a1da6c A |
732 | } |
733 | ||
214d78a2 A |
734 | void |
735 | workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th, | |
736 | vm_map_t vmap, user_addr_t stackaddr) | |
737 | { | |
738 | // Keep this in sync with workq_setup_thread() | |
739 | const vm_size_t guardsize = vm_map_page_size(vmap); | |
740 | const user_addr_t freeaddr = (user_addr_t)stackaddr + guardsize; | |
741 | const vm_map_offset_t freesize = vm_map_trunc_page_mask( | |
742 | (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, | |
743 | vm_map_page_mask(vmap)) - guardsize; | |
744 | ||
745 | __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr, | |
746 | freesize, VM_BEHAVIOR_REUSABLE); | |
747 | #if MACH_ASSERT | |
748 | if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) { | |
749 | os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr); | |
a0619f9c | 750 | } |
214d78a2 A |
751 | #endif |
752 | } | |
a0619f9c | 753 | |
214d78a2 A |
754 | struct workq_thread_addrs { |
755 | user_addr_t self; | |
756 | user_addr_t stack_bottom; | |
757 | user_addr_t stack_top; | |
758 | }; | |
a0619f9c | 759 | |
214d78a2 A |
760 | static inline void |
761 | workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr) | |
762 | { | |
763 | th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN); | |
764 | } | |
a0619f9c | 765 | |
214d78a2 A |
766 | static void |
767 | workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr, | |
768 | struct workq_thread_addrs *th_addrs) | |
769 | { | |
770 | const vm_size_t guardsize = vm_map_page_size(map); | |
a0619f9c | 771 | |
214d78a2 A |
772 | th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE + |
773 | guardsize + PTHREAD_T_OFFSET); | |
774 | workq_thread_set_top_addr(th_addrs, th_addrs->self); | |
775 | th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize); | |
a0619f9c A |
776 | } |
777 | ||
214d78a2 A |
778 | static inline void |
779 | workq_set_register_state(proc_t p, thread_t th, | |
780 | struct workq_thread_addrs *addrs, mach_port_name_t kport, | |
781 | user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count) | |
f1a1da6c | 782 | { |
214d78a2 A |
783 | user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p); |
784 | if (!wqstart_fnptr) { | |
785 | panic("workqueue thread start function pointer is NULL"); | |
786 | } | |
f1a1da6c | 787 | |
214d78a2 A |
788 | #if defined(__i386__) || defined(__x86_64__) |
789 | if (proc_is64bit_data(p) == 0) { | |
790 | x86_thread_state32_t state = { | |
791 | .eip = (unsigned int)wqstart_fnptr, | |
792 | .eax = /* arg0 */ (unsigned int)addrs->self, | |
793 | .ebx = /* arg1 */ (unsigned int)kport, | |
794 | .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom, | |
795 | .edx = /* arg3 */ (unsigned int)kevent_list, | |
796 | .edi = /* arg4 */ (unsigned int)upcall_flags, | |
797 | .esi = /* arg5 */ (unsigned int)kevent_count, | |
f1a1da6c | 798 | |
214d78a2 A |
799 | .esp = (int)((vm_offset_t)addrs->stack_top), |
800 | }; | |
801 | ||
802 | int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); | |
803 | if (error != KERN_SUCCESS) { | |
804 | panic(__func__ ": thread_set_wq_state failed: %d", error); | |
2546420a | 805 | } |
214d78a2 A |
806 | } else { |
807 | x86_thread_state64_t state64 = { | |
808 | // x86-64 already passes all the arguments in registers, so we just put them in their final place here | |
809 | .rip = (uint64_t)wqstart_fnptr, | |
810 | .rdi = (uint64_t)addrs->self, | |
811 | .rsi = (uint64_t)kport, | |
812 | .rdx = (uint64_t)addrs->stack_bottom, | |
813 | .rcx = (uint64_t)kevent_list, | |
814 | .r8 = (uint64_t)upcall_flags, | |
815 | .r9 = (uint64_t)kevent_count, | |
2546420a | 816 | |
214d78a2 A |
817 | .rsp = (uint64_t)(addrs->stack_top) |
818 | }; | |
2546420a | 819 | |
214d78a2 A |
820 | int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); |
821 | if (error != KERN_SUCCESS) { | |
822 | panic(__func__ ": thread_set_wq_state failed: %d", error); | |
2546420a A |
823 | } |
824 | } | |
c6e5f90c A |
825 | #elif defined(__arm__) || defined(__arm64__) |
826 | if (!proc_is64bit_data(p)) { | |
827 | arm_thread_state_t state = { | |
828 | .pc = (int)wqstart_fnptr, | |
829 | .r[0] = (unsigned int)addrs->self, | |
830 | .r[1] = (unsigned int)kport, | |
831 | .r[2] = (unsigned int)addrs->stack_bottom, | |
832 | .r[3] = (unsigned int)kevent_list, | |
833 | // will be pushed onto the stack as arg4/5 | |
834 | .r[4] = (unsigned int)upcall_flags, | |
835 | .r[5] = (unsigned int)kevent_count, | |
836 | ||
837 | .sp = (int)(addrs->stack_top) | |
838 | }; | |
839 | ||
840 | int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); | |
841 | if (error != KERN_SUCCESS) { | |
842 | panic(__func__ ": thread_set_wq_state failed: %d", error); | |
843 | } | |
844 | } else { | |
845 | #if defined(__arm64__) | |
846 | arm_thread_state64_t state = { | |
847 | .pc = (uint64_t)wqstart_fnptr, | |
848 | .x[0] = (uint64_t)addrs->self, | |
849 | .x[1] = (uint64_t)kport, | |
850 | .x[2] = (uint64_t)addrs->stack_bottom, | |
851 | .x[3] = (uint64_t)kevent_list, | |
852 | .x[4] = (uint64_t)upcall_flags, | |
853 | .x[5] = (uint64_t)kevent_count, | |
854 | ||
855 | .sp = (uint64_t)((vm_offset_t)addrs->stack_top), | |
856 | }; | |
857 | ||
858 | int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); | |
859 | if (error != KERN_SUCCESS) { | |
860 | panic(__func__ ": thread_set_wq_state failed: %d", error); | |
861 | } | |
862 | #else /* defined(__arm64__) */ | |
863 | panic("Shouldn't have a 64-bit thread on a 32-bit kernel..."); | |
864 | #endif /* defined(__arm64__) */ | |
865 | } | |
214d78a2 A |
866 | #else |
867 | #error setup_wqthread not defined for this architecture | |
868 | #endif | |
869 | } | |
2546420a | 870 | |
c6e5f90c A |
871 | static inline int |
872 | workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, | |
214d78a2 A |
873 | user_addr_t eventlist, int nevents, int kevent_flags, |
874 | user_addr_t *kevent_list_out, int *kevent_count_out) | |
875 | { | |
214d78a2 | 876 | int ret; |
f1a1da6c | 877 | |
c6e5f90c A |
878 | user_addr_t kevent_list = th_addrs->self - |
879 | WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s); | |
880 | user_addr_t data_buf = kevent_list - WQ_KEVENT_DATA_SIZE; | |
881 | user_size_t data_available = WQ_KEVENT_DATA_SIZE; | |
f1a1da6c | 882 | |
c6e5f90c A |
883 | ret = pthread_kern->kevent_workq_internal(p, eventlist, nevents, |
884 | kevent_list, WQ_KEVENT_LIST_LEN, | |
885 | data_buf, &data_available, | |
886 | kevent_flags, kevent_count_out); | |
f1a1da6c | 887 | |
214d78a2 | 888 | // squash any errors into just empty output |
c6e5f90c | 889 | if (ret != 0 || *kevent_count_out == -1) { |
214d78a2 A |
890 | *kevent_list_out = NULL; |
891 | *kevent_count_out = 0; | |
892 | return ret; | |
893 | } | |
f1a1da6c | 894 | |
c6e5f90c | 895 | workq_thread_set_top_addr(th_addrs, data_buf + data_available); |
214d78a2 A |
896 | *kevent_list_out = kevent_list; |
897 | return ret; | |
898 | } | |
f1a1da6c | 899 | |
964d3577 A |
900 | /** |
901 | * configures initial thread stack/registers to jump into: | |
2546420a | 902 | * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents); |
964d3577 A |
903 | * to get there we jump through assembily stubs in pthread_asm.s. Those |
904 | * routines setup a stack frame, using the current stack pointer, and marshall | |
905 | * arguments from registers to the stack as required by the ABI. | |
906 | * | |
907 | * One odd thing we do here is to start the pthread_t 4k below what would be the | |
908 | * top of the stack otherwise. This is because usually only the first 4k of the | |
909 | * pthread_t will be used and so we want to put it on the same 16k page as the | |
910 | * top of the stack to save memory. | |
911 | * | |
912 | * When we are done the stack will look like: | |
913 | * |-----------| th_stackaddr + th_allocsize | |
914 | * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET | |
a0619f9c A |
915 | * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events |
916 | * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes | |
c6e5f90c | 917 | * |stack gap | bottom aligned to 16 bytes |
964d3577 A |
918 | * | STACK | |
919 | * | ⇓ | | |
920 | * | | | |
921 | * |guard page | guardsize | |
922 | * |-----------| th_stackaddr | |
923 | */ | |
214d78a2 | 924 | __attribute__((noreturn,noinline)) |
2546420a | 925 | void |
214d78a2 A |
926 | workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, |
927 | mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags) | |
f1a1da6c | 928 | { |
214d78a2 A |
929 | struct workq_thread_addrs th_addrs; |
930 | bool first_use = (setup_flags & WQ_SETUP_FIRST_USE); | |
931 | user_addr_t kevent_list = NULL; | |
932 | int kevent_count = 0; | |
f1a1da6c | 933 | |
214d78a2 | 934 | workq_thread_get_addrs(map, stackaddr, &th_addrs); |
2546420a | 935 | |
214d78a2 | 936 | if (first_use) { |
a0619f9c A |
937 | uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); |
938 | if (tsd_offset) { | |
214d78a2 A |
939 | mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset; |
940 | kern_return_t kret = pthread_kern->thread_set_tsd_base(th, | |
941 | th_tsd_base); | |
a0619f9c A |
942 | if (kret == KERN_SUCCESS) { |
943 | upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET; | |
944 | } | |
2546420a | 945 | } |
2546420a | 946 | |
2546420a | 947 | /* |
214d78a2 A |
948 | * Pre-fault the first page of the new thread's stack and the page that will |
949 | * contain the pthread_t structure. | |
950 | */ | |
951 | vm_map_offset_t mask = vm_map_page_mask(map); | |
952 | vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask); | |
953 | vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask); | |
954 | if (th_page != stk_page) { | |
955 | vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); | |
2546420a | 956 | } |
214d78a2 | 957 | vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); |
2546420a | 958 | } |
f1a1da6c | 959 | |
214d78a2 A |
960 | if (setup_flags & WQ_SETUP_EXIT_THREAD) { |
961 | kevent_count = WORKQ_EXIT_THREAD_NKEVENT; | |
962 | } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) { | |
a0619f9c | 963 | unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE; |
c6e5f90c | 964 | workq_kevent(p, &th_addrs, NULL, 0, flags, &kevent_list, &kevent_count); |
964d3577 A |
965 | } |
966 | ||
214d78a2 A |
967 | workq_set_register_state(p, th, &th_addrs, kport, |
968 | kevent_list, upcall_flags, kevent_count); | |
964d3577 | 969 | |
214d78a2 A |
970 | if (first_use) { |
971 | pthread_kern->thread_bootstrap_return(); | |
964d3577 | 972 | } else { |
214d78a2 | 973 | pthread_kern->unix_syscall_return(EJUSTRETURN); |
f1a1da6c | 974 | } |
214d78a2 | 975 | __builtin_unreachable(); |
964d3577 | 976 | } |
964d3577 | 977 | |
a0619f9c | 978 | int |
214d78a2 A |
979 | workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map, |
980 | user_addr_t stackaddr, mach_port_name_t kport, | |
981 | user_addr_t events, int nevents, int upcall_flags) | |
f1a1da6c | 982 | { |
214d78a2 A |
983 | struct workq_thread_addrs th_addrs; |
984 | user_addr_t kevent_list = NULL; | |
985 | int kevent_count = 0, error; | |
986 | __assert_only kern_return_t kr; | |
f1a1da6c | 987 | |
214d78a2 | 988 | workq_thread_get_addrs(map, stackaddr, &th_addrs); |
2546420a | 989 | |
214d78a2 A |
990 | unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE | |
991 | KEVENT_FLAG_PARKING; | |
c6e5f90c | 992 | error = workq_kevent(p, &th_addrs, events, nevents, flags, |
214d78a2 | 993 | &kevent_list, &kevent_count); |
2546420a | 994 | |
214d78a2 A |
995 | if (error || kevent_count == 0) { |
996 | return error; | |
2546420a A |
997 | } |
998 | ||
214d78a2 A |
999 | kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); |
1000 | assert(kr == KERN_SUCCESS); | |
2546420a | 1001 | |
214d78a2 A |
1002 | workq_set_register_state(p, th, &th_addrs, kport, |
1003 | kevent_list, upcall_flags, kevent_count); | |
2546420a | 1004 | |
214d78a2 A |
1005 | pthread_kern->unix_syscall_return(EJUSTRETURN); |
1006 | __builtin_unreachable(); | |
2546420a A |
1007 | } |
1008 | ||
a0619f9c | 1009 | int |
f1a1da6c A |
1010 | _thread_selfid(__unused struct proc *p, uint64_t *retval) |
1011 | { | |
1012 | thread_t thread = current_thread(); | |
1013 | *retval = thread_tid(thread); | |
1014 | return KERN_SUCCESS; | |
1015 | } | |
1016 | ||
1017 | void | |
1018 | _pthread_init(void) | |
1019 | { | |
1020 | pthread_lck_grp_attr = lck_grp_attr_alloc_init(); | |
1021 | pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); | |
a0619f9c | 1022 | |
f1a1da6c A |
1023 | /* |
1024 | * allocate the lock attribute for pthread synchronizers | |
1025 | */ | |
1026 | pthread_lck_attr = lck_attr_alloc_init(); | |
f1a1da6c | 1027 | pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); |
a0619f9c | 1028 | |
f1a1da6c A |
1029 | pth_global_hashinit(); |
1030 | psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); | |
1031 | psynch_zoneinit(); | |
1032 | ||
76b7b9a2 A |
1033 | int policy_bootarg; |
1034 | if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) { | |
1035 | pthread_mutex_default_policy = policy_bootarg; | |
1036 | } | |
1037 | ||
76b7b9a2 | 1038 | sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy); |
f1a1da6c | 1039 | } |