]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_support.c
d33d6e31d375a2e61d43ee6695618c0d44a479f0
[apple/libpthread.git] / kern / kern_support.c
1 /*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_synch.c
31 */
32
33 #pragma mark - Front Matter
34
35 #define _PTHREAD_CONDATTR_T
36 #define _PTHREAD_COND_T
37 #define _PTHREAD_MUTEXATTR_T
38 #define _PTHREAD_MUTEX_T
39 #define _PTHREAD_RWLOCKATTR_T
40 #define _PTHREAD_RWLOCK_T
41
42 #undef pthread_mutexattr_t
43 #undef pthread_mutex_t
44 #undef pthread_condattr_t
45 #undef pthread_cond_t
46 #undef pthread_rwlockattr_t
47 #undef pthread_rwlock_t
48
49 #include <sys/cdefs.h>
50 #include <os/log.h>
51
52 // <rdar://problem/26158937> panic() should be marked noreturn
53 extern void panic(const char *string, ...) __printflike(1,2) __dead2;
54
55 #include <sys/param.h>
56 #include <sys/queue.h>
57 #include <sys/resourcevar.h>
58 //#include <sys/proc_internal.h>
59 #include <sys/kauth.h>
60 #include <sys/systm.h>
61 #include <sys/timeb.h>
62 #include <sys/times.h>
63 #include <sys/acct.h>
64 #include <sys/kernel.h>
65 #include <sys/wait.h>
66 #include <sys/signalvar.h>
67 #include <sys/sysctl.h>
68 #include <sys/syslog.h>
69 #include <sys/stat.h>
70 #include <sys/lock.h>
71 #include <sys/kdebug.h>
72 //#include <sys/sysproto.h>
73 #include <sys/vm.h>
74 #include <sys/user.h> /* for coredump */
75 #include <sys/proc_info.h> /* for fill_procworkqueue */
76
77 #include <mach/mach_port.h>
78 #include <mach/mach_types.h>
79 #include <mach/semaphore.h>
80 #include <mach/sync_policy.h>
81 #include <mach/task.h>
82 #include <mach/vm_prot.h>
83 #include <kern/kern_types.h>
84 #include <kern/task.h>
85 #include <kern/clock.h>
86 #include <mach/kern_return.h>
87 #include <kern/thread.h>
88 #include <kern/zalloc.h>
89 #include <kern/sched_prim.h> /* for thread_exception_return */
90 #include <kern/processor.h>
91 #include <kern/assert.h>
92 #include <mach/mach_vm.h>
93 #include <mach/mach_param.h>
94 #include <mach/thread_status.h>
95 #include <mach/thread_policy.h>
96 #include <mach/message.h>
97 #include <mach/port.h>
98 //#include <vm/vm_protos.h>
99 #include <vm/vm_fault.h>
100 #include <vm/vm_map.h>
101 #include <mach/thread_act.h> /* for thread_resume */
102 #include <machine/machine_routines.h>
103 #include <mach/shared_region.h>
104
105 #include "kern/kern_internal.h"
106
107 #ifndef WQ_SETUP_EXIT_THREAD
108 #define WQ_SETUP_EXIT_THREAD 8
109 #endif
110
111 // XXX: Ditto for thread tags from kern/thread.h
112 #define THREAD_TAG_MAINTHREAD 0x1
113 #define THREAD_TAG_PTHREAD 0x10
114 #define THREAD_TAG_WORKQUEUE 0x20
115
116 lck_grp_attr_t *pthread_lck_grp_attr;
117 lck_grp_t *pthread_lck_grp;
118 lck_attr_t *pthread_lck_attr;
119
120 #define C_32_STK_ALIGN 16
121 #define C_64_STK_ALIGN 16
122
123 // WORKQ use the largest alignment any platform needs
124 #define C_WORKQ_STK_ALIGN 16
125
126 #if defined(__arm64__)
127 /* Pull the pthread_t into the same page as the top of the stack so we dirty one less page.
128 * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned
129 */
130 #define PTHREAD_T_OFFSET (12*1024)
131 #else
132 #define PTHREAD_T_OFFSET 0
133 #endif
134
135 /*
136 * Flags filed passed to bsdthread_create and back in pthread_start
137 31 <---------------------------------> 0
138 _________________________________________
139 | flags(8) | policy(8) | importance(16) |
140 -----------------------------------------
141 */
142
143 #define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401>
144 #define PTHREAD_START_SETSCHED 0x02000000
145 // was PTHREAD_START_DETACHED 0x04000000
146 #define PTHREAD_START_QOSCLASS 0x08000000
147 #define PTHREAD_START_TSD_BASE_SET 0x10000000
148 #define PTHREAD_START_SUSPENDED 0x20000000
149 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
150 #define PTHREAD_START_POLICY_BITSHIFT 16
151 #define PTHREAD_START_POLICY_MASK 0xff
152 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
153
154 #define SCHED_OTHER POLICY_TIMESHARE
155 #define SCHED_FIFO POLICY_FIFO
156 #define SCHED_RR POLICY_RR
157
158 #define BASEPRI_DEFAULT 31
159
160 uint32_t pthread_debug_tracing = 1;
161
162 static uint32_t pthread_mutex_default_policy;
163
164 SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED,
165 &pthread_mutex_default_policy, 0, "");
166
167 #pragma mark - Process/Thread Setup/Teardown syscalls
168
169 static mach_vm_offset_t
170 stack_addr_hint(proc_t p, vm_map_t vmap)
171 {
172 mach_vm_offset_t stackaddr;
173 mach_vm_offset_t aslr_offset;
174 bool proc64bit = proc_is64bit(p);
175 bool proc64bit_data = proc_is64bit_data(p);
176
177 // We can't safely take random values % something unless its a power-of-two
178 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two");
179
180 #if defined(__i386__) || defined(__x86_64__)
181 (void)proc64bit_data;
182 if (proc64bit) {
183 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
184 aslr_offset = random() % (1 << 28); // about 512 stacks
185 } else {
186 // Actually bigger than the image shift, we've got ~256MB to work with
187 aslr_offset = random() % (16 * PTH_DEFAULT_STACKSIZE);
188 }
189 aslr_offset = vm_map_trunc_page_mask(aslr_offset, vm_map_page_mask(vmap));
190 if (proc64bit) {
191 // Above nanomalloc range (see NANOZONE_SIGNATURE)
192 stackaddr = 0x700000000000 + aslr_offset;
193 } else {
194 stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset;
195 }
196 #elif defined(__arm__) || defined(__arm64__)
197 user_addr_t main_thread_stack_top = 0;
198 if (pthread_kern->proc_get_user_stack) {
199 main_thread_stack_top = pthread_kern->proc_get_user_stack(p);
200 }
201 if (proc64bit && main_thread_stack_top) {
202 // The main thread stack position is randomly slid by xnu (c.f.
203 // load_main() in mach_loader.c), so basing pthread stack allocations
204 // where the main thread stack ends is already ASLRd and doing so
205 // avoids creating a gap in the process address space that may cause
206 // extra PTE memory usage. rdar://problem/33328206
207 stackaddr = vm_map_trunc_page_mask((vm_map_offset_t)main_thread_stack_top,
208 vm_map_page_mask(vmap));
209 } else {
210 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
211 aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE);
212 aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset,
213 vm_map_page_mask(vmap));
214 if (proc64bit) {
215 // 64 stacks below shared region
216 stackaddr = SHARED_REGION_BASE_ARM64 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset;
217 } else {
218 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
219 if (proc64bit_data) {
220 stackaddr = SHARED_REGION_BASE_ARM64_32;
221 } else {
222 stackaddr = SHARED_REGION_BASE_ARM;
223 }
224
225 stackaddr -= 32 * PTH_DEFAULT_STACKSIZE + aslr_offset;
226 }
227 }
228 #else
229 #error Need to define a stack address hint for this architecture
230 #endif
231 return stackaddr;
232 }
233
234 static bool
235 _pthread_priority_to_policy(pthread_priority_t priority,
236 thread_qos_policy_data_t *data)
237 {
238 data->qos_tier = _pthread_priority_thread_qos(priority);
239 data->tier_importance = _pthread_priority_relpri(priority);
240 if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 ||
241 data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
242 return false;
243 }
244 return true;
245 }
246
247 /**
248 * bsdthread_create system call. Used by pthread_create.
249 */
250 int
251 _bsdthread_create(struct proc *p,
252 __unused user_addr_t user_func, __unused user_addr_t user_funcarg,
253 user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags,
254 user_addr_t *retval)
255 {
256 kern_return_t kret;
257 void * sright;
258 int error = 0;
259 mach_vm_offset_t th_tsd_base;
260 mach_port_name_t th_thport;
261 thread_t th;
262 task_t ctask = current_task();
263 unsigned int policy, importance;
264 uint32_t tsd_offset;
265 bool start_suspended = (flags & PTHREAD_START_SUSPENDED);
266
267 if (pthread_kern->proc_get_register(p) == 0) {
268 return EINVAL;
269 }
270
271 PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0);
272
273 /* Create thread and make it immovable, do not pin control port yet */
274 if (pthread_kern->thread_create_immovable) {
275 kret = pthread_kern->thread_create_immovable(ctask, &th);
276 } else {
277 kret = pthread_kern->thread_create(ctask, &th);
278 }
279
280 if (kret != KERN_SUCCESS)
281 return(ENOMEM);
282 thread_reference(th);
283
284 pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD);
285
286 if (pthread_kern->convert_thread_to_port_pinned) {
287 /* Convert to immovable/pinned thread port, but port is not pinned yet */
288 sright = (void *)pthread_kern->convert_thread_to_port_pinned(th);
289 } else {
290 sright = (void *)pthread_kern->convert_thread_to_port(th);
291 }
292
293 if (pthread_kern->ipc_port_copyout_send_pinned) {
294 /* Atomically, pin and copy out the port */
295 th_thport = pthread_kern->ipc_port_copyout_send_pinned(sright, pthread_kern->task_get_ipcspace(ctask));
296 } else {
297 th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
298 }
299
300 if (!MACH_PORT_VALID(th_thport)) {
301 error = EMFILE; // userland will convert this into a crash
302 goto out;
303 }
304
305 if ((flags & PTHREAD_START_CUSTOM) == 0) {
306 error = EINVAL;
307 goto out;
308 }
309
310 PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3);
311
312 tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
313 if (tsd_offset) {
314 th_tsd_base = user_pthread + tsd_offset;
315 kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
316 if (kret == KERN_SUCCESS) {
317 flags |= PTHREAD_START_TSD_BASE_SET;
318 }
319 }
320 /*
321 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
322 * supports this flag (after the fact).
323 */
324 flags &= ~PTHREAD_START_SUSPENDED;
325
326 /*
327 * Set up registers & function call.
328 */
329 #if defined(__i386__) || defined(__x86_64__)
330 if (proc_is64bit_data(p)) {
331 x86_thread_state64_t state = {
332 .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
333 .rdi = (uint64_t)user_pthread,
334 .rsi = (uint64_t)th_thport,
335 .rdx = (uint64_t)user_func, /* golang wants this */
336 .rcx = (uint64_t)user_funcarg, /* golang wants this */
337 .r8 = (uint64_t)user_stack, /* golang wants this */
338 .r9 = (uint64_t)flags,
339
340 .rsp = (uint64_t)user_stack,
341 };
342
343 (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
344 } else {
345 x86_thread_state32_t state = {
346 .eip = (uint32_t)pthread_kern->proc_get_threadstart(p),
347 .eax = (uint32_t)user_pthread,
348 .ebx = (uint32_t)th_thport,
349 .ecx = (uint32_t)user_func, /* golang wants this */
350 .edx = (uint32_t)user_funcarg, /* golang wants this */
351 .edi = (uint32_t)user_stack, /* golang wants this */
352 .esi = (uint32_t)flags,
353
354 .esp = (uint32_t)user_stack,
355 };
356
357 (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
358 }
359 #elif defined(__arm__) || defined(__arm64__)
360 if (proc_is64bit_data(p)) {
361 #ifdef __arm64__
362 arm_thread_state64_t state = {
363 .pc = (uint64_t)pthread_kern->proc_get_threadstart(p),
364 .x[0] = (uint64_t)user_pthread,
365 .x[1] = (uint64_t)th_thport,
366 .x[2] = (uint64_t)user_func, /* golang wants this */
367 .x[3] = (uint64_t)user_funcarg, /* golang wants this */
368 .x[4] = (uint64_t)user_stack, /* golang wants this */
369 .x[5] = (uint64_t)flags,
370
371 .sp = (uint64_t)user_stack,
372 };
373
374 (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
375 #else
376 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
377 #endif // defined(__arm64__)
378 } else {
379 arm_thread_state_t state = {
380 .pc = (uint32_t)pthread_kern->proc_get_threadstart(p),
381 .r[0] = (uint32_t)user_pthread,
382 .r[1] = (uint32_t)th_thport,
383 .r[2] = (uint32_t)user_func, /* golang wants this */
384 .r[3] = (uint32_t)user_funcarg, /* golang wants this */
385 .r[4] = (uint32_t)user_stack, /* golang wants this */
386 .r[5] = (uint32_t)flags,
387
388 .sp = (uint32_t)user_stack,
389 };
390
391 (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
392 }
393 #else
394 #error bsdthread_create not defined for this architecture
395 #endif
396
397 if (flags & PTHREAD_START_SETSCHED) {
398 /* Set scheduling parameters if needed */
399 thread_extended_policy_data_t extinfo;
400 thread_precedence_policy_data_t precedinfo;
401
402 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
403 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
404
405 if (policy == SCHED_OTHER) {
406 extinfo.timeshare = 1;
407 } else {
408 extinfo.timeshare = 0;
409 }
410
411 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
412
413 precedinfo.importance = (importance - BASEPRI_DEFAULT);
414 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
415 } else if (flags & PTHREAD_START_QOSCLASS) {
416 /* Set thread QoS class if requested. */
417 thread_qos_policy_data_t qos;
418
419 if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) {
420 error = EINVAL;
421 goto out;
422 }
423 pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY,
424 (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
425 }
426
427 if (pthread_kern->proc_get_mach_thread_self_tsd_offset) {
428 uint64_t mach_thread_self_offset =
429 pthread_kern->proc_get_mach_thread_self_tsd_offset(p);
430 if (mach_thread_self_offset && tsd_offset) {
431 bool proc64bit = proc_is64bit(p);
432 if (proc64bit) {
433 uint64_t th_thport_tsd = (uint64_t)th_thport;
434 error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
435 mach_thread_self_offset, sizeof(th_thport_tsd));
436 } else {
437 uint32_t th_thport_tsd = (uint32_t)th_thport;
438 error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
439 mach_thread_self_offset, sizeof(th_thport_tsd));
440 }
441 if (error) {
442 goto out;
443 }
444 }
445 }
446
447 if (!start_suspended) {
448 kret = pthread_kern->thread_resume(th);
449 if (kret != KERN_SUCCESS) {
450 error = EINVAL;
451 goto out;
452 }
453 }
454 thread_deallocate(th); /* drop the creator reference */
455
456 PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0);
457
458 *retval = user_pthread;
459 return(0);
460
461 out:
462 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
463 if (pthread_kern->thread_will_park_or_terminate) {
464 pthread_kern->thread_will_park_or_terminate(th);
465 }
466 (void)thread_terminate(th);
467 (void)thread_deallocate(th);
468 return(error);
469 }
470
471 /**
472 * bsdthread_terminate system call. Used by pthread_terminate
473 */
474 int
475 _bsdthread_terminate(__unused struct proc *p,
476 user_addr_t stackaddr,
477 size_t size,
478 uint32_t kthport,
479 uint32_t sem,
480 __unused int32_t *retval)
481 {
482 mach_vm_offset_t freeaddr;
483 mach_vm_size_t freesize;
484 kern_return_t kret;
485 thread_t th = current_thread();
486
487 freeaddr = (mach_vm_offset_t)stackaddr;
488 freesize = size;
489
490 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff);
491
492 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
493 if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
494 vm_map_t user_map = pthread_kern->current_map();
495 freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
496 kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
497 #if MACH_ASSERT
498 if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) {
499 os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret);
500 }
501 #endif
502 kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
503 assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
504 } else {
505 kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
506 if (kret != KERN_SUCCESS) {
507 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
508 }
509 }
510 }
511
512 if (pthread_kern->thread_will_park_or_terminate) {
513 pthread_kern->thread_will_park_or_terminate(th);
514 }
515 if (pthread_kern->thread_terminate_pinned) {
516 (void)pthread_kern->thread_terminate_pinned(th);
517 } else {
518 (void)thread_terminate(th);
519 }
520 if (sem != MACH_PORT_NULL) {
521 kret = pthread_kern->semaphore_signal_internal_trap(sem);
522 if (kret != KERN_SUCCESS) {
523 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
524 }
525 }
526
527 if (kthport != MACH_PORT_NULL) {
528 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
529 }
530
531 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0);
532
533 pthread_kern->thread_exception_return();
534 __builtin_unreachable();
535 }
536
537 /**
538 * bsdthread_register system call. Performs per-process setup. Responsible for
539 * returning capabilitiy bits to userspace and receiving userspace function addresses.
540 */
541 int
542 _bsdthread_register(struct proc *p,
543 user_addr_t threadstart,
544 user_addr_t wqthread,
545 int pthsize,
546 user_addr_t pthread_init_data,
547 user_addr_t pthread_init_data_size,
548 uint64_t dispatchqueue_offset,
549 int32_t *retval)
550 {
551 struct _pthread_registration_data data = {};
552 uint32_t max_tsd_offset;
553 kern_return_t kr;
554 size_t pthread_init_sz = 0;
555
556 /* syscall randomizer test can pass bogus values */
557 if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) {
558 return(EINVAL);
559 }
560 /*
561 * if we have pthread_init_data, then we use that and target_concptr
562 * (which is an offset) get data.
563 */
564 if (pthread_init_data != 0) {
565 if (pthread_init_data_size < sizeof(data.version)) {
566 return EINVAL;
567 }
568 pthread_init_sz = MIN(sizeof(data), (size_t)pthread_init_data_size);
569 int ret = copyin(pthread_init_data, &data, pthread_init_sz);
570 if (ret) {
571 return ret;
572 }
573 if (data.version != (size_t)pthread_init_data_size) {
574 return EINVAL;
575 }
576 } else {
577 data.dispatch_queue_offset = dispatchqueue_offset;
578 }
579
580 /* We have to do this before proc_get_register so that it resets after fork */
581 mach_vm_offset_t stackaddr = stack_addr_hint(p, pthread_kern->current_map());
582 pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stackaddr);
583
584 /* prevent multiple registrations */
585 if (pthread_kern->proc_get_register(p) != 0) {
586 return(EINVAL);
587 }
588
589 pthread_kern->proc_set_threadstart(p, threadstart);
590 pthread_kern->proc_set_wqthread(p, wqthread);
591 pthread_kern->proc_set_pthsize(p, pthsize);
592 pthread_kern->proc_set_register(p);
593
594 uint32_t tsd_slot_sz = proc_is64bit(p) ? sizeof(uint64_t) : sizeof(uint32_t);
595 if ((uint32_t)pthsize >= tsd_slot_sz &&
596 data.tsd_offset <= (uint32_t)(pthsize - tsd_slot_sz)) {
597 max_tsd_offset = ((uint32_t)pthsize - data.tsd_offset - tsd_slot_sz);
598 } else {
599 data.tsd_offset = 0;
600 max_tsd_offset = 0;
601 }
602 pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset);
603
604 if (data.dispatch_queue_offset > max_tsd_offset) {
605 data.dispatch_queue_offset = 0;
606 }
607 pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
608
609 if (pthread_kern->proc_set_return_to_kernel_offset) {
610 if (data.return_to_kernel_offset > max_tsd_offset) {
611 data.return_to_kernel_offset = 0;
612 }
613 pthread_kern->proc_set_return_to_kernel_offset(p,
614 data.return_to_kernel_offset);
615 }
616
617 if (pthread_kern->proc_set_mach_thread_self_tsd_offset) {
618 if (data.mach_thread_self_offset > max_tsd_offset) {
619 data.mach_thread_self_offset = 0;
620 }
621 pthread_kern->proc_set_mach_thread_self_tsd_offset(p,
622 data.mach_thread_self_offset);
623 }
624
625 if (pthread_init_data != 0) {
626 /* Outgoing data that userspace expects as a reply */
627 data.version = sizeof(struct _pthread_registration_data);
628 data.main_qos = _pthread_unspecified_priority();
629
630 if (pthread_kern->qos_main_thread_active()) {
631 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
632 thread_qos_policy_data_t qos;
633 boolean_t gd = FALSE;
634
635 kr = pthread_kern->thread_policy_get(current_thread(),
636 THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
637 if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
638 /*
639 * Unspecified threads means the kernel wants us
640 * to impose legacy upon the thread.
641 */
642 qos.qos_tier = THREAD_QOS_LEGACY;
643 qos.tier_importance = 0;
644
645 kr = pthread_kern->thread_policy_set_internal(current_thread(),
646 THREAD_QOS_POLICY, (thread_policy_t)&qos,
647 THREAD_QOS_POLICY_COUNT);
648 }
649
650 if (kr == KERN_SUCCESS) {
651 data.main_qos = _pthread_priority_make_from_thread_qos(
652 qos.qos_tier, 0, 0);
653 }
654 }
655
656 data.stack_addr_hint = stackaddr;
657 data.mutex_default_policy = pthread_mutex_default_policy;
658
659 kr = copyout(&data, pthread_init_data, pthread_init_sz);
660 if (kr != KERN_SUCCESS) {
661 return EINVAL;
662 }
663 }
664
665 /* return the supported feature set as the return value. */
666 *retval = PTHREAD_FEATURE_SUPPORTED;
667
668 return(0);
669 }
670
671
672 #pragma mark - Workqueue Thread Support
673
674 static mach_vm_size_t
675 workq_thread_allocsize(proc_t p, vm_map_t wq_map,
676 mach_vm_size_t *guardsize_out)
677 {
678 mach_vm_size_t guardsize = vm_map_page_size(wq_map);
679 mach_vm_size_t pthread_size = vm_map_round_page_mask(
680 pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET,
681 vm_map_page_mask(wq_map));
682 if (guardsize_out) *guardsize_out = guardsize;
683 return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
684 }
685
686 int
687 workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr)
688 {
689 mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
690 mach_vm_size_t guardsize, th_allocsize;
691 kern_return_t kret;
692
693 th_allocsize = workq_thread_allocsize(p, vmap, &guardsize);
694 kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1,
695 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE,
696 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
697
698 if (kret != KERN_SUCCESS) {
699 kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize,
700 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
701 }
702
703 if (kret != KERN_SUCCESS) {
704 goto fail;
705 }
706
707 /*
708 * The guard page is at the lowest address
709 * The stack base is the highest address
710 */
711 kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE);
712 if (kret != KERN_SUCCESS) {
713 goto fail_vm_deallocate;
714 }
715
716 if (out_addr) {
717 *out_addr = stackaddr;
718 }
719 return 0;
720
721 fail_vm_deallocate:
722 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
723 fail:
724 return kret;
725 }
726
727 int
728 workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr)
729 {
730 return mach_vm_deallocate(vmap, stackaddr,
731 workq_thread_allocsize(p, vmap, NULL));
732 }
733
734 void
735 workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th,
736 vm_map_t vmap, user_addr_t stackaddr)
737 {
738 // Keep this in sync with workq_setup_thread()
739 const vm_size_t guardsize = vm_map_page_size(vmap);
740 const user_addr_t freeaddr = (user_addr_t)stackaddr + guardsize;
741 const vm_map_offset_t freesize = vm_map_trunc_page_mask(
742 (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1,
743 vm_map_page_mask(vmap)) - guardsize;
744
745 __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr,
746 freesize, VM_BEHAVIOR_REUSABLE);
747 #if MACH_ASSERT
748 if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
749 os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
750 }
751 #endif
752 }
753
754 struct workq_thread_addrs {
755 user_addr_t self;
756 user_addr_t stack_bottom;
757 user_addr_t stack_top;
758 };
759
760 static inline void
761 workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr)
762 {
763 th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN);
764 }
765
766 static void
767 workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr,
768 struct workq_thread_addrs *th_addrs)
769 {
770 const vm_size_t guardsize = vm_map_page_size(map);
771
772 th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE +
773 guardsize + PTHREAD_T_OFFSET);
774 workq_thread_set_top_addr(th_addrs, th_addrs->self);
775 th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize);
776 }
777
778 static inline void
779 workq_set_register_state(proc_t p, thread_t th,
780 struct workq_thread_addrs *addrs, mach_port_name_t kport,
781 user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count)
782 {
783 user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
784 if (!wqstart_fnptr) {
785 panic("workqueue thread start function pointer is NULL");
786 }
787
788 #if defined(__i386__) || defined(__x86_64__)
789 if (proc_is64bit_data(p) == 0) {
790 x86_thread_state32_t state = {
791 .eip = (unsigned int)wqstart_fnptr,
792 .eax = /* arg0 */ (unsigned int)addrs->self,
793 .ebx = /* arg1 */ (unsigned int)kport,
794 .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom,
795 .edx = /* arg3 */ (unsigned int)kevent_list,
796 .edi = /* arg4 */ (unsigned int)upcall_flags,
797 .esi = /* arg5 */ (unsigned int)kevent_count,
798
799 .esp = (int)((vm_offset_t)addrs->stack_top),
800 };
801
802 int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
803 if (error != KERN_SUCCESS) {
804 panic(__func__ ": thread_set_wq_state failed: %d", error);
805 }
806 } else {
807 x86_thread_state64_t state64 = {
808 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
809 .rip = (uint64_t)wqstart_fnptr,
810 .rdi = (uint64_t)addrs->self,
811 .rsi = (uint64_t)kport,
812 .rdx = (uint64_t)addrs->stack_bottom,
813 .rcx = (uint64_t)kevent_list,
814 .r8 = (uint64_t)upcall_flags,
815 .r9 = (uint64_t)kevent_count,
816
817 .rsp = (uint64_t)(addrs->stack_top)
818 };
819
820 int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
821 if (error != KERN_SUCCESS) {
822 panic(__func__ ": thread_set_wq_state failed: %d", error);
823 }
824 }
825 #elif defined(__arm__) || defined(__arm64__)
826 if (!proc_is64bit_data(p)) {
827 arm_thread_state_t state = {
828 .pc = (int)wqstart_fnptr,
829 .r[0] = (unsigned int)addrs->self,
830 .r[1] = (unsigned int)kport,
831 .r[2] = (unsigned int)addrs->stack_bottom,
832 .r[3] = (unsigned int)kevent_list,
833 // will be pushed onto the stack as arg4/5
834 .r[4] = (unsigned int)upcall_flags,
835 .r[5] = (unsigned int)kevent_count,
836
837 .sp = (int)(addrs->stack_top)
838 };
839
840 int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
841 if (error != KERN_SUCCESS) {
842 panic(__func__ ": thread_set_wq_state failed: %d", error);
843 }
844 } else {
845 #if defined(__arm64__)
846 arm_thread_state64_t state = {
847 .pc = (uint64_t)wqstart_fnptr,
848 .x[0] = (uint64_t)addrs->self,
849 .x[1] = (uint64_t)kport,
850 .x[2] = (uint64_t)addrs->stack_bottom,
851 .x[3] = (uint64_t)kevent_list,
852 .x[4] = (uint64_t)upcall_flags,
853 .x[5] = (uint64_t)kevent_count,
854
855 .sp = (uint64_t)((vm_offset_t)addrs->stack_top),
856 };
857
858 int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
859 if (error != KERN_SUCCESS) {
860 panic(__func__ ": thread_set_wq_state failed: %d", error);
861 }
862 #else /* defined(__arm64__) */
863 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
864 #endif /* defined(__arm64__) */
865 }
866 #else
867 #error setup_wqthread not defined for this architecture
868 #endif
869 }
870
871 static inline int
872 workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs,
873 user_addr_t eventlist, int nevents, int kevent_flags,
874 user_addr_t *kevent_list_out, int *kevent_count_out)
875 {
876 int ret;
877
878 user_addr_t kevent_list = th_addrs->self -
879 WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
880 user_addr_t data_buf = kevent_list - WQ_KEVENT_DATA_SIZE;
881 user_size_t data_available = WQ_KEVENT_DATA_SIZE;
882
883 ret = pthread_kern->kevent_workq_internal(p, eventlist, nevents,
884 kevent_list, WQ_KEVENT_LIST_LEN,
885 data_buf, &data_available,
886 kevent_flags, kevent_count_out);
887
888 // squash any errors into just empty output
889 if (ret != 0 || *kevent_count_out == -1) {
890 *kevent_list_out = NULL;
891 *kevent_count_out = 0;
892 return ret;
893 }
894
895 workq_thread_set_top_addr(th_addrs, data_buf + data_available);
896 *kevent_list_out = kevent_list;
897 return ret;
898 }
899
900 /**
901 * configures initial thread stack/registers to jump into:
902 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
903 * to get there we jump through assembily stubs in pthread_asm.s. Those
904 * routines setup a stack frame, using the current stack pointer, and marshall
905 * arguments from registers to the stack as required by the ABI.
906 *
907 * One odd thing we do here is to start the pthread_t 4k below what would be the
908 * top of the stack otherwise. This is because usually only the first 4k of the
909 * pthread_t will be used and so we want to put it on the same 16k page as the
910 * top of the stack to save memory.
911 *
912 * When we are done the stack will look like:
913 * |-----------| th_stackaddr + th_allocsize
914 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
915 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
916 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
917 * |stack gap | bottom aligned to 16 bytes
918 * | STACK |
919 * | ⇓ |
920 * | |
921 * |guard page | guardsize
922 * |-----------| th_stackaddr
923 */
924 __attribute__((noreturn,noinline))
925 void
926 workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
927 mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags)
928 {
929 struct workq_thread_addrs th_addrs;
930 bool first_use = (setup_flags & WQ_SETUP_FIRST_USE);
931 user_addr_t kevent_list = NULL;
932 int kevent_count = 0;
933
934 workq_thread_get_addrs(map, stackaddr, &th_addrs);
935
936 if (first_use) {
937 uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
938 if (tsd_offset) {
939 mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset;
940 kern_return_t kret = pthread_kern->thread_set_tsd_base(th,
941 th_tsd_base);
942 if (kret == KERN_SUCCESS) {
943 upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
944 }
945 }
946
947 /*
948 * Pre-fault the first page of the new thread's stack and the page that will
949 * contain the pthread_t structure.
950 */
951 vm_map_offset_t mask = vm_map_page_mask(map);
952 vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask);
953 vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask);
954 if (th_page != stk_page) {
955 vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
956 }
957 vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
958 }
959
960 if (setup_flags & WQ_SETUP_EXIT_THREAD) {
961 kevent_count = WORKQ_EXIT_THREAD_NKEVENT;
962 } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
963 unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
964 workq_kevent(p, &th_addrs, NULL, 0, flags, &kevent_list, &kevent_count);
965 }
966
967 workq_set_register_state(p, th, &th_addrs, kport,
968 kevent_list, upcall_flags, kevent_count);
969
970 if (first_use) {
971 pthread_kern->thread_bootstrap_return();
972 } else {
973 pthread_kern->unix_syscall_return(EJUSTRETURN);
974 }
975 __builtin_unreachable();
976 }
977
978 int
979 workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
980 user_addr_t stackaddr, mach_port_name_t kport,
981 user_addr_t events, int nevents, int upcall_flags)
982 {
983 struct workq_thread_addrs th_addrs;
984 user_addr_t kevent_list = NULL;
985 int kevent_count = 0, error;
986 __assert_only kern_return_t kr;
987
988 workq_thread_get_addrs(map, stackaddr, &th_addrs);
989
990 unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE |
991 KEVENT_FLAG_PARKING;
992 error = workq_kevent(p, &th_addrs, events, nevents, flags,
993 &kevent_list, &kevent_count);
994
995 if (error || kevent_count == 0) {
996 return error;
997 }
998
999 kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
1000 assert(kr == KERN_SUCCESS);
1001
1002 workq_set_register_state(p, th, &th_addrs, kport,
1003 kevent_list, upcall_flags, kevent_count);
1004
1005 pthread_kern->unix_syscall_return(EJUSTRETURN);
1006 __builtin_unreachable();
1007 }
1008
1009 int
1010 _thread_selfid(__unused struct proc *p, uint64_t *retval)
1011 {
1012 thread_t thread = current_thread();
1013 *retval = thread_tid(thread);
1014 return KERN_SUCCESS;
1015 }
1016
1017 void
1018 _pthread_init(void)
1019 {
1020 pthread_lck_grp_attr = lck_grp_attr_alloc_init();
1021 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
1022
1023 /*
1024 * allocate the lock attribute for pthread synchronizers
1025 */
1026 pthread_lck_attr = lck_attr_alloc_init();
1027 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
1028
1029 pth_global_hashinit();
1030 psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
1031 psynch_zoneinit();
1032
1033 int policy_bootarg;
1034 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) {
1035 pthread_mutex_default_policy = policy_bootarg;
1036 }
1037
1038 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy);
1039 }