2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #pragma mark - Front Matter
35 #define _PTHREAD_CONDATTR_T
36 #define _PTHREAD_COND_T
37 #define _PTHREAD_MUTEXATTR_T
38 #define _PTHREAD_MUTEX_T
39 #define _PTHREAD_RWLOCKATTR_T
40 #define _PTHREAD_RWLOCK_T
42 #undef pthread_mutexattr_t
43 #undef pthread_mutex_t
44 #undef pthread_condattr_t
46 #undef pthread_rwlockattr_t
47 #undef pthread_rwlock_t
49 #include <sys/cdefs.h>
52 // <rdar://problem/26158937> panic() should be marked noreturn
53 extern void panic(const char *string
, ...) __printflike(1,2) __dead2
;
55 #include <sys/param.h>
56 #include <sys/queue.h>
57 #include <sys/resourcevar.h>
58 //#include <sys/proc_internal.h>
59 #include <sys/kauth.h>
60 #include <sys/systm.h>
61 #include <sys/timeb.h>
62 #include <sys/times.h>
64 #include <sys/kernel.h>
66 #include <sys/signalvar.h>
67 #include <sys/sysctl.h>
68 #include <sys/syslog.h>
71 #include <sys/kdebug.h>
72 //#include <sys/sysproto.h>
74 #include <sys/user.h> /* for coredump */
75 #include <sys/proc_info.h> /* for fill_procworkqueue */
77 #include <mach/mach_port.h>
78 #include <mach/mach_types.h>
79 #include <mach/semaphore.h>
80 #include <mach/sync_policy.h>
81 #include <mach/task.h>
82 #include <mach/vm_prot.h>
83 #include <kern/kern_types.h>
84 #include <kern/task.h>
85 #include <kern/clock.h>
86 #include <mach/kern_return.h>
87 #include <kern/thread.h>
88 #include <kern/zalloc.h>
89 #include <kern/sched_prim.h> /* for thread_exception_return */
90 #include <kern/processor.h>
91 #include <kern/assert.h>
92 #include <mach/mach_vm.h>
93 #include <mach/mach_param.h>
94 #include <mach/thread_status.h>
95 #include <mach/thread_policy.h>
96 #include <mach/message.h>
97 #include <mach/port.h>
98 //#include <vm/vm_protos.h>
99 #include <vm/vm_fault.h>
100 #include <vm/vm_map.h>
101 #include <mach/thread_act.h> /* for thread_resume */
102 #include <machine/machine_routines.h>
103 #include <mach/shared_region.h>
105 #include "kern/kern_internal.h"
107 #ifndef WQ_SETUP_EXIT_THREAD
108 #define WQ_SETUP_EXIT_THREAD 8
111 // XXX: Ditto for thread tags from kern/thread.h
112 #define THREAD_TAG_MAINTHREAD 0x1
113 #define THREAD_TAG_PTHREAD 0x10
114 #define THREAD_TAG_WORKQUEUE 0x20
116 lck_grp_attr_t
*pthread_lck_grp_attr
;
117 lck_grp_t
*pthread_lck_grp
;
118 lck_attr_t
*pthread_lck_attr
;
120 #define C_32_STK_ALIGN 16
121 #define C_64_STK_ALIGN 16
123 // WORKQ use the largest alignment any platform needs
124 #define C_WORKQ_STK_ALIGN 16
126 #if defined(__arm64__)
127 /* Pull the pthread_t into the same page as the top of the stack so we dirty one less page.
128 * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned
130 #define PTHREAD_T_OFFSET (12*1024)
132 #define PTHREAD_T_OFFSET 0
136 * Flags filed passed to bsdthread_create and back in pthread_start
137 31 <---------------------------------> 0
138 _________________________________________
139 | flags(8) | policy(8) | importance(16) |
140 -----------------------------------------
143 #define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401>
144 #define PTHREAD_START_SETSCHED 0x02000000
145 // was PTHREAD_START_DETACHED 0x04000000
146 #define PTHREAD_START_QOSCLASS 0x08000000
147 #define PTHREAD_START_TSD_BASE_SET 0x10000000
148 #define PTHREAD_START_SUSPENDED 0x20000000
149 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
150 #define PTHREAD_START_POLICY_BITSHIFT 16
151 #define PTHREAD_START_POLICY_MASK 0xff
152 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
154 #define SCHED_OTHER POLICY_TIMESHARE
155 #define SCHED_FIFO POLICY_FIFO
156 #define SCHED_RR POLICY_RR
158 #define BASEPRI_DEFAULT 31
160 uint32_t pthread_debug_tracing
= 1;
162 static uint32_t pthread_mutex_default_policy
;
164 SYSCTL_INT(_kern
, OID_AUTO
, pthread_mutex_default_policy
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
165 &pthread_mutex_default_policy
, 0, "");
167 #pragma mark - Process/Thread Setup/Teardown syscalls
169 static mach_vm_offset_t
170 stack_addr_hint(proc_t p
, vm_map_t vmap
)
172 mach_vm_offset_t stackaddr
;
173 mach_vm_offset_t aslr_offset
;
174 bool proc64bit
= proc_is64bit(p
);
175 bool proc64bit_data
= proc_is64bit_data(p
);
177 // We can't safely take random values % something unless its a power-of-two
178 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE
), "PTH_DEFAULT_STACKSIZE is a power-of-two");
180 #if defined(__i386__) || defined(__x86_64__)
181 (void)proc64bit_data
;
183 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
184 aslr_offset
= random() % (1 << 28); // about 512 stacks
186 // Actually bigger than the image shift, we've got ~256MB to work with
187 aslr_offset
= random() % (16 * PTH_DEFAULT_STACKSIZE
);
189 aslr_offset
= vm_map_trunc_page_mask(aslr_offset
, vm_map_page_mask(vmap
));
191 // Above nanomalloc range (see NANOZONE_SIGNATURE)
192 stackaddr
= 0x700000000000 + aslr_offset
;
194 stackaddr
= SHARED_REGION_BASE_I386
+ SHARED_REGION_SIZE_I386
+ aslr_offset
;
196 #elif defined(__arm__) || defined(__arm64__)
197 user_addr_t main_thread_stack_top
= 0;
198 if (pthread_kern
->proc_get_user_stack
) {
199 main_thread_stack_top
= pthread_kern
->proc_get_user_stack(p
);
201 if (proc64bit
&& main_thread_stack_top
) {
202 // The main thread stack position is randomly slid by xnu (c.f.
203 // load_main() in mach_loader.c), so basing pthread stack allocations
204 // where the main thread stack ends is already ASLRd and doing so
205 // avoids creating a gap in the process address space that may cause
206 // extra PTE memory usage. rdar://problem/33328206
207 stackaddr
= vm_map_trunc_page_mask((vm_map_offset_t
)main_thread_stack_top
,
208 vm_map_page_mask(vmap
));
210 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
211 aslr_offset
= random() % ((proc64bit
? 4 : 2) * PTH_DEFAULT_STACKSIZE
);
212 aslr_offset
= vm_map_trunc_page_mask((vm_map_offset_t
)aslr_offset
,
213 vm_map_page_mask(vmap
));
215 // 64 stacks below shared region
216 stackaddr
= SHARED_REGION_BASE_ARM64
- 64 * PTH_DEFAULT_STACKSIZE
- aslr_offset
;
218 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
219 if (proc64bit_data
) {
220 stackaddr
= SHARED_REGION_BASE_ARM64_32
;
222 stackaddr
= SHARED_REGION_BASE_ARM
;
225 stackaddr
-= 32 * PTH_DEFAULT_STACKSIZE
+ aslr_offset
;
229 #error Need to define a stack address hint for this architecture
235 _pthread_priority_to_policy(pthread_priority_t priority
,
236 thread_qos_policy_data_t
*data
)
238 data
->qos_tier
= _pthread_priority_thread_qos(priority
);
239 data
->tier_importance
= _pthread_priority_relpri(priority
);
240 if (data
->qos_tier
== THREAD_QOS_UNSPECIFIED
|| data
->tier_importance
> 0 ||
241 data
->tier_importance
< THREAD_QOS_MIN_TIER_IMPORTANCE
) {
248 * bsdthread_create system call. Used by pthread_create.
251 _bsdthread_create(struct proc
*p
,
252 __unused user_addr_t user_func
, __unused user_addr_t user_funcarg
,
253 user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
,
259 mach_vm_offset_t th_tsd_base
;
260 mach_port_name_t th_thport
;
262 task_t ctask
= current_task();
263 unsigned int policy
, importance
;
265 bool start_suspended
= (flags
& PTHREAD_START_SUSPENDED
);
267 if (pthread_kern
->proc_get_register(p
) == 0) {
271 PTHREAD_TRACE(pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0);
273 /* Create thread and make it immovable, do not pin control port yet */
274 if (pthread_kern
->thread_create_immovable
) {
275 kret
= pthread_kern
->thread_create_immovable(ctask
, &th
);
277 kret
= pthread_kern
->thread_create(ctask
, &th
);
280 if (kret
!= KERN_SUCCESS
)
282 thread_reference(th
);
284 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
);
286 if (pthread_kern
->convert_thread_to_port_pinned
) {
287 /* Convert to immovable/pinned thread port, but port is not pinned yet */
288 sright
= (void *)pthread_kern
->convert_thread_to_port_pinned(th
);
290 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
293 if (pthread_kern
->ipc_port_copyout_send_pinned
) {
294 /* Atomically, pin and copy out the port */
295 th_thport
= pthread_kern
->ipc_port_copyout_send_pinned(sright
, pthread_kern
->task_get_ipcspace(ctask
));
297 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
300 if (!MACH_PORT_VALID(th_thport
)) {
301 error
= EMFILE
; // userland will convert this into a crash
305 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
310 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3);
312 tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
314 th_tsd_base
= user_pthread
+ tsd_offset
;
315 kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
316 if (kret
== KERN_SUCCESS
) {
317 flags
|= PTHREAD_START_TSD_BASE_SET
;
321 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
322 * supports this flag (after the fact).
324 flags
&= ~PTHREAD_START_SUSPENDED
;
327 * Set up registers & function call.
329 #if defined(__i386__) || defined(__x86_64__)
330 if (proc_is64bit_data(p
)) {
331 x86_thread_state64_t state
= {
332 .rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
333 .rdi
= (uint64_t)user_pthread
,
334 .rsi
= (uint64_t)th_thport
,
335 .rdx
= (uint64_t)user_func
, /* golang wants this */
336 .rcx
= (uint64_t)user_funcarg
, /* golang wants this */
337 .r8
= (uint64_t)user_stack
, /* golang wants this */
338 .r9
= (uint64_t)flags
,
340 .rsp
= (uint64_t)user_stack
,
343 (void)pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
345 x86_thread_state32_t state
= {
346 .eip
= (uint32_t)pthread_kern
->proc_get_threadstart(p
),
347 .eax
= (uint32_t)user_pthread
,
348 .ebx
= (uint32_t)th_thport
,
349 .ecx
= (uint32_t)user_func
, /* golang wants this */
350 .edx
= (uint32_t)user_funcarg
, /* golang wants this */
351 .edi
= (uint32_t)user_stack
, /* golang wants this */
352 .esi
= (uint32_t)flags
,
354 .esp
= (uint32_t)user_stack
,
357 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
359 #elif defined(__arm__) || defined(__arm64__)
360 if (proc_is64bit_data(p
)) {
362 arm_thread_state64_t state
= {
363 .pc
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
364 .x
[0] = (uint64_t)user_pthread
,
365 .x
[1] = (uint64_t)th_thport
,
366 .x
[2] = (uint64_t)user_func
, /* golang wants this */
367 .x
[3] = (uint64_t)user_funcarg
, /* golang wants this */
368 .x
[4] = (uint64_t)user_stack
, /* golang wants this */
369 .x
[5] = (uint64_t)flags
,
371 .sp
= (uint64_t)user_stack
,
374 (void)pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
376 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
377 #endif // defined(__arm64__)
379 arm_thread_state_t state
= {
380 .pc
= (uint32_t)pthread_kern
->proc_get_threadstart(p
),
381 .r
[0] = (uint32_t)user_pthread
,
382 .r
[1] = (uint32_t)th_thport
,
383 .r
[2] = (uint32_t)user_func
, /* golang wants this */
384 .r
[3] = (uint32_t)user_funcarg
, /* golang wants this */
385 .r
[4] = (uint32_t)user_stack
, /* golang wants this */
386 .r
[5] = (uint32_t)flags
,
388 .sp
= (uint32_t)user_stack
,
391 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
394 #error bsdthread_create not defined for this architecture
397 if (flags
& PTHREAD_START_SETSCHED
) {
398 /* Set scheduling parameters if needed */
399 thread_extended_policy_data_t extinfo
;
400 thread_precedence_policy_data_t precedinfo
;
402 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
403 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
405 if (policy
== SCHED_OTHER
) {
406 extinfo
.timeshare
= 1;
408 extinfo
.timeshare
= 0;
411 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
413 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
414 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
415 } else if (flags
& PTHREAD_START_QOSCLASS
) {
416 /* Set thread QoS class if requested. */
417 thread_qos_policy_data_t qos
;
419 if (!_pthread_priority_to_policy(flags
& PTHREAD_START_QOSCLASS_MASK
, &qos
)) {
423 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
,
424 (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
427 if (pthread_kern
->proc_get_mach_thread_self_tsd_offset
) {
428 uint64_t mach_thread_self_offset
=
429 pthread_kern
->proc_get_mach_thread_self_tsd_offset(p
);
430 if (mach_thread_self_offset
&& tsd_offset
) {
431 bool proc64bit
= proc_is64bit(p
);
433 uint64_t th_thport_tsd
= (uint64_t)th_thport
;
434 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
435 mach_thread_self_offset
, sizeof(th_thport_tsd
));
437 uint32_t th_thport_tsd
= (uint32_t)th_thport
;
438 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
439 mach_thread_self_offset
, sizeof(th_thport_tsd
));
447 if (!start_suspended
) {
448 kret
= pthread_kern
->thread_resume(th
);
449 if (kret
!= KERN_SUCCESS
) {
454 thread_deallocate(th
); /* drop the creator reference */
456 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_END
, error
, user_pthread
, 0, 0);
458 *retval
= user_pthread
;
462 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
463 if (pthread_kern
->thread_will_park_or_terminate
) {
464 pthread_kern
->thread_will_park_or_terminate(th
);
466 (void)thread_terminate(th
);
467 (void)thread_deallocate(th
);
472 * bsdthread_terminate system call. Used by pthread_terminate
475 _bsdthread_terminate(__unused
struct proc
*p
,
476 user_addr_t stackaddr
,
480 __unused
int32_t *retval
)
482 mach_vm_offset_t freeaddr
;
483 mach_vm_size_t freesize
;
485 thread_t th
= current_thread();
487 freeaddr
= (mach_vm_offset_t
)stackaddr
;
490 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff);
492 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
493 if (pthread_kern
->thread_get_tag(th
) & THREAD_TAG_MAINTHREAD
){
494 vm_map_t user_map
= pthread_kern
->current_map();
495 freesize
= vm_map_trunc_page_mask((vm_map_offset_t
)freesize
- 1, vm_map_page_mask(user_map
));
496 kret
= mach_vm_behavior_set(user_map
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
498 if (kret
!= KERN_SUCCESS
&& kret
!= KERN_INVALID_ADDRESS
) {
499 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kret
);
502 kret
= kret
? kret
: mach_vm_protect(user_map
, freeaddr
, freesize
, FALSE
, VM_PROT_NONE
);
503 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
505 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
506 if (kret
!= KERN_SUCCESS
) {
507 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
512 if (pthread_kern
->thread_will_park_or_terminate
) {
513 pthread_kern
->thread_will_park_or_terminate(th
);
515 if (pthread_kern
->thread_terminate_pinned
) {
516 (void)pthread_kern
->thread_terminate_pinned(th
);
518 (void)thread_terminate(th
);
520 if (sem
!= MACH_PORT_NULL
) {
521 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
522 if (kret
!= KERN_SUCCESS
) {
523 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
527 if (kthport
!= MACH_PORT_NULL
) {
528 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
531 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0);
533 pthread_kern
->thread_exception_return();
534 __builtin_unreachable();
538 * bsdthread_register system call. Performs per-process setup. Responsible for
539 * returning capabilitiy bits to userspace and receiving userspace function addresses.
542 _bsdthread_register(struct proc
*p
,
543 user_addr_t threadstart
,
544 user_addr_t wqthread
,
546 user_addr_t pthread_init_data
,
547 user_addr_t pthread_init_data_size
,
548 uint64_t dispatchqueue_offset
,
551 struct _pthread_registration_data data
= {};
552 uint32_t max_tsd_offset
;
554 size_t pthread_init_sz
= 0;
556 /* syscall randomizer test can pass bogus values */
557 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
561 * if we have pthread_init_data, then we use that and target_concptr
562 * (which is an offset) get data.
564 if (pthread_init_data
!= 0) {
565 if (pthread_init_data_size
< sizeof(data
.version
)) {
568 pthread_init_sz
= MIN(sizeof(data
), (size_t)pthread_init_data_size
);
569 int ret
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
573 if (data
.version
!= (size_t)pthread_init_data_size
) {
577 data
.dispatch_queue_offset
= dispatchqueue_offset
;
580 /* We have to do this before proc_get_register so that it resets after fork */
581 mach_vm_offset_t stackaddr
= stack_addr_hint(p
, pthread_kern
->current_map());
582 pthread_kern
->proc_set_stack_addr_hint(p
, (user_addr_t
)stackaddr
);
584 /* prevent multiple registrations */
585 if (pthread_kern
->proc_get_register(p
) != 0) {
589 pthread_kern
->proc_set_threadstart(p
, threadstart
);
590 pthread_kern
->proc_set_wqthread(p
, wqthread
);
591 pthread_kern
->proc_set_pthsize(p
, pthsize
);
592 pthread_kern
->proc_set_register(p
);
594 uint32_t tsd_slot_sz
= proc_is64bit(p
) ? sizeof(uint64_t) : sizeof(uint32_t);
595 if ((uint32_t)pthsize
>= tsd_slot_sz
&&
596 data
.tsd_offset
<= (uint32_t)(pthsize
- tsd_slot_sz
)) {
597 max_tsd_offset
= ((uint32_t)pthsize
- data
.tsd_offset
- tsd_slot_sz
);
602 pthread_kern
->proc_set_pthread_tsd_offset(p
, data
.tsd_offset
);
604 if (data
.dispatch_queue_offset
> max_tsd_offset
) {
605 data
.dispatch_queue_offset
= 0;
607 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
609 if (pthread_kern
->proc_set_return_to_kernel_offset
) {
610 if (data
.return_to_kernel_offset
> max_tsd_offset
) {
611 data
.return_to_kernel_offset
= 0;
613 pthread_kern
->proc_set_return_to_kernel_offset(p
,
614 data
.return_to_kernel_offset
);
617 if (pthread_kern
->proc_set_mach_thread_self_tsd_offset
) {
618 if (data
.mach_thread_self_offset
> max_tsd_offset
) {
619 data
.mach_thread_self_offset
= 0;
621 pthread_kern
->proc_set_mach_thread_self_tsd_offset(p
,
622 data
.mach_thread_self_offset
);
625 if (pthread_init_data
!= 0) {
626 /* Outgoing data that userspace expects as a reply */
627 data
.version
= sizeof(struct _pthread_registration_data
);
628 data
.main_qos
= _pthread_unspecified_priority();
630 if (pthread_kern
->qos_main_thread_active()) {
631 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
632 thread_qos_policy_data_t qos
;
633 boolean_t gd
= FALSE
;
635 kr
= pthread_kern
->thread_policy_get(current_thread(),
636 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
637 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
639 * Unspecified threads means the kernel wants us
640 * to impose legacy upon the thread.
642 qos
.qos_tier
= THREAD_QOS_LEGACY
;
643 qos
.tier_importance
= 0;
645 kr
= pthread_kern
->thread_policy_set_internal(current_thread(),
646 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
,
647 THREAD_QOS_POLICY_COUNT
);
650 if (kr
== KERN_SUCCESS
) {
651 data
.main_qos
= _pthread_priority_make_from_thread_qos(
656 data
.stack_addr_hint
= stackaddr
;
657 data
.mutex_default_policy
= pthread_mutex_default_policy
;
659 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
660 if (kr
!= KERN_SUCCESS
) {
665 /* return the supported feature set as the return value. */
666 *retval
= PTHREAD_FEATURE_SUPPORTED
;
672 #pragma mark - Workqueue Thread Support
674 static mach_vm_size_t
675 workq_thread_allocsize(proc_t p
, vm_map_t wq_map
,
676 mach_vm_size_t
*guardsize_out
)
678 mach_vm_size_t guardsize
= vm_map_page_size(wq_map
);
679 mach_vm_size_t pthread_size
= vm_map_round_page_mask(
680 pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
,
681 vm_map_page_mask(wq_map
));
682 if (guardsize_out
) *guardsize_out
= guardsize
;
683 return guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
687 workq_create_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t
*out_addr
)
689 mach_vm_offset_t stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
690 mach_vm_size_t guardsize
, th_allocsize
;
693 th_allocsize
= workq_thread_allocsize(p
, vmap
, &guardsize
);
694 kret
= mach_vm_map(vmap
, &stackaddr
, th_allocsize
, page_size
- 1,
695 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
, NULL
, 0, FALSE
,
696 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
698 if (kret
!= KERN_SUCCESS
) {
699 kret
= mach_vm_allocate(vmap
, &stackaddr
, th_allocsize
,
700 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
703 if (kret
!= KERN_SUCCESS
) {
708 * The guard page is at the lowest address
709 * The stack base is the highest address
711 kret
= mach_vm_protect(vmap
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
712 if (kret
!= KERN_SUCCESS
) {
713 goto fail_vm_deallocate
;
717 *out_addr
= stackaddr
;
722 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
728 workq_destroy_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t stackaddr
)
730 return mach_vm_deallocate(vmap
, stackaddr
,
731 workq_thread_allocsize(p
, vmap
, NULL
));
735 workq_markfree_threadstack(proc_t OS_UNUSED p
, thread_t OS_UNUSED th
,
736 vm_map_t vmap
, user_addr_t stackaddr
)
738 // Keep this in sync with workq_setup_thread()
739 const vm_size_t guardsize
= vm_map_page_size(vmap
);
740 const user_addr_t freeaddr
= (user_addr_t
)stackaddr
+ guardsize
;
741 const vm_map_offset_t freesize
= vm_map_trunc_page_mask(
742 (PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
) - 1,
743 vm_map_page_mask(vmap
)) - guardsize
;
745 __assert_only kern_return_t kr
= mach_vm_behavior_set(vmap
, freeaddr
,
746 freesize
, VM_BEHAVIOR_REUSABLE
);
748 if (kr
!= KERN_SUCCESS
&& kr
!= KERN_INVALID_ADDRESS
) {
749 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kr
);
754 struct workq_thread_addrs
{
756 user_addr_t stack_bottom
;
757 user_addr_t stack_top
;
761 workq_thread_set_top_addr(struct workq_thread_addrs
*th_addrs
, user_addr_t addr
)
763 th_addrs
->stack_top
= (addr
& -C_WORKQ_STK_ALIGN
);
767 workq_thread_get_addrs(vm_map_t map
, user_addr_t stackaddr
,
768 struct workq_thread_addrs
*th_addrs
)
770 const vm_size_t guardsize
= vm_map_page_size(map
);
772 th_addrs
->self
= (user_addr_t
)(stackaddr
+ PTH_DEFAULT_STACKSIZE
+
773 guardsize
+ PTHREAD_T_OFFSET
);
774 workq_thread_set_top_addr(th_addrs
, th_addrs
->self
);
775 th_addrs
->stack_bottom
= (user_addr_t
)(stackaddr
+ guardsize
);
779 workq_set_register_state(proc_t p
, thread_t th
,
780 struct workq_thread_addrs
*addrs
, mach_port_name_t kport
,
781 user_addr_t kevent_list
, uint32_t upcall_flags
, int kevent_count
)
783 user_addr_t wqstart_fnptr
= pthread_kern
->proc_get_wqthread(p
);
784 if (!wqstart_fnptr
) {
785 panic("workqueue thread start function pointer is NULL");
788 #if defined(__i386__) || defined(__x86_64__)
789 if (proc_is64bit_data(p
) == 0) {
790 x86_thread_state32_t state
= {
791 .eip
= (unsigned int)wqstart_fnptr
,
792 .eax
= /* arg0 */ (unsigned int)addrs
->self
,
793 .ebx
= /* arg1 */ (unsigned int)kport
,
794 .ecx
= /* arg2 */ (unsigned int)addrs
->stack_bottom
,
795 .edx
= /* arg3 */ (unsigned int)kevent_list
,
796 .edi
= /* arg4 */ (unsigned int)upcall_flags
,
797 .esi
= /* arg5 */ (unsigned int)kevent_count
,
799 .esp
= (int)((vm_offset_t
)addrs
->stack_top
),
802 int error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
803 if (error
!= KERN_SUCCESS
) {
804 panic(__func__
": thread_set_wq_state failed: %d", error
);
807 x86_thread_state64_t state64
= {
808 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
809 .rip
= (uint64_t)wqstart_fnptr
,
810 .rdi
= (uint64_t)addrs
->self
,
811 .rsi
= (uint64_t)kport
,
812 .rdx
= (uint64_t)addrs
->stack_bottom
,
813 .rcx
= (uint64_t)kevent_list
,
814 .r8
= (uint64_t)upcall_flags
,
815 .r9
= (uint64_t)kevent_count
,
817 .rsp
= (uint64_t)(addrs
->stack_top
)
820 int error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
821 if (error
!= KERN_SUCCESS
) {
822 panic(__func__
": thread_set_wq_state failed: %d", error
);
825 #elif defined(__arm__) || defined(__arm64__)
826 if (!proc_is64bit_data(p
)) {
827 arm_thread_state_t state
= {
828 .pc
= (int)wqstart_fnptr
,
829 .r
[0] = (unsigned int)addrs
->self
,
830 .r
[1] = (unsigned int)kport
,
831 .r
[2] = (unsigned int)addrs
->stack_bottom
,
832 .r
[3] = (unsigned int)kevent_list
,
833 // will be pushed onto the stack as arg4/5
834 .r
[4] = (unsigned int)upcall_flags
,
835 .r
[5] = (unsigned int)kevent_count
,
837 .sp
= (int)(addrs
->stack_top
)
840 int error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
841 if (error
!= KERN_SUCCESS
) {
842 panic(__func__
": thread_set_wq_state failed: %d", error
);
845 #if defined(__arm64__)
846 arm_thread_state64_t state
= {
847 .pc
= (uint64_t)wqstart_fnptr
,
848 .x
[0] = (uint64_t)addrs
->self
,
849 .x
[1] = (uint64_t)kport
,
850 .x
[2] = (uint64_t)addrs
->stack_bottom
,
851 .x
[3] = (uint64_t)kevent_list
,
852 .x
[4] = (uint64_t)upcall_flags
,
853 .x
[5] = (uint64_t)kevent_count
,
855 .sp
= (uint64_t)((vm_offset_t
)addrs
->stack_top
),
858 int error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
859 if (error
!= KERN_SUCCESS
) {
860 panic(__func__
": thread_set_wq_state failed: %d", error
);
862 #else /* defined(__arm64__) */
863 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
864 #endif /* defined(__arm64__) */
867 #error setup_wqthread not defined for this architecture
872 workq_kevent(proc_t p
, struct workq_thread_addrs
*th_addrs
,
873 user_addr_t eventlist
, int nevents
, int kevent_flags
,
874 user_addr_t
*kevent_list_out
, int *kevent_count_out
)
878 user_addr_t kevent_list
= th_addrs
->self
-
879 WQ_KEVENT_LIST_LEN
* sizeof(struct kevent_qos_s
);
880 user_addr_t data_buf
= kevent_list
- WQ_KEVENT_DATA_SIZE
;
881 user_size_t data_available
= WQ_KEVENT_DATA_SIZE
;
883 ret
= pthread_kern
->kevent_workq_internal(p
, eventlist
, nevents
,
884 kevent_list
, WQ_KEVENT_LIST_LEN
,
885 data_buf
, &data_available
,
886 kevent_flags
, kevent_count_out
);
888 // squash any errors into just empty output
889 if (ret
!= 0 || *kevent_count_out
== -1) {
890 *kevent_list_out
= NULL
;
891 *kevent_count_out
= 0;
895 workq_thread_set_top_addr(th_addrs
, data_buf
+ data_available
);
896 *kevent_list_out
= kevent_list
;
901 * configures initial thread stack/registers to jump into:
902 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
903 * to get there we jump through assembily stubs in pthread_asm.s. Those
904 * routines setup a stack frame, using the current stack pointer, and marshall
905 * arguments from registers to the stack as required by the ABI.
907 * One odd thing we do here is to start the pthread_t 4k below what would be the
908 * top of the stack otherwise. This is because usually only the first 4k of the
909 * pthread_t will be used and so we want to put it on the same 16k page as the
910 * top of the stack to save memory.
912 * When we are done the stack will look like:
913 * |-----------| th_stackaddr + th_allocsize
914 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
915 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
916 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
917 * |stack gap | bottom aligned to 16 bytes
921 * |guard page | guardsize
922 * |-----------| th_stackaddr
924 __attribute__((noreturn
,noinline
))
926 workq_setup_thread(proc_t p
, thread_t th
, vm_map_t map
, user_addr_t stackaddr
,
927 mach_port_name_t kport
, int th_qos __unused
, int setup_flags
, int upcall_flags
)
929 struct workq_thread_addrs th_addrs
;
930 bool first_use
= (setup_flags
& WQ_SETUP_FIRST_USE
);
931 user_addr_t kevent_list
= NULL
;
932 int kevent_count
= 0;
934 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
937 uint32_t tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
939 mach_vm_offset_t th_tsd_base
= th_addrs
.self
+ tsd_offset
;
940 kern_return_t kret
= pthread_kern
->thread_set_tsd_base(th
,
942 if (kret
== KERN_SUCCESS
) {
943 upcall_flags
|= WQ_FLAG_THREAD_TSD_BASE_SET
;
948 * Pre-fault the first page of the new thread's stack and the page that will
949 * contain the pthread_t structure.
951 vm_map_offset_t mask
= vm_map_page_mask(map
);
952 vm_map_offset_t th_page
= vm_map_trunc_page_mask(th_addrs
.self
, mask
);
953 vm_map_offset_t stk_page
= vm_map_trunc_page_mask(th_addrs
.stack_top
- 1, mask
);
954 if (th_page
!= stk_page
) {
955 vm_fault(map
, stk_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
957 vm_fault(map
, th_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
960 if (setup_flags
& WQ_SETUP_EXIT_THREAD
) {
961 kevent_count
= WORKQ_EXIT_THREAD_NKEVENT
;
962 } else if (upcall_flags
& WQ_FLAG_THREAD_KEVENT
) {
963 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
;
964 workq_kevent(p
, &th_addrs
, NULL
, 0, flags
, &kevent_list
, &kevent_count
);
967 workq_set_register_state(p
, th
, &th_addrs
, kport
,
968 kevent_list
, upcall_flags
, kevent_count
);
971 pthread_kern
->thread_bootstrap_return();
973 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
975 __builtin_unreachable();
979 workq_handle_stack_events(proc_t p
, thread_t th
, vm_map_t map
,
980 user_addr_t stackaddr
, mach_port_name_t kport
,
981 user_addr_t events
, int nevents
, int upcall_flags
)
983 struct workq_thread_addrs th_addrs
;
984 user_addr_t kevent_list
= NULL
;
985 int kevent_count
= 0, error
;
986 __assert_only kern_return_t kr
;
988 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
990 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
|
992 error
= workq_kevent(p
, &th_addrs
, events
, nevents
, flags
,
993 &kevent_list
, &kevent_count
);
995 if (error
|| kevent_count
== 0) {
999 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
1000 assert(kr
== KERN_SUCCESS
);
1002 workq_set_register_state(p
, th
, &th_addrs
, kport
,
1003 kevent_list
, upcall_flags
, kevent_count
);
1005 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
1006 __builtin_unreachable();
1010 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
1012 thread_t thread
= current_thread();
1013 *retval
= thread_tid(thread
);
1014 return KERN_SUCCESS
;
1020 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
1021 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
1024 * allocate the lock attribute for pthread synchronizers
1026 pthread_lck_attr
= lck_attr_alloc_init();
1027 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
1029 pth_global_hashinit();
1030 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
1034 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg
, sizeof(policy_bootarg
))) {
1035 pthread_mutex_default_policy
= policy_bootarg
;
1038 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy
);