2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #pragma mark - Front Matter
35 #define _PTHREAD_CONDATTR_T
36 #define _PTHREAD_COND_T
37 #define _PTHREAD_MUTEXATTR_T
38 #define _PTHREAD_MUTEX_T
39 #define _PTHREAD_RWLOCKATTR_T
40 #define _PTHREAD_RWLOCK_T
42 #undef pthread_mutexattr_t
43 #undef pthread_mutex_t
44 #undef pthread_condattr_t
46 #undef pthread_rwlockattr_t
47 #undef pthread_rwlock_t
49 #include <sys/cdefs.h>
52 // <rdar://problem/26158937> panic() should be marked noreturn
53 extern void panic(const char *string
, ...) __printflike(1,2) __dead2
;
55 #include <sys/param.h>
56 #include <sys/queue.h>
57 #include <sys/resourcevar.h>
58 //#include <sys/proc_internal.h>
59 #include <sys/kauth.h>
60 #include <sys/systm.h>
61 #include <sys/timeb.h>
62 #include <sys/times.h>
64 #include <sys/kernel.h>
66 #include <sys/signalvar.h>
67 #include <sys/sysctl.h>
68 #include <sys/syslog.h>
71 #include <sys/kdebug.h>
72 //#include <sys/sysproto.h>
74 #include <sys/user.h> /* for coredump */
75 #include <sys/proc_info.h> /* for fill_procworkqueue */
77 #include <mach/mach_port.h>
78 #include <mach/mach_types.h>
79 #include <mach/semaphore.h>
80 #include <mach/sync_policy.h>
81 #include <mach/task.h>
82 #include <mach/vm_prot.h>
83 #include <kern/kern_types.h>
84 #include <kern/task.h>
85 #include <kern/clock.h>
86 #include <mach/kern_return.h>
87 #include <kern/thread.h>
88 #include <kern/zalloc.h>
89 #include <kern/sched_prim.h> /* for thread_exception_return */
90 #include <kern/processor.h>
91 #include <kern/assert.h>
92 #include <mach/mach_vm.h>
93 #include <mach/mach_param.h>
94 #include <mach/thread_status.h>
95 #include <mach/thread_policy.h>
96 #include <mach/message.h>
97 #include <mach/port.h>
98 //#include <vm/vm_protos.h>
99 #include <vm/vm_fault.h>
100 #include <vm/vm_map.h>
101 #include <mach/thread_act.h> /* for thread_resume */
102 #include <machine/machine_routines.h>
103 #include <mach/shared_region.h>
105 #include <libkern/OSAtomic.h>
106 #include <libkern/libkern.h>
108 #include "kern_internal.h"
110 #ifndef WQ_SETUP_EXIT_THREAD
111 #define WQ_SETUP_EXIT_THREAD 8
114 // XXX: Ditto for thread tags from kern/thread.h
115 #define THREAD_TAG_MAINTHREAD 0x1
116 #define THREAD_TAG_PTHREAD 0x10
117 #define THREAD_TAG_WORKQUEUE 0x20
119 lck_grp_attr_t
*pthread_lck_grp_attr
;
120 lck_grp_t
*pthread_lck_grp
;
121 lck_attr_t
*pthread_lck_attr
;
123 #define C_32_STK_ALIGN 16
124 #define C_64_STK_ALIGN 16
126 // WORKQ use the largest alignment any platform needs
127 #define C_WORKQ_STK_ALIGN 16
129 #if defined(__arm64__)
130 /* Pull the pthread_t into the same page as the top of the stack so we dirty one less page.
131 * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned
133 #define PTHREAD_T_OFFSET (12*1024)
135 #define PTHREAD_T_OFFSET 0
139 * Flags filed passed to bsdthread_create and back in pthread_start
140 31 <---------------------------------> 0
141 _________________________________________
142 | flags(8) | policy(8) | importance(16) |
143 -----------------------------------------
146 #define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401>
147 #define PTHREAD_START_SETSCHED 0x02000000
148 // was PTHREAD_START_DETACHED 0x04000000
149 #define PTHREAD_START_QOSCLASS 0x08000000
150 #define PTHREAD_START_TSD_BASE_SET 0x10000000
151 #define PTHREAD_START_SUSPENDED 0x20000000
152 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
153 #define PTHREAD_START_POLICY_BITSHIFT 16
154 #define PTHREAD_START_POLICY_MASK 0xff
155 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
157 #define SCHED_OTHER POLICY_TIMESHARE
158 #define SCHED_FIFO POLICY_FIFO
159 #define SCHED_RR POLICY_RR
161 #define BASEPRI_DEFAULT 31
163 uint32_t pthread_debug_tracing
= 1;
165 static uint32_t pthread_mutex_default_policy
;
167 SYSCTL_INT(_kern
, OID_AUTO
, pthread_mutex_default_policy
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
168 &pthread_mutex_default_policy
, 0, "");
170 #pragma mark - Process/Thread Setup/Teardown syscalls
172 static mach_vm_offset_t
173 stack_addr_hint(proc_t p
, vm_map_t vmap
)
175 mach_vm_offset_t stackaddr
;
176 mach_vm_offset_t aslr_offset
;
177 bool proc64bit
= proc_is64bit(p
);
178 bool proc64bit_data
= proc_is64bit_data(p
);
180 // We can't safely take random values % something unless its a power-of-two
181 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE
), "PTH_DEFAULT_STACKSIZE is a power-of-two");
183 #if defined(__i386__) || defined(__x86_64__)
184 (void)proc64bit_data
;
186 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
187 aslr_offset
= random() % (1 << 28); // about 512 stacks
189 // Actually bigger than the image shift, we've got ~256MB to work with
190 aslr_offset
= random() % (16 * PTH_DEFAULT_STACKSIZE
);
192 aslr_offset
= vm_map_trunc_page_mask(aslr_offset
, vm_map_page_mask(vmap
));
194 // Above nanomalloc range (see NANOZONE_SIGNATURE)
195 stackaddr
= 0x700000000000 + aslr_offset
;
197 stackaddr
= SHARED_REGION_BASE_I386
+ SHARED_REGION_SIZE_I386
+ aslr_offset
;
199 #elif defined(__arm__) || defined(__arm64__)
200 user_addr_t main_thread_stack_top
= 0;
201 if (pthread_kern
->proc_get_user_stack
) {
202 main_thread_stack_top
= pthread_kern
->proc_get_user_stack(p
);
204 if (proc64bit
&& main_thread_stack_top
) {
205 // The main thread stack position is randomly slid by xnu (c.f.
206 // load_main() in mach_loader.c), so basing pthread stack allocations
207 // where the main thread stack ends is already ASLRd and doing so
208 // avoids creating a gap in the process address space that may cause
209 // extra PTE memory usage. rdar://problem/33328206
210 stackaddr
= vm_map_trunc_page_mask((vm_map_offset_t
)main_thread_stack_top
,
211 vm_map_page_mask(vmap
));
213 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
214 aslr_offset
= random() % ((proc64bit
? 4 : 2) * PTH_DEFAULT_STACKSIZE
);
215 aslr_offset
= vm_map_trunc_page_mask((vm_map_offset_t
)aslr_offset
,
216 vm_map_page_mask(vmap
));
218 // 64 stacks below shared region
219 stackaddr
= SHARED_REGION_BASE_ARM64
- 64 * PTH_DEFAULT_STACKSIZE
- aslr_offset
;
221 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
222 if (proc64bit_data
) {
223 stackaddr
= SHARED_REGION_BASE_ARM64_32
;
225 stackaddr
= SHARED_REGION_BASE_ARM
;
228 stackaddr
-= 32 * PTH_DEFAULT_STACKSIZE
+ aslr_offset
;
232 #error Need to define a stack address hint for this architecture
238 _pthread_priority_to_policy(pthread_priority_t priority
,
239 thread_qos_policy_data_t
*data
)
241 data
->qos_tier
= _pthread_priority_thread_qos(priority
);
242 data
->tier_importance
= _pthread_priority_relpri(priority
);
243 if (data
->qos_tier
== THREAD_QOS_UNSPECIFIED
|| data
->tier_importance
> 0 ||
244 data
->tier_importance
< THREAD_QOS_MIN_TIER_IMPORTANCE
) {
251 * bsdthread_create system call. Used by pthread_create.
254 _bsdthread_create(struct proc
*p
,
255 __unused user_addr_t user_func
, __unused user_addr_t user_funcarg
,
256 user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
,
262 mach_vm_offset_t th_tsd_base
;
263 mach_port_name_t th_thport
;
265 task_t ctask
= current_task();
266 unsigned int policy
, importance
;
268 bool start_suspended
= (flags
& PTHREAD_START_SUSPENDED
);
270 if (pthread_kern
->proc_get_register(p
) == 0) {
274 PTHREAD_TRACE(pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0);
276 kret
= pthread_kern
->thread_create(ctask
, &th
);
277 if (kret
!= KERN_SUCCESS
)
279 thread_reference(th
);
281 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
);
283 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
284 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
285 if (!MACH_PORT_VALID(th_thport
)) {
286 error
= EMFILE
; // userland will convert this into a crash
290 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
295 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3);
297 tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
299 th_tsd_base
= user_pthread
+ tsd_offset
;
300 kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
301 if (kret
== KERN_SUCCESS
) {
302 flags
|= PTHREAD_START_TSD_BASE_SET
;
306 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
307 * supports this flag (after the fact).
309 flags
&= ~PTHREAD_START_SUSPENDED
;
312 * Set up registers & function call.
314 #if defined(__i386__) || defined(__x86_64__)
315 if (proc_is64bit_data(p
)) {
316 x86_thread_state64_t state
= {
317 .rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
318 .rdi
= (uint64_t)user_pthread
,
319 .rsi
= (uint64_t)th_thport
,
320 .rdx
= (uint64_t)user_func
, /* golang wants this */
321 .rcx
= (uint64_t)user_funcarg
, /* golang wants this */
322 .r8
= (uint64_t)user_stack
, /* golang wants this */
323 .r9
= (uint64_t)flags
,
325 .rsp
= (uint64_t)user_stack
,
328 (void)pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
330 x86_thread_state32_t state
= {
331 .eip
= (uint32_t)pthread_kern
->proc_get_threadstart(p
),
332 .eax
= (uint32_t)user_pthread
,
333 .ebx
= (uint32_t)th_thport
,
334 .ecx
= (uint32_t)user_func
, /* golang wants this */
335 .edx
= (uint32_t)user_funcarg
, /* golang wants this */
336 .edi
= (uint32_t)user_stack
, /* golang wants this */
337 .esi
= (uint32_t)flags
,
339 .esp
= (uint32_t)user_stack
,
342 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
344 #elif defined(__arm__) || defined(__arm64__)
345 if (proc_is64bit_data(p
)) {
347 arm_thread_state64_t state
= {
348 .pc
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
349 .x
[0] = (uint64_t)user_pthread
,
350 .x
[1] = (uint64_t)th_thport
,
351 .x
[2] = (uint64_t)user_func
, /* golang wants this */
352 .x
[3] = (uint64_t)user_funcarg
, /* golang wants this */
353 .x
[4] = (uint64_t)user_stack
, /* golang wants this */
354 .x
[5] = (uint64_t)flags
,
356 .sp
= (uint64_t)user_stack
,
359 (void)pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
361 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
362 #endif // defined(__arm64__)
364 arm_thread_state_t state
= {
365 .pc
= (uint32_t)pthread_kern
->proc_get_threadstart(p
),
366 .r
[0] = (uint32_t)user_pthread
,
367 .r
[1] = (uint32_t)th_thport
,
368 .r
[2] = (uint32_t)user_func
, /* golang wants this */
369 .r
[3] = (uint32_t)user_funcarg
, /* golang wants this */
370 .r
[4] = (uint32_t)user_stack
, /* golang wants this */
371 .r
[5] = (uint32_t)flags
,
373 .sp
= (uint32_t)user_stack
,
376 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
379 #error bsdthread_create not defined for this architecture
382 if (flags
& PTHREAD_START_SETSCHED
) {
383 /* Set scheduling parameters if needed */
384 thread_extended_policy_data_t extinfo
;
385 thread_precedence_policy_data_t precedinfo
;
387 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
388 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
390 if (policy
== SCHED_OTHER
) {
391 extinfo
.timeshare
= 1;
393 extinfo
.timeshare
= 0;
396 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
398 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
399 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
400 } else if (flags
& PTHREAD_START_QOSCLASS
) {
401 /* Set thread QoS class if requested. */
402 thread_qos_policy_data_t qos
;
404 if (!_pthread_priority_to_policy(flags
& PTHREAD_START_QOSCLASS_MASK
, &qos
)) {
408 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
,
409 (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
412 if (pthread_kern
->proc_get_mach_thread_self_tsd_offset
) {
413 uint64_t mach_thread_self_offset
=
414 pthread_kern
->proc_get_mach_thread_self_tsd_offset(p
);
415 if (mach_thread_self_offset
&& tsd_offset
) {
416 bool proc64bit
= proc_is64bit(p
);
418 uint64_t th_thport_tsd
= (uint64_t)th_thport
;
419 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
420 mach_thread_self_offset
, sizeof(th_thport_tsd
));
422 uint32_t th_thport_tsd
= (uint32_t)th_thport
;
423 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
424 mach_thread_self_offset
, sizeof(th_thport_tsd
));
432 if (!start_suspended
) {
433 kret
= pthread_kern
->thread_resume(th
);
434 if (kret
!= KERN_SUCCESS
) {
439 thread_deallocate(th
); /* drop the creator reference */
441 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_END
, error
, user_pthread
, 0, 0);
443 *retval
= user_pthread
;
447 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
448 if (pthread_kern
->thread_will_park_or_terminate
) {
449 pthread_kern
->thread_will_park_or_terminate(th
);
451 (void)thread_terminate(th
);
452 (void)thread_deallocate(th
);
457 * bsdthread_terminate system call. Used by pthread_terminate
460 _bsdthread_terminate(__unused
struct proc
*p
,
461 user_addr_t stackaddr
,
465 __unused
int32_t *retval
)
467 mach_vm_offset_t freeaddr
;
468 mach_vm_size_t freesize
;
470 thread_t th
= current_thread();
472 freeaddr
= (mach_vm_offset_t
)stackaddr
;
475 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff);
477 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
478 if (pthread_kern
->thread_get_tag(th
) & THREAD_TAG_MAINTHREAD
){
479 vm_map_t user_map
= pthread_kern
->current_map();
480 freesize
= vm_map_trunc_page_mask((vm_map_offset_t
)freesize
- 1, vm_map_page_mask(user_map
));
481 kret
= mach_vm_behavior_set(user_map
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
483 if (kret
!= KERN_SUCCESS
&& kret
!= KERN_INVALID_ADDRESS
) {
484 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kret
);
487 kret
= kret
? kret
: mach_vm_protect(user_map
, freeaddr
, freesize
, FALSE
, VM_PROT_NONE
);
488 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
490 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
491 if (kret
!= KERN_SUCCESS
) {
492 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
497 if (pthread_kern
->thread_will_park_or_terminate
) {
498 pthread_kern
->thread_will_park_or_terminate(th
);
500 (void)thread_terminate(th
);
501 if (sem
!= MACH_PORT_NULL
) {
502 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
503 if (kret
!= KERN_SUCCESS
) {
504 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
508 if (kthport
!= MACH_PORT_NULL
) {
509 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
512 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0);
514 pthread_kern
->thread_exception_return();
515 __builtin_unreachable();
519 * bsdthread_register system call. Performs per-process setup. Responsible for
520 * returning capabilitiy bits to userspace and receiving userspace function addresses.
523 _bsdthread_register(struct proc
*p
,
524 user_addr_t threadstart
,
525 user_addr_t wqthread
,
527 user_addr_t pthread_init_data
,
528 user_addr_t pthread_init_data_size
,
529 uint64_t dispatchqueue_offset
,
532 struct _pthread_registration_data data
= {};
533 uint32_t max_tsd_offset
;
535 size_t pthread_init_sz
= 0;
537 /* syscall randomizer test can pass bogus values */
538 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
542 * if we have pthread_init_data, then we use that and target_concptr
543 * (which is an offset) get data.
545 if (pthread_init_data
!= 0) {
546 if (pthread_init_data_size
< sizeof(data
.version
)) {
549 pthread_init_sz
= MIN(sizeof(data
), (size_t)pthread_init_data_size
);
550 int ret
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
554 if (data
.version
!= (size_t)pthread_init_data_size
) {
558 data
.dispatch_queue_offset
= dispatchqueue_offset
;
561 /* We have to do this before proc_get_register so that it resets after fork */
562 mach_vm_offset_t stackaddr
= stack_addr_hint(p
, pthread_kern
->current_map());
563 pthread_kern
->proc_set_stack_addr_hint(p
, (user_addr_t
)stackaddr
);
565 /* prevent multiple registrations */
566 if (pthread_kern
->proc_get_register(p
) != 0) {
570 pthread_kern
->proc_set_threadstart(p
, threadstart
);
571 pthread_kern
->proc_set_wqthread(p
, wqthread
);
572 pthread_kern
->proc_set_pthsize(p
, pthsize
);
573 pthread_kern
->proc_set_register(p
);
575 uint32_t tsd_slot_sz
= proc_is64bit(p
) ? sizeof(uint64_t) : sizeof(uint32_t);
576 if ((uint32_t)pthsize
>= tsd_slot_sz
&&
577 data
.tsd_offset
<= (uint32_t)(pthsize
- tsd_slot_sz
)) {
578 max_tsd_offset
= ((uint32_t)pthsize
- data
.tsd_offset
- tsd_slot_sz
);
583 pthread_kern
->proc_set_pthread_tsd_offset(p
, data
.tsd_offset
);
585 if (data
.dispatch_queue_offset
> max_tsd_offset
) {
586 data
.dispatch_queue_offset
= 0;
588 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
590 if (pthread_kern
->proc_set_return_to_kernel_offset
) {
591 if (data
.return_to_kernel_offset
> max_tsd_offset
) {
592 data
.return_to_kernel_offset
= 0;
594 pthread_kern
->proc_set_return_to_kernel_offset(p
,
595 data
.return_to_kernel_offset
);
598 if (pthread_kern
->proc_set_mach_thread_self_tsd_offset
) {
599 if (data
.mach_thread_self_offset
> max_tsd_offset
) {
600 data
.mach_thread_self_offset
= 0;
602 pthread_kern
->proc_set_mach_thread_self_tsd_offset(p
,
603 data
.mach_thread_self_offset
);
606 if (pthread_init_data
!= 0) {
607 /* Outgoing data that userspace expects as a reply */
608 data
.version
= sizeof(struct _pthread_registration_data
);
609 data
.main_qos
= _pthread_unspecified_priority();
611 if (pthread_kern
->qos_main_thread_active()) {
612 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
613 thread_qos_policy_data_t qos
;
614 boolean_t gd
= FALSE
;
616 kr
= pthread_kern
->thread_policy_get(current_thread(),
617 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
618 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
620 * Unspecified threads means the kernel wants us
621 * to impose legacy upon the thread.
623 qos
.qos_tier
= THREAD_QOS_LEGACY
;
624 qos
.tier_importance
= 0;
626 kr
= pthread_kern
->thread_policy_set_internal(current_thread(),
627 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
,
628 THREAD_QOS_POLICY_COUNT
);
631 if (kr
== KERN_SUCCESS
) {
632 data
.main_qos
= _pthread_priority_make_from_thread_qos(
637 data
.stack_addr_hint
= stackaddr
;
638 data
.mutex_default_policy
= pthread_mutex_default_policy
;
640 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
641 if (kr
!= KERN_SUCCESS
) {
646 /* return the supported feature set as the return value. */
647 *retval
= PTHREAD_FEATURE_SUPPORTED
;
653 #pragma mark - Workqueue Thread Support
655 static mach_vm_size_t
656 workq_thread_allocsize(proc_t p
, vm_map_t wq_map
,
657 mach_vm_size_t
*guardsize_out
)
659 mach_vm_size_t guardsize
= vm_map_page_size(wq_map
);
660 mach_vm_size_t pthread_size
= vm_map_round_page_mask(
661 pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
,
662 vm_map_page_mask(wq_map
));
663 if (guardsize_out
) *guardsize_out
= guardsize
;
664 return guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
668 workq_create_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t
*out_addr
)
670 mach_vm_offset_t stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
671 mach_vm_size_t guardsize
, th_allocsize
;
674 th_allocsize
= workq_thread_allocsize(p
, vmap
, &guardsize
);
675 kret
= mach_vm_map(vmap
, &stackaddr
, th_allocsize
, page_size
- 1,
676 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
, NULL
, 0, FALSE
,
677 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
679 if (kret
!= KERN_SUCCESS
) {
680 kret
= mach_vm_allocate(vmap
, &stackaddr
, th_allocsize
,
681 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
684 if (kret
!= KERN_SUCCESS
) {
689 * The guard page is at the lowest address
690 * The stack base is the highest address
692 kret
= mach_vm_protect(vmap
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
693 if (kret
!= KERN_SUCCESS
) {
694 goto fail_vm_deallocate
;
698 *out_addr
= stackaddr
;
703 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
709 workq_destroy_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t stackaddr
)
711 return mach_vm_deallocate(vmap
, stackaddr
,
712 workq_thread_allocsize(p
, vmap
, NULL
));
716 workq_markfree_threadstack(proc_t OS_UNUSED p
, thread_t OS_UNUSED th
,
717 vm_map_t vmap
, user_addr_t stackaddr
)
719 // Keep this in sync with workq_setup_thread()
720 const vm_size_t guardsize
= vm_map_page_size(vmap
);
721 const user_addr_t freeaddr
= (user_addr_t
)stackaddr
+ guardsize
;
722 const vm_map_offset_t freesize
= vm_map_trunc_page_mask(
723 (PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
) - 1,
724 vm_map_page_mask(vmap
)) - guardsize
;
726 __assert_only kern_return_t kr
= mach_vm_behavior_set(vmap
, freeaddr
,
727 freesize
, VM_BEHAVIOR_REUSABLE
);
729 if (kr
!= KERN_SUCCESS
&& kr
!= KERN_INVALID_ADDRESS
) {
730 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kr
);
735 struct workq_thread_addrs
{
737 user_addr_t stack_bottom
;
738 user_addr_t stack_top
;
742 workq_thread_set_top_addr(struct workq_thread_addrs
*th_addrs
, user_addr_t addr
)
744 th_addrs
->stack_top
= (addr
& -C_WORKQ_STK_ALIGN
);
748 workq_thread_get_addrs(vm_map_t map
, user_addr_t stackaddr
,
749 struct workq_thread_addrs
*th_addrs
)
751 const vm_size_t guardsize
= vm_map_page_size(map
);
753 th_addrs
->self
= (user_addr_t
)(stackaddr
+ PTH_DEFAULT_STACKSIZE
+
754 guardsize
+ PTHREAD_T_OFFSET
);
755 workq_thread_set_top_addr(th_addrs
, th_addrs
->self
);
756 th_addrs
->stack_bottom
= (user_addr_t
)(stackaddr
+ guardsize
);
760 workq_set_register_state(proc_t p
, thread_t th
,
761 struct workq_thread_addrs
*addrs
, mach_port_name_t kport
,
762 user_addr_t kevent_list
, uint32_t upcall_flags
, int kevent_count
)
764 user_addr_t wqstart_fnptr
= pthread_kern
->proc_get_wqthread(p
);
765 if (!wqstart_fnptr
) {
766 panic("workqueue thread start function pointer is NULL");
769 #if defined(__i386__) || defined(__x86_64__)
770 if (proc_is64bit_data(p
) == 0) {
771 x86_thread_state32_t state
= {
772 .eip
= (unsigned int)wqstart_fnptr
,
773 .eax
= /* arg0 */ (unsigned int)addrs
->self
,
774 .ebx
= /* arg1 */ (unsigned int)kport
,
775 .ecx
= /* arg2 */ (unsigned int)addrs
->stack_bottom
,
776 .edx
= /* arg3 */ (unsigned int)kevent_list
,
777 .edi
= /* arg4 */ (unsigned int)upcall_flags
,
778 .esi
= /* arg5 */ (unsigned int)kevent_count
,
780 .esp
= (int)((vm_offset_t
)addrs
->stack_top
),
783 int error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
784 if (error
!= KERN_SUCCESS
) {
785 panic(__func__
": thread_set_wq_state failed: %d", error
);
788 x86_thread_state64_t state64
= {
789 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
790 .rip
= (uint64_t)wqstart_fnptr
,
791 .rdi
= (uint64_t)addrs
->self
,
792 .rsi
= (uint64_t)kport
,
793 .rdx
= (uint64_t)addrs
->stack_bottom
,
794 .rcx
= (uint64_t)kevent_list
,
795 .r8
= (uint64_t)upcall_flags
,
796 .r9
= (uint64_t)kevent_count
,
798 .rsp
= (uint64_t)(addrs
->stack_top
)
801 int error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
802 if (error
!= KERN_SUCCESS
) {
803 panic(__func__
": thread_set_wq_state failed: %d", error
);
806 #elif defined(__arm__) || defined(__arm64__)
807 if (!proc_is64bit_data(p
)) {
808 arm_thread_state_t state
= {
809 .pc
= (int)wqstart_fnptr
,
810 .r
[0] = (unsigned int)addrs
->self
,
811 .r
[1] = (unsigned int)kport
,
812 .r
[2] = (unsigned int)addrs
->stack_bottom
,
813 .r
[3] = (unsigned int)kevent_list
,
814 // will be pushed onto the stack as arg4/5
815 .r
[4] = (unsigned int)upcall_flags
,
816 .r
[5] = (unsigned int)kevent_count
,
818 .sp
= (int)(addrs
->stack_top
)
821 int error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
822 if (error
!= KERN_SUCCESS
) {
823 panic(__func__
": thread_set_wq_state failed: %d", error
);
826 #if defined(__arm64__)
827 arm_thread_state64_t state
= {
828 .pc
= (uint64_t)wqstart_fnptr
,
829 .x
[0] = (uint64_t)addrs
->self
,
830 .x
[1] = (uint64_t)kport
,
831 .x
[2] = (uint64_t)addrs
->stack_bottom
,
832 .x
[3] = (uint64_t)kevent_list
,
833 .x
[4] = (uint64_t)upcall_flags
,
834 .x
[5] = (uint64_t)kevent_count
,
836 .sp
= (uint64_t)((vm_offset_t
)addrs
->stack_top
),
839 int error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
840 if (error
!= KERN_SUCCESS
) {
841 panic(__func__
": thread_set_wq_state failed: %d", error
);
843 #else /* defined(__arm64__) */
844 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
845 #endif /* defined(__arm64__) */
848 #error setup_wqthread not defined for this architecture
853 workq_kevent(proc_t p
, struct workq_thread_addrs
*th_addrs
,
854 user_addr_t eventlist
, int nevents
, int kevent_flags
,
855 user_addr_t
*kevent_list_out
, int *kevent_count_out
)
859 user_addr_t kevent_list
= th_addrs
->self
-
860 WQ_KEVENT_LIST_LEN
* sizeof(struct kevent_qos_s
);
861 user_addr_t data_buf
= kevent_list
- WQ_KEVENT_DATA_SIZE
;
862 user_size_t data_available
= WQ_KEVENT_DATA_SIZE
;
864 ret
= pthread_kern
->kevent_workq_internal(p
, eventlist
, nevents
,
865 kevent_list
, WQ_KEVENT_LIST_LEN
,
866 data_buf
, &data_available
,
867 kevent_flags
, kevent_count_out
);
869 // squash any errors into just empty output
870 if (ret
!= 0 || *kevent_count_out
== -1) {
871 *kevent_list_out
= NULL
;
872 *kevent_count_out
= 0;
876 workq_thread_set_top_addr(th_addrs
, data_buf
+ data_available
);
877 *kevent_list_out
= kevent_list
;
882 * configures initial thread stack/registers to jump into:
883 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
884 * to get there we jump through assembily stubs in pthread_asm.s. Those
885 * routines setup a stack frame, using the current stack pointer, and marshall
886 * arguments from registers to the stack as required by the ABI.
888 * One odd thing we do here is to start the pthread_t 4k below what would be the
889 * top of the stack otherwise. This is because usually only the first 4k of the
890 * pthread_t will be used and so we want to put it on the same 16k page as the
891 * top of the stack to save memory.
893 * When we are done the stack will look like:
894 * |-----------| th_stackaddr + th_allocsize
895 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
896 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
897 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
898 * |stack gap | bottom aligned to 16 bytes
902 * |guard page | guardsize
903 * |-----------| th_stackaddr
905 __attribute__((noreturn
,noinline
))
907 workq_setup_thread(proc_t p
, thread_t th
, vm_map_t map
, user_addr_t stackaddr
,
908 mach_port_name_t kport
, int th_qos __unused
, int setup_flags
, int upcall_flags
)
910 struct workq_thread_addrs th_addrs
;
911 bool first_use
= (setup_flags
& WQ_SETUP_FIRST_USE
);
912 user_addr_t kevent_list
= NULL
;
913 int kevent_count
= 0;
915 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
918 uint32_t tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
920 mach_vm_offset_t th_tsd_base
= th_addrs
.self
+ tsd_offset
;
921 kern_return_t kret
= pthread_kern
->thread_set_tsd_base(th
,
923 if (kret
== KERN_SUCCESS
) {
924 upcall_flags
|= WQ_FLAG_THREAD_TSD_BASE_SET
;
929 * Pre-fault the first page of the new thread's stack and the page that will
930 * contain the pthread_t structure.
932 vm_map_offset_t mask
= vm_map_page_mask(map
);
933 vm_map_offset_t th_page
= vm_map_trunc_page_mask(th_addrs
.self
, mask
);
934 vm_map_offset_t stk_page
= vm_map_trunc_page_mask(th_addrs
.stack_top
- 1, mask
);
935 if (th_page
!= stk_page
) {
936 vm_fault(map
, stk_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
938 vm_fault(map
, th_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
941 if (setup_flags
& WQ_SETUP_EXIT_THREAD
) {
942 kevent_count
= WORKQ_EXIT_THREAD_NKEVENT
;
943 } else if (upcall_flags
& WQ_FLAG_THREAD_KEVENT
) {
944 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
;
945 workq_kevent(p
, &th_addrs
, NULL
, 0, flags
, &kevent_list
, &kevent_count
);
948 workq_set_register_state(p
, th
, &th_addrs
, kport
,
949 kevent_list
, upcall_flags
, kevent_count
);
952 pthread_kern
->thread_bootstrap_return();
954 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
956 __builtin_unreachable();
960 workq_handle_stack_events(proc_t p
, thread_t th
, vm_map_t map
,
961 user_addr_t stackaddr
, mach_port_name_t kport
,
962 user_addr_t events
, int nevents
, int upcall_flags
)
964 struct workq_thread_addrs th_addrs
;
965 user_addr_t kevent_list
= NULL
;
966 int kevent_count
= 0, error
;
967 __assert_only kern_return_t kr
;
969 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
971 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
|
973 error
= workq_kevent(p
, &th_addrs
, events
, nevents
, flags
,
974 &kevent_list
, &kevent_count
);
976 if (error
|| kevent_count
== 0) {
980 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
981 assert(kr
== KERN_SUCCESS
);
983 workq_set_register_state(p
, th
, &th_addrs
, kport
,
984 kevent_list
, upcall_flags
, kevent_count
);
986 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
987 __builtin_unreachable();
991 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
993 thread_t thread
= current_thread();
994 *retval
= thread_tid(thread
);
1001 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
1002 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
1005 * allocate the lock attribute for pthread synchronizers
1007 pthread_lck_attr
= lck_attr_alloc_init();
1008 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
1010 pth_global_hashinit();
1011 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
1015 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg
, sizeof(policy_bootarg
))) {
1016 pthread_mutex_default_policy
= policy_bootarg
;
1019 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy
);