2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #pragma mark - Front Matter
35 #define _PTHREAD_CONDATTR_T
36 #define _PTHREAD_COND_T
37 #define _PTHREAD_MUTEXATTR_T
38 #define _PTHREAD_MUTEX_T
39 #define _PTHREAD_RWLOCKATTR_T
40 #define _PTHREAD_RWLOCK_T
42 #undef pthread_mutexattr_t
43 #undef pthread_mutex_t
44 #undef pthread_condattr_t
46 #undef pthread_rwlockattr_t
47 #undef pthread_rwlock_t
49 #include <sys/cdefs.h>
52 // <rdar://problem/26158937> panic() should be marked noreturn
53 extern void panic(const char *string
, ...) __printflike(1,2) __dead2
;
55 #include <sys/param.h>
56 #include <sys/queue.h>
57 #include <sys/resourcevar.h>
58 //#include <sys/proc_internal.h>
59 #include <sys/kauth.h>
60 #include <sys/systm.h>
61 #include <sys/timeb.h>
62 #include <sys/times.h>
64 #include <sys/kernel.h>
66 #include <sys/signalvar.h>
67 #include <sys/sysctl.h>
68 #include <sys/syslog.h>
71 #include <sys/kdebug.h>
72 //#include <sys/sysproto.h>
74 #include <sys/user.h> /* for coredump */
75 #include <sys/proc_info.h> /* for fill_procworkqueue */
77 #include <mach/mach_port.h>
78 #include <mach/mach_types.h>
79 #include <mach/semaphore.h>
80 #include <mach/sync_policy.h>
81 #include <mach/task.h>
82 #include <mach/vm_prot.h>
83 #include <kern/kern_types.h>
84 #include <kern/task.h>
85 #include <kern/clock.h>
86 #include <mach/kern_return.h>
87 #include <kern/thread.h>
88 #include <kern/zalloc.h>
89 #include <kern/sched_prim.h> /* for thread_exception_return */
90 #include <kern/processor.h>
91 #include <kern/assert.h>
92 #include <mach/mach_vm.h>
93 #include <mach/mach_param.h>
94 #include <mach/thread_status.h>
95 #include <mach/thread_policy.h>
96 #include <mach/message.h>
97 #include <mach/port.h>
98 //#include <vm/vm_protos.h>
99 #include <vm/vm_fault.h>
100 #include <vm/vm_map.h>
101 #include <mach/thread_act.h> /* for thread_resume */
102 #include <machine/machine_routines.h>
103 #include <mach/shared_region.h>
105 #include "kern/kern_internal.h"
107 #ifndef WQ_SETUP_EXIT_THREAD
108 #define WQ_SETUP_EXIT_THREAD 8
111 // XXX: Ditto for thread tags from kern/thread.h
112 #define THREAD_TAG_MAINTHREAD 0x1
113 #define THREAD_TAG_PTHREAD 0x10
114 #define THREAD_TAG_WORKQUEUE 0x20
116 lck_grp_attr_t
*pthread_lck_grp_attr
;
117 lck_grp_t
*pthread_lck_grp
;
118 lck_attr_t
*pthread_lck_attr
;
120 #define C_32_STK_ALIGN 16
121 #define C_64_STK_ALIGN 16
123 // WORKQ use the largest alignment any platform needs
124 #define C_WORKQ_STK_ALIGN 16
126 #if defined(__arm64__)
127 /* Pull the pthread_t into the same page as the top of the stack so we dirty one less page.
128 * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned
130 #define PTHREAD_T_OFFSET (12*1024)
132 #define PTHREAD_T_OFFSET 0
136 * Flags filed passed to bsdthread_create and back in pthread_start
137 31 <---------------------------------> 0
138 _________________________________________
139 | flags(8) | policy(8) | importance(16) |
140 -----------------------------------------
143 #define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401>
144 #define PTHREAD_START_SETSCHED 0x02000000
145 // was PTHREAD_START_DETACHED 0x04000000
146 #define PTHREAD_START_QOSCLASS 0x08000000
147 #define PTHREAD_START_TSD_BASE_SET 0x10000000
148 #define PTHREAD_START_SUSPENDED 0x20000000
149 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
150 #define PTHREAD_START_POLICY_BITSHIFT 16
151 #define PTHREAD_START_POLICY_MASK 0xff
152 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
154 #define SCHED_OTHER POLICY_TIMESHARE
155 #define SCHED_FIFO POLICY_FIFO
156 #define SCHED_RR POLICY_RR
158 #define BASEPRI_DEFAULT 31
160 uint32_t pthread_debug_tracing
= 1;
162 static uint32_t pthread_mutex_default_policy
;
164 SYSCTL_INT(_kern
, OID_AUTO
, pthread_mutex_default_policy
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
165 &pthread_mutex_default_policy
, 0, "");
167 #pragma mark - Process/Thread Setup/Teardown syscalls
169 static mach_vm_offset_t
170 stack_addr_hint(proc_t p
, vm_map_t vmap
)
172 mach_vm_offset_t stackaddr
;
173 mach_vm_offset_t aslr_offset
;
174 bool proc64bit
= proc_is64bit(p
);
175 bool proc64bit_data
= proc_is64bit_data(p
);
177 // We can't safely take random values % something unless its a power-of-two
178 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE
), "PTH_DEFAULT_STACKSIZE is a power-of-two");
180 #if defined(__i386__) || defined(__x86_64__)
181 (void)proc64bit_data
;
183 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
184 aslr_offset
= random() % (1 << 28); // about 512 stacks
186 // Actually bigger than the image shift, we've got ~256MB to work with
187 aslr_offset
= random() % (16 * PTH_DEFAULT_STACKSIZE
);
189 aslr_offset
= vm_map_trunc_page_mask(aslr_offset
, vm_map_page_mask(vmap
));
191 // Above nanomalloc range (see NANOZONE_SIGNATURE)
192 stackaddr
= 0x700000000000 + aslr_offset
;
194 stackaddr
= SHARED_REGION_BASE_I386
+ SHARED_REGION_SIZE_I386
+ aslr_offset
;
196 #elif defined(__arm__) || defined(__arm64__)
197 user_addr_t main_thread_stack_top
= 0;
198 if (pthread_kern
->proc_get_user_stack
) {
199 main_thread_stack_top
= pthread_kern
->proc_get_user_stack(p
);
201 if (proc64bit
&& main_thread_stack_top
) {
202 // The main thread stack position is randomly slid by xnu (c.f.
203 // load_main() in mach_loader.c), so basing pthread stack allocations
204 // where the main thread stack ends is already ASLRd and doing so
205 // avoids creating a gap in the process address space that may cause
206 // extra PTE memory usage. rdar://problem/33328206
207 stackaddr
= vm_map_trunc_page_mask((vm_map_offset_t
)main_thread_stack_top
,
208 vm_map_page_mask(vmap
));
210 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
211 aslr_offset
= random() % ((proc64bit
? 4 : 2) * PTH_DEFAULT_STACKSIZE
);
212 aslr_offset
= vm_map_trunc_page_mask((vm_map_offset_t
)aslr_offset
,
213 vm_map_page_mask(vmap
));
215 // 64 stacks below shared region
216 stackaddr
= SHARED_REGION_BASE_ARM64
- 64 * PTH_DEFAULT_STACKSIZE
- aslr_offset
;
218 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
219 if (proc64bit_data
) {
220 stackaddr
= SHARED_REGION_BASE_ARM64_32
;
222 stackaddr
= SHARED_REGION_BASE_ARM
;
225 stackaddr
-= 32 * PTH_DEFAULT_STACKSIZE
+ aslr_offset
;
229 #error Need to define a stack address hint for this architecture
235 _pthread_priority_to_policy(pthread_priority_t priority
,
236 thread_qos_policy_data_t
*data
)
238 data
->qos_tier
= _pthread_priority_thread_qos(priority
);
239 data
->tier_importance
= _pthread_priority_relpri(priority
);
240 if (data
->qos_tier
== THREAD_QOS_UNSPECIFIED
|| data
->tier_importance
> 0 ||
241 data
->tier_importance
< THREAD_QOS_MIN_TIER_IMPORTANCE
) {
248 * bsdthread_create system call. Used by pthread_create.
251 _bsdthread_create(struct proc
*p
,
252 __unused user_addr_t user_func
, __unused user_addr_t user_funcarg
,
253 user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
,
259 mach_vm_offset_t th_tsd_base
;
260 mach_port_name_t th_thport
;
262 task_t ctask
= current_task();
263 unsigned int policy
, importance
;
265 bool start_suspended
= (flags
& PTHREAD_START_SUSPENDED
);
267 if (pthread_kern
->proc_get_register(p
) == 0) {
271 PTHREAD_TRACE(pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0);
273 kret
= pthread_kern
->thread_create(ctask
, &th
);
274 if (kret
!= KERN_SUCCESS
)
276 thread_reference(th
);
278 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
);
280 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
281 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
282 if (!MACH_PORT_VALID(th_thport
)) {
283 error
= EMFILE
; // userland will convert this into a crash
287 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
292 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3);
294 tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
296 th_tsd_base
= user_pthread
+ tsd_offset
;
297 kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
298 if (kret
== KERN_SUCCESS
) {
299 flags
|= PTHREAD_START_TSD_BASE_SET
;
303 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
304 * supports this flag (after the fact).
306 flags
&= ~PTHREAD_START_SUSPENDED
;
309 * Set up registers & function call.
311 #if defined(__i386__) || defined(__x86_64__)
312 if (proc_is64bit_data(p
)) {
313 x86_thread_state64_t state
= {
314 .rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
315 .rdi
= (uint64_t)user_pthread
,
316 .rsi
= (uint64_t)th_thport
,
317 .rdx
= (uint64_t)user_func
, /* golang wants this */
318 .rcx
= (uint64_t)user_funcarg
, /* golang wants this */
319 .r8
= (uint64_t)user_stack
, /* golang wants this */
320 .r9
= (uint64_t)flags
,
322 .rsp
= (uint64_t)user_stack
,
325 (void)pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
327 x86_thread_state32_t state
= {
328 .eip
= (uint32_t)pthread_kern
->proc_get_threadstart(p
),
329 .eax
= (uint32_t)user_pthread
,
330 .ebx
= (uint32_t)th_thport
,
331 .ecx
= (uint32_t)user_func
, /* golang wants this */
332 .edx
= (uint32_t)user_funcarg
, /* golang wants this */
333 .edi
= (uint32_t)user_stack
, /* golang wants this */
334 .esi
= (uint32_t)flags
,
336 .esp
= (uint32_t)user_stack
,
339 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
341 #elif defined(__arm__) || defined(__arm64__)
342 if (proc_is64bit_data(p
)) {
344 arm_thread_state64_t state
= {
345 .pc
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
346 .x
[0] = (uint64_t)user_pthread
,
347 .x
[1] = (uint64_t)th_thport
,
348 .x
[2] = (uint64_t)user_func
, /* golang wants this */
349 .x
[3] = (uint64_t)user_funcarg
, /* golang wants this */
350 .x
[4] = (uint64_t)user_stack
, /* golang wants this */
351 .x
[5] = (uint64_t)flags
,
353 .sp
= (uint64_t)user_stack
,
356 (void)pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
358 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
359 #endif // defined(__arm64__)
361 arm_thread_state_t state
= {
362 .pc
= (uint32_t)pthread_kern
->proc_get_threadstart(p
),
363 .r
[0] = (uint32_t)user_pthread
,
364 .r
[1] = (uint32_t)th_thport
,
365 .r
[2] = (uint32_t)user_func
, /* golang wants this */
366 .r
[3] = (uint32_t)user_funcarg
, /* golang wants this */
367 .r
[4] = (uint32_t)user_stack
, /* golang wants this */
368 .r
[5] = (uint32_t)flags
,
370 .sp
= (uint32_t)user_stack
,
373 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
376 #error bsdthread_create not defined for this architecture
379 if (flags
& PTHREAD_START_SETSCHED
) {
380 /* Set scheduling parameters if needed */
381 thread_extended_policy_data_t extinfo
;
382 thread_precedence_policy_data_t precedinfo
;
384 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
385 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
387 if (policy
== SCHED_OTHER
) {
388 extinfo
.timeshare
= 1;
390 extinfo
.timeshare
= 0;
393 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
395 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
396 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
397 } else if (flags
& PTHREAD_START_QOSCLASS
) {
398 /* Set thread QoS class if requested. */
399 thread_qos_policy_data_t qos
;
401 if (!_pthread_priority_to_policy(flags
& PTHREAD_START_QOSCLASS_MASK
, &qos
)) {
405 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
,
406 (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
409 if (pthread_kern
->proc_get_mach_thread_self_tsd_offset
) {
410 uint64_t mach_thread_self_offset
=
411 pthread_kern
->proc_get_mach_thread_self_tsd_offset(p
);
412 if (mach_thread_self_offset
&& tsd_offset
) {
413 bool proc64bit
= proc_is64bit(p
);
415 uint64_t th_thport_tsd
= (uint64_t)th_thport
;
416 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
417 mach_thread_self_offset
, sizeof(th_thport_tsd
));
419 uint32_t th_thport_tsd
= (uint32_t)th_thport
;
420 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
421 mach_thread_self_offset
, sizeof(th_thport_tsd
));
429 if (!start_suspended
) {
430 kret
= pthread_kern
->thread_resume(th
);
431 if (kret
!= KERN_SUCCESS
) {
436 thread_deallocate(th
); /* drop the creator reference */
438 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_END
, error
, user_pthread
, 0, 0);
440 *retval
= user_pthread
;
444 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
445 if (pthread_kern
->thread_will_park_or_terminate
) {
446 pthread_kern
->thread_will_park_or_terminate(th
);
448 (void)thread_terminate(th
);
449 (void)thread_deallocate(th
);
454 * bsdthread_terminate system call. Used by pthread_terminate
457 _bsdthread_terminate(__unused
struct proc
*p
,
458 user_addr_t stackaddr
,
462 __unused
int32_t *retval
)
464 mach_vm_offset_t freeaddr
;
465 mach_vm_size_t freesize
;
467 thread_t th
= current_thread();
469 freeaddr
= (mach_vm_offset_t
)stackaddr
;
472 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff);
474 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
475 if (pthread_kern
->thread_get_tag(th
) & THREAD_TAG_MAINTHREAD
){
476 vm_map_t user_map
= pthread_kern
->current_map();
477 freesize
= vm_map_trunc_page_mask((vm_map_offset_t
)freesize
- 1, vm_map_page_mask(user_map
));
478 kret
= mach_vm_behavior_set(user_map
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
480 if (kret
!= KERN_SUCCESS
&& kret
!= KERN_INVALID_ADDRESS
) {
481 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kret
);
484 kret
= kret
? kret
: mach_vm_protect(user_map
, freeaddr
, freesize
, FALSE
, VM_PROT_NONE
);
485 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
487 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
488 if (kret
!= KERN_SUCCESS
) {
489 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
494 if (pthread_kern
->thread_will_park_or_terminate
) {
495 pthread_kern
->thread_will_park_or_terminate(th
);
497 (void)thread_terminate(th
);
498 if (sem
!= MACH_PORT_NULL
) {
499 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
500 if (kret
!= KERN_SUCCESS
) {
501 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
505 if (kthport
!= MACH_PORT_NULL
) {
506 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
509 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0);
511 pthread_kern
->thread_exception_return();
512 __builtin_unreachable();
516 * bsdthread_register system call. Performs per-process setup. Responsible for
517 * returning capabilitiy bits to userspace and receiving userspace function addresses.
520 _bsdthread_register(struct proc
*p
,
521 user_addr_t threadstart
,
522 user_addr_t wqthread
,
524 user_addr_t pthread_init_data
,
525 user_addr_t pthread_init_data_size
,
526 uint64_t dispatchqueue_offset
,
529 struct _pthread_registration_data data
= {};
530 uint32_t max_tsd_offset
;
532 size_t pthread_init_sz
= 0;
534 /* syscall randomizer test can pass bogus values */
535 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
539 * if we have pthread_init_data, then we use that and target_concptr
540 * (which is an offset) get data.
542 if (pthread_init_data
!= 0) {
543 if (pthread_init_data_size
< sizeof(data
.version
)) {
546 pthread_init_sz
= MIN(sizeof(data
), (size_t)pthread_init_data_size
);
547 int ret
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
551 if (data
.version
!= (size_t)pthread_init_data_size
) {
555 data
.dispatch_queue_offset
= dispatchqueue_offset
;
558 /* We have to do this before proc_get_register so that it resets after fork */
559 mach_vm_offset_t stackaddr
= stack_addr_hint(p
, pthread_kern
->current_map());
560 pthread_kern
->proc_set_stack_addr_hint(p
, (user_addr_t
)stackaddr
);
562 /* prevent multiple registrations */
563 if (pthread_kern
->proc_get_register(p
) != 0) {
567 pthread_kern
->proc_set_threadstart(p
, threadstart
);
568 pthread_kern
->proc_set_wqthread(p
, wqthread
);
569 pthread_kern
->proc_set_pthsize(p
, pthsize
);
570 pthread_kern
->proc_set_register(p
);
572 uint32_t tsd_slot_sz
= proc_is64bit(p
) ? sizeof(uint64_t) : sizeof(uint32_t);
573 if ((uint32_t)pthsize
>= tsd_slot_sz
&&
574 data
.tsd_offset
<= (uint32_t)(pthsize
- tsd_slot_sz
)) {
575 max_tsd_offset
= ((uint32_t)pthsize
- data
.tsd_offset
- tsd_slot_sz
);
580 pthread_kern
->proc_set_pthread_tsd_offset(p
, data
.tsd_offset
);
582 if (data
.dispatch_queue_offset
> max_tsd_offset
) {
583 data
.dispatch_queue_offset
= 0;
585 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
587 if (pthread_kern
->proc_set_return_to_kernel_offset
) {
588 if (data
.return_to_kernel_offset
> max_tsd_offset
) {
589 data
.return_to_kernel_offset
= 0;
591 pthread_kern
->proc_set_return_to_kernel_offset(p
,
592 data
.return_to_kernel_offset
);
595 if (pthread_kern
->proc_set_mach_thread_self_tsd_offset
) {
596 if (data
.mach_thread_self_offset
> max_tsd_offset
) {
597 data
.mach_thread_self_offset
= 0;
599 pthread_kern
->proc_set_mach_thread_self_tsd_offset(p
,
600 data
.mach_thread_self_offset
);
603 if (pthread_init_data
!= 0) {
604 /* Outgoing data that userspace expects as a reply */
605 data
.version
= sizeof(struct _pthread_registration_data
);
606 data
.main_qos
= _pthread_unspecified_priority();
608 if (pthread_kern
->qos_main_thread_active()) {
609 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
610 thread_qos_policy_data_t qos
;
611 boolean_t gd
= FALSE
;
613 kr
= pthread_kern
->thread_policy_get(current_thread(),
614 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
615 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
617 * Unspecified threads means the kernel wants us
618 * to impose legacy upon the thread.
620 qos
.qos_tier
= THREAD_QOS_LEGACY
;
621 qos
.tier_importance
= 0;
623 kr
= pthread_kern
->thread_policy_set_internal(current_thread(),
624 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
,
625 THREAD_QOS_POLICY_COUNT
);
628 if (kr
== KERN_SUCCESS
) {
629 data
.main_qos
= _pthread_priority_make_from_thread_qos(
634 data
.stack_addr_hint
= stackaddr
;
635 data
.mutex_default_policy
= pthread_mutex_default_policy
;
637 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
638 if (kr
!= KERN_SUCCESS
) {
643 /* return the supported feature set as the return value. */
644 *retval
= PTHREAD_FEATURE_SUPPORTED
;
650 #pragma mark - Workqueue Thread Support
652 static mach_vm_size_t
653 workq_thread_allocsize(proc_t p
, vm_map_t wq_map
,
654 mach_vm_size_t
*guardsize_out
)
656 mach_vm_size_t guardsize
= vm_map_page_size(wq_map
);
657 mach_vm_size_t pthread_size
= vm_map_round_page_mask(
658 pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
,
659 vm_map_page_mask(wq_map
));
660 if (guardsize_out
) *guardsize_out
= guardsize
;
661 return guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
665 workq_create_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t
*out_addr
)
667 mach_vm_offset_t stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
668 mach_vm_size_t guardsize
, th_allocsize
;
671 th_allocsize
= workq_thread_allocsize(p
, vmap
, &guardsize
);
672 kret
= mach_vm_map(vmap
, &stackaddr
, th_allocsize
, page_size
- 1,
673 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
, NULL
, 0, FALSE
,
674 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
676 if (kret
!= KERN_SUCCESS
) {
677 kret
= mach_vm_allocate(vmap
, &stackaddr
, th_allocsize
,
678 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
681 if (kret
!= KERN_SUCCESS
) {
686 * The guard page is at the lowest address
687 * The stack base is the highest address
689 kret
= mach_vm_protect(vmap
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
690 if (kret
!= KERN_SUCCESS
) {
691 goto fail_vm_deallocate
;
695 *out_addr
= stackaddr
;
700 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
706 workq_destroy_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t stackaddr
)
708 return mach_vm_deallocate(vmap
, stackaddr
,
709 workq_thread_allocsize(p
, vmap
, NULL
));
713 workq_markfree_threadstack(proc_t OS_UNUSED p
, thread_t OS_UNUSED th
,
714 vm_map_t vmap
, user_addr_t stackaddr
)
716 // Keep this in sync with workq_setup_thread()
717 const vm_size_t guardsize
= vm_map_page_size(vmap
);
718 const user_addr_t freeaddr
= (user_addr_t
)stackaddr
+ guardsize
;
719 const vm_map_offset_t freesize
= vm_map_trunc_page_mask(
720 (PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
) - 1,
721 vm_map_page_mask(vmap
)) - guardsize
;
723 __assert_only kern_return_t kr
= mach_vm_behavior_set(vmap
, freeaddr
,
724 freesize
, VM_BEHAVIOR_REUSABLE
);
726 if (kr
!= KERN_SUCCESS
&& kr
!= KERN_INVALID_ADDRESS
) {
727 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kr
);
732 struct workq_thread_addrs
{
734 user_addr_t stack_bottom
;
735 user_addr_t stack_top
;
739 workq_thread_set_top_addr(struct workq_thread_addrs
*th_addrs
, user_addr_t addr
)
741 th_addrs
->stack_top
= (addr
& -C_WORKQ_STK_ALIGN
);
745 workq_thread_get_addrs(vm_map_t map
, user_addr_t stackaddr
,
746 struct workq_thread_addrs
*th_addrs
)
748 const vm_size_t guardsize
= vm_map_page_size(map
);
750 th_addrs
->self
= (user_addr_t
)(stackaddr
+ PTH_DEFAULT_STACKSIZE
+
751 guardsize
+ PTHREAD_T_OFFSET
);
752 workq_thread_set_top_addr(th_addrs
, th_addrs
->self
);
753 th_addrs
->stack_bottom
= (user_addr_t
)(stackaddr
+ guardsize
);
757 workq_set_register_state(proc_t p
, thread_t th
,
758 struct workq_thread_addrs
*addrs
, mach_port_name_t kport
,
759 user_addr_t kevent_list
, uint32_t upcall_flags
, int kevent_count
)
761 user_addr_t wqstart_fnptr
= pthread_kern
->proc_get_wqthread(p
);
762 if (!wqstart_fnptr
) {
763 panic("workqueue thread start function pointer is NULL");
766 #if defined(__i386__) || defined(__x86_64__)
767 if (proc_is64bit_data(p
) == 0) {
768 x86_thread_state32_t state
= {
769 .eip
= (unsigned int)wqstart_fnptr
,
770 .eax
= /* arg0 */ (unsigned int)addrs
->self
,
771 .ebx
= /* arg1 */ (unsigned int)kport
,
772 .ecx
= /* arg2 */ (unsigned int)addrs
->stack_bottom
,
773 .edx
= /* arg3 */ (unsigned int)kevent_list
,
774 .edi
= /* arg4 */ (unsigned int)upcall_flags
,
775 .esi
= /* arg5 */ (unsigned int)kevent_count
,
777 .esp
= (int)((vm_offset_t
)addrs
->stack_top
),
780 int error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
781 if (error
!= KERN_SUCCESS
) {
782 panic(__func__
": thread_set_wq_state failed: %d", error
);
785 x86_thread_state64_t state64
= {
786 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
787 .rip
= (uint64_t)wqstart_fnptr
,
788 .rdi
= (uint64_t)addrs
->self
,
789 .rsi
= (uint64_t)kport
,
790 .rdx
= (uint64_t)addrs
->stack_bottom
,
791 .rcx
= (uint64_t)kevent_list
,
792 .r8
= (uint64_t)upcall_flags
,
793 .r9
= (uint64_t)kevent_count
,
795 .rsp
= (uint64_t)(addrs
->stack_top
)
798 int error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
799 if (error
!= KERN_SUCCESS
) {
800 panic(__func__
": thread_set_wq_state failed: %d", error
);
803 #elif defined(__arm__) || defined(__arm64__)
804 if (!proc_is64bit_data(p
)) {
805 arm_thread_state_t state
= {
806 .pc
= (int)wqstart_fnptr
,
807 .r
[0] = (unsigned int)addrs
->self
,
808 .r
[1] = (unsigned int)kport
,
809 .r
[2] = (unsigned int)addrs
->stack_bottom
,
810 .r
[3] = (unsigned int)kevent_list
,
811 // will be pushed onto the stack as arg4/5
812 .r
[4] = (unsigned int)upcall_flags
,
813 .r
[5] = (unsigned int)kevent_count
,
815 .sp
= (int)(addrs
->stack_top
)
818 int error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
819 if (error
!= KERN_SUCCESS
) {
820 panic(__func__
": thread_set_wq_state failed: %d", error
);
823 #if defined(__arm64__)
824 arm_thread_state64_t state
= {
825 .pc
= (uint64_t)wqstart_fnptr
,
826 .x
[0] = (uint64_t)addrs
->self
,
827 .x
[1] = (uint64_t)kport
,
828 .x
[2] = (uint64_t)addrs
->stack_bottom
,
829 .x
[3] = (uint64_t)kevent_list
,
830 .x
[4] = (uint64_t)upcall_flags
,
831 .x
[5] = (uint64_t)kevent_count
,
833 .sp
= (uint64_t)((vm_offset_t
)addrs
->stack_top
),
836 int error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
837 if (error
!= KERN_SUCCESS
) {
838 panic(__func__
": thread_set_wq_state failed: %d", error
);
840 #else /* defined(__arm64__) */
841 panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
842 #endif /* defined(__arm64__) */
845 #error setup_wqthread not defined for this architecture
850 workq_kevent(proc_t p
, struct workq_thread_addrs
*th_addrs
,
851 user_addr_t eventlist
, int nevents
, int kevent_flags
,
852 user_addr_t
*kevent_list_out
, int *kevent_count_out
)
856 user_addr_t kevent_list
= th_addrs
->self
-
857 WQ_KEVENT_LIST_LEN
* sizeof(struct kevent_qos_s
);
858 user_addr_t data_buf
= kevent_list
- WQ_KEVENT_DATA_SIZE
;
859 user_size_t data_available
= WQ_KEVENT_DATA_SIZE
;
861 ret
= pthread_kern
->kevent_workq_internal(p
, eventlist
, nevents
,
862 kevent_list
, WQ_KEVENT_LIST_LEN
,
863 data_buf
, &data_available
,
864 kevent_flags
, kevent_count_out
);
866 // squash any errors into just empty output
867 if (ret
!= 0 || *kevent_count_out
== -1) {
868 *kevent_list_out
= NULL
;
869 *kevent_count_out
= 0;
873 workq_thread_set_top_addr(th_addrs
, data_buf
+ data_available
);
874 *kevent_list_out
= kevent_list
;
879 * configures initial thread stack/registers to jump into:
880 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
881 * to get there we jump through assembily stubs in pthread_asm.s. Those
882 * routines setup a stack frame, using the current stack pointer, and marshall
883 * arguments from registers to the stack as required by the ABI.
885 * One odd thing we do here is to start the pthread_t 4k below what would be the
886 * top of the stack otherwise. This is because usually only the first 4k of the
887 * pthread_t will be used and so we want to put it on the same 16k page as the
888 * top of the stack to save memory.
890 * When we are done the stack will look like:
891 * |-----------| th_stackaddr + th_allocsize
892 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
893 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
894 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
895 * |stack gap | bottom aligned to 16 bytes
899 * |guard page | guardsize
900 * |-----------| th_stackaddr
902 __attribute__((noreturn
,noinline
))
904 workq_setup_thread(proc_t p
, thread_t th
, vm_map_t map
, user_addr_t stackaddr
,
905 mach_port_name_t kport
, int th_qos __unused
, int setup_flags
, int upcall_flags
)
907 struct workq_thread_addrs th_addrs
;
908 bool first_use
= (setup_flags
& WQ_SETUP_FIRST_USE
);
909 user_addr_t kevent_list
= NULL
;
910 int kevent_count
= 0;
912 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
915 uint32_t tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
917 mach_vm_offset_t th_tsd_base
= th_addrs
.self
+ tsd_offset
;
918 kern_return_t kret
= pthread_kern
->thread_set_tsd_base(th
,
920 if (kret
== KERN_SUCCESS
) {
921 upcall_flags
|= WQ_FLAG_THREAD_TSD_BASE_SET
;
926 * Pre-fault the first page of the new thread's stack and the page that will
927 * contain the pthread_t structure.
929 vm_map_offset_t mask
= vm_map_page_mask(map
);
930 vm_map_offset_t th_page
= vm_map_trunc_page_mask(th_addrs
.self
, mask
);
931 vm_map_offset_t stk_page
= vm_map_trunc_page_mask(th_addrs
.stack_top
- 1, mask
);
932 if (th_page
!= stk_page
) {
933 vm_fault(map
, stk_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
935 vm_fault(map
, th_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
938 if (setup_flags
& WQ_SETUP_EXIT_THREAD
) {
939 kevent_count
= WORKQ_EXIT_THREAD_NKEVENT
;
940 } else if (upcall_flags
& WQ_FLAG_THREAD_KEVENT
) {
941 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
;
942 workq_kevent(p
, &th_addrs
, NULL
, 0, flags
, &kevent_list
, &kevent_count
);
945 workq_set_register_state(p
, th
, &th_addrs
, kport
,
946 kevent_list
, upcall_flags
, kevent_count
);
949 pthread_kern
->thread_bootstrap_return();
951 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
953 __builtin_unreachable();
957 workq_handle_stack_events(proc_t p
, thread_t th
, vm_map_t map
,
958 user_addr_t stackaddr
, mach_port_name_t kport
,
959 user_addr_t events
, int nevents
, int upcall_flags
)
961 struct workq_thread_addrs th_addrs
;
962 user_addr_t kevent_list
= NULL
;
963 int kevent_count
= 0, error
;
964 __assert_only kern_return_t kr
;
966 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
968 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
|
970 error
= workq_kevent(p
, &th_addrs
, events
, nevents
, flags
,
971 &kevent_list
, &kevent_count
);
973 if (error
|| kevent_count
== 0) {
977 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
978 assert(kr
== KERN_SUCCESS
);
980 workq_set_register_state(p
, th
, &th_addrs
, kport
,
981 kevent_list
, upcall_flags
, kevent_count
);
983 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
984 __builtin_unreachable();
988 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
990 thread_t thread
= current_thread();
991 *retval
= thread_tid(thread
);
998 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
999 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
1002 * allocate the lock attribute for pthread synchronizers
1004 pthread_lck_attr
= lck_attr_alloc_init();
1005 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
1007 pth_global_hashinit();
1008 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
1012 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg
, sizeof(policy_bootarg
))) {
1013 pthread_mutex_default_policy
= policy_bootarg
;
1016 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy
);