2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #pragma mark - Front Matter
35 #define _PTHREAD_CONDATTR_T
36 #define _PTHREAD_COND_T
37 #define _PTHREAD_MUTEXATTR_T
38 #define _PTHREAD_MUTEX_T
39 #define _PTHREAD_RWLOCKATTR_T
40 #define _PTHREAD_RWLOCK_T
42 #undef pthread_mutexattr_t
43 #undef pthread_mutex_t
44 #undef pthread_condattr_t
46 #undef pthread_rwlockattr_t
47 #undef pthread_rwlock_t
49 #include <sys/cdefs.h>
52 // <rdar://problem/26158937> panic() should be marked noreturn
53 extern void panic(const char *string
, ...) __printflike(1,2) __dead2
;
55 #include <sys/param.h>
56 #include <sys/queue.h>
57 #include <sys/resourcevar.h>
58 //#include <sys/proc_internal.h>
59 #include <sys/kauth.h>
60 #include <sys/systm.h>
61 #include <sys/timeb.h>
62 #include <sys/times.h>
64 #include <sys/kernel.h>
66 #include <sys/signalvar.h>
67 #include <sys/sysctl.h>
68 #include <sys/syslog.h>
71 #include <sys/kdebug.h>
72 //#include <sys/sysproto.h>
74 #include <sys/user.h> /* for coredump */
75 #include <sys/proc_info.h> /* for fill_procworkqueue */
77 #include <mach/mach_port.h>
78 #include <mach/mach_types.h>
79 #include <mach/semaphore.h>
80 #include <mach/sync_policy.h>
81 #include <mach/task.h>
82 #include <mach/vm_prot.h>
83 #include <kern/kern_types.h>
84 #include <kern/task.h>
85 #include <kern/clock.h>
86 #include <mach/kern_return.h>
87 #include <kern/thread.h>
88 #include <kern/zalloc.h>
89 #include <kern/sched_prim.h> /* for thread_exception_return */
90 #include <kern/processor.h>
91 #include <kern/assert.h>
92 #include <mach/mach_vm.h>
93 #include <mach/mach_param.h>
94 #include <mach/thread_status.h>
95 #include <mach/thread_policy.h>
96 #include <mach/message.h>
97 #include <mach/port.h>
98 //#include <vm/vm_protos.h>
99 #include <vm/vm_fault.h>
100 #include <vm/vm_map.h>
101 #include <mach/thread_act.h> /* for thread_resume */
102 #include <machine/machine_routines.h>
103 #include <mach/shared_region.h>
105 #include <libkern/OSAtomic.h>
106 #include <libkern/libkern.h>
108 #include "kern_internal.h"
110 #ifndef WQ_SETUP_EXIT_THREAD
111 #define WQ_SETUP_EXIT_THREAD 8
114 // XXX: Ditto for thread tags from kern/thread.h
115 #define THREAD_TAG_MAINTHREAD 0x1
116 #define THREAD_TAG_PTHREAD 0x10
117 #define THREAD_TAG_WORKQUEUE 0x20
119 lck_grp_attr_t
*pthread_lck_grp_attr
;
120 lck_grp_t
*pthread_lck_grp
;
121 lck_attr_t
*pthread_lck_attr
;
123 #define C_32_STK_ALIGN 16
124 #define C_64_STK_ALIGN 16
125 #define C_64_REDZONE_LEN 128
127 // WORKQ use the largest alignment any platform needs
128 #define C_WORKQ_STK_ALIGN 16
130 #define PTHREAD_T_OFFSET 0
133 * Flags filed passed to bsdthread_create and back in pthread_start
134 31 <---------------------------------> 0
135 _________________________________________
136 | flags(8) | policy(8) | importance(16) |
137 -----------------------------------------
140 #define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401>
141 #define PTHREAD_START_SETSCHED 0x02000000
142 // was PTHREAD_START_DETACHED 0x04000000
143 #define PTHREAD_START_QOSCLASS 0x08000000
144 #define PTHREAD_START_TSD_BASE_SET 0x10000000
145 #define PTHREAD_START_SUSPENDED 0x20000000
146 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
147 #define PTHREAD_START_POLICY_BITSHIFT 16
148 #define PTHREAD_START_POLICY_MASK 0xff
149 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
151 #define SCHED_OTHER POLICY_TIMESHARE
152 #define SCHED_FIFO POLICY_FIFO
153 #define SCHED_RR POLICY_RR
155 #define BASEPRI_DEFAULT 31
157 uint32_t pthread_debug_tracing
= 1;
159 static uint32_t pthread_mutex_default_policy
;
161 SYSCTL_INT(_kern
, OID_AUTO
, pthread_mutex_default_policy
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
162 &pthread_mutex_default_policy
, 0, "");
164 #pragma mark - Process/Thread Setup/Teardown syscalls
166 static mach_vm_offset_t
167 stack_addr_hint(proc_t p
, vm_map_t vmap
)
169 mach_vm_offset_t stackaddr
;
170 mach_vm_offset_t aslr_offset
;
171 bool proc64bit
= proc_is64bit(p
);
173 // We can't safely take random values % something unless its a power-of-two
174 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE
), "PTH_DEFAULT_STACKSIZE is a power-of-two");
176 #if defined(__i386__) || defined(__x86_64__)
178 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
179 aslr_offset
= random() % (1 << 28); // about 512 stacks
181 // Actually bigger than the image shift, we've got ~256MB to work with
182 aslr_offset
= random() % (16 * PTH_DEFAULT_STACKSIZE
);
184 aslr_offset
= vm_map_trunc_page_mask(aslr_offset
, vm_map_page_mask(vmap
));
186 // Above nanomalloc range (see NANOZONE_SIGNATURE)
187 stackaddr
= 0x700000000000 + aslr_offset
;
189 stackaddr
= SHARED_REGION_BASE_I386
+ SHARED_REGION_SIZE_I386
+ aslr_offset
;
191 #elif defined(__arm__) || defined(__arm64__)
192 user_addr_t main_thread_stack_top
= 0;
193 if (pthread_kern
->proc_get_user_stack
) {
194 main_thread_stack_top
= pthread_kern
->proc_get_user_stack(p
);
196 if (proc64bit
&& main_thread_stack_top
) {
197 // The main thread stack position is randomly slid by xnu (c.f.
198 // load_main() in mach_loader.c), so basing pthread stack allocations
199 // where the main thread stack ends is already ASLRd and doing so
200 // avoids creating a gap in the process address space that may cause
201 // extra PTE memory usage. rdar://problem/33328206
202 stackaddr
= vm_map_trunc_page_mask((vm_map_offset_t
)main_thread_stack_top
,
203 vm_map_page_mask(vmap
));
205 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
206 aslr_offset
= random() % ((proc64bit
? 4 : 2) * PTH_DEFAULT_STACKSIZE
);
207 aslr_offset
= vm_map_trunc_page_mask((vm_map_offset_t
)aslr_offset
,
208 vm_map_page_mask(vmap
));
210 // 64 stacks below shared region
211 stackaddr
= SHARED_REGION_BASE_ARM64
- 64 * PTH_DEFAULT_STACKSIZE
- aslr_offset
;
213 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
214 stackaddr
= SHARED_REGION_BASE_ARM
- 32 * PTH_DEFAULT_STACKSIZE
+ aslr_offset
;
218 #error Need to define a stack address hint for this architecture
224 _pthread_priority_to_policy(pthread_priority_t priority
,
225 thread_qos_policy_data_t
*data
)
227 data
->qos_tier
= _pthread_priority_thread_qos(priority
);
228 data
->tier_importance
= _pthread_priority_relpri(priority
);
229 if (data
->qos_tier
== THREAD_QOS_UNSPECIFIED
|| data
->tier_importance
> 0 ||
230 data
->tier_importance
< THREAD_QOS_MIN_TIER_IMPORTANCE
) {
237 * bsdthread_create system call. Used by pthread_create.
240 _bsdthread_create(struct proc
*p
,
241 __unused user_addr_t user_func
, __unused user_addr_t user_funcarg
,
242 user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
,
248 mach_vm_offset_t th_tsd_base
;
249 mach_port_name_t th_thport
;
251 task_t ctask
= current_task();
252 unsigned int policy
, importance
;
254 bool start_suspended
= (flags
& PTHREAD_START_SUSPENDED
);
256 if (pthread_kern
->proc_get_register(p
) == 0) {
260 PTHREAD_TRACE(pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0);
262 kret
= pthread_kern
->thread_create(ctask
, &th
);
263 if (kret
!= KERN_SUCCESS
)
265 thread_reference(th
);
267 pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
);
269 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
270 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
271 if (!MACH_PORT_VALID(th_thport
)) {
272 error
= EMFILE
; // userland will convert this into a crash
276 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
281 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3);
283 tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
285 th_tsd_base
= user_pthread
+ tsd_offset
;
286 kret
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
);
287 if (kret
== KERN_SUCCESS
) {
288 flags
|= PTHREAD_START_TSD_BASE_SET
;
292 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
293 * supports this flag (after the fact).
295 flags
&= ~PTHREAD_START_SUSPENDED
;
298 * Set up registers & function call.
300 #if defined(__i386__) || defined(__x86_64__)
301 if (proc_is64bit_data(p
)) {
302 x86_thread_state64_t state
= {
303 .rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
),
304 .rdi
= (uint64_t)user_pthread
,
305 .rsi
= (uint64_t)th_thport
,
306 .rdx
= (uint64_t)user_func
, /* golang wants this */
307 .rcx
= (uint64_t)user_funcarg
, /* golang wants this */
308 .r8
= (uint64_t)user_stack
, /* golang wants this */
309 .r9
= (uint64_t)flags
,
311 .rsp
= (uint64_t)(user_stack
- C_64_REDZONE_LEN
)
314 (void)pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state
);
316 x86_thread_state32_t state
= {
317 .eip
= (uint32_t)pthread_kern
->proc_get_threadstart(p
),
318 .eax
= (uint32_t)user_pthread
,
319 .ebx
= (uint32_t)th_thport
,
320 .ecx
= (uint32_t)user_func
, /* golang wants this */
321 .edx
= (uint32_t)user_funcarg
, /* golang wants this */
322 .edi
= (uint32_t)user_stack
, /* golang wants this */
323 .esi
= (uint32_t)flags
,
325 .esp
= (int)((vm_offset_t
)(user_stack
- C_32_STK_ALIGN
))
328 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
331 #error bsdthread_create not defined for this architecture
334 if (flags
& PTHREAD_START_SETSCHED
) {
335 /* Set scheduling parameters if needed */
336 thread_extended_policy_data_t extinfo
;
337 thread_precedence_policy_data_t precedinfo
;
339 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
340 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
342 if (policy
== SCHED_OTHER
) {
343 extinfo
.timeshare
= 1;
345 extinfo
.timeshare
= 0;
348 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
350 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
351 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
352 } else if (flags
& PTHREAD_START_QOSCLASS
) {
353 /* Set thread QoS class if requested. */
354 thread_qos_policy_data_t qos
;
356 if (!_pthread_priority_to_policy(flags
& PTHREAD_START_QOSCLASS_MASK
, &qos
)) {
360 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
,
361 (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
364 if (pthread_kern
->proc_get_mach_thread_self_tsd_offset
) {
365 uint64_t mach_thread_self_offset
=
366 pthread_kern
->proc_get_mach_thread_self_tsd_offset(p
);
367 if (mach_thread_self_offset
&& tsd_offset
) {
368 bool proc64bit
= proc_is64bit(p
);
370 uint64_t th_thport_tsd
= (uint64_t)th_thport
;
371 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
372 mach_thread_self_offset
, sizeof(th_thport_tsd
));
374 uint32_t th_thport_tsd
= (uint32_t)th_thport
;
375 error
= copyout(&th_thport_tsd
, user_pthread
+ tsd_offset
+
376 mach_thread_self_offset
, sizeof(th_thport_tsd
));
384 if (!start_suspended
) {
385 kret
= pthread_kern
->thread_resume(th
);
386 if (kret
!= KERN_SUCCESS
) {
391 thread_deallocate(th
); /* drop the creator reference */
393 PTHREAD_TRACE(pthread_thread_create
|DBG_FUNC_END
, error
, user_pthread
, 0, 0);
395 *retval
= user_pthread
;
399 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
400 if (pthread_kern
->thread_will_park_or_terminate
) {
401 pthread_kern
->thread_will_park_or_terminate(th
);
403 (void)thread_terminate(th
);
404 (void)thread_deallocate(th
);
409 * bsdthread_terminate system call. Used by pthread_terminate
412 _bsdthread_terminate(__unused
struct proc
*p
,
413 user_addr_t stackaddr
,
417 __unused
int32_t *retval
)
419 mach_vm_offset_t freeaddr
;
420 mach_vm_size_t freesize
;
422 thread_t th
= current_thread();
424 freeaddr
= (mach_vm_offset_t
)stackaddr
;
427 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff);
429 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
430 if (pthread_kern
->thread_get_tag(th
) & THREAD_TAG_MAINTHREAD
){
431 vm_map_t user_map
= pthread_kern
->current_map();
432 freesize
= vm_map_trunc_page_mask((vm_map_offset_t
)freesize
- 1, vm_map_page_mask(user_map
));
433 kret
= mach_vm_behavior_set(user_map
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
);
435 if (kret
!= KERN_SUCCESS
&& kret
!= KERN_INVALID_ADDRESS
) {
436 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kret
);
439 kret
= kret
? kret
: mach_vm_protect(user_map
, freeaddr
, freesize
, FALSE
, VM_PROT_NONE
);
440 assert(kret
== KERN_SUCCESS
|| kret
== KERN_INVALID_ADDRESS
);
442 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
443 if (kret
!= KERN_SUCCESS
) {
444 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
449 if (pthread_kern
->thread_will_park_or_terminate
) {
450 pthread_kern
->thread_will_park_or_terminate(th
);
452 (void)thread_terminate(th
);
453 if (sem
!= MACH_PORT_NULL
) {
454 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
455 if (kret
!= KERN_SUCCESS
) {
456 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0);
460 if (kthport
!= MACH_PORT_NULL
) {
461 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
464 PTHREAD_TRACE(pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0);
466 pthread_kern
->thread_exception_return();
467 __builtin_unreachable();
471 * bsdthread_register system call. Performs per-process setup. Responsible for
472 * returning capabilitiy bits to userspace and receiving userspace function addresses.
475 _bsdthread_register(struct proc
*p
,
476 user_addr_t threadstart
,
477 user_addr_t wqthread
,
479 user_addr_t pthread_init_data
,
480 user_addr_t pthread_init_data_size
,
481 uint64_t dispatchqueue_offset
,
484 struct _pthread_registration_data data
= {};
485 uint32_t max_tsd_offset
;
487 size_t pthread_init_sz
= 0;
489 /* syscall randomizer test can pass bogus values */
490 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
494 * if we have pthread_init_data, then we use that and target_concptr
495 * (which is an offset) get data.
497 if (pthread_init_data
!= 0) {
498 if (pthread_init_data_size
< sizeof(data
.version
)) {
501 pthread_init_sz
= MIN(sizeof(data
), (size_t)pthread_init_data_size
);
502 int ret
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
506 if (data
.version
!= (size_t)pthread_init_data_size
) {
510 data
.dispatch_queue_offset
= dispatchqueue_offset
;
513 /* We have to do this before proc_get_register so that it resets after fork */
514 mach_vm_offset_t stackaddr
= stack_addr_hint(p
, pthread_kern
->current_map());
515 pthread_kern
->proc_set_stack_addr_hint(p
, (user_addr_t
)stackaddr
);
517 /* prevent multiple registrations */
518 if (pthread_kern
->proc_get_register(p
) != 0) {
522 pthread_kern
->proc_set_threadstart(p
, threadstart
);
523 pthread_kern
->proc_set_wqthread(p
, wqthread
);
524 pthread_kern
->proc_set_pthsize(p
, pthsize
);
525 pthread_kern
->proc_set_register(p
);
527 uint32_t tsd_slot_sz
= proc_is64bit(p
) ? sizeof(uint64_t) : sizeof(uint32_t);
528 if ((uint32_t)pthsize
>= tsd_slot_sz
&&
529 data
.tsd_offset
<= (uint32_t)(pthsize
- tsd_slot_sz
)) {
530 max_tsd_offset
= ((uint32_t)pthsize
- data
.tsd_offset
- tsd_slot_sz
);
535 pthread_kern
->proc_set_pthread_tsd_offset(p
, data
.tsd_offset
);
537 if (data
.dispatch_queue_offset
> max_tsd_offset
) {
538 data
.dispatch_queue_offset
= 0;
540 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
542 if (pthread_kern
->proc_set_return_to_kernel_offset
) {
543 if (data
.return_to_kernel_offset
> max_tsd_offset
) {
544 data
.return_to_kernel_offset
= 0;
546 pthread_kern
->proc_set_return_to_kernel_offset(p
,
547 data
.return_to_kernel_offset
);
550 if (pthread_kern
->proc_set_mach_thread_self_tsd_offset
) {
551 if (data
.mach_thread_self_offset
> max_tsd_offset
) {
552 data
.mach_thread_self_offset
= 0;
554 pthread_kern
->proc_set_mach_thread_self_tsd_offset(p
,
555 data
.mach_thread_self_offset
);
558 if (pthread_init_data
!= 0) {
559 /* Outgoing data that userspace expects as a reply */
560 data
.version
= sizeof(struct _pthread_registration_data
);
561 data
.main_qos
= _pthread_unspecified_priority();
563 if (pthread_kern
->qos_main_thread_active()) {
564 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
565 thread_qos_policy_data_t qos
;
566 boolean_t gd
= FALSE
;
568 kr
= pthread_kern
->thread_policy_get(current_thread(),
569 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
570 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
572 * Unspecified threads means the kernel wants us
573 * to impose legacy upon the thread.
575 qos
.qos_tier
= THREAD_QOS_LEGACY
;
576 qos
.tier_importance
= 0;
578 kr
= pthread_kern
->thread_policy_set_internal(current_thread(),
579 THREAD_QOS_POLICY
, (thread_policy_t
)&qos
,
580 THREAD_QOS_POLICY_COUNT
);
583 if (kr
== KERN_SUCCESS
) {
584 data
.main_qos
= _pthread_priority_make_from_thread_qos(
589 data
.stack_addr_hint
= stackaddr
;
590 data
.mutex_default_policy
= pthread_mutex_default_policy
;
592 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
593 if (kr
!= KERN_SUCCESS
) {
598 /* return the supported feature set as the return value. */
599 *retval
= PTHREAD_FEATURE_SUPPORTED
;
605 #pragma mark - Workqueue Thread Support
607 static mach_vm_size_t
608 workq_thread_allocsize(proc_t p
, vm_map_t wq_map
,
609 mach_vm_size_t
*guardsize_out
)
611 mach_vm_size_t guardsize
= vm_map_page_size(wq_map
);
612 mach_vm_size_t pthread_size
= vm_map_round_page_mask(
613 pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
,
614 vm_map_page_mask(wq_map
));
615 if (guardsize_out
) *guardsize_out
= guardsize
;
616 return guardsize
+ PTH_DEFAULT_STACKSIZE
+ pthread_size
;
620 workq_create_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t
*out_addr
)
622 mach_vm_offset_t stackaddr
= pthread_kern
->proc_get_stack_addr_hint(p
);
623 mach_vm_size_t guardsize
, th_allocsize
;
626 th_allocsize
= workq_thread_allocsize(p
, vmap
, &guardsize
);
627 kret
= mach_vm_map(vmap
, &stackaddr
, th_allocsize
, page_size
- 1,
628 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
, NULL
, 0, FALSE
,
629 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
631 if (kret
!= KERN_SUCCESS
) {
632 kret
= mach_vm_allocate(vmap
, &stackaddr
, th_allocsize
,
633 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
636 if (kret
!= KERN_SUCCESS
) {
641 * The guard page is at the lowest address
642 * The stack base is the highest address
644 kret
= mach_vm_protect(vmap
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
645 if (kret
!= KERN_SUCCESS
) {
646 goto fail_vm_deallocate
;
650 *out_addr
= stackaddr
;
655 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
661 workq_destroy_threadstack(proc_t p
, vm_map_t vmap
, mach_vm_offset_t stackaddr
)
663 return mach_vm_deallocate(vmap
, stackaddr
,
664 workq_thread_allocsize(p
, vmap
, NULL
));
668 workq_markfree_threadstack(proc_t OS_UNUSED p
, thread_t OS_UNUSED th
,
669 vm_map_t vmap
, user_addr_t stackaddr
)
671 // Keep this in sync with workq_setup_thread()
672 const vm_size_t guardsize
= vm_map_page_size(vmap
);
673 const user_addr_t freeaddr
= (user_addr_t
)stackaddr
+ guardsize
;
674 const vm_map_offset_t freesize
= vm_map_trunc_page_mask(
675 (PTH_DEFAULT_STACKSIZE
+ guardsize
+ PTHREAD_T_OFFSET
) - 1,
676 vm_map_page_mask(vmap
)) - guardsize
;
678 __assert_only kern_return_t kr
= mach_vm_behavior_set(vmap
, freeaddr
,
679 freesize
, VM_BEHAVIOR_REUSABLE
);
681 if (kr
!= KERN_SUCCESS
&& kr
!= KERN_INVALID_ADDRESS
) {
682 os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kr
);
687 struct workq_thread_addrs
{
689 user_addr_t stack_bottom
;
690 user_addr_t stack_top
;
694 workq_thread_set_top_addr(struct workq_thread_addrs
*th_addrs
, user_addr_t addr
)
696 th_addrs
->stack_top
= (addr
& -C_WORKQ_STK_ALIGN
);
700 workq_thread_get_addrs(vm_map_t map
, user_addr_t stackaddr
,
701 struct workq_thread_addrs
*th_addrs
)
703 const vm_size_t guardsize
= vm_map_page_size(map
);
705 th_addrs
->self
= (user_addr_t
)(stackaddr
+ PTH_DEFAULT_STACKSIZE
+
706 guardsize
+ PTHREAD_T_OFFSET
);
707 workq_thread_set_top_addr(th_addrs
, th_addrs
->self
);
708 th_addrs
->stack_bottom
= (user_addr_t
)(stackaddr
+ guardsize
);
712 workq_set_register_state(proc_t p
, thread_t th
,
713 struct workq_thread_addrs
*addrs
, mach_port_name_t kport
,
714 user_addr_t kevent_list
, uint32_t upcall_flags
, int kevent_count
)
716 user_addr_t wqstart_fnptr
= pthread_kern
->proc_get_wqthread(p
);
717 if (!wqstart_fnptr
) {
718 panic("workqueue thread start function pointer is NULL");
721 #if defined(__i386__) || defined(__x86_64__)
722 if (proc_is64bit_data(p
) == 0) {
723 x86_thread_state32_t state
= {
724 .eip
= (unsigned int)wqstart_fnptr
,
725 .eax
= /* arg0 */ (unsigned int)addrs
->self
,
726 .ebx
= /* arg1 */ (unsigned int)kport
,
727 .ecx
= /* arg2 */ (unsigned int)addrs
->stack_bottom
,
728 .edx
= /* arg3 */ (unsigned int)kevent_list
,
729 .edi
= /* arg4 */ (unsigned int)upcall_flags
,
730 .esi
= /* arg5 */ (unsigned int)kevent_count
,
732 .esp
= (int)((vm_offset_t
)addrs
->stack_top
),
735 int error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
);
736 if (error
!= KERN_SUCCESS
) {
737 panic(__func__
": thread_set_wq_state failed: %d", error
);
740 x86_thread_state64_t state64
= {
741 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
742 .rip
= (uint64_t)wqstart_fnptr
,
743 .rdi
= (uint64_t)addrs
->self
,
744 .rsi
= (uint64_t)kport
,
745 .rdx
= (uint64_t)addrs
->stack_bottom
,
746 .rcx
= (uint64_t)kevent_list
,
747 .r8
= (uint64_t)upcall_flags
,
748 .r9
= (uint64_t)kevent_count
,
750 .rsp
= (uint64_t)(addrs
->stack_top
)
753 int error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
);
754 if (error
!= KERN_SUCCESS
) {
755 panic(__func__
": thread_set_wq_state failed: %d", error
);
759 #error setup_wqthread not defined for this architecture
764 workq_kevent(proc_t p
, struct workq_thread_addrs
*th_addrs
, int upcall_flags
,
765 user_addr_t eventlist
, int nevents
, int kevent_flags
,
766 user_addr_t
*kevent_list_out
, int *kevent_count_out
)
768 bool workloop
= upcall_flags
& WQ_FLAG_THREAD_WORKLOOP
;
769 int kevent_count
= WQ_KEVENT_LIST_LEN
;
770 user_addr_t kevent_list
= th_addrs
->self
- WQ_KEVENT_LIST_LEN
* sizeof(struct kevent_qos_s
);
771 user_addr_t kevent_id_addr
= kevent_list
;
772 kqueue_id_t kevent_id
= -1;
777 * The kevent ID goes just below the kevent list. Sufficiently new
778 * userspace will know to look there. Old userspace will just
781 kevent_id_addr
-= sizeof(kqueue_id_t
);
784 user_addr_t kevent_data_buf
= kevent_id_addr
- WQ_KEVENT_DATA_SIZE
;
785 user_size_t kevent_data_available
= WQ_KEVENT_DATA_SIZE
;
788 kevent_flags
|= KEVENT_FLAG_WORKLOOP
;
789 ret
= kevent_id_internal(p
, &kevent_id
,
790 eventlist
, nevents
, kevent_list
, kevent_count
,
791 kevent_data_buf
, &kevent_data_available
,
792 kevent_flags
, &kevent_count
);
793 copyout(&kevent_id
, kevent_id_addr
, sizeof(kevent_id
));
795 kevent_flags
|= KEVENT_FLAG_WORKQ
;
796 ret
= kevent_qos_internal(p
, -1, eventlist
, nevents
, kevent_list
,
797 kevent_count
, kevent_data_buf
, &kevent_data_available
,
798 kevent_flags
, &kevent_count
);
801 // squash any errors into just empty output
802 if (ret
!= 0 || kevent_count
== -1) {
803 *kevent_list_out
= NULL
;
804 *kevent_count_out
= 0;
808 if (kevent_data_available
== WQ_KEVENT_DATA_SIZE
) {
809 workq_thread_set_top_addr(th_addrs
, kevent_id_addr
);
811 workq_thread_set_top_addr(th_addrs
,
812 kevent_data_buf
+ kevent_data_available
);
814 *kevent_count_out
= kevent_count
;
815 *kevent_list_out
= kevent_list
;
820 * configures initial thread stack/registers to jump into:
821 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
822 * to get there we jump through assembily stubs in pthread_asm.s. Those
823 * routines setup a stack frame, using the current stack pointer, and marshall
824 * arguments from registers to the stack as required by the ABI.
826 * One odd thing we do here is to start the pthread_t 4k below what would be the
827 * top of the stack otherwise. This is because usually only the first 4k of the
828 * pthread_t will be used and so we want to put it on the same 16k page as the
829 * top of the stack to save memory.
831 * When we are done the stack will look like:
832 * |-----------| th_stackaddr + th_allocsize
833 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
834 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
835 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
836 * |stack gap | bottom aligned to 16 bytes, and at least as big as stack_gap_min
840 * |guard page | guardsize
841 * |-----------| th_stackaddr
843 __attribute__((noreturn
,noinline
))
845 workq_setup_thread(proc_t p
, thread_t th
, vm_map_t map
, user_addr_t stackaddr
,
846 mach_port_name_t kport
, int th_qos __unused
, int setup_flags
, int upcall_flags
)
848 struct workq_thread_addrs th_addrs
;
849 bool first_use
= (setup_flags
& WQ_SETUP_FIRST_USE
);
850 user_addr_t kevent_list
= NULL
;
851 int kevent_count
= 0;
853 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
856 uint32_t tsd_offset
= pthread_kern
->proc_get_pthread_tsd_offset(p
);
858 mach_vm_offset_t th_tsd_base
= th_addrs
.self
+ tsd_offset
;
859 kern_return_t kret
= pthread_kern
->thread_set_tsd_base(th
,
861 if (kret
== KERN_SUCCESS
) {
862 upcall_flags
|= WQ_FLAG_THREAD_TSD_BASE_SET
;
867 * Pre-fault the first page of the new thread's stack and the page that will
868 * contain the pthread_t structure.
870 vm_map_offset_t mask
= vm_map_page_mask(map
);
871 vm_map_offset_t th_page
= vm_map_trunc_page_mask(th_addrs
.self
, mask
);
872 vm_map_offset_t stk_page
= vm_map_trunc_page_mask(th_addrs
.stack_top
- 1, mask
);
873 if (th_page
!= stk_page
) {
874 vm_fault(map
, stk_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
876 vm_fault(map
, th_page
, VM_PROT_READ
| VM_PROT_WRITE
, FALSE
, THREAD_UNINT
, NULL
, 0);
879 if (setup_flags
& WQ_SETUP_EXIT_THREAD
) {
880 kevent_count
= WORKQ_EXIT_THREAD_NKEVENT
;
881 } else if (upcall_flags
& WQ_FLAG_THREAD_KEVENT
) {
882 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
;
883 workq_kevent(p
, &th_addrs
, upcall_flags
, NULL
, 0, flags
,
884 &kevent_list
, &kevent_count
);
887 workq_set_register_state(p
, th
, &th_addrs
, kport
,
888 kevent_list
, upcall_flags
, kevent_count
);
891 pthread_kern
->thread_bootstrap_return();
893 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
895 __builtin_unreachable();
899 workq_handle_stack_events(proc_t p
, thread_t th
, vm_map_t map
,
900 user_addr_t stackaddr
, mach_port_name_t kport
,
901 user_addr_t events
, int nevents
, int upcall_flags
)
903 struct workq_thread_addrs th_addrs
;
904 user_addr_t kevent_list
= NULL
;
905 int kevent_count
= 0, error
;
906 __assert_only kern_return_t kr
;
908 workq_thread_get_addrs(map
, stackaddr
, &th_addrs
);
910 unsigned int flags
= KEVENT_FLAG_STACK_DATA
| KEVENT_FLAG_IMMEDIATE
|
912 error
= workq_kevent(p
, &th_addrs
, upcall_flags
, events
, nevents
, flags
,
913 &kevent_list
, &kevent_count
);
915 if (error
|| kevent_count
== 0) {
919 kr
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
);
920 assert(kr
== KERN_SUCCESS
);
922 workq_set_register_state(p
, th
, &th_addrs
, kport
,
923 kevent_list
, upcall_flags
, kevent_count
);
925 pthread_kern
->unix_syscall_return(EJUSTRETURN
);
926 __builtin_unreachable();
930 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
932 thread_t thread
= current_thread();
933 *retval
= thread_tid(thread
);
940 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
941 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
944 * allocate the lock attribute for pthread synchronizers
946 pthread_lck_attr
= lck_attr_alloc_init();
947 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
949 pth_global_hashinit();
950 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
954 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg
, sizeof(policy_bootarg
))) {
955 pthread_mutex_default_policy
= policy_bootarg
;
958 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy
);