]> git.saurik.com Git - apple/libpthread.git/blame - kern/kern_support.c
libpthread-330.230.1.tar.gz
[apple/libpthread.git] / kern / kern_support.c
CommitLineData
f1a1da6c 1/*
214d78a2 2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
f1a1da6c
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
a0619f9c 5 *
f1a1da6c
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
a0619f9c 14 *
f1a1da6c
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
a0619f9c 17 *
f1a1da6c
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
a0619f9c 25 *
f1a1da6c
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 * pthread_synch.c
31 */
32
964d3577
A
33#pragma mark - Front Matter
34
214d78a2
A
35#define _PTHREAD_CONDATTR_T
36#define _PTHREAD_COND_T
f1a1da6c
A
37#define _PTHREAD_MUTEXATTR_T
38#define _PTHREAD_MUTEX_T
39#define _PTHREAD_RWLOCKATTR_T
40#define _PTHREAD_RWLOCK_T
41
42#undef pthread_mutexattr_t
43#undef pthread_mutex_t
44#undef pthread_condattr_t
45#undef pthread_cond_t
46#undef pthread_rwlockattr_t
47#undef pthread_rwlock_t
48
2546420a 49#include <sys/cdefs.h>
0691f650 50#include <os/log.h>
2546420a
A
51
52// <rdar://problem/26158937> panic() should be marked noreturn
53extern void panic(const char *string, ...) __printflike(1,2) __dead2;
54
f1a1da6c
A
55#include <sys/param.h>
56#include <sys/queue.h>
57#include <sys/resourcevar.h>
58//#include <sys/proc_internal.h>
59#include <sys/kauth.h>
60#include <sys/systm.h>
61#include <sys/timeb.h>
62#include <sys/times.h>
63#include <sys/acct.h>
64#include <sys/kernel.h>
65#include <sys/wait.h>
66#include <sys/signalvar.h>
67#include <sys/sysctl.h>
68#include <sys/syslog.h>
69#include <sys/stat.h>
70#include <sys/lock.h>
71#include <sys/kdebug.h>
72//#include <sys/sysproto.h>
73#include <sys/vm.h>
74#include <sys/user.h> /* for coredump */
75#include <sys/proc_info.h> /* for fill_procworkqueue */
76
f1a1da6c
A
77#include <mach/mach_port.h>
78#include <mach/mach_types.h>
79#include <mach/semaphore.h>
80#include <mach/sync_policy.h>
81#include <mach/task.h>
82#include <mach/vm_prot.h>
83#include <kern/kern_types.h>
84#include <kern/task.h>
85#include <kern/clock.h>
86#include <mach/kern_return.h>
87#include <kern/thread.h>
a0619f9c 88#include <kern/zalloc.h>
f1a1da6c
A
89#include <kern/sched_prim.h> /* for thread_exception_return */
90#include <kern/processor.h>
91#include <kern/assert.h>
92#include <mach/mach_vm.h>
93#include <mach/mach_param.h>
94#include <mach/thread_status.h>
95#include <mach/thread_policy.h>
96#include <mach/message.h>
97#include <mach/port.h>
98//#include <vm/vm_protos.h>
99#include <vm/vm_fault.h>
100#include <vm/vm_map.h>
101#include <mach/thread_act.h> /* for thread_resume */
102#include <machine/machine_routines.h>
964d3577 103#include <mach/shared_region.h>
f1a1da6c
A
104
105#include <libkern/OSAtomic.h>
2546420a 106#include <libkern/libkern.h>
f1a1da6c 107
f1a1da6c
A
108#include "kern_internal.h"
109
214d78a2
A
110#ifndef WQ_SETUP_EXIT_THREAD
111#define WQ_SETUP_EXIT_THREAD 8
112#endif
f1a1da6c 113
2546420a
A
114// XXX: Ditto for thread tags from kern/thread.h
115#define THREAD_TAG_MAINTHREAD 0x1
116#define THREAD_TAG_PTHREAD 0x10
117#define THREAD_TAG_WORKQUEUE 0x20
118
f1a1da6c
A
119lck_grp_attr_t *pthread_lck_grp_attr;
120lck_grp_t *pthread_lck_grp;
121lck_attr_t *pthread_lck_attr;
122
f1a1da6c
A
123#define C_32_STK_ALIGN 16
124#define C_64_STK_ALIGN 16
125#define C_64_REDZONE_LEN 128
964d3577 126
214d78a2
A
127// WORKQ use the largest alignment any platform needs
128#define C_WORKQ_STK_ALIGN 16
129
964d3577 130#define PTHREAD_T_OFFSET 0
f1a1da6c
A
131
132/*
a0619f9c 133 * Flags filed passed to bsdthread_create and back in pthread_start
f1a1da6c
A
13431 <---------------------------------> 0
135_________________________________________
136| flags(8) | policy(8) | importance(16) |
137-----------------------------------------
138*/
139
214d78a2 140#define PTHREAD_START_CUSTOM 0x01000000 // <rdar://problem/34501401>
2546420a 141#define PTHREAD_START_SETSCHED 0x02000000
214d78a2 142// was PTHREAD_START_DETACHED 0x04000000
2546420a
A
143#define PTHREAD_START_QOSCLASS 0x08000000
144#define PTHREAD_START_TSD_BASE_SET 0x10000000
214d78a2 145#define PTHREAD_START_SUSPENDED 0x20000000
2546420a 146#define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
f1a1da6c
A
147#define PTHREAD_START_POLICY_BITSHIFT 16
148#define PTHREAD_START_POLICY_MASK 0xff
149#define PTHREAD_START_IMPORTANCE_MASK 0xffff
150
151#define SCHED_OTHER POLICY_TIMESHARE
152#define SCHED_FIFO POLICY_FIFO
153#define SCHED_RR POLICY_RR
154
964d3577
A
155#define BASEPRI_DEFAULT 31
156
2546420a
A
157uint32_t pthread_debug_tracing = 1;
158
76b7b9a2
A
159static uint32_t pthread_mutex_default_policy;
160
161SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED,
162 &pthread_mutex_default_policy, 0, "");
163
964d3577
A
164#pragma mark - Process/Thread Setup/Teardown syscalls
165
2546420a
A
166static mach_vm_offset_t
167stack_addr_hint(proc_t p, vm_map_t vmap)
168{
964d3577 169 mach_vm_offset_t stackaddr;
2546420a
A
170 mach_vm_offset_t aslr_offset;
171 bool proc64bit = proc_is64bit(p);
172
173 // We can't safely take random values % something unless its a power-of-two
174 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two");
175
964d3577 176#if defined(__i386__) || defined(__x86_64__)
2546420a
A
177 if (proc64bit) {
178 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
179 aslr_offset = random() % (1 << 28); // about 512 stacks
180 } else {
181 // Actually bigger than the image shift, we've got ~256MB to work with
182 aslr_offset = random() % (16 * PTH_DEFAULT_STACKSIZE);
183 }
184 aslr_offset = vm_map_trunc_page_mask(aslr_offset, vm_map_page_mask(vmap));
185 if (proc64bit) {
964d3577 186 // Above nanomalloc range (see NANOZONE_SIGNATURE)
2546420a 187 stackaddr = 0x700000000000 + aslr_offset;
964d3577 188 } else {
2546420a 189 stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset;
964d3577
A
190 }
191#elif defined(__arm__) || defined(__arm64__)
a0619f9c
A
192 user_addr_t main_thread_stack_top = 0;
193 if (pthread_kern->proc_get_user_stack) {
194 main_thread_stack_top = pthread_kern->proc_get_user_stack(p);
195 }
196 if (proc64bit && main_thread_stack_top) {
197 // The main thread stack position is randomly slid by xnu (c.f.
198 // load_main() in mach_loader.c), so basing pthread stack allocations
199 // where the main thread stack ends is already ASLRd and doing so
200 // avoids creating a gap in the process address space that may cause
201 // extra PTE memory usage. rdar://problem/33328206
202 stackaddr = vm_map_trunc_page_mask((vm_map_offset_t)main_thread_stack_top,
203 vm_map_page_mask(vmap));
964d3577 204 } else {
a0619f9c
A
205 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
206 aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE);
207 aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset,
208 vm_map_page_mask(vmap));
209 if (proc64bit) {
210 // 64 stacks below shared region
211 stackaddr = SHARED_REGION_BASE_ARM64 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset;
212 } else {
213 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
214 stackaddr = SHARED_REGION_BASE_ARM - 32 * PTH_DEFAULT_STACKSIZE + aslr_offset;
215 }
964d3577
A
216 }
217#else
218#error Need to define a stack address hint for this architecture
219#endif
220 return stackaddr;
221}
222
214d78a2
A
223static bool
224_pthread_priority_to_policy(pthread_priority_t priority,
225 thread_qos_policy_data_t *data)
226{
227 data->qos_tier = _pthread_priority_thread_qos(priority);
228 data->tier_importance = _pthread_priority_relpri(priority);
229 if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 ||
230 data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
231 return false;
232 }
233 return true;
234}
235
964d3577
A
236/**
237 * bsdthread_create system call. Used by pthread_create.
238 */
f1a1da6c 239int
214d78a2
A
240_bsdthread_create(struct proc *p,
241 __unused user_addr_t user_func, __unused user_addr_t user_funcarg,
242 user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags,
243 user_addr_t *retval)
f1a1da6c
A
244{
245 kern_return_t kret;
246 void * sright;
247 int error = 0;
2546420a 248 mach_vm_offset_t th_tsd_base;
f1a1da6c
A
249 mach_port_name_t th_thport;
250 thread_t th;
f1a1da6c
A
251 task_t ctask = current_task();
252 unsigned int policy, importance;
2546420a 253 uint32_t tsd_offset;
214d78a2 254 bool start_suspended = (flags & PTHREAD_START_SUSPENDED);
f1a1da6c
A
255
256 if (pthread_kern->proc_get_register(p) == 0) {
257 return EINVAL;
258 }
259
214d78a2 260 PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0);
f1a1da6c 261
f1a1da6c
A
262 kret = pthread_kern->thread_create(ctask, &th);
263 if (kret != KERN_SUCCESS)
264 return(ENOMEM);
265 thread_reference(th);
266
2546420a
A
267 pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD);
268
f1a1da6c
A
269 sright = (void *)pthread_kern->convert_thread_to_port(th);
270 th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
a0619f9c
A
271 if (!MACH_PORT_VALID(th_thport)) {
272 error = EMFILE; // userland will convert this into a crash
273 goto out;
274 }
f1a1da6c 275
964d3577 276 if ((flags & PTHREAD_START_CUSTOM) == 0) {
214d78a2
A
277 error = EINVAL;
278 goto out;
f1a1da6c 279 }
2546420a 280
214d78a2
A
281 PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3);
282
2546420a
A
283 tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
284 if (tsd_offset) {
214d78a2 285 th_tsd_base = user_pthread + tsd_offset;
2546420a
A
286 kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
287 if (kret == KERN_SUCCESS) {
288 flags |= PTHREAD_START_TSD_BASE_SET;
289 }
290 }
214d78a2
A
291 /*
292 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
293 * supports this flag (after the fact).
294 */
295 flags &= ~PTHREAD_START_SUSPENDED;
2546420a 296
f1a1da6c 297 /*
214d78a2 298 * Set up registers & function call.
f1a1da6c 299 */
214d78a2
A
300#if defined(__i386__) || defined(__x86_64__)
301 if (proc_is64bit_data(p)) {
302 x86_thread_state64_t state = {
303 .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
304 .rdi = (uint64_t)user_pthread,
305 .rsi = (uint64_t)th_thport,
306 .rdx = (uint64_t)user_func, /* golang wants this */
307 .rcx = (uint64_t)user_funcarg, /* golang wants this */
308 .r8 = (uint64_t)user_stack, /* golang wants this */
309 .r9 = (uint64_t)flags,
310
311 .rsp = (uint64_t)(user_stack - C_64_REDZONE_LEN)
964d3577 312 };
f1a1da6c 313
214d78a2 314 (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
f1a1da6c 315 } else {
214d78a2
A
316 x86_thread_state32_t state = {
317 .eip = (uint32_t)pthread_kern->proc_get_threadstart(p),
318 .eax = (uint32_t)user_pthread,
319 .ebx = (uint32_t)th_thport,
320 .ecx = (uint32_t)user_func, /* golang wants this */
321 .edx = (uint32_t)user_funcarg, /* golang wants this */
322 .edi = (uint32_t)user_stack, /* golang wants this */
323 .esi = (uint32_t)flags,
324
325 .esp = (int)((vm_offset_t)(user_stack - C_32_STK_ALIGN))
964d3577 326 };
f1a1da6c 327
214d78a2 328 (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
f1a1da6c 329 }
f1a1da6c
A
330#else
331#error bsdthread_create not defined for this architecture
332#endif
333
214d78a2 334 if (flags & PTHREAD_START_SETSCHED) {
f1a1da6c
A
335 /* Set scheduling parameters if needed */
336 thread_extended_policy_data_t extinfo;
337 thread_precedence_policy_data_t precedinfo;
338
339 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
340 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
341
342 if (policy == SCHED_OTHER) {
343 extinfo.timeshare = 1;
344 } else {
345 extinfo.timeshare = 0;
346 }
347
348 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
349
f1a1da6c
A
350 precedinfo.importance = (importance - BASEPRI_DEFAULT);
351 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
214d78a2 352 } else if (flags & PTHREAD_START_QOSCLASS) {
f1a1da6c 353 /* Set thread QoS class if requested. */
f1a1da6c 354 thread_qos_policy_data_t qos;
f1a1da6c 355
214d78a2
A
356 if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) {
357 error = EINVAL;
358 goto out;
359 }
360 pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY,
361 (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
f1a1da6c
A
362 }
363
a0619f9c
A
364 if (pthread_kern->proc_get_mach_thread_self_tsd_offset) {
365 uint64_t mach_thread_self_offset =
366 pthread_kern->proc_get_mach_thread_self_tsd_offset(p);
367 if (mach_thread_self_offset && tsd_offset) {
368 bool proc64bit = proc_is64bit(p);
369 if (proc64bit) {
370 uint64_t th_thport_tsd = (uint64_t)th_thport;
214d78a2 371 error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
a0619f9c
A
372 mach_thread_self_offset, sizeof(th_thport_tsd));
373 } else {
374 uint32_t th_thport_tsd = (uint32_t)th_thport;
214d78a2 375 error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
a0619f9c
A
376 mach_thread_self_offset, sizeof(th_thport_tsd));
377 }
378 if (error) {
214d78a2 379 goto out;
a0619f9c
A
380 }
381 }
382 }
383
214d78a2
A
384 if (!start_suspended) {
385 kret = pthread_kern->thread_resume(th);
386 if (kret != KERN_SUCCESS) {
387 error = EINVAL;
388 goto out;
389 }
f1a1da6c
A
390 }
391 thread_deallocate(th); /* drop the creator reference */
392
214d78a2 393 PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0);
f1a1da6c 394
214d78a2 395 *retval = user_pthread;
f1a1da6c
A
396 return(0);
397
f1a1da6c
A
398out:
399 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
a0619f9c
A
400 if (pthread_kern->thread_will_park_or_terminate) {
401 pthread_kern->thread_will_park_or_terminate(th);
402 }
f1a1da6c
A
403 (void)thread_terminate(th);
404 (void)thread_deallocate(th);
405 return(error);
406}
407
964d3577
A
408/**
409 * bsdthread_terminate system call. Used by pthread_terminate
410 */
f1a1da6c
A
411int
412_bsdthread_terminate(__unused struct proc *p,
413 user_addr_t stackaddr,
414 size_t size,
415 uint32_t kthport,
416 uint32_t sem,
417 __unused int32_t *retval)
418{
419 mach_vm_offset_t freeaddr;
420 mach_vm_size_t freesize;
421 kern_return_t kret;
2546420a 422 thread_t th = current_thread();
f1a1da6c
A
423
424 freeaddr = (mach_vm_offset_t)stackaddr;
425 freesize = size;
426
214d78a2 427 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff);
f1a1da6c
A
428
429 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
2546420a
A
430 if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
431 vm_map_t user_map = pthread_kern->current_map();
432 freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
433 kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
214d78a2
A
434#if MACH_ASSERT
435 if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) {
436 os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret);
437 }
438#endif
2546420a
A
439 kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
440 assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
441 } else {
442 kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
443 if (kret != KERN_SUCCESS) {
214d78a2 444 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
2546420a 445 }
f1a1da6c
A
446 }
447 }
a0619f9c
A
448
449 if (pthread_kern->thread_will_park_or_terminate) {
450 pthread_kern->thread_will_park_or_terminate(th);
451 }
452 (void)thread_terminate(th);
f1a1da6c 453 if (sem != MACH_PORT_NULL) {
214d78a2 454 kret = pthread_kern->semaphore_signal_internal_trap(sem);
f1a1da6c 455 if (kret != KERN_SUCCESS) {
214d78a2 456 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
f1a1da6c
A
457 }
458 }
a0619f9c 459
f1a1da6c
A
460 if (kthport != MACH_PORT_NULL) {
461 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
462 }
463
214d78a2 464 PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0);
f1a1da6c
A
465
466 pthread_kern->thread_exception_return();
214d78a2 467 __builtin_unreachable();
f1a1da6c
A
468}
469
964d3577
A
470/**
471 * bsdthread_register system call. Performs per-process setup. Responsible for
472 * returning capabilitiy bits to userspace and receiving userspace function addresses.
473 */
f1a1da6c
A
474int
475_bsdthread_register(struct proc *p,
476 user_addr_t threadstart,
477 user_addr_t wqthread,
478 int pthsize,
479 user_addr_t pthread_init_data,
2546420a 480 user_addr_t pthread_init_data_size,
f1a1da6c
A
481 uint64_t dispatchqueue_offset,
482 int32_t *retval)
483{
a0619f9c
A
484 struct _pthread_registration_data data = {};
485 uint32_t max_tsd_offset;
486 kern_return_t kr;
487 size_t pthread_init_sz = 0;
2546420a 488
f1a1da6c
A
489 /* syscall randomizer test can pass bogus values */
490 if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) {
491 return(EINVAL);
492 }
a0619f9c
A
493 /*
494 * if we have pthread_init_data, then we use that and target_concptr
495 * (which is an offset) get data.
496 */
497 if (pthread_init_data != 0) {
498 if (pthread_init_data_size < sizeof(data.version)) {
499 return EINVAL;
500 }
501 pthread_init_sz = MIN(sizeof(data), (size_t)pthread_init_data_size);
502 int ret = copyin(pthread_init_data, &data, pthread_init_sz);
503 if (ret) {
504 return ret;
505 }
506 if (data.version != (size_t)pthread_init_data_size) {
507 return EINVAL;
508 }
509 } else {
510 data.dispatch_queue_offset = dispatchqueue_offset;
511 }
512
513 /* We have to do this before proc_get_register so that it resets after fork */
514 mach_vm_offset_t stackaddr = stack_addr_hint(p, pthread_kern->current_map());
515 pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stackaddr);
516
517 /* prevent multiple registrations */
518 if (pthread_kern->proc_get_register(p) != 0) {
519 return(EINVAL);
520 }
521
f1a1da6c
A
522 pthread_kern->proc_set_threadstart(p, threadstart);
523 pthread_kern->proc_set_wqthread(p, wqthread);
524 pthread_kern->proc_set_pthsize(p, pthsize);
525 pthread_kern->proc_set_register(p);
526
a0619f9c
A
527 uint32_t tsd_slot_sz = proc_is64bit(p) ? sizeof(uint64_t) : sizeof(uint32_t);
528 if ((uint32_t)pthsize >= tsd_slot_sz &&
529 data.tsd_offset <= (uint32_t)(pthsize - tsd_slot_sz)) {
530 max_tsd_offset = ((uint32_t)pthsize - data.tsd_offset - tsd_slot_sz);
531 } else {
532 data.tsd_offset = 0;
533 max_tsd_offset = 0;
534 }
535 pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset);
f1a1da6c 536
a0619f9c
A
537 if (data.dispatch_queue_offset > max_tsd_offset) {
538 data.dispatch_queue_offset = 0;
539 }
540 pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
f1a1da6c 541
a0619f9c
A
542 if (pthread_kern->proc_set_return_to_kernel_offset) {
543 if (data.return_to_kernel_offset > max_tsd_offset) {
544 data.return_to_kernel_offset = 0;
f1a1da6c 545 }
a0619f9c
A
546 pthread_kern->proc_set_return_to_kernel_offset(p,
547 data.return_to_kernel_offset);
548 }
f1a1da6c 549
a0619f9c
A
550 if (pthread_kern->proc_set_mach_thread_self_tsd_offset) {
551 if (data.mach_thread_self_offset > max_tsd_offset) {
552 data.mach_thread_self_offset = 0;
2546420a 553 }
a0619f9c
A
554 pthread_kern->proc_set_mach_thread_self_tsd_offset(p,
555 data.mach_thread_self_offset);
556 }
f1a1da6c 557
a0619f9c 558 if (pthread_init_data != 0) {
f1a1da6c 559 /* Outgoing data that userspace expects as a reply */
2546420a 560 data.version = sizeof(struct _pthread_registration_data);
214d78a2
A
561 data.main_qos = _pthread_unspecified_priority();
562
f1a1da6c
A
563 if (pthread_kern->qos_main_thread_active()) {
564 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
a0619f9c 565 thread_qos_policy_data_t qos;
f1a1da6c
A
566 boolean_t gd = FALSE;
567
214d78a2
A
568 kr = pthread_kern->thread_policy_get(current_thread(),
569 THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
f1a1da6c 570 if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
214d78a2
A
571 /*
572 * Unspecified threads means the kernel wants us
573 * to impose legacy upon the thread.
574 */
f1a1da6c
A
575 qos.qos_tier = THREAD_QOS_LEGACY;
576 qos.tier_importance = 0;
577
214d78a2
A
578 kr = pthread_kern->thread_policy_set_internal(current_thread(),
579 THREAD_QOS_POLICY, (thread_policy_t)&qos,
580 THREAD_QOS_POLICY_COUNT);
f1a1da6c
A
581 }
582
583 if (kr == KERN_SUCCESS) {
214d78a2
A
584 data.main_qos = _pthread_priority_make_from_thread_qos(
585 qos.qos_tier, 0, 0);
f1a1da6c 586 }
f1a1da6c
A
587 }
588
214d78a2 589 data.stack_addr_hint = stackaddr;
76b7b9a2
A
590 data.mutex_default_policy = pthread_mutex_default_policy;
591
f1a1da6c
A
592 kr = copyout(&data, pthread_init_data, pthread_init_sz);
593 if (kr != KERN_SUCCESS) {
594 return EINVAL;
595 }
f1a1da6c
A
596 }
597
598 /* return the supported feature set as the return value. */
599 *retval = PTHREAD_FEATURE_SUPPORTED;
600
601 return(0);
602}
603
214d78a2
A
604
605#pragma mark - Workqueue Thread Support
606
607static mach_vm_size_t
608workq_thread_allocsize(proc_t p, vm_map_t wq_map,
609 mach_vm_size_t *guardsize_out)
610{
611 mach_vm_size_t guardsize = vm_map_page_size(wq_map);
612 mach_vm_size_t pthread_size = vm_map_round_page_mask(
613 pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET,
614 vm_map_page_mask(wq_map));
615 if (guardsize_out) *guardsize_out = guardsize;
616 return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
617}
964d3577 618
f1a1da6c 619int
214d78a2 620workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr)
f1a1da6c 621{
214d78a2
A
622 mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
623 mach_vm_size_t guardsize, th_allocsize;
624 kern_return_t kret;
f1a1da6c 625
214d78a2
A
626 th_allocsize = workq_thread_allocsize(p, vmap, &guardsize);
627 kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1,
628 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE,
629 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
f1a1da6c 630
214d78a2
A
631 if (kret != KERN_SUCCESS) {
632 kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize,
633 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
f1a1da6c
A
634 }
635
214d78a2
A
636 if (kret != KERN_SUCCESS) {
637 goto fail;
f1a1da6c
A
638 }
639
214d78a2
A
640 /*
641 * The guard page is at the lowest address
642 * The stack base is the highest address
643 */
644 kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE);
645 if (kret != KERN_SUCCESS) {
646 goto fail_vm_deallocate;
f1a1da6c
A
647 }
648
214d78a2
A
649 if (out_addr) {
650 *out_addr = stackaddr;
f1a1da6c 651 }
214d78a2 652 return 0;
f1a1da6c 653
214d78a2
A
654fail_vm_deallocate:
655 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
656fail:
657 return kret;
f1a1da6c
A
658}
659
214d78a2
A
660int
661workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr)
f1a1da6c 662{
214d78a2
A
663 return mach_vm_deallocate(vmap, stackaddr,
664 workq_thread_allocsize(p, vmap, NULL));
f1a1da6c
A
665}
666
214d78a2
A
667void
668workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th,
669 vm_map_t vmap, user_addr_t stackaddr)
670{
671 // Keep this in sync with workq_setup_thread()
672 const vm_size_t guardsize = vm_map_page_size(vmap);
673 const user_addr_t freeaddr = (user_addr_t)stackaddr + guardsize;
674 const vm_map_offset_t freesize = vm_map_trunc_page_mask(
675 (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1,
676 vm_map_page_mask(vmap)) - guardsize;
677
678 __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr,
679 freesize, VM_BEHAVIOR_REUSABLE);
680#if MACH_ASSERT
681 if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
682 os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
a0619f9c 683 }
214d78a2
A
684#endif
685}
a0619f9c 686
214d78a2
A
687struct workq_thread_addrs {
688 user_addr_t self;
689 user_addr_t stack_bottom;
690 user_addr_t stack_top;
691};
a0619f9c 692
214d78a2
A
693static inline void
694workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr)
695{
696 th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN);
697}
a0619f9c 698
214d78a2
A
699static void
700workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr,
701 struct workq_thread_addrs *th_addrs)
702{
703 const vm_size_t guardsize = vm_map_page_size(map);
a0619f9c 704
214d78a2
A
705 th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE +
706 guardsize + PTHREAD_T_OFFSET);
707 workq_thread_set_top_addr(th_addrs, th_addrs->self);
708 th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize);
a0619f9c
A
709}
710
214d78a2
A
711static inline void
712workq_set_register_state(proc_t p, thread_t th,
713 struct workq_thread_addrs *addrs, mach_port_name_t kport,
714 user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count)
f1a1da6c 715{
214d78a2
A
716 user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
717 if (!wqstart_fnptr) {
718 panic("workqueue thread start function pointer is NULL");
719 }
f1a1da6c 720
214d78a2
A
721#if defined(__i386__) || defined(__x86_64__)
722 if (proc_is64bit_data(p) == 0) {
723 x86_thread_state32_t state = {
724 .eip = (unsigned int)wqstart_fnptr,
725 .eax = /* arg0 */ (unsigned int)addrs->self,
726 .ebx = /* arg1 */ (unsigned int)kport,
727 .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom,
728 .edx = /* arg3 */ (unsigned int)kevent_list,
729 .edi = /* arg4 */ (unsigned int)upcall_flags,
730 .esi = /* arg5 */ (unsigned int)kevent_count,
f1a1da6c 731
214d78a2
A
732 .esp = (int)((vm_offset_t)addrs->stack_top),
733 };
734
735 int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
736 if (error != KERN_SUCCESS) {
737 panic(__func__ ": thread_set_wq_state failed: %d", error);
2546420a 738 }
214d78a2
A
739 } else {
740 x86_thread_state64_t state64 = {
741 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
742 .rip = (uint64_t)wqstart_fnptr,
743 .rdi = (uint64_t)addrs->self,
744 .rsi = (uint64_t)kport,
745 .rdx = (uint64_t)addrs->stack_bottom,
746 .rcx = (uint64_t)kevent_list,
747 .r8 = (uint64_t)upcall_flags,
748 .r9 = (uint64_t)kevent_count,
2546420a 749
214d78a2
A
750 .rsp = (uint64_t)(addrs->stack_top)
751 };
2546420a 752
214d78a2
A
753 int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
754 if (error != KERN_SUCCESS) {
755 panic(__func__ ": thread_set_wq_state failed: %d", error);
2546420a
A
756 }
757 }
214d78a2
A
758#else
759#error setup_wqthread not defined for this architecture
760#endif
761}
2546420a 762
214d78a2
A
763static int
764workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, int upcall_flags,
765 user_addr_t eventlist, int nevents, int kevent_flags,
766 user_addr_t *kevent_list_out, int *kevent_count_out)
767{
768 bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP;
769 int kevent_count = WQ_KEVENT_LIST_LEN;
770 user_addr_t kevent_list = th_addrs->self - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
771 user_addr_t kevent_id_addr = kevent_list;
772 kqueue_id_t kevent_id = -1;
773 int ret;
f1a1da6c 774
214d78a2 775 if (workloop) {
a0619f9c 776 /*
214d78a2
A
777 * The kevent ID goes just below the kevent list. Sufficiently new
778 * userspace will know to look there. Old userspace will just
779 * ignore it.
a0619f9c 780 */
214d78a2
A
781 kevent_id_addr -= sizeof(kqueue_id_t);
782 }
f1a1da6c 783
214d78a2
A
784 user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE;
785 user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE;
f1a1da6c 786
214d78a2
A
787 if (workloop) {
788 kevent_flags |= KEVENT_FLAG_WORKLOOP;
789 ret = kevent_id_internal(p, &kevent_id,
790 eventlist, nevents, kevent_list, kevent_count,
791 kevent_data_buf, &kevent_data_available,
792 kevent_flags, &kevent_count);
793 copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id));
794 } else {
795 kevent_flags |= KEVENT_FLAG_WORKQ;
796 ret = kevent_qos_internal(p, -1, eventlist, nevents, kevent_list,
797 kevent_count, kevent_data_buf, &kevent_data_available,
798 kevent_flags, &kevent_count);
799 }
f1a1da6c 800
214d78a2
A
801 // squash any errors into just empty output
802 if (ret != 0 || kevent_count == -1) {
803 *kevent_list_out = NULL;
804 *kevent_count_out = 0;
805 return ret;
806 }
f1a1da6c 807
214d78a2
A
808 if (kevent_data_available == WQ_KEVENT_DATA_SIZE) {
809 workq_thread_set_top_addr(th_addrs, kevent_id_addr);
810 } else {
811 workq_thread_set_top_addr(th_addrs,
812 kevent_data_buf + kevent_data_available);
813 }
814 *kevent_count_out = kevent_count;
815 *kevent_list_out = kevent_list;
816 return ret;
817}
f1a1da6c 818
964d3577
A
819/**
820 * configures initial thread stack/registers to jump into:
2546420a 821 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
964d3577
A
822 * to get there we jump through assembily stubs in pthread_asm.s. Those
823 * routines setup a stack frame, using the current stack pointer, and marshall
824 * arguments from registers to the stack as required by the ABI.
825 *
826 * One odd thing we do here is to start the pthread_t 4k below what would be the
827 * top of the stack otherwise. This is because usually only the first 4k of the
828 * pthread_t will be used and so we want to put it on the same 16k page as the
829 * top of the stack to save memory.
830 *
831 * When we are done the stack will look like:
832 * |-----------| th_stackaddr + th_allocsize
833 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
a0619f9c
A
834 * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
835 * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
964d3577
A
836 * |stack gap | bottom aligned to 16 bytes, and at least as big as stack_gap_min
837 * | STACK |
838 * | ⇓ |
839 * | |
840 * |guard page | guardsize
841 * |-----------| th_stackaddr
842 */
214d78a2 843__attribute__((noreturn,noinline))
2546420a 844void
214d78a2
A
845workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
846 mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags)
f1a1da6c 847{
214d78a2
A
848 struct workq_thread_addrs th_addrs;
849 bool first_use = (setup_flags & WQ_SETUP_FIRST_USE);
850 user_addr_t kevent_list = NULL;
851 int kevent_count = 0;
f1a1da6c 852
214d78a2 853 workq_thread_get_addrs(map, stackaddr, &th_addrs);
2546420a 854
214d78a2 855 if (first_use) {
a0619f9c
A
856 uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
857 if (tsd_offset) {
214d78a2
A
858 mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset;
859 kern_return_t kret = pthread_kern->thread_set_tsd_base(th,
860 th_tsd_base);
a0619f9c
A
861 if (kret == KERN_SUCCESS) {
862 upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
863 }
2546420a 864 }
2546420a 865
2546420a 866 /*
214d78a2
A
867 * Pre-fault the first page of the new thread's stack and the page that will
868 * contain the pthread_t structure.
869 */
870 vm_map_offset_t mask = vm_map_page_mask(map);
871 vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask);
872 vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask);
873 if (th_page != stk_page) {
874 vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
2546420a 875 }
214d78a2 876 vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
2546420a 877 }
f1a1da6c 878
214d78a2
A
879 if (setup_flags & WQ_SETUP_EXIT_THREAD) {
880 kevent_count = WORKQ_EXIT_THREAD_NKEVENT;
881 } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
a0619f9c 882 unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
214d78a2
A
883 workq_kevent(p, &th_addrs, upcall_flags, NULL, 0, flags,
884 &kevent_list, &kevent_count);
964d3577
A
885 }
886
214d78a2
A
887 workq_set_register_state(p, th, &th_addrs, kport,
888 kevent_list, upcall_flags, kevent_count);
964d3577 889
214d78a2
A
890 if (first_use) {
891 pthread_kern->thread_bootstrap_return();
964d3577 892 } else {
214d78a2 893 pthread_kern->unix_syscall_return(EJUSTRETURN);
f1a1da6c 894 }
214d78a2 895 __builtin_unreachable();
964d3577 896}
964d3577 897
a0619f9c 898int
214d78a2
A
899workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
900 user_addr_t stackaddr, mach_port_name_t kport,
901 user_addr_t events, int nevents, int upcall_flags)
f1a1da6c 902{
214d78a2
A
903 struct workq_thread_addrs th_addrs;
904 user_addr_t kevent_list = NULL;
905 int kevent_count = 0, error;
906 __assert_only kern_return_t kr;
f1a1da6c 907
214d78a2 908 workq_thread_get_addrs(map, stackaddr, &th_addrs);
2546420a 909
214d78a2
A
910 unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE |
911 KEVENT_FLAG_PARKING;
912 error = workq_kevent(p, &th_addrs, upcall_flags, events, nevents, flags,
913 &kevent_list, &kevent_count);
2546420a 914
214d78a2
A
915 if (error || kevent_count == 0) {
916 return error;
2546420a
A
917 }
918
214d78a2
A
919 kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
920 assert(kr == KERN_SUCCESS);
2546420a 921
214d78a2
A
922 workq_set_register_state(p, th, &th_addrs, kport,
923 kevent_list, upcall_flags, kevent_count);
2546420a 924
214d78a2
A
925 pthread_kern->unix_syscall_return(EJUSTRETURN);
926 __builtin_unreachable();
2546420a
A
927}
928
a0619f9c 929int
f1a1da6c
A
930_thread_selfid(__unused struct proc *p, uint64_t *retval)
931{
932 thread_t thread = current_thread();
933 *retval = thread_tid(thread);
934 return KERN_SUCCESS;
935}
936
937void
938_pthread_init(void)
939{
940 pthread_lck_grp_attr = lck_grp_attr_alloc_init();
941 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
a0619f9c 942
f1a1da6c
A
943 /*
944 * allocate the lock attribute for pthread synchronizers
945 */
946 pthread_lck_attr = lck_attr_alloc_init();
f1a1da6c 947 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
a0619f9c 948
f1a1da6c
A
949 pth_global_hashinit();
950 psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
951 psynch_zoneinit();
952
76b7b9a2
A
953 int policy_bootarg;
954 if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) {
955 pthread_mutex_default_policy = policy_bootarg;
956 }
957
76b7b9a2 958 sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy);
f1a1da6c 959}