]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/kern_fork.c
xnu-1699.26.8.tar.gz
[apple/xnu.git] / bsd / kern / kern_fork.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
67 */
68/*
69 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74/*
75 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
76 * support for mandatory and extensible security protections. This notice
77 * is included in support of clause 2.2 (b) of the Apple Public License,
78 * Version 2.0.
79 */
80
81#include <kern/assert.h>
82#include <sys/param.h>
83#include <sys/systm.h>
84#include <sys/filedesc.h>
85#include <sys/kernel.h>
86#include <sys/malloc.h>
87#include <sys/proc_internal.h>
88#include <sys/kauth.h>
89#include <sys/user.h>
90#include <sys/resourcevar.h>
91#include <sys/vnode_internal.h>
92#include <sys/file_internal.h>
93#include <sys/acct.h>
94#include <sys/codesign.h>
95#include <sys/sysproto.h>
96#if CONFIG_DTRACE
97/* Do not include dtrace.h, it redefines kmem_[alloc/free] */
98extern void dtrace_fasttrap_fork(proc_t, proc_t);
99extern void (*dtrace_helpers_fork)(proc_t, proc_t);
100extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t);
101
102#include <sys/dtrace_ptss.h>
103#endif
104
105#include <security/audit/audit.h>
106
107#include <mach/mach_types.h>
108#include <kern/kern_types.h>
109#include <kern/kalloc.h>
110#include <kern/mach_param.h>
111#include <kern/task.h>
112#include <kern/thread_call.h>
113#include <kern/zalloc.h>
114
115#include <machine/spl.h>
116
117#if CONFIG_MACF
118#include <security/mac.h>
119#include <security/mac_mach_internal.h>
120#endif
121
122#include <vm/vm_map.h>
123#include <vm/vm_protos.h>
124#include <vm/vm_shared_region.h>
125
126#include <sys/shm_internal.h> /* for shmfork() */
127#include <mach/task.h> /* for thread_create() */
128#include <mach/thread_act.h> /* for thread_resume() */
129
130#include <sys/sdt.h>
131
132/* XXX routines which should have Mach prototypes, but don't */
133void thread_set_parent(thread_t parent, int pid);
134extern void act_thread_catt(void *ctx);
135void thread_set_child(thread_t child, int pid);
136void *act_thread_csave(void);
137
138
139thread_t cloneproc(task_t, proc_t, int);
140proc_t forkproc(proc_t);
141void forkproc_free(proc_t);
142thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit);
143void proc_vfork_begin(proc_t parent_proc);
144void proc_vfork_end(proc_t parent_proc);
145
146#define DOFORK 0x1 /* fork() system call */
147#define DOVFORK 0x2 /* vfork() system call */
148
149/*
150 * proc_vfork_begin
151 *
152 * Description: start a vfork on a process
153 *
154 * Parameters: parent_proc process (re)entering vfork state
155 *
156 * Returns: (void)
157 *
158 * Notes: Although this function increments a count, a count in
159 * excess of 1 is not currently supported. According to the
160 * POSIX standard, calling anything other than execve() or
161 * _exit() fillowing a vfork(), including calling vfork()
162 * itself again, will result in undefned behaviour
163 */
164void
165proc_vfork_begin(proc_t parent_proc)
166{
167 proc_lock(parent_proc);
168 parent_proc->p_lflag |= P_LVFORK;
169 parent_proc->p_vforkcnt++;
170 proc_unlock(parent_proc);
171}
172
173/*
174 * proc_vfork_end
175 *
176 * Description: stop a vfork on a process
177 *
178 * Parameters: parent_proc process leaving vfork state
179 *
180 * Returns: (void)
181 *
182 * Notes: Decerements the count; currently, reentrancy of vfork()
183 * is unsupported on the current process
184 */
185void
186proc_vfork_end(proc_t parent_proc)
187{
188 proc_lock(parent_proc);
189 parent_proc->p_vforkcnt--;
190 if (parent_proc->p_vforkcnt < 0)
191 panic("vfork cnt is -ve");
192 /* resude the vfork count; clear the flag when it goes to 0 */
193 if (parent_proc->p_vforkcnt == 0)
194 parent_proc->p_lflag &= ~P_LVFORK;
195 proc_unlock(parent_proc);
196}
197
198
199/*
200 * vfork
201 *
202 * Description: vfork system call
203 *
204 * Parameters: void [no arguments]
205 *
206 * Retval: 0 (to child process)
207 * !0 pid of child (to parent process)
208 * -1 error (see "Returns:")
209 *
210 * Returns: EAGAIN Administrative limit reached
211 * EINVAL vfork() called during vfork()
212 * ENOMEM Failed to allocate new process
213 *
214 * Note: After a successful call to this function, the parent process
215 * has its task, thread, and uthread lent to the child process,
216 * and control is returned to the caller; if this function is
217 * invoked as a system call, the return is to user space, and
218 * is effectively running on the child process.
219 *
220 * Subsequent calls that operate on process state are permitted,
221 * though discouraged, and will operate on the child process; any
222 * operations on the task, thread, or uthread will result in
223 * changes in the parent state, and, if inheritable, the child
224 * state, when a task, thread, and uthread are realized for the
225 * child process at execve() time, will also be effected. Given
226 * this, it's recemmended that people use the posix_spawn() call
227 * instead.
228 *
229 * BLOCK DIAGRAM OF VFORK
230 *
231 * Before:
232 *
233 * ,----------------. ,-------------.
234 * | | task | |
235 * | parent_thread | ------> | parent_task |
236 * | | <.list. | |
237 * `----------------' `-------------'
238 * uthread | ^ bsd_info | ^
239 * v | vc_thread v | task
240 * ,----------------. ,-------------.
241 * | | | |
242 * | parent_uthread | <.list. | parent_proc | <-- current_proc()
243 * | | | |
244 * `----------------' `-------------'
245 * uu_proc |
246 * v
247 * NULL
248 *
249 * After:
250 *
251 * ,----------------. ,-------------.
252 * | | task | |
253 * ,----> | parent_thread | ------> | parent_task |
254 * | | | <.list. | |
255 * | `----------------' `-------------'
256 * | uthread | ^ bsd_info | ^
257 * | v | vc_thread v | task
258 * | ,----------------. ,-------------.
259 * | | | | |
260 * | | parent_uthread | <.list. | parent_proc |
261 * | | | | |
262 * | `----------------' `-------------'
263 * | uu_proc | . list
264 * | v v
265 * | ,----------------.
266 * `----- | |
267 * p_vforkact | child_proc | <-- current_proc()
268 * | |
269 * `----------------'
270 */
271int
272vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval)
273{
274 thread_t child_thread;
275 int err;
276
277 if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK)) != 0) {
278 retval[1] = 0;
279 } else {
280 /*
281 * kludge: rely on uu_proc being set in the vfork case,
282 * rather than returning the actual thread. We can remove
283 * this when we remove the uu_proc/current_proc() kludge.
284 */
285 proc_t child_proc = current_proc();
286
287 retval[0] = child_proc->p_pid;
288 retval[1] = 1; /* flag child return for user space */
289
290 /*
291 * Drop the signal lock on the child which was taken on our
292 * behalf by forkproc()/cloneproc() to prevent signals being
293 * received by the child in a partially constructed state.
294 */
295 proc_signalend(child_proc, 0);
296 proc_transend(child_proc, 0);
297
298 /* flag the fork has occurred */
299 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid);
300 DTRACE_PROC1(create, proc_t, child_proc);
301 }
302
303 return(err);
304}
305
306
307/*
308 * fork1
309 *
310 * Description: common code used by all new process creation other than the
311 * bootstrap of the initial process on the system
312 *
313 * Parameters: parent_proc parent process of the process being
314 * child_threadp pointer to location to receive the
315 * Mach thread_t of the child process
316 * breated
317 * kind kind of creation being requested
318 *
319 * Notes: Permissable values for 'kind':
320 *
321 * PROC_CREATE_FORK Create a complete process which will
322 * return actively running in both the
323 * parent and the child; the child copies
324 * the parent address space.
325 * PROC_CREATE_SPAWN Create a complete process which will
326 * return actively running in the parent
327 * only after returning actively running
328 * in the child; the child address space
329 * is newly created by an image activator,
330 * after which the child is run.
331 * PROC_CREATE_VFORK Creates a partial process which will
332 * borrow the parent task, thread, and
333 * uthread to return running in the child;
334 * the child address space and other parts
335 * are lazily created at execve() time, or
336 * the child is terminated, and the parent
337 * does not actively run until that
338 * happens.
339 *
340 * At first it may seem strange that we return the child thread
341 * address rather than process structure, since the process is
342 * the only part guaranteed to be "new"; however, since we do
343 * not actualy adjust other references between Mach and BSD (see
344 * the block diagram above the implementation of vfork()), this
345 * is the only method which guarantees us the ability to get
346 * back to the other information.
347 */
348int
349fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
350{
351 thread_t parent_thread = (thread_t)current_thread();
352 uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
353 proc_t child_proc = NULL; /* set in switch, but compiler... */
354 thread_t child_thread = NULL;
355 uid_t uid;
356 int count;
357 int err = 0;
358 int spawn = 0;
359
360 /*
361 * Although process entries are dynamically created, we still keep
362 * a global limit on the maximum number we will create. Don't allow
363 * a nonprivileged user to use the last process; don't let root
364 * exceed the limit. The variable nprocs is the current number of
365 * processes, maxproc is the limit.
366 */
367 uid = kauth_getruid();
368 proc_list_lock();
369 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
370 proc_list_unlock();
371 tablefull("proc");
372 return (EAGAIN);
373 }
374 proc_list_unlock();
375
376 /*
377 * Increment the count of procs running with this uid. Don't allow
378 * a nonprivileged user to exceed their current limit, which is
379 * always less than what an rlim_t can hold.
380 * (locking protection is provided by list lock held in chgproccnt)
381 */
382 count = chgproccnt(uid, 1);
383 if (uid != 0 &&
384 (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) {
385 err = EAGAIN;
386 goto bad;
387 }
388
389#if CONFIG_MACF
390 /*
391 * Determine if MAC policies applied to the process will allow
392 * it to fork. This is an advisory-only check.
393 */
394 err = mac_proc_check_fork(parent_proc);
395 if (err != 0) {
396 goto bad;
397 }
398#endif
399
400 switch(kind) {
401 case PROC_CREATE_VFORK:
402 /*
403 * Prevent a vfork while we are in vfork(); we should
404 * also likely preventing a fork here as well, and this
405 * check should then be outside the switch statement,
406 * since the proc struct contents will copy from the
407 * child and the tash/thread/uthread from the parent in
408 * that case. We do not support vfork() in vfork()
409 * because we don't have to; the same non-requirement
410 * is true of both fork() and posix_spawn() and any
411 * call other than execve() amd _exit(), but we've
412 * been historically lenient, so we continue to be so
413 * (for now).
414 *
415 * <rdar://6640521> Probably a source of random panics
416 */
417 if (parent_uthread->uu_flag & UT_VFORK) {
418 printf("fork1 called within vfork by %s\n", parent_proc->p_comm);
419 err = EINVAL;
420 goto bad;
421 }
422
423 /*
424 * Flag us in progress; if we chose to support vfork() in
425 * vfork(), we would chain our parent at this point (in
426 * effect, a stack push). We don't, since we actually want
427 * to disallow everything not specified in the standard
428 */
429 proc_vfork_begin(parent_proc);
430
431 /* The newly created process comes with signal lock held */
432 if ((child_proc = forkproc(parent_proc)) == NULL) {
433 /* Failed to allocate new process */
434 proc_vfork_end(parent_proc);
435 err = ENOMEM;
436 goto bad;
437 }
438
439// XXX BEGIN: wants to move to be common code (and safe)
440#if CONFIG_MACF
441 /*
442 * allow policies to associate the credential/label that
443 * we referenced from the parent ... with the child
444 * JMM - this really isn't safe, as we can drop that
445 * association without informing the policy in other
446 * situations (keep long enough to get policies changed)
447 */
448 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
449#endif
450
451 /*
452 * Propogate change of PID - may get new cred if auditing.
453 *
454 * NOTE: This has no effect in the vfork case, since
455 * child_proc->task != current_task(), but we duplicate it
456 * because this is probably, ultimately, wrong, since we
457 * will be running in the "child" which is the parent task
458 * with the wrong token until we get to the execve() or
459 * _exit() call; a lot of "undefined" can happen before
460 * that.
461 *
462 * <rdar://6640530> disallow everything but exeve()/_exit()?
463 */
464 set_security_token(child_proc);
465
466 AUDIT_ARG(pid, child_proc->p_pid);
467
468// XXX END: wants to move to be common code (and safe)
469
470 /*
471 * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD
472 *
473 * Note: this is where we would "push" state instead of setting
474 * it for nested vfork() support (see proc_vfork_end() for
475 * description if issues here).
476 */
477 child_proc->task = parent_proc->task;
478
479 child_proc->p_lflag |= P_LINVFORK;
480 child_proc->p_vforkact = parent_thread;
481 child_proc->p_stat = SRUN;
482
483 parent_uthread->uu_flag |= UT_VFORK;
484 parent_uthread->uu_proc = child_proc;
485 parent_uthread->uu_userstate = (void *)act_thread_csave();
486 parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask;
487
488 /* temporarily drop thread-set-id state */
489 if (parent_uthread->uu_flag & UT_SETUID) {
490 parent_uthread->uu_flag |= UT_WASSETUID;
491 parent_uthread->uu_flag &= ~UT_SETUID;
492 }
493
494 /* blow thread state information */
495 /* XXX is this actually necessary, given syscall return? */
496 thread_set_child(parent_thread, child_proc->p_pid);
497
498 child_proc->p_acflag = AFORK; /* forked but not exec'ed */
499
500 /*
501 * Preserve synchronization semantics of vfork. If
502 * waiting for child to exec or exit, set P_PPWAIT
503 * on child, and sleep on our proc (in case of exit).
504 */
505 child_proc->p_lflag |= P_LPPWAIT;
506 pinsertchild(parent_proc, child_proc); /* set visible */
507
508 break;
509
510 case PROC_CREATE_SPAWN:
511 /*
512 * A spawned process differs from a forked process in that
513 * the spawned process does not carry around the parents
514 * baggage with regard to address space copying, dtrace,
515 * and so on.
516 */
517 spawn = 1;
518
519 /* FALLSTHROUGH */
520
521 case PROC_CREATE_FORK:
522 /*
523 * When we clone the parent process, we are going to inherit
524 * its task attributes and memory, since when we fork, we
525 * will, in effect, create a duplicate of it, with only minor
526 * differences. Contrarily, spawned processes do not inherit.
527 */
528 if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) {
529 /* Failed to create thread */
530 err = EAGAIN;
531 goto bad;
532 }
533
534 /* copy current thread state into the child thread (only for fork) */
535 if (!spawn) {
536 thread_dup(child_thread);
537 }
538
539 /* child_proc = child_thread->task->proc; */
540 child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread)));
541
542// XXX BEGIN: wants to move to be common code (and safe)
543#if CONFIG_MACF
544 /*
545 * allow policies to associate the credential/label that
546 * we referenced from the parent ... with the child
547 * JMM - this really isn't safe, as we can drop that
548 * association without informing the policy in other
549 * situations (keep long enough to get policies changed)
550 */
551 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
552#endif
553
554 /*
555 * Propogate change of PID - may get new cred if auditing.
556 *
557 * NOTE: This has no effect in the vfork case, since
558 * child_proc->task != current_task(), but we duplicate it
559 * because this is probably, ultimately, wrong, since we
560 * will be running in the "child" which is the parent task
561 * with the wrong token until we get to the execve() or
562 * _exit() call; a lot of "undefined" can happen before
563 * that.
564 *
565 * <rdar://6640530> disallow everything but exeve()/_exit()?
566 */
567 set_security_token(child_proc);
568
569 AUDIT_ARG(pid, child_proc->p_pid);
570
571// XXX END: wants to move to be common code (and safe)
572
573 /*
574 * Blow thread state information; this is what gives the child
575 * process its "return" value from a fork() call.
576 *
577 * Note: this should probably move to fork() proper, since it
578 * is not relevent to spawn, and the value won't matter
579 * until we resume the child there. If you are in here
580 * refactoring code, consider doing this at the same time.
581 */
582 thread_set_child(child_thread, child_proc->p_pid);
583
584 child_proc->p_acflag = AFORK; /* forked but not exec'ed */
585
586// <rdar://6598155> dtrace code cleanup needed
587#if CONFIG_DTRACE
588 /*
589 * This code applies to new processes who are copying the task
590 * and thread state and address spaces of their parent process.
591 */
592 if (!spawn) {
593// <rdar://6598155> call dtrace specific function here instead of all this...
594 /*
595 * APPLE NOTE: Solaris does a sprlock() and drops the
596 * proc_lock here. We're cheating a bit and only taking
597 * the p_dtrace_sprlock lock. A full sprlock would
598 * task_suspend the parent.
599 */
600 lck_mtx_lock(&parent_proc->p_dtrace_sprlock);
601
602 /*
603 * Remove all DTrace tracepoints from the child process. We
604 * need to do this _before_ duplicating USDT providers since
605 * any associated probes may be immediately enabled.
606 */
607 if (parent_proc->p_dtrace_count > 0) {
608 dtrace_fasttrap_fork(parent_proc, child_proc);
609 }
610
611 lck_mtx_unlock(&parent_proc->p_dtrace_sprlock);
612
613 /*
614 * Duplicate any lazy dof(s). This must be done while NOT
615 * holding the parent sprlock! Lock ordering is
616 * dtrace_dof_mode_lock, then sprlock. It is imperative we
617 * always call dtrace_lazy_dofs_duplicate, rather than null
618 * check and call if !NULL. If we NULL test, during lazy dof
619 * faulting we can race with the faulting code and proceed
620 * from here to beyond the helpers copy. The lazy dof
621 * faulting will then fail to copy the helpers to the child
622 * process.
623 */
624 dtrace_lazy_dofs_duplicate(parent_proc, child_proc);
625
626 /*
627 * Duplicate any helper actions and providers. The SFORKING
628 * we set above informs the code to enable USDT probes that
629 * sprlock() may fail because the child is being forked.
630 */
631 /*
632 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
633 * never fails to find the child. We do not set SFORKING.
634 */
635 if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
636 (*dtrace_helpers_fork)(parent_proc, child_proc);
637 }
638
639 }
640#endif /* CONFIG_DTRACE */
641
642 break;
643
644 default:
645 panic("fork1 called with unknown kind %d", kind);
646 break;
647 }
648
649
650 /* return the thread pointer to the caller */
651 *child_threadp = child_thread;
652
653bad:
654 /*
655 * In the error case, we return a 0 value for the returned pid (but
656 * it is ignored in the trampoline due to the error return); this
657 * is probably not necessary.
658 */
659 if (err) {
660 (void)chgproccnt(uid, -1);
661 }
662
663 return (err);
664}
665
666
667/*
668 * vfork_return
669 *
670 * Description: "Return" to parent vfork thread() following execve/_exit;
671 * this is done by reassociating the parent process structure
672 * with the task, thread, and uthread.
673 *
674 * Parameters: child_proc Child process
675 * retval System call return value array
676 * rval Return value to present to parent
677 *
678 * Returns: void
679 *
680 * Note: The caller resumes or exits the parent, as appropriate, after
681 * callling this function.
682 */
683void
684vfork_return(proc_t child_proc, int32_t *retval, int rval)
685{
686 proc_t parent_proc = child_proc->p_pptr;
687 thread_t parent_thread = (thread_t)current_thread();
688 uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
689
690 act_thread_catt(parent_uthread->uu_userstate);
691
692 /* end vfork in parent */
693 proc_vfork_end(parent_proc);
694
695 /* REPATRIATE PARENT TASK, THREAD, UTHREAD */
696 parent_uthread->uu_userstate = 0;
697 parent_uthread->uu_flag &= ~UT_VFORK;
698 /* restore thread-set-id state */
699 if (parent_uthread->uu_flag & UT_WASSETUID) {
700 parent_uthread->uu_flag |= UT_SETUID;
701 parent_uthread->uu_flag &= UT_WASSETUID;
702 }
703 parent_uthread->uu_proc = 0;
704 parent_uthread->uu_sigmask = parent_uthread->uu_vforkmask;
705 child_proc->p_lflag &= ~P_LINVFORK;
706 child_proc->p_vforkact = (void *)0;
707
708 thread_set_parent(parent_thread, rval);
709
710 if (retval) {
711 retval[0] = rval;
712 retval[1] = 0; /* mark parent */
713 }
714
715 return;
716}
717
718
719/*
720 * fork_create_child
721 *
722 * Description: Common operations associated with the creation of a child
723 * process
724 *
725 * Parameters: parent_task parent task
726 * child_proc child process
727 * inherit_memory TRUE, if the parents address space is
728 * to be inherited by the child
729 * is64bit TRUE, if the child being created will
730 * be associated with a 64 bit process
731 * rather than a 32 bit process
732 *
733 * Note: This code is called in the fork() case, from the execve() call
734 * graph, if implementing an execve() following a vfork(), from
735 * the posix_spawn() call graph (which implicitly includes a
736 * vfork() equivalent call, and in the system bootstrap case.
737 *
738 * It creates a new task and thread (and as a side effect of the
739 * thread creation, a uthread), which is then associated with the
740 * process 'child'. If the parent process address space is to
741 * be inherited, then a flag indicates that the newly created
742 * task should inherit this from the child task.
743 *
744 * As a special concession to bootstrapping the initial process
745 * in the system, it's possible for 'parent_task' to be TASK_NULL;
746 * in this case, 'inherit_memory' MUST be FALSE.
747 */
748thread_t
749fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit)
750{
751 thread_t child_thread = NULL;
752 task_t child_task;
753 kern_return_t result;
754
755 /* Create a new task for the child process */
756 result = task_create_internal(parent_task,
757 inherit_memory,
758 is64bit,
759 &child_task);
760 if (result != KERN_SUCCESS) {
761 printf("execve: task_create_internal failed. Code: %d\n", result);
762 goto bad;
763 }
764
765 /* Set the child process task to the new task */
766 child_proc->task = child_task;
767
768 /* Set child task process to child proc */
769 set_bsdtask_info(child_task, child_proc);
770
771 /* Propagate CPU limit timer from parent */
772 if (timerisset(&child_proc->p_rlim_cpu))
773 task_vtimer_set(child_task, TASK_VTIMER_RLIM);
774
775 /* Set/clear 64 bit vm_map flag */
776 if (is64bit)
777 vm_map_set_64bit(get_task_map(child_task));
778 else
779 vm_map_set_32bit(get_task_map(child_task));
780
781#if CONFIG_MACF
782 /* Update task for MAC framework */
783 /* valid to use p_ucred as child is still not running ... */
784 mac_task_label_update_cred(child_proc->p_ucred, child_task);
785#endif
786
787 /*
788 * Set child process BSD visible scheduler priority if nice value
789 * inherited from parent
790 */
791 if (child_proc->p_nice != 0)
792 resetpriority(child_proc);
793
794 /* Create a new thread for the child process */
795 result = thread_create(child_task, &child_thread);
796 if (result != KERN_SUCCESS) {
797 printf("execve: thread_create failed. Code: %d\n", result);
798 task_deallocate(child_task);
799 child_task = NULL;
800 }
801bad:
802 thread_yield_internal(1);
803
804 return(child_thread);
805}
806
807
808/*
809 * fork
810 *
811 * Description: fork system call.
812 *
813 * Parameters: parent Parent process to fork
814 * uap (void) [unused]
815 * retval Return value
816 *
817 * Returns: 0 Success
818 * EAGAIN Resource unavailable, try again
819 *
820 * Notes: Attempts to create a new child process which inherits state
821 * from the parent process. If successful, the call returns
822 * having created an initially suspended child process with an
823 * extra Mach task and thread reference, for which the thread
824 * is initially suspended. Until we resume the child process,
825 * it is not yet running.
826 *
827 * The return information to the child is contained in the
828 * thread state structure of the new child, and does not
829 * become visible to the child through a normal return process,
830 * since it never made the call into the kernel itself in the
831 * first place.
832 *
833 * After resuming the thread, this function returns directly to
834 * the parent process which invoked the fork() system call.
835 *
836 * Important: The child thread_resume occurs before the parent returns;
837 * depending on scheduling latency, this means that it is not
838 * deterministic as to whether the parent or child is scheduled
839 * to run first. It is entirely possible that the child could
840 * run to completion prior to the parent running.
841 */
842int
843fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval)
844{
845 thread_t child_thread;
846 int err;
847
848 retval[1] = 0; /* flag parent return for user space */
849
850 if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK)) == 0) {
851 task_t child_task;
852 proc_t child_proc;
853
854 /* Return to the parent */
855 child_proc = (proc_t)get_bsdthreadtask_info(child_thread);
856 retval[0] = child_proc->p_pid;
857
858 /*
859 * Drop the signal lock on the child which was taken on our
860 * behalf by forkproc()/cloneproc() to prevent signals being
861 * received by the child in a partially constructed state.
862 */
863 proc_signalend(child_proc, 0);
864 proc_transend(child_proc, 0);
865
866 /* flag the fork has occurred */
867 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid);
868 DTRACE_PROC1(create, proc_t, child_proc);
869
870 /* "Return" to the child */
871 (void)thread_resume(child_thread);
872
873 /* drop the extra references we got during the creation */
874 if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) {
875 task_deallocate(child_task);
876 }
877 thread_deallocate(child_thread);
878 }
879
880 return(err);
881}
882
883
884/*
885 * cloneproc
886 *
887 * Description: Create a new process from a specified process.
888 *
889 * Parameters: parent_task The parent task to be cloned, or
890 * TASK_NULL is task characteristics
891 * are not to be inherited
892 * be cloned, or TASK_NULL if the new
893 * task is not to inherit the VM
894 * characteristics of the parent
895 * parent_proc The parent process to be cloned
896 * inherit_memory True if the child is to inherit
897 * memory from the parent; if this is
898 * non-NULL, then the parent_task must
899 * also be non-NULL
900 *
901 * Returns: !NULL pointer to new child thread
902 * NULL Failure (unspecified)
903 *
904 * Note: On return newly created child process has signal lock held
905 * to block delivery of signal to it if called with lock set.
906 * fork() code needs to explicity remove this lock before
907 * signals can be delivered
908 *
909 * In the case of bootstrap, this function can be called from
910 * bsd_utaskbootstrap() in order to bootstrap the first process;
911 * the net effect is to provide a uthread structure for the
912 * kernel process associated with the kernel task.
913 *
914 * XXX: Tristating using the value parent_task as the major key
915 * and inherit_memory as the minor key is something we should
916 * refactor later; we owe the current semantics, ultimately,
917 * to the semantics of task_create_internal. For now, we will
918 * live with this being somewhat awkward.
919 */
920thread_t
921cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory)
922{
923 task_t child_task;
924 proc_t child_proc;
925 thread_t child_thread = NULL;
926
927 if ((child_proc = forkproc(parent_proc)) == NULL) {
928 /* Failed to allocate new process */
929 goto bad;
930 }
931
932 child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64));
933
934 if (child_thread == NULL) {
935 /*
936 * Failed to create thread; now we must deconstruct the new
937 * process previously obtained from forkproc().
938 */
939 forkproc_free(child_proc);
940 goto bad;
941 }
942
943 child_task = get_threadtask(child_thread);
944 if (parent_proc->p_flag & P_LP64) {
945 task_set_64bit(child_task, TRUE);
946 OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag);
947 } else {
948 task_set_64bit(child_task, FALSE);
949 OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag);
950 }
951
952 /* make child visible */
953 pinsertchild(parent_proc, child_proc);
954
955 /*
956 * Make child runnable, set start time.
957 */
958 child_proc->p_stat = SRUN;
959bad:
960 return(child_thread);
961}
962
963
964/*
965 * Destroy a process structure that resulted from a call to forkproc(), but
966 * which must be returned to the system because of a subsequent failure
967 * preventing it from becoming active.
968 *
969 * Parameters: p The incomplete process from forkproc()
970 *
971 * Returns: (void)
972 *
973 * Note: This function should only be used in an error handler following
974 * a call to forkproc().
975 *
976 * Operations occur in reverse order of those in forkproc().
977 */
978void
979forkproc_free(proc_t p)
980{
981
982 /* We held signal and a transition locks; drop them */
983 proc_signalend(p, 0);
984 proc_transend(p, 0);
985
986 /*
987 * If we have our own copy of the resource limits structure, we
988 * need to free it. If it's a shared copy, we need to drop our
989 * reference on it.
990 */
991 proc_limitdrop(p, 0);
992 p->p_limit = NULL;
993
994#if SYSV_SHM
995 /* Need to drop references to the shared memory segment(s), if any */
996 if (p->vm_shm) {
997 /*
998 * Use shmexec(): we have no address space, so no mappings
999 *
1000 * XXX Yes, the routine is badly named.
1001 */
1002 shmexec(p);
1003 }
1004#endif
1005
1006 /* Need to undo the effects of the fdcopy(), if any */
1007 fdfree(p);
1008
1009 /*
1010 * Drop the reference on a text vnode pointer, if any
1011 * XXX This code is broken in forkproc(); see <rdar://4256419>;
1012 * XXX if anyone ever uses this field, we will be extremely unhappy.
1013 */
1014 if (p->p_textvp) {
1015 vnode_rele(p->p_textvp);
1016 p->p_textvp = NULL;
1017 }
1018
1019 /* Stop the profiling clock */
1020 stopprofclock(p);
1021
1022 /* Update the audit session proc count */
1023 AUDIT_SESSION_PROCEXIT(p);
1024
1025 /* Release the credential reference */
1026 kauth_cred_unref(&p->p_ucred);
1027
1028 proc_list_lock();
1029 /* Decrement the count of processes in the system */
1030 nprocs--;
1031 proc_list_unlock();
1032
1033 thread_call_free(p->p_rcall);
1034
1035 /* Free allocated memory */
1036 FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS);
1037 FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS);
1038 proc_checkdeadrefs(p);
1039 FREE_ZONE(p, sizeof *p, M_PROC);
1040}
1041
1042
1043/*
1044 * forkproc
1045 *
1046 * Description: Create a new process structure, given a parent process
1047 * structure.
1048 *
1049 * Parameters: parent_proc The parent process
1050 *
1051 * Returns: !NULL The new process structure
1052 * NULL Error (insufficient free memory)
1053 *
1054 * Note: When successful, the newly created process structure is
1055 * partially initialized; if a caller needs to deconstruct the
1056 * returned structure, they must call forkproc_free() to do so.
1057 */
1058proc_t
1059forkproc(proc_t parent_proc)
1060{
1061 proc_t child_proc; /* Our new process */
1062 static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
1063 static uint64_t nextuniqueid = 0;
1064 int error = 0;
1065 struct session *sessp;
1066 uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread());
1067
1068 MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK);
1069 if (child_proc == NULL) {
1070 printf("forkproc: M_PROC zone exhausted\n");
1071 goto bad;
1072 }
1073 /* zero it out as we need to insert in hash */
1074 bzero(child_proc, sizeof *child_proc);
1075
1076 MALLOC_ZONE(child_proc->p_stats, struct pstats *,
1077 sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK);
1078 if (child_proc->p_stats == NULL) {
1079 printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n");
1080 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
1081 child_proc = NULL;
1082 goto bad;
1083 }
1084 MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *,
1085 sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK);
1086 if (child_proc->p_sigacts == NULL) {
1087 printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n");
1088 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
1089 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
1090 child_proc = NULL;
1091 goto bad;
1092 }
1093
1094 /* allocate a callout for use by interval timers */
1095 child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc);
1096 if (child_proc->p_rcall == NULL) {
1097 FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS);
1098 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
1099 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
1100 child_proc = NULL;
1101 goto bad;
1102 }
1103
1104
1105 /*
1106 * Find an unused PID.
1107 */
1108
1109 proc_list_lock();
1110
1111 nextpid++;
1112retry:
1113 /*
1114 * If the process ID prototype has wrapped around,
1115 * restart somewhat above 0, as the low-numbered procs
1116 * tend to include daemons that don't exit.
1117 */
1118 if (nextpid >= PID_MAX) {
1119 nextpid = 100;
1120 pidwrap = 1;
1121 }
1122 if (pidwrap != 0) {
1123
1124 /* if the pid stays in hash both for zombie and runniing state */
1125 if (pfind_locked(nextpid) != PROC_NULL) {
1126 nextpid++;
1127 goto retry;
1128 }
1129
1130 if (pgfind_internal(nextpid) != PGRP_NULL) {
1131 nextpid++;
1132 goto retry;
1133 }
1134 if (session_find_internal(nextpid) != SESSION_NULL) {
1135 nextpid++;
1136 goto retry;
1137 }
1138 }
1139 nprocs++;
1140 child_proc->p_pid = nextpid;
1141 child_proc->p_idversion = nextpidversion++;
1142 /* kernel process is handcrafted and not from fork, so start from 1 */
1143 child_proc->p_uniqueid = ++nextuniqueid;
1144#if 1
1145 if (child_proc->p_pid != 0) {
1146 if (pfind_locked(child_proc->p_pid) != PROC_NULL)
1147 panic("proc in the list already\n");
1148 }
1149#endif
1150 /* Insert in the hash */
1151 child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE);
1152 LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash);
1153 proc_list_unlock();
1154
1155
1156 /*
1157 * We've identified the PID we are going to use; initialize the new
1158 * process structure.
1159 */
1160 child_proc->p_stat = SIDL;
1161 child_proc->p_pgrpid = PGRPID_DEAD;
1162
1163 /*
1164 * The zero'ing of the proc was at the allocation time due to need
1165 * for insertion to hash. Copy the section that is to be copied
1166 * directly from the parent.
1167 */
1168 bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy,
1169 (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy));
1170
1171 /*
1172 * Some flags are inherited from the parent.
1173 * Duplicate sub-structures as needed.
1174 * Increase reference counts on shared objects.
1175 * The p_stats and p_sigacts substructs are set in vm_fork.
1176 */
1177 child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR));
1178 if (parent_proc->p_flag & P_PROFIL)
1179 startprofclock(child_proc);
1180 /*
1181 * Note that if the current thread has an assumed identity, this
1182 * credential will be granted to the new process.
1183 */
1184 child_proc->p_ucred = kauth_cred_get_with_ref();
1185 /* update cred on proc */
1186 PROC_UPDATE_CREDS_ONPROC(child_proc);
1187 /* update audit session proc count */
1188 AUDIT_SESSION_PROCNEW(child_proc);
1189
1190#if CONFIG_FINE_LOCK_GROUPS
1191 lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
1192 lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
1193#if CONFIG_DTRACE
1194 lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
1195#endif
1196 lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
1197#else /* !CONFIG_FINE_LOCK_GROUPS */
1198 lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
1199 lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
1200#if CONFIG_DTRACE
1201 lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
1202#endif
1203 lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
1204#endif /* !CONFIG_FINE_LOCK_GROUPS */
1205 klist_init(&child_proc->p_klist);
1206
1207 if (child_proc->p_textvp != NULLVP) {
1208 /* bump references to the text vnode */
1209 /* Need to hold iocount across the ref call */
1210 if (vnode_getwithref(child_proc->p_textvp) == 0) {
1211 error = vnode_ref(child_proc->p_textvp);
1212 vnode_put(child_proc->p_textvp);
1213 if (error != 0)
1214 child_proc->p_textvp = NULLVP;
1215 }
1216 }
1217
1218 /*
1219 * Copy the parents per process open file table to the child; if
1220 * there is a per-thread current working directory, set the childs
1221 * per-process current working directory to that instead of the
1222 * parents.
1223 *
1224 * XXX may fail to copy descriptors to child
1225 */
1226 child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir);
1227
1228#if SYSV_SHM
1229 if (parent_proc->vm_shm) {
1230 /* XXX may fail to attach shm to child */
1231 (void)shmfork(parent_proc, child_proc);
1232 }
1233#endif
1234 /*
1235 * inherit the limit structure to child
1236 */
1237 proc_limitfork(parent_proc, child_proc);
1238
1239 if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
1240 uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur;
1241 child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur;
1242 }
1243
1244 /* Intialize new process stats, including start time */
1245 /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */
1246 bzero(&child_proc->p_stats->pstat_startzero,
1247 (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero -
1248 (caddr_t)&child_proc->p_stats->pstat_startzero));
1249 bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof));
1250 microtime(&child_proc->p_start);
1251 child_proc->p_stats->p_start = child_proc->p_start; /* for compat */
1252
1253 if (parent_proc->p_sigacts != NULL)
1254 (void)memcpy(child_proc->p_sigacts,
1255 parent_proc->p_sigacts, sizeof *child_proc->p_sigacts);
1256 else
1257 (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts);
1258
1259 sessp = proc_session(parent_proc);
1260 if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT)
1261 OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag);
1262 session_rele(sessp);
1263
1264 /*
1265 * block all signals to reach the process.
1266 * no transition race should be occuring with the child yet,
1267 * but indicate that the process is in (the creation) transition.
1268 */
1269 proc_signalstart(child_proc, 0);
1270 proc_transstart(child_proc, 0);
1271
1272 child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX;
1273 TAILQ_INIT(&child_proc->p_uthlist);
1274 TAILQ_INIT(&child_proc->p_aio_activeq);
1275 TAILQ_INIT(&child_proc->p_aio_doneq);
1276
1277 /* Inherit the parent flags for code sign */
1278 child_proc->p_csflags = (parent_proc->p_csflags & ~CS_KILLED);
1279
1280 /*
1281 * All processes have work queue locks; cleaned up by
1282 * reap_child_locked()
1283 */
1284 workqueue_init_lock(child_proc);
1285
1286 /*
1287 * Copy work queue information
1288 *
1289 * Note: This should probably only happen in the case where we are
1290 * creating a child that is a copy of the parent; since this
1291 * routine is called in the non-duplication case of vfork()
1292 * or posix_spawn(), then this information should likely not
1293 * be duplicated.
1294 *
1295 * <rdar://6640553> Work queue pointers that no longer point to code
1296 */
1297 child_proc->p_wqthread = parent_proc->p_wqthread;
1298 child_proc->p_threadstart = parent_proc->p_threadstart;
1299 child_proc->p_pthsize = parent_proc->p_pthsize;
1300 child_proc->p_targconc = parent_proc->p_targconc;
1301 if ((parent_proc->p_lflag & P_LREGISTER) != 0) {
1302 child_proc->p_lflag |= P_LREGISTER;
1303 }
1304 child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset;
1305#if PSYNCH
1306 pth_proc_hashinit(child_proc);
1307#endif /* PSYNCH */
1308
1309#if CONFIG_LCTX
1310 child_proc->p_lctx = NULL;
1311 /* Add new process to login context (if any). */
1312 if (parent_proc->p_lctx != NULL) {
1313 /*
1314 * <rdar://6640564> This should probably be delayed in the
1315 * vfork() or posix_spawn() cases.
1316 */
1317 LCTX_LOCK(parent_proc->p_lctx);
1318 enterlctx(child_proc, parent_proc->p_lctx, 0);
1319 }
1320#endif
1321
1322bad:
1323 return(child_proc);
1324}
1325
1326void
1327proc_lock(proc_t p)
1328{
1329 lck_mtx_lock(&p->p_mlock);
1330}
1331
1332void
1333proc_unlock(proc_t p)
1334{
1335 lck_mtx_unlock(&p->p_mlock);
1336}
1337
1338void
1339proc_spinlock(proc_t p)
1340{
1341 lck_spin_lock(&p->p_slock);
1342}
1343
1344void
1345proc_spinunlock(proc_t p)
1346{
1347 lck_spin_unlock(&p->p_slock);
1348}
1349
1350void
1351proc_list_lock(void)
1352{
1353 lck_mtx_lock(proc_list_mlock);
1354}
1355
1356void
1357proc_list_unlock(void)
1358{
1359 lck_mtx_unlock(proc_list_mlock);
1360}
1361
1362#include <kern/zalloc.h>
1363
1364struct zone *uthread_zone;
1365static int uthread_zone_inited = 0;
1366
1367static void
1368uthread_zone_init(void)
1369{
1370 if (!uthread_zone_inited) {
1371 uthread_zone = zinit(sizeof(struct uthread),
1372 thread_max * sizeof(struct uthread),
1373 THREAD_CHUNK * sizeof(struct uthread),
1374 "uthreads");
1375 uthread_zone_inited = 1;
1376 }
1377}
1378
1379void *
1380uthread_alloc(task_t task, thread_t thread, int noinherit)
1381{
1382 proc_t p;
1383 uthread_t uth;
1384 uthread_t uth_parent;
1385 void *ut;
1386
1387 if (!uthread_zone_inited)
1388 uthread_zone_init();
1389
1390 ut = (void *)zalloc(uthread_zone);
1391 bzero(ut, sizeof(struct uthread));
1392
1393 p = (proc_t) get_bsdtask_info(task);
1394 uth = (uthread_t)ut;
1395 uth->uu_kwe.kwe_uth = uth;
1396
1397 /*
1398 * Thread inherits credential from the creating thread, if both
1399 * are in the same task.
1400 *
1401 * If the creating thread has no credential or is from another
1402 * task we can leave the new thread credential NULL. If it needs
1403 * one later, it will be lazily assigned from the task's process.
1404 */
1405 uth_parent = (uthread_t)get_bsdthread_info(current_thread());
1406 if ((noinherit == 0) && task == current_task() &&
1407 uth_parent != NULL &&
1408 IS_VALID_CRED(uth_parent->uu_ucred)) {
1409 /*
1410 * XXX The new thread is, in theory, being created in context
1411 * XXX of parent thread, so a direct reference to the parent
1412 * XXX is OK.
1413 */
1414 kauth_cred_ref(uth_parent->uu_ucred);
1415 uth->uu_ucred = uth_parent->uu_ucred;
1416 /* the credential we just inherited is an assumed credential */
1417 if (uth_parent->uu_flag & UT_SETUID)
1418 uth->uu_flag |= UT_SETUID;
1419 } else {
1420 /* sometimes workqueue threads are created out task context */
1421 if ((task != kernel_task) && (p != PROC_NULL))
1422 uth->uu_ucred = kauth_cred_proc_ref(p);
1423 else
1424 uth->uu_ucred = NOCRED;
1425 }
1426
1427
1428 if ((task != kernel_task) && p) {
1429
1430 proc_lock(p);
1431 if (noinherit != 0) {
1432 /* workq threads will not inherit masks */
1433 uth->uu_sigmask = ~workq_threadmask;
1434 } else if (uth_parent) {
1435 if (uth_parent->uu_flag & UT_SAS_OLDMASK)
1436 uth->uu_sigmask = uth_parent->uu_oldmask;
1437 else
1438 uth->uu_sigmask = uth_parent->uu_sigmask;
1439 }
1440 uth->uu_context.vc_thread = thread;
1441 TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list);
1442 proc_unlock(p);
1443
1444#if CONFIG_DTRACE
1445 if (p->p_dtrace_ptss_pages != NULL) {
1446 uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p);
1447 }
1448#endif
1449 }
1450
1451 return (ut);
1452}
1453
1454
1455/*
1456 * This routine frees all the BSD context in uthread except the credential.
1457 * It does not free the uthread structure as well
1458 */
1459void
1460uthread_cleanup(task_t task, void *uthread, void * bsd_info)
1461{
1462 struct _select *sel;
1463 uthread_t uth = (uthread_t)uthread;
1464 proc_t p = (proc_t)bsd_info;
1465
1466
1467 if (uth->uu_lowpri_window || uth->uu_throttle_info) {
1468 /*
1469 * task is marked as a low priority I/O type
1470 * and we've somehow managed to not dismiss the throttle
1471 * through the normal exit paths back to user space...
1472 * no need to throttle this thread since its going away
1473 * but we do need to update our bookeeping w/r to throttled threads
1474 *
1475 * Calling this routine will clean up any throttle info reference
1476 * still inuse by the thread.
1477 */
1478 throttle_lowpri_io(FALSE);
1479 }
1480 /*
1481 * Per-thread audit state should never last beyond system
1482 * call return. Since we don't audit the thread creation/
1483 * removal, the thread state pointer should never be
1484 * non-NULL when we get here.
1485 */
1486 assert(uth->uu_ar == NULL);
1487
1488 sel = &uth->uu_select;
1489 /* cleanup the select bit space */
1490 if (sel->nbytes) {
1491 FREE(sel->ibits, M_TEMP);
1492 FREE(sel->obits, M_TEMP);
1493 sel->nbytes = 0;
1494 }
1495
1496 if (uth->uu_cdir) {
1497 vnode_rele(uth->uu_cdir);
1498 uth->uu_cdir = NULLVP;
1499 }
1500
1501 if (uth->uu_allocsize && uth->uu_wqset){
1502 kfree(uth->uu_wqset, uth->uu_allocsize);
1503 sel->count = 0;
1504 uth->uu_allocsize = 0;
1505 uth->uu_wqset = 0;
1506 sel->wql = 0;
1507 }
1508
1509 if(uth->pth_name != NULL)
1510 {
1511 kfree(uth->pth_name, MAXTHREADNAMESIZE);
1512 uth->pth_name = 0;
1513 }
1514 if ((task != kernel_task) && p) {
1515
1516 if (((uth->uu_flag & UT_VFORK) == UT_VFORK) && (uth->uu_proc != PROC_NULL)) {
1517 vfork_exit_internal(uth->uu_proc, 0, 1);
1518 }
1519 /*
1520 * Remove the thread from the process list and
1521 * transfer [appropriate] pending signals to the process.
1522 */
1523 if (get_bsdtask_info(task) == p) {
1524 proc_lock(p);
1525 TAILQ_REMOVE(&p->p_uthlist, uth, uu_list);
1526 p->p_siglist |= (uth->uu_siglist & execmask & (~p->p_sigignore | sigcantmask));
1527 proc_unlock(p);
1528 }
1529#if CONFIG_DTRACE
1530 struct dtrace_ptss_page_entry *tmpptr = uth->t_dtrace_scratch;
1531 uth->t_dtrace_scratch = NULL;
1532 if (tmpptr != NULL) {
1533 dtrace_ptss_release_entry(p, tmpptr);
1534 }
1535#endif
1536 }
1537}
1538
1539/* This routine releases the credential stored in uthread */
1540void
1541uthread_cred_free(void *uthread)
1542{
1543 uthread_t uth = (uthread_t)uthread;
1544
1545 /* and free the uthread itself */
1546 if (IS_VALID_CRED(uth->uu_ucred)) {
1547 kauth_cred_t oldcred = uth->uu_ucred;
1548 uth->uu_ucred = NOCRED;
1549 kauth_cred_unref(&oldcred);
1550 }
1551}
1552
1553/* This routine frees the uthread structure held in thread structure */
1554void
1555uthread_zone_free(void *uthread)
1556{
1557 /* and free the uthread itself */
1558 zfree(uthread_zone, uthread);
1559}