]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_fork.c
2f09ba8eed297bf3f50b492a6272e59df3d710a9
[apple/xnu.git] / bsd / kern / kern_fork.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74 /*
75 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
76 * support for mandatory and extensible security protections. This notice
77 * is included in support of clause 2.2 (b) of the Apple Public License,
78 * Version 2.0.
79 */
80
81 #include <kern/assert.h>
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/filedesc.h>
85 #include <sys/kernel.h>
86 #include <sys/malloc.h>
87 #include <sys/proc_internal.h>
88 #include <sys/kauth.h>
89 #include <sys/user.h>
90 #include <sys/resourcevar.h>
91 #include <sys/vnode_internal.h>
92 #include <sys/file_internal.h>
93 #include <sys/acct.h>
94 #include <sys/codesign.h>
95 #include <sys/sysproto.h>
96 #if CONFIG_DTRACE
97 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
98 extern void dtrace_fasttrap_fork(proc_t, proc_t);
99 extern void (*dtrace_helpers_fork)(proc_t, proc_t);
100 extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t);
101
102 #include <sys/dtrace_ptss.h>
103 #endif
104
105 #include <security/audit/audit.h>
106
107 #include <mach/mach_types.h>
108 #include <kern/kern_types.h>
109 #include <kern/kalloc.h>
110 #include <kern/mach_param.h>
111 #include <kern/task.h>
112 #include <kern/thread.h>
113 #include <kern/thread_call.h>
114 #include <kern/zalloc.h>
115
116 #include <machine/spl.h>
117
118 #if CONFIG_MACF
119 #include <security/mac.h>
120 #include <security/mac_mach_internal.h>
121 #endif
122
123 #include <vm/vm_map.h>
124 #include <vm/vm_protos.h>
125 #include <vm/vm_shared_region.h>
126
127 #include <sys/shm_internal.h> /* for shmfork() */
128 #include <mach/task.h> /* for thread_create() */
129 #include <mach/thread_act.h> /* for thread_resume() */
130
131 #include <sys/sdt.h>
132
133 #if CONFIG_MEMORYSTATUS
134 #include <sys/kern_memorystatus.h>
135 #endif
136
137 /* XXX routines which should have Mach prototypes, but don't */
138 void thread_set_parent(thread_t parent, int pid);
139 extern void act_thread_catt(void *ctx);
140 void thread_set_child(thread_t child, int pid);
141 void *act_thread_csave(void);
142
143
144 thread_t cloneproc(task_t, proc_t, int);
145 proc_t forkproc(proc_t);
146 void forkproc_free(proc_t);
147 thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit);
148 void proc_vfork_begin(proc_t parent_proc);
149 void proc_vfork_end(proc_t parent_proc);
150
151 #define DOFORK 0x1 /* fork() system call */
152 #define DOVFORK 0x2 /* vfork() system call */
153
154 /*
155 * proc_vfork_begin
156 *
157 * Description: start a vfork on a process
158 *
159 * Parameters: parent_proc process (re)entering vfork state
160 *
161 * Returns: (void)
162 *
163 * Notes: Although this function increments a count, a count in
164 * excess of 1 is not currently supported. According to the
165 * POSIX standard, calling anything other than execve() or
166 * _exit() following a vfork(), including calling vfork()
167 * itself again, will result in undefined behaviour
168 */
169 void
170 proc_vfork_begin(proc_t parent_proc)
171 {
172 proc_lock(parent_proc);
173 parent_proc->p_lflag |= P_LVFORK;
174 parent_proc->p_vforkcnt++;
175 proc_unlock(parent_proc);
176 }
177
178 /*
179 * proc_vfork_end
180 *
181 * Description: stop a vfork on a process
182 *
183 * Parameters: parent_proc process leaving vfork state
184 *
185 * Returns: (void)
186 *
187 * Notes: Decrements the count; currently, reentrancy of vfork()
188 * is unsupported on the current process
189 */
190 void
191 proc_vfork_end(proc_t parent_proc)
192 {
193 proc_lock(parent_proc);
194 parent_proc->p_vforkcnt--;
195 if (parent_proc->p_vforkcnt < 0)
196 panic("vfork cnt is -ve");
197 if (parent_proc->p_vforkcnt == 0)
198 parent_proc->p_lflag &= ~P_LVFORK;
199 proc_unlock(parent_proc);
200 }
201
202
203 /*
204 * vfork
205 *
206 * Description: vfork system call
207 *
208 * Parameters: void [no arguments]
209 *
210 * Retval: 0 (to child process)
211 * !0 pid of child (to parent process)
212 * -1 error (see "Returns:")
213 *
214 * Returns: EAGAIN Administrative limit reached
215 * EINVAL vfork() called during vfork()
216 * ENOMEM Failed to allocate new process
217 *
218 * Note: After a successful call to this function, the parent process
219 * has its task, thread, and uthread lent to the child process,
220 * and control is returned to the caller; if this function is
221 * invoked as a system call, the return is to user space, and
222 * is effectively running on the child process.
223 *
224 * Subsequent calls that operate on process state are permitted,
225 * though discouraged, and will operate on the child process; any
226 * operations on the task, thread, or uthread will result in
227 * changes in the parent state, and, if inheritable, the child
228 * state, when a task, thread, and uthread are realized for the
229 * child process at execve() time, will also be effected. Given
230 * this, it's recemmended that people use the posix_spawn() call
231 * instead.
232 *
233 * BLOCK DIAGRAM OF VFORK
234 *
235 * Before:
236 *
237 * ,----------------. ,-------------.
238 * | | task | |
239 * | parent_thread | ------> | parent_task |
240 * | | <.list. | |
241 * `----------------' `-------------'
242 * uthread | ^ bsd_info | ^
243 * v | vc_thread v | task
244 * ,----------------. ,-------------.
245 * | | | |
246 * | parent_uthread | <.list. | parent_proc | <-- current_proc()
247 * | | | |
248 * `----------------' `-------------'
249 * uu_proc |
250 * v
251 * NULL
252 *
253 * After:
254 *
255 * ,----------------. ,-------------.
256 * | | task | |
257 * ,----> | parent_thread | ------> | parent_task |
258 * | | | <.list. | |
259 * | `----------------' `-------------'
260 * | uthread | ^ bsd_info | ^
261 * | v | vc_thread v | task
262 * | ,----------------. ,-------------.
263 * | | | | |
264 * | | parent_uthread | <.list. | parent_proc |
265 * | | | | |
266 * | `----------------' `-------------'
267 * | uu_proc | . list
268 * | v v
269 * | ,----------------.
270 * `----- | |
271 * p_vforkact | child_proc | <-- current_proc()
272 * | |
273 * `----------------'
274 */
275 int
276 vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval)
277 {
278 thread_t child_thread;
279 int err;
280
281 if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK)) != 0) {
282 retval[1] = 0;
283 } else {
284 /*
285 * kludge: rely on uu_proc being set in the vfork case,
286 * rather than returning the actual thread. We can remove
287 * this when we remove the uu_proc/current_proc() kludge.
288 */
289 proc_t child_proc = current_proc();
290
291 retval[0] = child_proc->p_pid;
292 retval[1] = 1; /* flag child return for user space */
293
294 /*
295 * Drop the signal lock on the child which was taken on our
296 * behalf by forkproc()/cloneproc() to prevent signals being
297 * received by the child in a partially constructed state.
298 */
299 proc_signalend(child_proc, 0);
300 proc_transend(child_proc, 0);
301
302 /* flag the fork has occurred */
303 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid);
304 DTRACE_PROC1(create, proc_t, child_proc);
305 }
306
307 return(err);
308 }
309
310
311 /*
312 * fork1
313 *
314 * Description: common code used by all new process creation other than the
315 * bootstrap of the initial process on the system
316 *
317 * Parameters: parent_proc parent process of the process being
318 * child_threadp pointer to location to receive the
319 * Mach thread_t of the child process
320 * breated
321 * kind kind of creation being requested
322 *
323 * Notes: Permissable values for 'kind':
324 *
325 * PROC_CREATE_FORK Create a complete process which will
326 * return actively running in both the
327 * parent and the child; the child copies
328 * the parent address space.
329 * PROC_CREATE_SPAWN Create a complete process which will
330 * return actively running in the parent
331 * only after returning actively running
332 * in the child; the child address space
333 * is newly created by an image activator,
334 * after which the child is run.
335 * PROC_CREATE_VFORK Creates a partial process which will
336 * borrow the parent task, thread, and
337 * uthread to return running in the child;
338 * the child address space and other parts
339 * are lazily created at execve() time, or
340 * the child is terminated, and the parent
341 * does not actively run until that
342 * happens.
343 *
344 * At first it may seem strange that we return the child thread
345 * address rather than process structure, since the process is
346 * the only part guaranteed to be "new"; however, since we do
347 * not actualy adjust other references between Mach and BSD (see
348 * the block diagram above the implementation of vfork()), this
349 * is the only method which guarantees us the ability to get
350 * back to the other information.
351 */
352 int
353 fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
354 {
355 thread_t parent_thread = (thread_t)current_thread();
356 uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
357 proc_t child_proc = NULL; /* set in switch, but compiler... */
358 thread_t child_thread = NULL;
359 uid_t uid;
360 int count;
361 int err = 0;
362 int spawn = 0;
363
364 /*
365 * Although process entries are dynamically created, we still keep
366 * a global limit on the maximum number we will create. Don't allow
367 * a nonprivileged user to use the last process; don't let root
368 * exceed the limit. The variable nprocs is the current number of
369 * processes, maxproc is the limit.
370 */
371 uid = kauth_getruid();
372 proc_list_lock();
373 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
374 proc_list_unlock();
375 tablefull("proc");
376 return (EAGAIN);
377 }
378 proc_list_unlock();
379
380 /*
381 * Increment the count of procs running with this uid. Don't allow
382 * a nonprivileged user to exceed their current limit, which is
383 * always less than what an rlim_t can hold.
384 * (locking protection is provided by list lock held in chgproccnt)
385 */
386 count = chgproccnt(uid, 1);
387 if (uid != 0 &&
388 (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) {
389 err = EAGAIN;
390 goto bad;
391 }
392
393 #if CONFIG_MACF
394 /*
395 * Determine if MAC policies applied to the process will allow
396 * it to fork. This is an advisory-only check.
397 */
398 err = mac_proc_check_fork(parent_proc);
399 if (err != 0) {
400 goto bad;
401 }
402 #endif
403
404 switch(kind) {
405 case PROC_CREATE_VFORK:
406 /*
407 * Prevent a vfork while we are in vfork(); we should
408 * also likely preventing a fork here as well, and this
409 * check should then be outside the switch statement,
410 * since the proc struct contents will copy from the
411 * child and the tash/thread/uthread from the parent in
412 * that case. We do not support vfork() in vfork()
413 * because we don't have to; the same non-requirement
414 * is true of both fork() and posix_spawn() and any
415 * call other than execve() amd _exit(), but we've
416 * been historically lenient, so we continue to be so
417 * (for now).
418 *
419 * <rdar://6640521> Probably a source of random panics
420 */
421 if (parent_uthread->uu_flag & UT_VFORK) {
422 printf("fork1 called within vfork by %s\n", parent_proc->p_comm);
423 err = EINVAL;
424 goto bad;
425 }
426
427 /*
428 * Flag us in progress; if we chose to support vfork() in
429 * vfork(), we would chain our parent at this point (in
430 * effect, a stack push). We don't, since we actually want
431 * to disallow everything not specified in the standard
432 */
433 proc_vfork_begin(parent_proc);
434
435 /* The newly created process comes with signal lock held */
436 if ((child_proc = forkproc(parent_proc)) == NULL) {
437 /* Failed to allocate new process */
438 proc_vfork_end(parent_proc);
439 err = ENOMEM;
440 goto bad;
441 }
442
443 // XXX BEGIN: wants to move to be common code (and safe)
444 #if CONFIG_MACF
445 /*
446 * allow policies to associate the credential/label that
447 * we referenced from the parent ... with the child
448 * JMM - this really isn't safe, as we can drop that
449 * association without informing the policy in other
450 * situations (keep long enough to get policies changed)
451 */
452 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
453 #endif
454
455 /*
456 * Propogate change of PID - may get new cred if auditing.
457 *
458 * NOTE: This has no effect in the vfork case, since
459 * child_proc->task != current_task(), but we duplicate it
460 * because this is probably, ultimately, wrong, since we
461 * will be running in the "child" which is the parent task
462 * with the wrong token until we get to the execve() or
463 * _exit() call; a lot of "undefined" can happen before
464 * that.
465 *
466 * <rdar://6640530> disallow everything but exeve()/_exit()?
467 */
468 set_security_token(child_proc);
469
470 AUDIT_ARG(pid, child_proc->p_pid);
471
472 // XXX END: wants to move to be common code (and safe)
473
474 /*
475 * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD
476 *
477 * Note: this is where we would "push" state instead of setting
478 * it for nested vfork() support (see proc_vfork_end() for
479 * description if issues here).
480 */
481 child_proc->task = parent_proc->task;
482
483 child_proc->p_lflag |= P_LINVFORK;
484 child_proc->p_vforkact = parent_thread;
485 child_proc->p_stat = SRUN;
486
487 parent_uthread->uu_flag |= UT_VFORK;
488 parent_uthread->uu_proc = child_proc;
489 parent_uthread->uu_userstate = (void *)act_thread_csave();
490 parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask;
491
492 /* temporarily drop thread-set-id state */
493 if (parent_uthread->uu_flag & UT_SETUID) {
494 parent_uthread->uu_flag |= UT_WASSETUID;
495 parent_uthread->uu_flag &= ~UT_SETUID;
496 }
497
498 /* blow thread state information */
499 /* XXX is this actually necessary, given syscall return? */
500 thread_set_child(parent_thread, child_proc->p_pid);
501
502 child_proc->p_acflag = AFORK; /* forked but not exec'ed */
503
504 /*
505 * Preserve synchronization semantics of vfork. If
506 * waiting for child to exec or exit, set P_PPWAIT
507 * on child, and sleep on our proc (in case of exit).
508 */
509 child_proc->p_lflag |= P_LPPWAIT;
510 pinsertchild(parent_proc, child_proc); /* set visible */
511
512 break;
513
514 case PROC_CREATE_SPAWN:
515 /*
516 * A spawned process differs from a forked process in that
517 * the spawned process does not carry around the parents
518 * baggage with regard to address space copying, dtrace,
519 * and so on.
520 */
521 spawn = 1;
522
523 /* FALLSTHROUGH */
524
525 case PROC_CREATE_FORK:
526 /*
527 * When we clone the parent process, we are going to inherit
528 * its task attributes and memory, since when we fork, we
529 * will, in effect, create a duplicate of it, with only minor
530 * differences. Contrarily, spawned processes do not inherit.
531 */
532 if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) {
533 /* Failed to create thread */
534 err = EAGAIN;
535 goto bad;
536 }
537
538 /* copy current thread state into the child thread (only for fork) */
539 if (!spawn) {
540 thread_dup(child_thread);
541 }
542
543 /* child_proc = child_thread->task->proc; */
544 child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread)));
545
546 // XXX BEGIN: wants to move to be common code (and safe)
547 #if CONFIG_MACF
548 /*
549 * allow policies to associate the credential/label that
550 * we referenced from the parent ... with the child
551 * JMM - this really isn't safe, as we can drop that
552 * association without informing the policy in other
553 * situations (keep long enough to get policies changed)
554 */
555 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
556 #endif
557
558 /*
559 * Propogate change of PID - may get new cred if auditing.
560 *
561 * NOTE: This has no effect in the vfork case, since
562 * child_proc->task != current_task(), but we duplicate it
563 * because this is probably, ultimately, wrong, since we
564 * will be running in the "child" which is the parent task
565 * with the wrong token until we get to the execve() or
566 * _exit() call; a lot of "undefined" can happen before
567 * that.
568 *
569 * <rdar://6640530> disallow everything but exeve()/_exit()?
570 */
571 set_security_token(child_proc);
572
573 AUDIT_ARG(pid, child_proc->p_pid);
574
575 // XXX END: wants to move to be common code (and safe)
576
577 /*
578 * Blow thread state information; this is what gives the child
579 * process its "return" value from a fork() call.
580 *
581 * Note: this should probably move to fork() proper, since it
582 * is not relevent to spawn, and the value won't matter
583 * until we resume the child there. If you are in here
584 * refactoring code, consider doing this at the same time.
585 */
586 thread_set_child(child_thread, child_proc->p_pid);
587
588 child_proc->p_acflag = AFORK; /* forked but not exec'ed */
589
590 // <rdar://6598155> dtrace code cleanup needed
591 #if CONFIG_DTRACE
592 /*
593 * This code applies to new processes who are copying the task
594 * and thread state and address spaces of their parent process.
595 */
596 if (!spawn) {
597 // <rdar://6598155> call dtrace specific function here instead of all this...
598 /*
599 * APPLE NOTE: Solaris does a sprlock() and drops the
600 * proc_lock here. We're cheating a bit and only taking
601 * the p_dtrace_sprlock lock. A full sprlock would
602 * task_suspend the parent.
603 */
604 lck_mtx_lock(&parent_proc->p_dtrace_sprlock);
605
606 /*
607 * Remove all DTrace tracepoints from the child process. We
608 * need to do this _before_ duplicating USDT providers since
609 * any associated probes may be immediately enabled.
610 */
611 if (parent_proc->p_dtrace_count > 0) {
612 dtrace_fasttrap_fork(parent_proc, child_proc);
613 }
614
615 lck_mtx_unlock(&parent_proc->p_dtrace_sprlock);
616
617 /*
618 * Duplicate any lazy dof(s). This must be done while NOT
619 * holding the parent sprlock! Lock ordering is
620 * dtrace_dof_mode_lock, then sprlock. It is imperative we
621 * always call dtrace_lazy_dofs_duplicate, rather than null
622 * check and call if !NULL. If we NULL test, during lazy dof
623 * faulting we can race with the faulting code and proceed
624 * from here to beyond the helpers copy. The lazy dof
625 * faulting will then fail to copy the helpers to the child
626 * process.
627 */
628 dtrace_lazy_dofs_duplicate(parent_proc, child_proc);
629
630 /*
631 * Duplicate any helper actions and providers. The SFORKING
632 * we set above informs the code to enable USDT probes that
633 * sprlock() may fail because the child is being forked.
634 */
635 /*
636 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
637 * never fails to find the child. We do not set SFORKING.
638 */
639 if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
640 (*dtrace_helpers_fork)(parent_proc, child_proc);
641 }
642
643 }
644 #endif /* CONFIG_DTRACE */
645
646 break;
647
648 default:
649 panic("fork1 called with unknown kind %d", kind);
650 break;
651 }
652
653
654 /* return the thread pointer to the caller */
655 *child_threadp = child_thread;
656
657 #if CONFIG_MEMORYSTATUS
658 if (!err) {
659 memorystatus_list_add(child_proc->p_pid, DEFAULT_JETSAM_PRIORITY, -1);
660 }
661 #endif
662
663 bad:
664 /*
665 * In the error case, we return a 0 value for the returned pid (but
666 * it is ignored in the trampoline due to the error return); this
667 * is probably not necessary.
668 */
669 if (err) {
670 (void)chgproccnt(uid, -1);
671 }
672
673 return (err);
674 }
675
676
677 /*
678 * vfork_return
679 *
680 * Description: "Return" to parent vfork thread() following execve/_exit;
681 * this is done by reassociating the parent process structure
682 * with the task, thread, and uthread.
683 *
684 * Refer to the ASCII art above vfork() to figure out the
685 * state we're undoing.
686 *
687 * Parameters: child_proc Child process
688 * retval System call return value array
689 * rval Return value to present to parent
690 *
691 * Returns: void
692 *
693 * Notes: The caller resumes or exits the parent, as appropriate, after
694 * calling this function.
695 */
696 void
697 vfork_return(proc_t child_proc, int32_t *retval, int rval)
698 {
699 task_t parent_task = get_threadtask(child_proc->p_vforkact);
700 proc_t parent_proc = get_bsdtask_info(parent_task);
701 thread_t th = current_thread();
702 uthread_t uth = get_bsdthread_info(th);
703
704 act_thread_catt(uth->uu_userstate);
705
706 /* clear vfork state in parent proc structure */
707 proc_vfork_end(parent_proc);
708
709 /* REPATRIATE PARENT TASK, THREAD, UTHREAD */
710 uth->uu_userstate = 0;
711 uth->uu_flag &= ~UT_VFORK;
712 /* restore thread-set-id state */
713 if (uth->uu_flag & UT_WASSETUID) {
714 uth->uu_flag |= UT_SETUID;
715 uth->uu_flag &= UT_WASSETUID;
716 }
717 uth->uu_proc = 0;
718 uth->uu_sigmask = uth->uu_vforkmask;
719
720 proc_lock(child_proc);
721 child_proc->p_lflag &= ~P_LINVFORK;
722 child_proc->p_vforkact = 0;
723 proc_unlock(child_proc);
724
725 thread_set_parent(th, rval);
726
727 if (retval) {
728 retval[0] = rval;
729 retval[1] = 0; /* mark parent */
730 }
731 }
732
733
734 /*
735 * fork_create_child
736 *
737 * Description: Common operations associated with the creation of a child
738 * process
739 *
740 * Parameters: parent_task parent task
741 * child_proc child process
742 * inherit_memory TRUE, if the parents address space is
743 * to be inherited by the child
744 * is64bit TRUE, if the child being created will
745 * be associated with a 64 bit process
746 * rather than a 32 bit process
747 *
748 * Note: This code is called in the fork() case, from the execve() call
749 * graph, if implementing an execve() following a vfork(), from
750 * the posix_spawn() call graph (which implicitly includes a
751 * vfork() equivalent call, and in the system bootstrap case.
752 *
753 * It creates a new task and thread (and as a side effect of the
754 * thread creation, a uthread), which is then associated with the
755 * process 'child'. If the parent process address space is to
756 * be inherited, then a flag indicates that the newly created
757 * task should inherit this from the child task.
758 *
759 * As a special concession to bootstrapping the initial process
760 * in the system, it's possible for 'parent_task' to be TASK_NULL;
761 * in this case, 'inherit_memory' MUST be FALSE.
762 */
763 thread_t
764 fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit)
765 {
766 thread_t child_thread = NULL;
767 task_t child_task;
768 kern_return_t result;
769
770 /* Create a new task for the child process */
771 result = task_create_internal(parent_task,
772 inherit_memory,
773 is64bit,
774 &child_task);
775 if (result != KERN_SUCCESS) {
776 printf("execve: task_create_internal failed. Code: %d\n", result);
777 goto bad;
778 }
779
780 /* Set the child process task to the new task */
781 child_proc->task = child_task;
782
783 /* Set child task process to child proc */
784 set_bsdtask_info(child_task, child_proc);
785
786 /* Propagate CPU limit timer from parent */
787 if (timerisset(&child_proc->p_rlim_cpu))
788 task_vtimer_set(child_task, TASK_VTIMER_RLIM);
789
790 /* Set/clear 64 bit vm_map flag */
791 if (is64bit)
792 vm_map_set_64bit(get_task_map(child_task));
793 else
794 vm_map_set_32bit(get_task_map(child_task));
795
796 #if CONFIG_MACF
797 /* Update task for MAC framework */
798 /* valid to use p_ucred as child is still not running ... */
799 mac_task_label_update_cred(child_proc->p_ucred, child_task);
800 #endif
801
802 /*
803 * Set child process BSD visible scheduler priority if nice value
804 * inherited from parent
805 */
806 if (child_proc->p_nice != 0)
807 resetpriority(child_proc);
808
809 /* Create a new thread for the child process */
810 result = thread_create(child_task, &child_thread);
811 if (result != KERN_SUCCESS) {
812 printf("execve: thread_create failed. Code: %d\n", result);
813 task_deallocate(child_task);
814 child_task = NULL;
815 }
816
817 /*
818 * Tag thread as being the first thread in its task.
819 */
820 thread_set_tag(child_thread, THREAD_TAG_MAINTHREAD);
821
822 bad:
823 thread_yield_internal(1);
824
825 return(child_thread);
826 }
827
828
829 /*
830 * fork
831 *
832 * Description: fork system call.
833 *
834 * Parameters: parent Parent process to fork
835 * uap (void) [unused]
836 * retval Return value
837 *
838 * Returns: 0 Success
839 * EAGAIN Resource unavailable, try again
840 *
841 * Notes: Attempts to create a new child process which inherits state
842 * from the parent process. If successful, the call returns
843 * having created an initially suspended child process with an
844 * extra Mach task and thread reference, for which the thread
845 * is initially suspended. Until we resume the child process,
846 * it is not yet running.
847 *
848 * The return information to the child is contained in the
849 * thread state structure of the new child, and does not
850 * become visible to the child through a normal return process,
851 * since it never made the call into the kernel itself in the
852 * first place.
853 *
854 * After resuming the thread, this function returns directly to
855 * the parent process which invoked the fork() system call.
856 *
857 * Important: The child thread_resume occurs before the parent returns;
858 * depending on scheduling latency, this means that it is not
859 * deterministic as to whether the parent or child is scheduled
860 * to run first. It is entirely possible that the child could
861 * run to completion prior to the parent running.
862 */
863 int
864 fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval)
865 {
866 thread_t child_thread;
867 int err;
868
869 retval[1] = 0; /* flag parent return for user space */
870
871 if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK)) == 0) {
872 task_t child_task;
873 proc_t child_proc;
874
875 /* Return to the parent */
876 child_proc = (proc_t)get_bsdthreadtask_info(child_thread);
877 retval[0] = child_proc->p_pid;
878
879 /*
880 * Drop the signal lock on the child which was taken on our
881 * behalf by forkproc()/cloneproc() to prevent signals being
882 * received by the child in a partially constructed state.
883 */
884 proc_signalend(child_proc, 0);
885 proc_transend(child_proc, 0);
886
887 /* flag the fork has occurred */
888 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid);
889 DTRACE_PROC1(create, proc_t, child_proc);
890
891 /* "Return" to the child */
892 (void)thread_resume(child_thread);
893
894 /* drop the extra references we got during the creation */
895 if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) {
896 task_deallocate(child_task);
897 }
898 thread_deallocate(child_thread);
899 }
900
901 return(err);
902 }
903
904
905 /*
906 * cloneproc
907 *
908 * Description: Create a new process from a specified process.
909 *
910 * Parameters: parent_task The parent task to be cloned, or
911 * TASK_NULL is task characteristics
912 * are not to be inherited
913 * be cloned, or TASK_NULL if the new
914 * task is not to inherit the VM
915 * characteristics of the parent
916 * parent_proc The parent process to be cloned
917 * inherit_memory True if the child is to inherit
918 * memory from the parent; if this is
919 * non-NULL, then the parent_task must
920 * also be non-NULL
921 *
922 * Returns: !NULL pointer to new child thread
923 * NULL Failure (unspecified)
924 *
925 * Note: On return newly created child process has signal lock held
926 * to block delivery of signal to it if called with lock set.
927 * fork() code needs to explicity remove this lock before
928 * signals can be delivered
929 *
930 * In the case of bootstrap, this function can be called from
931 * bsd_utaskbootstrap() in order to bootstrap the first process;
932 * the net effect is to provide a uthread structure for the
933 * kernel process associated with the kernel task.
934 *
935 * XXX: Tristating using the value parent_task as the major key
936 * and inherit_memory as the minor key is something we should
937 * refactor later; we owe the current semantics, ultimately,
938 * to the semantics of task_create_internal. For now, we will
939 * live with this being somewhat awkward.
940 */
941 thread_t
942 cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory)
943 {
944 task_t child_task;
945 proc_t child_proc;
946 thread_t child_thread = NULL;
947
948 if ((child_proc = forkproc(parent_proc)) == NULL) {
949 /* Failed to allocate new process */
950 goto bad;
951 }
952
953 child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64));
954
955 if (child_thread == NULL) {
956 /*
957 * Failed to create thread; now we must deconstruct the new
958 * process previously obtained from forkproc().
959 */
960 forkproc_free(child_proc);
961 goto bad;
962 }
963
964 child_task = get_threadtask(child_thread);
965 if (parent_proc->p_flag & P_LP64) {
966 task_set_64bit(child_task, TRUE);
967 OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag);
968 } else {
969 task_set_64bit(child_task, FALSE);
970 OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag);
971 }
972
973 /* make child visible */
974 pinsertchild(parent_proc, child_proc);
975
976 /*
977 * Make child runnable, set start time.
978 */
979 child_proc->p_stat = SRUN;
980 bad:
981 return(child_thread);
982 }
983
984
985 /*
986 * Destroy a process structure that resulted from a call to forkproc(), but
987 * which must be returned to the system because of a subsequent failure
988 * preventing it from becoming active.
989 *
990 * Parameters: p The incomplete process from forkproc()
991 *
992 * Returns: (void)
993 *
994 * Note: This function should only be used in an error handler following
995 * a call to forkproc().
996 *
997 * Operations occur in reverse order of those in forkproc().
998 */
999 void
1000 forkproc_free(proc_t p)
1001 {
1002
1003 /* We held signal and a transition locks; drop them */
1004 proc_signalend(p, 0);
1005 proc_transend(p, 0);
1006
1007 /*
1008 * If we have our own copy of the resource limits structure, we
1009 * need to free it. If it's a shared copy, we need to drop our
1010 * reference on it.
1011 */
1012 proc_limitdrop(p, 0);
1013 p->p_limit = NULL;
1014
1015 #if SYSV_SHM
1016 /* Need to drop references to the shared memory segment(s), if any */
1017 if (p->vm_shm) {
1018 /*
1019 * Use shmexec(): we have no address space, so no mappings
1020 *
1021 * XXX Yes, the routine is badly named.
1022 */
1023 shmexec(p);
1024 }
1025 #endif
1026
1027 /* Need to undo the effects of the fdcopy(), if any */
1028 fdfree(p);
1029
1030 #if !CONFIG_EMBEDDED
1031 if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
1032 throttle_legacy_process_decr();
1033 }
1034 #endif
1035
1036 /*
1037 * Drop the reference on a text vnode pointer, if any
1038 * XXX This code is broken in forkproc(); see <rdar://4256419>;
1039 * XXX if anyone ever uses this field, we will be extremely unhappy.
1040 */
1041 if (p->p_textvp) {
1042 vnode_rele(p->p_textvp);
1043 p->p_textvp = NULL;
1044 }
1045
1046 /* Stop the profiling clock */
1047 stopprofclock(p);
1048
1049 /* Update the audit session proc count */
1050 AUDIT_SESSION_PROCEXIT(p);
1051
1052 /* Release the credential reference */
1053 kauth_cred_unref(&p->p_ucred);
1054
1055 proc_list_lock();
1056 /* Decrement the count of processes in the system */
1057 nprocs--;
1058 proc_list_unlock();
1059
1060 thread_call_free(p->p_rcall);
1061
1062 /* Free allocated memory */
1063 FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS);
1064 FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS);
1065 proc_checkdeadrefs(p);
1066 FREE_ZONE(p, sizeof *p, M_PROC);
1067 }
1068
1069
1070 /*
1071 * forkproc
1072 *
1073 * Description: Create a new process structure, given a parent process
1074 * structure.
1075 *
1076 * Parameters: parent_proc The parent process
1077 *
1078 * Returns: !NULL The new process structure
1079 * NULL Error (insufficient free memory)
1080 *
1081 * Note: When successful, the newly created process structure is
1082 * partially initialized; if a caller needs to deconstruct the
1083 * returned structure, they must call forkproc_free() to do so.
1084 */
1085 proc_t
1086 forkproc(proc_t parent_proc)
1087 {
1088 proc_t child_proc; /* Our new process */
1089 static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
1090 static uint64_t nextuniqueid = 0;
1091 int error = 0;
1092 struct session *sessp;
1093 uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread());
1094
1095 MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK);
1096 if (child_proc == NULL) {
1097 printf("forkproc: M_PROC zone exhausted\n");
1098 goto bad;
1099 }
1100 /* zero it out as we need to insert in hash */
1101 bzero(child_proc, sizeof *child_proc);
1102
1103 MALLOC_ZONE(child_proc->p_stats, struct pstats *,
1104 sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK);
1105 if (child_proc->p_stats == NULL) {
1106 printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n");
1107 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
1108 child_proc = NULL;
1109 goto bad;
1110 }
1111 MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *,
1112 sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK);
1113 if (child_proc->p_sigacts == NULL) {
1114 printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n");
1115 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
1116 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
1117 child_proc = NULL;
1118 goto bad;
1119 }
1120
1121 /* allocate a callout for use by interval timers */
1122 child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc);
1123 if (child_proc->p_rcall == NULL) {
1124 FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS);
1125 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
1126 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
1127 child_proc = NULL;
1128 goto bad;
1129 }
1130
1131
1132 /*
1133 * Find an unused PID.
1134 */
1135
1136 proc_list_lock();
1137
1138 nextpid++;
1139 retry:
1140 /*
1141 * If the process ID prototype has wrapped around,
1142 * restart somewhat above 0, as the low-numbered procs
1143 * tend to include daemons that don't exit.
1144 */
1145 if (nextpid >= PID_MAX) {
1146 nextpid = 100;
1147 pidwrap = 1;
1148 }
1149 if (pidwrap != 0) {
1150
1151 /* if the pid stays in hash both for zombie and runniing state */
1152 if (pfind_locked(nextpid) != PROC_NULL) {
1153 nextpid++;
1154 goto retry;
1155 }
1156
1157 if (pgfind_internal(nextpid) != PGRP_NULL) {
1158 nextpid++;
1159 goto retry;
1160 }
1161 if (session_find_internal(nextpid) != SESSION_NULL) {
1162 nextpid++;
1163 goto retry;
1164 }
1165 }
1166 nprocs++;
1167 child_proc->p_pid = nextpid;
1168 child_proc->p_idversion = nextpidversion++;
1169 /* kernel process is handcrafted and not from fork, so start from 1 */
1170 child_proc->p_uniqueid = ++nextuniqueid;
1171 #if 1
1172 if (child_proc->p_pid != 0) {
1173 if (pfind_locked(child_proc->p_pid) != PROC_NULL)
1174 panic("proc in the list already\n");
1175 }
1176 #endif
1177 /* Insert in the hash */
1178 child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE);
1179 LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash);
1180 proc_list_unlock();
1181
1182
1183 /*
1184 * We've identified the PID we are going to use; initialize the new
1185 * process structure.
1186 */
1187 child_proc->p_stat = SIDL;
1188 child_proc->p_pgrpid = PGRPID_DEAD;
1189
1190 /*
1191 * The zero'ing of the proc was at the allocation time due to need
1192 * for insertion to hash. Copy the section that is to be copied
1193 * directly from the parent.
1194 */
1195 bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy,
1196 (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy));
1197
1198 /*
1199 * Some flags are inherited from the parent.
1200 * Duplicate sub-structures as needed.
1201 * Increase reference counts on shared objects.
1202 * The p_stats and p_sigacts substructs are set in vm_fork.
1203 */
1204 #if !CONFIG_EMBEDDED
1205 child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR | P_DELAYIDLESLEEP));
1206 #else /* !CONFIG_EMBEDDED */
1207 child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR));
1208 #endif /* !CONFIG_EMBEDDED */
1209 if (parent_proc->p_flag & P_PROFIL)
1210 startprofclock(child_proc);
1211
1212 #if !CONFIG_EMBEDDED
1213 if (child_proc->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) {
1214 throttle_legacy_process_incr();
1215 }
1216 #endif
1217
1218 /*
1219 * Note that if the current thread has an assumed identity, this
1220 * credential will be granted to the new process.
1221 */
1222 child_proc->p_ucred = kauth_cred_get_with_ref();
1223 /* update cred on proc */
1224 PROC_UPDATE_CREDS_ONPROC(child_proc);
1225 /* update audit session proc count */
1226 AUDIT_SESSION_PROCNEW(child_proc);
1227
1228 #if CONFIG_FINE_LOCK_GROUPS
1229 lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
1230 lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
1231 #if CONFIG_DTRACE
1232 lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
1233 #endif
1234 lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
1235 #else /* !CONFIG_FINE_LOCK_GROUPS */
1236 lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
1237 lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
1238 #if CONFIG_DTRACE
1239 lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
1240 #endif
1241 lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
1242 #endif /* !CONFIG_FINE_LOCK_GROUPS */
1243 klist_init(&child_proc->p_klist);
1244
1245 if (child_proc->p_textvp != NULLVP) {
1246 /* bump references to the text vnode */
1247 /* Need to hold iocount across the ref call */
1248 if (vnode_getwithref(child_proc->p_textvp) == 0) {
1249 error = vnode_ref(child_proc->p_textvp);
1250 vnode_put(child_proc->p_textvp);
1251 if (error != 0)
1252 child_proc->p_textvp = NULLVP;
1253 }
1254 }
1255
1256 /*
1257 * Copy the parents per process open file table to the child; if
1258 * there is a per-thread current working directory, set the childs
1259 * per-process current working directory to that instead of the
1260 * parents.
1261 *
1262 * XXX may fail to copy descriptors to child
1263 */
1264 child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir);
1265
1266 #if SYSV_SHM
1267 if (parent_proc->vm_shm) {
1268 /* XXX may fail to attach shm to child */
1269 (void)shmfork(parent_proc, child_proc);
1270 }
1271 #endif
1272 /*
1273 * inherit the limit structure to child
1274 */
1275 proc_limitfork(parent_proc, child_proc);
1276
1277 if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
1278 uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur;
1279 child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur;
1280 }
1281
1282 /* Intialize new process stats, including start time */
1283 /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */
1284 bzero(&child_proc->p_stats->pstat_startzero,
1285 (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero -
1286 (caddr_t)&child_proc->p_stats->pstat_startzero));
1287 bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof));
1288 microtime(&child_proc->p_start);
1289 child_proc->p_stats->p_start = child_proc->p_start; /* for compat */
1290
1291 if (parent_proc->p_sigacts != NULL)
1292 (void)memcpy(child_proc->p_sigacts,
1293 parent_proc->p_sigacts, sizeof *child_proc->p_sigacts);
1294 else
1295 (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts);
1296
1297 sessp = proc_session(parent_proc);
1298 if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT)
1299 OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag);
1300 session_rele(sessp);
1301
1302 /*
1303 * block all signals to reach the process.
1304 * no transition race should be occuring with the child yet,
1305 * but indicate that the process is in (the creation) transition.
1306 */
1307 proc_signalstart(child_proc, 0);
1308 proc_transstart(child_proc, 0);
1309
1310 child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX;
1311 TAILQ_INIT(&child_proc->p_uthlist);
1312 TAILQ_INIT(&child_proc->p_aio_activeq);
1313 TAILQ_INIT(&child_proc->p_aio_doneq);
1314
1315 /* Inherit the parent flags for code sign */
1316 child_proc->p_csflags = (parent_proc->p_csflags & ~CS_KILLED);
1317
1318 /*
1319 * All processes have work queue locks; cleaned up by
1320 * reap_child_locked()
1321 */
1322 workqueue_init_lock(child_proc);
1323
1324 /*
1325 * Copy work queue information
1326 *
1327 * Note: This should probably only happen in the case where we are
1328 * creating a child that is a copy of the parent; since this
1329 * routine is called in the non-duplication case of vfork()
1330 * or posix_spawn(), then this information should likely not
1331 * be duplicated.
1332 *
1333 * <rdar://6640553> Work queue pointers that no longer point to code
1334 */
1335 child_proc->p_wqthread = parent_proc->p_wqthread;
1336 child_proc->p_threadstart = parent_proc->p_threadstart;
1337 child_proc->p_pthsize = parent_proc->p_pthsize;
1338 child_proc->p_targconc = parent_proc->p_targconc;
1339 if ((parent_proc->p_lflag & P_LREGISTER) != 0) {
1340 child_proc->p_lflag |= P_LREGISTER;
1341 }
1342 child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset;
1343 #if PSYNCH
1344 pth_proc_hashinit(child_proc);
1345 #endif /* PSYNCH */
1346
1347 #if CONFIG_LCTX
1348 child_proc->p_lctx = NULL;
1349 /* Add new process to login context (if any). */
1350 if (parent_proc->p_lctx != NULL) {
1351 /*
1352 * <rdar://6640564> This should probably be delayed in the
1353 * vfork() or posix_spawn() cases.
1354 */
1355 LCTX_LOCK(parent_proc->p_lctx);
1356 enterlctx(child_proc, parent_proc->p_lctx, 0);
1357 }
1358 #endif
1359
1360 /* Default to no tracking of dirty state */
1361 child_proc->p_dirty = 0;
1362
1363 bad:
1364 return(child_proc);
1365 }
1366
1367 void
1368 proc_lock(proc_t p)
1369 {
1370 lck_mtx_lock(&p->p_mlock);
1371 }
1372
1373 void
1374 proc_unlock(proc_t p)
1375 {
1376 lck_mtx_unlock(&p->p_mlock);
1377 }
1378
1379 void
1380 proc_spinlock(proc_t p)
1381 {
1382 lck_spin_lock(&p->p_slock);
1383 }
1384
1385 void
1386 proc_spinunlock(proc_t p)
1387 {
1388 lck_spin_unlock(&p->p_slock);
1389 }
1390
1391 void
1392 proc_list_lock(void)
1393 {
1394 lck_mtx_lock(proc_list_mlock);
1395 }
1396
1397 void
1398 proc_list_unlock(void)
1399 {
1400 lck_mtx_unlock(proc_list_mlock);
1401 }
1402
1403 #include <kern/zalloc.h>
1404
1405 struct zone *uthread_zone;
1406 static int uthread_zone_inited = 0;
1407
1408 static void
1409 uthread_zone_init(void)
1410 {
1411 if (!uthread_zone_inited) {
1412 uthread_zone = zinit(sizeof(struct uthread),
1413 thread_max * sizeof(struct uthread),
1414 THREAD_CHUNK * sizeof(struct uthread),
1415 "uthreads");
1416 uthread_zone_inited = 1;
1417 }
1418 }
1419
1420 void *
1421 uthread_alloc(task_t task, thread_t thread, int noinherit)
1422 {
1423 proc_t p;
1424 uthread_t uth;
1425 uthread_t uth_parent;
1426 void *ut;
1427
1428 if (!uthread_zone_inited)
1429 uthread_zone_init();
1430
1431 ut = (void *)zalloc(uthread_zone);
1432 bzero(ut, sizeof(struct uthread));
1433
1434 p = (proc_t) get_bsdtask_info(task);
1435 uth = (uthread_t)ut;
1436 uth->uu_kwe.kwe_uth = uth;
1437 uth->uu_thread = thread;
1438
1439 /*
1440 * Thread inherits credential from the creating thread, if both
1441 * are in the same task.
1442 *
1443 * If the creating thread has no credential or is from another
1444 * task we can leave the new thread credential NULL. If it needs
1445 * one later, it will be lazily assigned from the task's process.
1446 */
1447 uth_parent = (uthread_t)get_bsdthread_info(current_thread());
1448 if ((noinherit == 0) && task == current_task() &&
1449 uth_parent != NULL &&
1450 IS_VALID_CRED(uth_parent->uu_ucred)) {
1451 /*
1452 * XXX The new thread is, in theory, being created in context
1453 * XXX of parent thread, so a direct reference to the parent
1454 * XXX is OK.
1455 */
1456 kauth_cred_ref(uth_parent->uu_ucred);
1457 uth->uu_ucred = uth_parent->uu_ucred;
1458 /* the credential we just inherited is an assumed credential */
1459 if (uth_parent->uu_flag & UT_SETUID)
1460 uth->uu_flag |= UT_SETUID;
1461 } else {
1462 /* sometimes workqueue threads are created out task context */
1463 if ((task != kernel_task) && (p != PROC_NULL))
1464 uth->uu_ucred = kauth_cred_proc_ref(p);
1465 else
1466 uth->uu_ucred = NOCRED;
1467 }
1468
1469
1470 if ((task != kernel_task) && p) {
1471
1472 proc_lock(p);
1473 if (noinherit != 0) {
1474 /* workq threads will not inherit masks */
1475 uth->uu_sigmask = ~workq_threadmask;
1476 } else if (uth_parent) {
1477 if (uth_parent->uu_flag & UT_SAS_OLDMASK)
1478 uth->uu_sigmask = uth_parent->uu_oldmask;
1479 else
1480 uth->uu_sigmask = uth_parent->uu_sigmask;
1481 }
1482 uth->uu_context.vc_thread = thread;
1483 TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list);
1484 proc_unlock(p);
1485
1486 #if CONFIG_DTRACE
1487 if (p->p_dtrace_ptss_pages != NULL) {
1488 uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p);
1489 }
1490 #endif
1491 #if CONFIG_MACF
1492 mac_thread_label_init(uth);
1493 #endif
1494 }
1495
1496 return (ut);
1497 }
1498
1499
1500 /*
1501 * This routine frees all the BSD context in uthread except the credential.
1502 * It does not free the uthread structure as well
1503 */
1504 void
1505 uthread_cleanup(task_t task, void *uthread, void * bsd_info)
1506 {
1507 struct _select *sel;
1508 uthread_t uth = (uthread_t)uthread;
1509 proc_t p = (proc_t)bsd_info;
1510
1511
1512 if (uth->uu_lowpri_window || uth->uu_throttle_info) {
1513 /*
1514 * task is marked as a low priority I/O type
1515 * and we've somehow managed to not dismiss the throttle
1516 * through the normal exit paths back to user space...
1517 * no need to throttle this thread since its going away
1518 * but we do need to update our bookeeping w/r to throttled threads
1519 *
1520 * Calling this routine will clean up any throttle info reference
1521 * still inuse by the thread.
1522 */
1523 throttle_lowpri_io(FALSE);
1524 }
1525 /*
1526 * Per-thread audit state should never last beyond system
1527 * call return. Since we don't audit the thread creation/
1528 * removal, the thread state pointer should never be
1529 * non-NULL when we get here.
1530 */
1531 assert(uth->uu_ar == NULL);
1532
1533 sel = &uth->uu_select;
1534 /* cleanup the select bit space */
1535 if (sel->nbytes) {
1536 FREE(sel->ibits, M_TEMP);
1537 FREE(sel->obits, M_TEMP);
1538 sel->nbytes = 0;
1539 }
1540
1541 if (uth->uu_cdir) {
1542 vnode_rele(uth->uu_cdir);
1543 uth->uu_cdir = NULLVP;
1544 }
1545
1546 if (uth->uu_allocsize && uth->uu_wqset){
1547 kfree(uth->uu_wqset, uth->uu_allocsize);
1548 sel->count = 0;
1549 uth->uu_allocsize = 0;
1550 uth->uu_wqset = 0;
1551 sel->wql = 0;
1552 }
1553
1554 if(uth->pth_name != NULL)
1555 {
1556 kfree(uth->pth_name, MAXTHREADNAMESIZE);
1557 uth->pth_name = 0;
1558 }
1559 if ((task != kernel_task) && p) {
1560
1561 if (((uth->uu_flag & UT_VFORK) == UT_VFORK) && (uth->uu_proc != PROC_NULL)) {
1562 vfork_exit_internal(uth->uu_proc, 0, 1);
1563 }
1564 /*
1565 * Remove the thread from the process list and
1566 * transfer [appropriate] pending signals to the process.
1567 */
1568 if (get_bsdtask_info(task) == p) {
1569 proc_lock(p);
1570 TAILQ_REMOVE(&p->p_uthlist, uth, uu_list);
1571 p->p_siglist |= (uth->uu_siglist & execmask & (~p->p_sigignore | sigcantmask));
1572 proc_unlock(p);
1573 }
1574 #if CONFIG_DTRACE
1575 struct dtrace_ptss_page_entry *tmpptr = uth->t_dtrace_scratch;
1576 uth->t_dtrace_scratch = NULL;
1577 if (tmpptr != NULL) {
1578 dtrace_ptss_release_entry(p, tmpptr);
1579 }
1580 #endif
1581 #if CONFIG_MACF
1582 mac_thread_label_destroy(uth);
1583 #endif
1584 }
1585 }
1586
1587 /* This routine releases the credential stored in uthread */
1588 void
1589 uthread_cred_free(void *uthread)
1590 {
1591 uthread_t uth = (uthread_t)uthread;
1592
1593 /* and free the uthread itself */
1594 if (IS_VALID_CRED(uth->uu_ucred)) {
1595 kauth_cred_t oldcred = uth->uu_ucred;
1596 uth->uu_ucred = NOCRED;
1597 kauth_cred_unref(&oldcred);
1598 }
1599 }
1600
1601 /* This routine frees the uthread structure held in thread structure */
1602 void
1603 uthread_zone_free(void *uthread)
1604 {
1605 /* and free the uthread itself */
1606 zfree(uthread_zone, uthread);
1607 }