2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
70 * Mach Operating System
71 * Copyright (c) 1987 Carnegie-Mellon University
72 * All rights reserved. The CMU software License Agreement specifies
73 * the terms and conditions for use and redistribution.
76 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
82 #include <sys/param.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/mount_internal.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kauth.h>
88 #include <sys/systm.h>
89 #include <sys/vnode_internal.h>
91 #include <sys/buf_internal.h>
92 #include <sys/clist.h>
95 #include <sys/systm.h>
99 #include <security/audit/audit.h>
101 #include <sys/malloc.h>
102 #include <sys/dkstat.h>
103 #include <sys/codesign.h>
105 #include <kern/startup.h>
106 #include <kern/thread.h>
107 #include <kern/task.h>
108 #include <kern/ast.h>
109 #include <kern/kalloc.h>
110 #include <kern/ux_handler.h> /* for ux_handler_setup() */
112 #include <mach/vm_param.h>
114 #include <vm/vm_map.h>
115 #include <vm/vm_kern.h>
117 #include <sys/reboot.h>
118 #include <dev/busvar.h> /* for pseudo_inits */
119 #include <sys/kdebug.h>
120 #include <sys/monotonic.h>
121 #include <sys/reason.h>
123 #include <mach/mach_types.h>
124 #include <mach/vm_prot.h>
125 #include <mach/semaphore.h>
126 #include <mach/sync_policy.h>
127 #include <kern/clock.h>
128 #include <mach/kern_return.h>
129 #include <mach/thread_act.h> /* for thread_resume() */
130 #include <sys/ubc_internal.h> /* for ubc_init() */
131 #include <sys/mcache.h> /* for mcache_init() */
132 #include <sys/mbuf.h> /* for mbinit() */
133 #include <sys/event.h> /* for knote_init() */
134 #include <sys/eventhandler.h> /* for eventhandler_init() */
135 #include <sys/kern_memorystatus.h> /* for memorystatus_init() */
136 #include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
137 #include <sys/aio_kern.h> /* for aio_init() */
138 #include <sys/semaphore.h> /* for psem_cache_init() */
139 #include <net/dlil.h> /* for dlil_init() */
140 #include <net/kpi_protocol.h> /* for proto_kpi_init() */
141 #include <net/iptap.h> /* for iptap_init() */
142 #include <sys/pipe.h> /* for pipeinit() */
143 #include <sys/socketvar.h> /* for socketinit() */
144 #include <sys/protosw.h> /* for domaininit() */
145 #include <kern/sched_prim.h> /* for thread_wakeup() */
146 #include <net/if_ether.h> /* for ether_family_init() */
147 #include <net/if_gif.h> /* for gif_init() */
148 #include <vm/vm_protos.h> /* for vnode_pager_bootstrap() */
149 #include <miscfs/devfs/devfsdefs.h> /* for devfs_kernel_mount() */
150 #include <vm/vm_kern.h> /* for kmem_suballoc() */
151 #include <sys/semaphore.h> /* for psem_lock_init() */
152 #include <sys/msgbuf.h> /* for log_setsize() */
153 #include <sys/tty.h> /* for tty_init() */
154 #include <sys/proc_uuid_policy.h> /* proc_uuid_policy_init() */
155 #include <netinet/flow_divert.h> /* flow_divert_init() */
156 #include <net/content_filter.h> /* for cfil_init() */
157 #include <net/necp.h> /* for necp_init() */
158 #include <net/network_agent.h> /* for netagent_init() */
159 #include <net/packet_mangler.h> /* for pkt_mnglr_init() */
160 #include <net/if_utun.h> /* for utun_register_control() */
161 #include <net/if_ipsec.h> /* for ipsec_register_control() */
162 #include <net/net_str_id.h> /* for net_str_id_init() */
163 #include <net/netsrc.h> /* for netsrc_init() */
164 #include <net/ntstat.h> /* for nstat_init() */
165 #include <netinet/tcp_cc.h> /* for tcp_cc_init() */
166 #include <netinet/mptcp_var.h> /* for mptcp_control_register() */
167 #include <net/nwk_wq.h> /* for nwk_wq_init */
168 #include <net/restricted_in_port.h> /* for restricted_in_port_init() */
169 #include <kern/assert.h> /* for assert() */
170 #include <sys/kern_overrides.h> /* for init_system_override() */
172 #include <net/init.h>
175 #include <security/mac_framework.h>
176 #include <security/mac_internal.h> /* mac_init_bsd() */
177 #include <security/mac_mach_internal.h> /* mac_update_task_label() */
180 #include <machine/exec.h>
183 #include <sys/netboot.h>
187 #include <sys/imageboot.h>
191 #include <net/if_pflog.h>
195 #include <pexpert/pexpert.h>
196 #include <machine/pal_routines.h>
197 #include <console/video_console.h>
200 #include <tests/xnupost.h>
203 void * get_user_regs(thread_t
); /* XXX kludge for <machine/thread.h> */
204 void IOKitInitializeTime(void); /* XXX */
205 void IOSleep(unsigned int); /* XXX */
206 void loopattach(void); /* XXX */
208 const char copyright
[] =
209 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t"
210 "The Regents of the University of California. "
211 "All rights reserved.\n\n";
213 /* Components of the first process -- never freed. */
215 struct session session0
;
217 struct filedesc filedesc0
;
218 struct plimit limit0
;
219 struct pstats pstats0
;
220 struct sigacts sigacts0
;
230 /* Global variables to make pstat happy. We do swapping differently */
234 struct swdevt swdevt
[1];
236 dev_t rootdev
; /* device of the root */
237 dev_t dumpdev
; /* device to take dumps on */
238 long dumplo
; /* offset into dumpdev */
240 char hostname
[MAXHOSTNAMELEN
];
241 lck_mtx_t hostname_lock
;
242 lck_grp_t
*hostname_lck_grp
;
243 char domainname
[MAXDOMNAMELEN
];
244 lck_mtx_t domainname_lock
;
246 char rootdevice
[DEVMAXNAMESIZE
];
249 struct kmemstats kmemstats
[M_LAST
];
252 struct vnode
*rootvp
;
260 int legacy_footprint_entitlement_mode
= LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE
;
261 #endif /* __arm64__ */
264 __private_extern__
int proc_ref_tracking_disabled
= 0; /* disable panics on leaked proc refs across syscall boundary */
268 __private_extern__
int os_reason_debug_disabled
= 0; /* disable asserts for when we fail to allocate OS reasons */
271 extern kern_return_t
IOFindBSDRoot(char *, unsigned int, dev_t
*, u_int32_t
*);
272 extern void IOSecureBSDRoot(const char * rootName
);
273 extern kern_return_t
IOKitBSDInit(void );
274 extern void kminit(void);
275 extern void file_lock_init(void);
276 extern void kmeminit(void);
277 extern void bsd_bufferinit(void);
278 extern void oslog_setsize(int size
);
279 extern void throttle_init(void);
280 extern void acct_init(void);
282 #if CONFIG_LOCKERBOOT
283 #define LOCKER_PROTOBOOT_MOUNT "/protoboot"
285 const char kernel_protoboot_mount
[] = LOCKER_PROTOBOOT_MOUNT
;
286 extern int mount_locker_protoboot(const char *fsname
, const char *mntpoint
,
287 const char *pbdevpath
);
290 extern int serverperfmode
;
292 #if DEVELOPMENT || DEBUG
293 extern int syscallfilter_disable
;
294 #endif // DEVELOPMENT || DEBUG
296 vm_map_t bsd_pageable_map
;
299 static int bsd_simul_execs
;
300 static int bsd_pageable_map_size
;
301 __private_extern__
int execargs_cache_size
= 0;
302 __private_extern__
int execargs_free_count
= 0;
303 __private_extern__ vm_offset_t
* execargs_cache
= NULL
;
305 void bsd_exec_setup(int);
307 __private_extern__
int bootarg_execfailurereports
= 0;
310 __private_extern__
int bootarg_no32exec
= 1;
312 __private_extern__
int bootarg_vnode_cache_defeat
= 0;
314 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
315 __private_extern__
int bootarg_no_vnode_jetsam
= 0;
316 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
319 * Prevent kernel-based ASLR from being used, for testing.
321 #if DEVELOPMENT || DEBUG
322 __private_extern__
int bootarg_disable_aslr
= 0;
326 * Allow an alternate dyld to be used for testing.
329 #if DEVELOPMENT || DEBUG
330 char dyld_alt_path
[MAXPATHLEN
];
331 int use_alt_dyld
= 0;
332 extern uint64_t dyld_flags
;
336 extern int customnbuf
;
338 kern_return_t
bsd_autoconf(void);
339 void bsd_utaskbootstrap(void);
341 static void parse_bsd_args(void);
343 extern void dev_kmem_init(void);
345 extern void time_zone_slock_init(void);
346 extern void select_waitq_init(void);
347 static void process_name(const char *, proc_t
);
349 static void setconf(void);
352 extern void sysv_shm_lock_init(void);
355 extern void sysv_sem_lock_init(void);
358 extern void sysv_msg_lock_init(void);
361 extern void ulock_initialize(void);
364 #if defined (__i386__) || defined (__x86_64__)
365 /* MACF policy_check configuration flags; see policy_check.c for details */
366 int policy_check_flags
= 0;
368 extern int check_policy_init(int);
370 #endif /* CONFIG_MACF */
372 /* If we are using CONFIG_DTRACE */
374 extern void dtrace_postinit(void);
378 * Initialization code.
379 * Called from cold start routine as
380 * soon as a stack and segmentation
381 * have been established.
384 * hand craft 0th process
385 * call all initialization routines
386 * hand craft 1st user process
390 * Sets the name for the given task.
393 process_name(const char *s
, proc_t p
)
395 strlcpy(p
->p_comm
, s
, sizeof(p
->p_comm
));
396 strlcpy(p
->p_name
, s
, sizeof(p
->p_name
));
399 /* To allow these values to be patched, they're globals here */
400 #include <machine/vmparam.h>
401 struct rlimit vm_initial_limit_stack
= { .rlim_cur
= DFLSSIZ
, .rlim_max
= MAXSSIZ
- PAGE_MAX_SIZE
};
402 struct rlimit vm_initial_limit_data
= { .rlim_cur
= DFLDSIZ
, .rlim_max
= MAXDSIZ
};
403 struct rlimit vm_initial_limit_core
= { .rlim_cur
= DFLCSIZ
, .rlim_max
= MAXCSIZ
};
405 extern thread_t
cloneproc(task_t
, coalition_t
, proc_t
, int, int);
406 extern int (*mountroot
)(void);
408 lck_grp_t
* proc_lck_grp
;
409 lck_grp_t
* proc_slock_grp
;
410 lck_grp_t
* proc_fdmlock_grp
;
411 lck_grp_t
* proc_kqhashlock_grp
;
412 lck_grp_t
* proc_knhashlock_grp
;
413 lck_grp_t
* proc_ucred_mlock_grp
;
414 lck_grp_t
* proc_mlock_grp
;
415 lck_grp_attr_t
* proc_lck_grp_attr
;
416 lck_attr_t
* proc_lck_attr
;
417 lck_mtx_t
* proc_list_mlock
;
418 lck_mtx_t
* proc_klist_mlock
;
421 lck_grp_t
* sysctl_debug_test_stackshot_owner_grp
;
422 lck_mtx_t
* sysctl_debug_test_stackshot_owner_init_mtx
;
423 #endif /* !CONFIG_XNUPOST */
425 extern lck_mtx_t
* execargs_cache_lock
;
427 /* hook called after root is mounted XXX temporary hack */
428 void (*mountroot_post_hook
)(void);
429 void (*unmountroot_pre_hook
)(void);
432 * This function is called before IOKit initialization, so that globals
433 * like the sysctl tree are initialized before kernel extensions
434 * are started (since they may want to register sysctls
443 * This function is called very early on in the Mach startup, from the
444 * function start_kernel_threads() in osfmk/kern/startup.c. It's called
445 * in the context of the current (startup) task using a call to the
446 * function kernel_thread_create() to jump into start_kernel_threads().
447 * Internally, kernel_thread_create() calls thread_create_internal(),
448 * which calls uthread_alloc(). The function of uthread_alloc() is
449 * normally to allocate a uthread structure, and fill out the uu_sigmask,
450 * uu_context fields. It skips filling these out in the case of the "task"
451 * being "kernel_task", because the order of operation is inverted. To
452 * account for that, we need to manually fill in at least the contents
453 * of the uu_context.vc_ucred field so that the uthread structure can be
454 * used like any other.
462 struct vfs_context context
;
464 struct ucred temp_cred
;
465 struct posix_cred temp_pcred
;
466 #if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
467 boolean_t netboot
= FALSE
;
469 #if CONFIG_LOCKERBOOT
470 vnode_t pbvn
= NULLVP
;
471 mount_t pbmnt
= NULL
;
474 char pbfsname
[MFSNAMELEN
];
475 char *slash_dev
= NULL
;
478 #define DEBUG_BSDINIT 0
481 #define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
483 #define bsd_init_kprintf(x, ...)
490 bsd_init_kprintf("calling kmeminit\n");
493 bsd_init_kprintf("calling parse_bsd_args\n");
497 bsd_init_kprintf("calling dev_kmem_init\n");
501 /* Initialize kauth subsystem before instancing the first credential */
502 bsd_init_kprintf("calling kauth_init\n");
505 /* Initialize process and pgrp structures. */
506 bsd_init_kprintf("calling procinit\n");
509 /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
512 kernproc
= &proc0
; /* implicitly bzero'ed */
514 /* kernel_task->proc = kernproc; */
515 set_bsdtask_info(kernel_task
, (void *)kernproc
);
517 /* give kernproc a name */
518 bsd_init_kprintf("calling process_name\n");
519 process_name("kernel_task", kernproc
);
521 /* allocate proc lock group attribute and group */
522 bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
523 proc_lck_grp_attr
= lck_grp_attr_alloc_init();
525 proc_lck_grp
= lck_grp_alloc_init("proc", proc_lck_grp_attr
);
527 proc_slock_grp
= lck_grp_alloc_init("proc-slock", proc_lck_grp_attr
);
528 proc_ucred_mlock_grp
= lck_grp_alloc_init("proc-ucred-mlock", proc_lck_grp_attr
);
529 proc_mlock_grp
= lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr
);
530 proc_fdmlock_grp
= lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr
);
531 proc_kqhashlock_grp
= lck_grp_alloc_init("proc-kqhashlock", proc_lck_grp_attr
);
532 proc_knhashlock_grp
= lck_grp_alloc_init("proc-knhashlock", proc_lck_grp_attr
);
534 sysctl_debug_test_stackshot_owner_grp
= lck_grp_alloc_init("test-stackshot-owner-grp", LCK_GRP_ATTR_NULL
);
535 sysctl_debug_test_stackshot_owner_init_mtx
= lck_mtx_alloc_init(
536 sysctl_debug_test_stackshot_owner_grp
,
538 #endif /* !CONFIG_XNUPOST */
539 /* Allocate proc lock attribute */
540 proc_lck_attr
= lck_attr_alloc_init();
542 proc_list_mlock
= lck_mtx_alloc_init(proc_mlock_grp
, proc_lck_attr
);
543 proc_klist_mlock
= lck_mtx_alloc_init(proc_mlock_grp
, proc_lck_attr
);
544 lck_mtx_init(&kernproc
->p_mlock
, proc_mlock_grp
, proc_lck_attr
);
545 lck_mtx_init(&kernproc
->p_fdmlock
, proc_fdmlock_grp
, proc_lck_attr
);
546 lck_mtx_init(&kernproc
->p_ucred_mlock
, proc_ucred_mlock_grp
, proc_lck_attr
);
547 lck_spin_init(&kernproc
->p_slock
, proc_slock_grp
, proc_lck_attr
);
549 assert(bsd_simul_execs
!= 0);
550 execargs_cache_lock
= lck_mtx_alloc_init(proc_lck_grp
, proc_lck_attr
);
551 execargs_cache_size
= bsd_simul_execs
;
552 execargs_free_count
= bsd_simul_execs
;
553 execargs_cache
= (vm_offset_t
*)kalloc(bsd_simul_execs
* sizeof(vm_offset_t
));
554 bzero(execargs_cache
, bsd_simul_execs
* sizeof(vm_offset_t
));
556 if (current_task() != kernel_task
) {
557 printf("bsd_init: We have a problem, "
558 "current task is not kernel task\n");
561 bsd_init_kprintf("calling get_bsdthread_info\n");
562 ut
= (uthread_t
)get_bsdthread_info(current_thread());
566 * Initialize the MAC Framework
568 mac_policy_initbsd();
570 #if defined (__i386__) || defined (__x86_64__)
572 * We currently only support this on i386/x86_64, as that is the
573 * only lock code we have instrumented so far.
575 check_policy_init(policy_check_flags
);
581 hostname_lck_grp
= lck_grp_alloc_init("hostname", LCK_GRP_ATTR_NULL
);
582 lck_mtx_init(&hostname_lock
, hostname_lck_grp
, LCK_ATTR_NULL
);
583 lck_mtx_init(&domainname_lock
, hostname_lck_grp
, LCK_ATTR_NULL
);
589 LIST_INSERT_HEAD(&allproc
, kernproc
, p_list
);
590 kernproc
->p_pgrp
= &pgrp0
;
591 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0
, pg_hash
);
592 LIST_INIT(&pgrp0
.pg_members
);
593 lck_mtx_init(&pgrp0
.pg_mlock
, proc_mlock_grp
, proc_lck_attr
);
594 /* There is no other bsd thread this point and is safe without pgrp lock */
595 LIST_INSERT_HEAD(&pgrp0
.pg_members
, kernproc
, p_pglist
);
596 kernproc
->p_listflag
|= P_LIST_INPGRP
;
597 kernproc
->p_pgrpid
= 0;
598 kernproc
->p_uniqueid
= 0;
600 pgrp0
.pg_session
= &session0
;
601 pgrp0
.pg_membercnt
= 1;
603 session0
.s_count
= 1;
604 session0
.s_leader
= kernproc
;
605 session0
.s_listflags
= 0;
606 lck_mtx_init(&session0
.s_mlock
, proc_mlock_grp
, proc_lck_attr
);
607 LIST_INSERT_HEAD(SESSHASH(0), &session0
, s_hash
);
611 kernproc
->p_persona
= NULL
;
614 kernproc
->task
= kernel_task
;
616 kernproc
->p_stat
= SRUN
;
617 kernproc
->p_flag
= P_SYSTEM
;
618 kernproc
->p_lflag
= 0;
619 kernproc
->p_ladvflag
= 0;
621 #if defined(__LP64__)
622 kernproc
->p_flag
|= P_LP64
;
625 #if DEVELOPMENT || DEBUG
626 if (bootarg_disable_aslr
) {
627 kernproc
->p_flag
|= P_DISABLE_ASLR
;
631 kernproc
->p_nice
= NZERO
;
632 kernproc
->p_pptr
= kernproc
;
634 TAILQ_INIT(&kernproc
->p_uthlist
);
635 TAILQ_INSERT_TAIL(&kernproc
->p_uthlist
, ut
, uu_list
);
637 kernproc
->sigwait
= FALSE
;
638 kernproc
->sigwait_thread
= THREAD_NULL
;
639 kernproc
->exit_thread
= THREAD_NULL
;
640 kernproc
->p_csflags
= CS_VALID
;
643 * Create credential. This also Initializes the audit information.
645 bsd_init_kprintf("calling bzero\n");
646 bzero(&temp_cred
, sizeof(temp_cred
));
647 bzero(&temp_pcred
, sizeof(temp_pcred
));
648 temp_pcred
.cr_ngroups
= 1;
649 /* kern_proc, shouldn't call up to DS for group membership */
650 temp_pcred
.cr_flags
= CRF_NOMEMBERD
;
651 temp_cred
.cr_audit
.as_aia_p
= audit_default_aia_p
;
653 bsd_init_kprintf("calling kauth_cred_create\n");
655 * We have to label the temp cred before we create from it to
656 * properly set cr_ngroups, or the create will fail.
658 posix_cred_label(&temp_cred
, &temp_pcred
);
659 kernproc
->p_ucred
= kauth_cred_create(&temp_cred
);
661 /* update cred on proc */
662 PROC_UPDATE_CREDS_ONPROC(kernproc
);
664 /* give the (already exisiting) initial thread a reference on it */
665 bsd_init_kprintf("calling kauth_cred_ref\n");
666 kauth_cred_ref(kernproc
->p_ucred
);
667 ut
->uu_context
.vc_ucred
= kernproc
->p_ucred
;
668 ut
->uu_context
.vc_thread
= current_thread();
670 TAILQ_INIT(&kernproc
->p_aio_activeq
);
671 TAILQ_INIT(&kernproc
->p_aio_doneq
);
672 kernproc
->p_aio_total_count
= 0;
673 kernproc
->p_aio_active_count
= 0;
675 bsd_init_kprintf("calling file_lock_init\n");
679 mac_cred_label_associate_kernel(kernproc
->p_ucred
);
682 /* Create the file descriptor table. */
683 kernproc
->p_fd
= &filedesc0
;
684 filedesc0
.fd_cmask
= cmask
;
685 filedesc0
.fd_knlistsize
= 0;
686 filedesc0
.fd_knlist
= NULL
;
687 filedesc0
.fd_knhash
= NULL
;
688 filedesc0
.fd_knhashmask
= 0;
689 lck_mtx_init(&filedesc0
.fd_kqhashlock
, proc_kqhashlock_grp
, proc_lck_attr
);
690 lck_mtx_init(&filedesc0
.fd_knhashlock
, proc_knhashlock_grp
, proc_lck_attr
);
692 /* Create the limits structures. */
693 kernproc
->p_limit
= &limit0
;
694 for (i
= 0; i
< sizeof(kernproc
->p_rlimit
) / sizeof(kernproc
->p_rlimit
[0]); i
++) {
695 limit0
.pl_rlimit
[i
].rlim_cur
=
696 limit0
.pl_rlimit
[i
].rlim_max
= RLIM_INFINITY
;
698 limit0
.pl_rlimit
[RLIMIT_NOFILE
].rlim_cur
= NOFILE
;
699 limit0
.pl_rlimit
[RLIMIT_NPROC
].rlim_cur
= maxprocperuid
;
700 limit0
.pl_rlimit
[RLIMIT_NPROC
].rlim_max
= maxproc
;
701 limit0
.pl_rlimit
[RLIMIT_STACK
] = vm_initial_limit_stack
;
702 limit0
.pl_rlimit
[RLIMIT_DATA
] = vm_initial_limit_data
;
703 limit0
.pl_rlimit
[RLIMIT_CORE
] = vm_initial_limit_core
;
704 limit0
.pl_refcnt
= 1;
706 kernproc
->p_stats
= &pstats0
;
707 kernproc
->p_sigacts
= &sigacts0
;
710 * Charge root for one process: launchd.
712 bsd_init_kprintf("calling chgproccnt\n");
713 (void)chgproccnt(0, 1);
716 * Allocate a kernel submap for pageable memory
717 * for temporary copying (execve()).
722 bsd_init_kprintf("calling kmem_suballoc\n");
723 assert(bsd_pageable_map_size
!= 0);
724 ret
= kmem_suballoc(kernel_map
,
726 (vm_size_t
)bsd_pageable_map_size
,
729 VM_MAP_KERNEL_FLAGS_NONE
,
732 if (ret
!= KERN_SUCCESS
) {
733 panic("bsd_init: Failed to allocate bsd pageable map");
737 bsd_init_kprintf("calling fpxlog_init\n");
741 * Initialize buffers and hash links for buffers
743 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
744 * happen after a credential has been associated with
747 bsd_init_kprintf("calling bsd_bufferinit\n");
751 * Initialize the calendar.
753 bsd_init_kprintf("calling IOKitInitializeTime\n");
754 IOKitInitializeTime();
756 bsd_init_kprintf("calling ubc_init\n");
759 /* Initialize the file systems. */
760 bsd_init_kprintf("calling vfsinit\n");
763 #if CONFIG_PROC_UUID_POLICY
764 /* Initial proc_uuid_policy subsystem */
765 bsd_init_kprintf("calling proc_uuid_policy_init()\n");
766 proc_uuid_policy_init();
770 /* Initialize per-CPU cache allocator */
773 /* Initialize mbuf's. */
774 bsd_init_kprintf("calling mbinit\n");
776 net_str_id_init(); /* for mbuf tags */
777 restricted_in_port_init();
781 * Initializes security event auditing.
782 * XXX: Should/could this occur later?
785 bsd_init_kprintf("calling audit_init\n");
789 /* Initialize kqueues */
790 bsd_init_kprintf("calling knote_init\n");
793 /* Initialize event handler */
794 bsd_init_kprintf("calling eventhandler_init\n");
797 /* Initialize for async IO */
798 bsd_init_kprintf("calling aio_init\n");
801 /* Initialize pipes */
802 bsd_init_kprintf("calling pipeinit\n");
805 /* Initialize SysV shm subsystem locks; the subsystem proper is
806 * initialized through a sysctl.
809 bsd_init_kprintf("calling sysv_shm_lock_init\n");
810 sysv_shm_lock_init();
813 bsd_init_kprintf("calling sysv_sem_lock_init\n");
814 sysv_sem_lock_init();
817 bsd_init_kprintf("sysv_msg_lock_init\n");
818 sysv_msg_lock_init();
820 bsd_init_kprintf("calling pshm_lock_init\n");
822 bsd_init_kprintf("calling psem_lock_init\n");
826 /* POSIX Shm and Sem */
827 bsd_init_kprintf("calling pshm_cache_init\n");
829 bsd_init_kprintf("calling psem_cache_init\n");
831 bsd_init_kprintf("calling time_zone_slock_init\n");
832 time_zone_slock_init();
833 bsd_init_kprintf("calling select_waitq_init\n");
837 * Initialize protocols. Block reception of incoming packets
838 * until everything is ready.
841 bsd_init_kprintf("calling nwk_wq_init\n");
843 bsd_init_kprintf("calling dlil_init\n");
845 bsd_init_kprintf("calling proto_kpi_init\n");
847 #endif /* NETWORKING */
849 bsd_init_kprintf("calling socketinit\n");
851 bsd_init_kprintf("calling domaininit\n");
856 #endif /* FLOW_DIVERT */
858 kernproc
->p_fd
->fd_cdir
= NULL
;
859 kernproc
->p_fd
->fd_rdir
= NULL
;
862 #ifndef CONFIG_MEMORYSTATUS
863 #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
865 /* Initialise background freezing */
866 bsd_init_kprintf("calling memorystatus_freeze_init\n");
867 memorystatus_freeze_init();
870 #if CONFIG_MEMORYSTATUS
871 /* Initialize kernel memory status notifications */
872 bsd_init_kprintf("calling memorystatus_init\n");
874 #endif /* CONFIG_MEMORYSTATUS */
876 bsd_init_kprintf("calling acct_init\n");
879 bsd_init_kprintf("calling sysctl_mib_init\n");
882 bsd_init_kprintf("calling bsd_autoconf\n");
885 bsd_init_kprintf("calling os_reason_init\n");
893 * We attach the loopback interface *way* down here to ensure
894 * it happens after autoconf(), otherwise it becomes the
895 * "primary" interface.
899 bsd_init_kprintf("calling loopattach\n");
900 loopattach(); /* XXX */
903 /* Initialize gif interface (after lo0) */
908 /* Initialize packet filter log interface */
913 /* Register the built-in dlil ethernet interface family */
914 bsd_init_kprintf("calling ether_family_init\n");
919 /* Call any kext code that wants to run just after network init */
920 bsd_init_kprintf("calling net_init_run\n");
932 /* Initialize Network Extension Control Policies */
938 /* register user tunnel kernel control handler */
939 utun_register_control();
941 ipsec_register_control();
947 mptcp_control_register();
949 #endif /* NETWORKING */
951 bsd_init_kprintf("calling vnode_pager_bootstrap\n");
952 vnode_pager_bootstrap();
954 bsd_init_kprintf("calling inittodr\n");
957 /* Mount the root file system. */
961 bsd_init_kprintf("calling setconf\n");
964 netboot
= (mountroot
== netboot_mountroot
);
967 bsd_init_kprintf("vfs_mountroot\n");
968 if (0 == (err
= vfs_mountroot())) {
971 rootdevice
[0] = '\0';
974 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
975 vc_progress_set(FALSE
, 0);
976 for (i
= 1; 1; i
*= 2) {
977 printf("bsd_init: failed to mount network root, error %d, %s\n",
978 err
, PE_boot_args());
979 printf("We are hanging here...\n");
980 IOSleep(i
* 60 * 1000);
985 printf("cannot mount root, errno = %d\n", err
);
986 boothowto
|= RB_ASKNAME
;
989 IOSecureBSDRoot(rootdevice
);
991 context
.vc_thread
= current_thread();
992 context
.vc_ucred
= kernproc
->p_ucred
;
993 mountlist
.tqh_first
->mnt_flag
|= MNT_ROOTFS
;
995 bsd_init_kprintf("calling VFS_ROOT\n");
996 /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */
997 if (VFS_ROOT(mountlist
.tqh_first
, &rootvnode
, &context
)) {
998 panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
1000 rootvnode
->v_flag
|= VROOT
;
1001 (void)vnode_ref(rootvnode
);
1002 (void)vnode_put(rootvnode
);
1003 filedesc0
.fd_cdir
= rootvnode
;
1010 /* post mount setup */
1011 if ((err
= netboot_setup()) != 0) {
1012 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
1013 vc_progress_set(FALSE
, 0);
1014 for (i
= 1; 1; i
*= 2) {
1015 printf("bsd_init: NetBoot could not find root, error %d: %s\n",
1016 err
, PE_boot_args());
1017 printf("We are hanging here...\n");
1018 IOSleep(i
* 60 * 1000);
1026 #if CONFIG_IMAGEBOOT
1027 #if CONFIG_LOCKERBOOT
1029 * Stash the protoboot vnode, mount, filesystem name, and device name for
1030 * later use. Note that the mount-from name may not have the "/dev/"
1031 * component, so we must sniff out this condition and add it as needed.
1034 pbmnt
= pbvn
->v_mount
;
1035 pbdevp
= vfs_statfs(pbmnt
)->f_mntfromname
;
1036 slash_dev
= strnstr(pbdevp
, "/dev/", strlen(pbdevp
));
1039 * If the old root is a snapshot mount, it will have the form:
1041 * com.apple.os.update-<boot manifest hash>@<dev node path>
1043 * So we just search the mntfromname for any occurrence of "/dev/" and
1044 * grab that as the device path. The image boot code needs a dev node to
1045 * do the re-mount, so we cannot directly mount the snapshot as the
1046 * protoboot volume currently.
1048 strlcpy(pbdevpath
, slash_dev
, sizeof(pbdevpath
));
1050 snprintf(pbdevpath
, sizeof(pbdevpath
), "/dev/%s", pbdevp
);
1053 bsd_init_kprintf("protoboot mount-from: %s\n", pbdevp
);
1054 bsd_init_kprintf("protoboot dev path: %s\n", pbdevpath
);
1056 strlcpy(pbfsname
, pbmnt
->mnt_vtable
->vfc_name
, sizeof(pbfsname
));
1059 * See if a system disk image is present. If so, mount it and
1060 * switch the root vnode to point to it
1062 imageboot_type_t imageboot_type
= imageboot_needed();
1063 if (netboot
== FALSE
&& imageboot_type
) {
1065 * An image was found. No turning back: we're booted
1066 * with a kernel from the disk image.
1068 bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type
);
1069 imageboot_setup(imageboot_type
);
1072 #if CONFIG_LOCKERBOOT
1073 if (imageboot_type
== IMAGEBOOT_LOCKER
) {
1074 bsd_init_kprintf("booting from locker\n");
1075 if (vnode_tag(rootvnode
) != VT_LOCKERFS
) {
1076 panic("root filesystem not a locker: fsname = %s",
1077 rootvnode
->v_mount
->mnt_vtable
->vfc_name
);
1080 #endif /* CONFIG_LOCKERBOOT */
1081 #endif /* CONFIG_IMAGEBOOT */
1083 /* set initial time; all other resource data is already zero'ed */
1084 microtime_with_abstime(&kernproc
->p_start
, &kernproc
->p_stats
->ps_start
);
1088 char mounthere
[] = "/dev"; /* !const because of internal casting */
1090 bsd_init_kprintf("calling devfs_kernel_mount\n");
1091 devfs_kernel_mount(mounthere
);
1095 if (vfs_mount_rosv_data()) {
1096 panic("failed to mount data volume!");
1099 if (vfs_mount_vm()) {
1100 printf("failed to mount vm volume!");
1103 #if CONFIG_LOCKERBOOT
1105 * We need to wait until devfs is up before remounting the protoboot volume
1106 * within the locker so that it can have a real devfs vnode backing it.
1108 if (imageboot_type
== IMAGEBOOT_LOCKER
) {
1109 bsd_init_kprintf("re-mounting protoboot volume\n");
1110 int error
= mount_locker_protoboot(pbfsname
, LOCKER_PROTOBOOT_MOUNT
,
1113 panic("failed to mount protoboot volume: dev path = %s, error = %d",
1117 #endif /* CONFIG_LOCKERBOOT */
1119 /* Initialize signal state for process 0. */
1120 bsd_init_kprintf("calling siginit\n");
1123 bsd_init_kprintf("calling bsd_utaskbootstrap\n");
1124 bsd_utaskbootstrap();
1126 pal_kernel_announce();
1128 bsd_init_kprintf("calling mountroot_post_hook\n");
1130 /* invoke post-root-mount hook */
1131 if (mountroot_post_hook
!= NULL
) {
1132 mountroot_post_hook();
1136 consider_zone_gc(FALSE
);
1139 /* Initialize System Override call */
1140 init_system_override();
1142 bsd_init_kprintf("done\n");
1148 proc_t p
= current_proc();
1150 process_name("init", p
);
1152 /* Set up exception-to-signal reflection */
1156 mac_cred_label_associate_user(p
->p_ucred
);
1159 vm_init_before_launchd();
1162 int result
= bsd_list_tests();
1163 result
= bsd_do_post();
1165 panic("bsd_do_post: Tests failed with result = 0x%08x\n", result
);
1169 bsd_init_kprintf("bsd_do_post - done");
1171 load_init_program(p
);
1178 kprintf("bsd_autoconf: calling kminit\n");
1182 * Early startup for bsd pseudodevices.
1185 struct pseudo_init
*pi
;
1187 for (pi
= pseudo_inits
; pi
->ps_func
; pi
++) {
1188 (*pi
->ps_func
)(pi
->ps_count
);
1192 return IOKitBSDInit();
1196 #include <sys/disklabel.h> /* for MAXPARTITIONS */
1204 err
= IOFindBSDRoot(rootdevice
, sizeof(rootdevice
), &rootdev
, &flags
);
1206 printf("setconf: IOFindBSDRoot returned an error (%d);"
1207 "setting rootdevice to 'sd0a'.\n", err
); /* XXX DEBUG TEMP */
1208 rootdev
= makedev( 6, 0 );
1209 strlcpy(rootdevice
, "sd0a", sizeof(rootdevice
));
1215 /* network device */
1216 mountroot
= netboot_mountroot
;
1219 /* otherwise have vfs determine root filesystem */
1227 bsd_utaskbootstrap(void)
1233 * Clone the bootstrap process from the kernel process, without
1234 * inheriting either task characteristics or memory from the kernel;
1236 thread
= cloneproc(TASK_NULL
, COALITION_NULL
, kernproc
, FALSE
, TRUE
);
1238 /* Hold the reference as it will be dropped during shutdown */
1239 initproc
= proc_find(1);
1240 #if __PROC_INTERNAL_DEBUG
1241 if (initproc
== PROC_NULL
) {
1242 panic("bsd_utaskbootstrap: initproc not set\n");
1246 * Since we aren't going back out the normal way to our parent,
1247 * we have to drop the transition locks explicitly.
1249 proc_signalend(initproc
, 0);
1250 proc_transend(initproc
, 0);
1252 ut
= (struct uthread
*)get_bsdthread_info(thread
);
1254 act_set_astbsd(thread
);
1255 task_clear_return_wait(get_threadtask(thread
), TCRW_CLEAR_ALL_WAIT
);
1259 parse_bsd_args(void)
1264 if (PE_parse_boot_argn("-s", namep
, sizeof(namep
))) {
1265 boothowto
|= RB_SINGLE
;
1268 if (PE_parse_boot_argn("-x", namep
, sizeof(namep
))) { /* safe boot */
1269 boothowto
|= RB_SAFEBOOT
;
1272 if (PE_parse_boot_argn("-minimalboot", namep
, sizeof(namep
))) {
1274 * -minimalboot indicates that we want userspace to be bootstrapped to a
1275 * minimal environment. What constitutes minimal is up to the bootstrap
1284 /* disable 32 bit grading */
1285 if (PE_parse_boot_argn("no32exec", &no32exec
, sizeof(no32exec
))) {
1286 bootarg_no32exec
= !!no32exec
;
1290 int execfailure_crashreports
;
1291 /* enable crash reports on various exec failures */
1292 if (PE_parse_boot_argn("execfailurecrashes", &execfailure_crashreports
, sizeof(execfailure_crashreports
))) {
1293 bootarg_execfailurereports
= !!execfailure_crashreports
;
1296 /* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */
1297 if (PE_parse_boot_argn("-vnode_cache_defeat", namep
, sizeof(namep
))) {
1298 bootarg_vnode_cache_defeat
= 1;
1301 #if DEVELOPMENT || DEBUG
1302 if (PE_parse_boot_argn("-disable_aslr", namep
, sizeof(namep
))) {
1303 bootarg_disable_aslr
= 1;
1307 PE_parse_boot_argn("ncl", &ncl
, sizeof(ncl
));
1308 if (PE_parse_boot_argn("nbuf", &max_nbuf_headers
,
1309 sizeof(max_nbuf_headers
))) {
1314 #if defined (__i386__) || defined (__x86_64__)
1315 PE_parse_boot_argn("policy_check", &policy_check_flags
, sizeof(policy_check_flags
));
1317 #endif /* CONFIG_MACF */
1319 if (PE_parse_boot_argn("msgbuf", &msgbuf
, sizeof(msgbuf
))) {
1320 log_setsize(msgbuf
);
1321 oslog_setsize(msgbuf
);
1324 if (PE_parse_boot_argn("-novfscache", namep
, sizeof(namep
))) {
1328 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
1329 if (PE_parse_boot_argn("-no_vnode_jetsam", namep
, sizeof(namep
))) {
1330 bootarg_no_vnode_jetsam
= 1;
1332 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
1337 * The darkboot flag is specified by the bootloader and is stored in
1338 * boot_args->bootFlags. This flag is available starting revision 2.
1340 boot_args
*args
= (boot_args
*) PE_state
.bootArgs
;
1341 if ((args
!= NULL
) && (args
->Revision
>= kBootArgsRevision2
)) {
1342 darkboot
= (args
->bootFlags
& kBootFlagsDarkBoot
) ? 1 : 0;
1349 if (PE_parse_boot_argn("-disable_procref_tracking", namep
, sizeof(namep
))) {
1350 proc_ref_tracking_disabled
= 1;
1355 if (PE_parse_boot_argn("-disable_osreason_debug", namep
, sizeof(namep
))) {
1356 os_reason_debug_disabled
= 1;
1360 PE_parse_boot_argn("sigrestrict", &sigrestrict_arg
, sizeof(sigrestrict_arg
));
1362 #if DEVELOPMENT || DEBUG
1363 if (PE_parse_boot_argn("-no_sigsys", namep
, sizeof(namep
))) {
1364 send_sigsys
= false;
1367 if (PE_parse_boot_argn("alt-dyld", dyld_alt_path
, sizeof(dyld_alt_path
))) {
1368 if (strlen(dyld_alt_path
) > 0) {
1372 PE_parse_boot_argn("dyld_flags", &dyld_flags
, sizeof(dyld_flags
));
1374 if (PE_parse_boot_argn("-disable_syscallfilter", &namep
, sizeof(namep
))) {
1375 syscallfilter_disable
= 1;
1379 if (PE_parse_boot_argn("legacy_footprint_entitlement_mode", &legacy_footprint_entitlement_mode
, sizeof(legacy_footprint_entitlement_mode
))) {
1381 * legacy_footprint_entitlement_mode specifies the behavior we want associated
1382 * with the entitlement. The supported modes are:
1384 * LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE:
1385 * Indicates that we want every process to have the memory accounting
1386 * that is available in iOS 12.0 and beyond.
1388 * LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT:
1389 * Indicates that for every process that has the 'legacy footprint entitlement',
1390 * we want to give it the old iOS 11.0 accounting behavior which accounted some
1391 * of the process's memory to the kernel.
1393 * LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE:
1394 * Indicates that for every process that has the 'legacy footprint entitlement',
1395 * we want it to have a higher memory limit which will help them acclimate to the
1396 * iOS 12.0 (& beyond) accounting behavior that does the right accounting.
1397 * The bonus added to the system-wide task limit to calculate this higher memory limit
1398 * is available in legacy_footprint_bonus_mb.
1401 if (legacy_footprint_entitlement_mode
< LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE
||
1402 legacy_footprint_entitlement_mode
> LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE
) {
1403 legacy_footprint_entitlement_mode
= LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE
;
1406 #endif /* __arm64__ */
1407 #endif /* DEVELOPMENT || DEBUG */
1411 bsd_exec_setup(int scale
)
1416 bsd_simul_execs
= BSD_SIMUL_EXECS
;
1420 bsd_simul_execs
= 65;
1424 bsd_simul_execs
= 129;
1428 bsd_simul_execs
= 257;
1431 bsd_simul_execs
= 513;
1434 bsd_pageable_map_size
= (bsd_simul_execs
* BSD_PAGEABLE_SIZE_PER_EXEC
);