]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/bsd_init.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / kern / bsd_init.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 *
29 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)init_main.c 8.16 (Berkeley) 5/14/95
66 */
67
68 /*
69 *
70 * Mach Operating System
71 * Copyright (c) 1987 Carnegie-Mellon University
72 * All rights reserved. The CMU software License Agreement specifies
73 * the terms and conditions for use and redistribution.
74 */
75 /*
76 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81
82 #include <sys/param.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/mount_internal.h>
86 #include <sys/proc_internal.h>
87 #include <sys/kauth.h>
88 #include <sys/systm.h>
89 #include <sys/vnode_internal.h>
90 #include <sys/conf.h>
91 #include <sys/buf_internal.h>
92 #include <sys/user.h>
93 #include <sys/time.h>
94 #include <sys/systm.h>
95 #include <sys/mman.h>
96 #include <sys/kasl.h>
97
98 #include <security/audit/audit.h>
99
100 #include <sys/malloc.h>
101 #include <sys/dkstat.h>
102 #include <sys/codesign.h>
103
104 #include <kern/startup.h>
105 #include <kern/thread.h>
106 #include <kern/task.h>
107 #include <kern/ast.h>
108 #include <kern/zalloc.h>
109 #include <kern/ux_handler.h> /* for ux_handler_setup() */
110
111 #include <mach/vm_param.h>
112
113 #include <vm/vm_map.h>
114 #include <vm/vm_kern.h>
115
116 #include <sys/reboot.h>
117 #include <dev/busvar.h> /* for pseudo_inits */
118 #include <sys/kdebug.h>
119 #include <sys/monotonic.h>
120 #include <sys/reason.h>
121
122 #include <mach/mach_types.h>
123 #include <mach/vm_prot.h>
124 #include <mach/semaphore.h>
125 #include <mach/sync_policy.h>
126 #include <kern/clock.h>
127 #include <mach/kern_return.h>
128 #include <mach/thread_act.h> /* for thread_resume() */
129 #include <sys/mcache.h> /* for mcache_init() */
130 #include <sys/mbuf.h> /* for mbinit() */
131 #include <sys/event.h> /* for knote_init() */
132 #include <sys/eventhandler.h> /* for eventhandler_init() */
133 #include <sys/kern_memorystatus.h> /* for memorystatus_init() */
134 #include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
135 #include <sys/aio_kern.h> /* for aio_init() */
136 #include <sys/semaphore.h> /* for psem_cache_init() */
137 #include <net/dlil.h> /* for dlil_init() */
138 #include <net/kpi_protocol.h> /* for proto_kpi_init() */
139 #include <net/iptap.h> /* for iptap_init() */
140 #include <sys/socketvar.h> /* for socketinit() */
141 #include <sys/protosw.h> /* for domaininit() */
142 #include <kern/sched_prim.h> /* for thread_wakeup() */
143 #include <net/if_ether.h> /* for ether_family_init() */
144 #include <net/if_gif.h> /* for gif_init() */
145 #include <miscfs/devfs/devfsdefs.h> /* for devfs_kernel_mount() */
146 #include <vm/vm_kern.h> /* for kmem_suballoc() */
147 #include <sys/semaphore.h> /* for psem_lock_init() */
148 #include <sys/msgbuf.h> /* for log_setsize() */
149 #include <sys/tty.h> /* for tty_init() */
150 #include <sys/proc_uuid_policy.h> /* proc_uuid_policy_init() */
151 #include <netinet/flow_divert.h> /* flow_divert_init() */
152 #include <net/content_filter.h> /* for cfil_init() */
153 #include <net/necp.h> /* for necp_init() */
154 #include <net/network_agent.h> /* for netagent_init() */
155 #include <net/packet_mangler.h> /* for pkt_mnglr_init() */
156 #include <net/if_utun.h> /* for utun_register_control() */
157 #include <net/if_ipsec.h> /* for ipsec_register_control() */
158 #include <net/net_str_id.h> /* for net_str_id_init() */
159 #include <net/netsrc.h> /* for netsrc_init() */
160 #include <net/ntstat.h> /* for nstat_init() */
161 #include <netinet/tcp_cc.h> /* for tcp_cc_init() */
162 #include <netinet/mptcp_var.h> /* for mptcp_control_register() */
163 #include <net/nwk_wq.h> /* for nwk_wq_init */
164 #include <net/restricted_in_port.h> /* for restricted_in_port_init() */
165 #include <kern/assert.h> /* for assert() */
166 #include <sys/kern_overrides.h> /* for init_system_override() */
167 #include <sys/lockf.h> /* for lf_init() */
168 #include <sys/fsctl.h>
169
170 #include <net/init.h>
171
172 #if CONFIG_MACF
173 #include <security/mac_framework.h>
174 #include <security/mac_internal.h> /* mac_init_bsd() */
175 #include <security/mac_mach_internal.h> /* mac_update_task_label() */
176 #endif
177
178 #include <machine/exec.h>
179
180 #if CONFIG_NETBOOT
181 #include <sys/netboot.h>
182 #endif
183
184 #if CONFIG_IMAGEBOOT
185 #include <sys/imageboot.h>
186 #endif
187
188 #if PFLOG
189 #include <net/if_pflog.h>
190 #endif
191
192
193 #include <pexpert/pexpert.h>
194 #include <machine/pal_routines.h>
195 #include <console/video_console.h>
196
197 #if CONFIG_XNUPOST
198 #include <tests/xnupost.h>
199 #endif
200
201 void * get_user_regs(thread_t); /* XXX kludge for <machine/thread.h> */
202 void IOKitInitializeTime(void); /* XXX */
203 void IOSleep(unsigned int); /* XXX */
204 void loopattach(void); /* XXX */
205
206 const char *const copyright =
207 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t"
208 "The Regents of the University of California. "
209 "All rights reserved.\n\n";
210
211 /* Components of the first process -- never freed. */
212 struct proc proc0 = { .p_comm = "kernel_task", .p_name = "kernel_task" };
213 struct session session0;
214 struct pgrp pgrp0;
215 struct filedesc filedesc0;
216 struct plimit limit0;
217 struct pstats pstats0;
218 struct sigacts sigacts0;
219 SECURITY_READ_ONLY_LATE(proc_t) kernproc = &proc0;
220 proc_t XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
221
222 long tk_cancc;
223 long tk_nin;
224 long tk_nout;
225 long tk_rawcc;
226
227 int lock_trace = 0;
228 /* Global variables to make pstat happy. We do swapping differently */
229 int nswdev, nswap;
230 int nswapmap;
231 void *swapmap;
232 struct swdevt swdevt[1];
233
234 dev_t rootdev; /* device of the root */
235 dev_t dumpdev; /* device to take dumps on */
236 long dumplo; /* offset into dumpdev */
237 long hostid;
238 char hostname[MAXHOSTNAMELEN];
239 lck_mtx_t hostname_lock;
240 lck_grp_t *hostname_lck_grp;
241 char domainname[MAXDOMNAMELEN];
242 lck_mtx_t domainname_lock;
243
244 char rootdevice[DEVMAXNAMESIZE];
245
246 struct vnode *rootvp;
247 bool rootvp_is_ssd = false;
248 int boothowto;
249 int minimalboot = 0;
250 #if CONFIG_DARKBOOT
251 int darkboot = 0;
252 #endif
253
254 #if __arm64__
255 int legacy_footprint_entitlement_mode = LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE;
256 #endif /* __arm64__ */
257
258 #if PROC_REF_DEBUG
259 __private_extern__ int proc_ref_tracking_disabled = 0; /* disable panics on leaked proc refs across syscall boundary */
260 #endif
261
262 #if OS_REASON_DEBUG
263 __private_extern__ int os_reason_debug_disabled = 0; /* disable asserts for when we fail to allocate OS reasons */
264 #endif
265
266 extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
267 extern void IOSecureBSDRoot(const char * rootName);
268 extern kern_return_t IOKitBSDInit(void );
269 extern boolean_t IOSetRecoveryBoot(bsd_bootfail_mode_t, uuid_t, boolean_t);
270 extern void kminit(void);
271 extern void file_lock_init(void);
272 extern void bsd_bufferinit(void);
273 extern void oslog_setsize(int size);
274 extern void throttle_init(void);
275 extern void acct_init(void);
276
277 #if CONFIG_LOCKERBOOT
278 #define LOCKER_PROTOBOOT_MOUNT "/protoboot"
279
280 const char kernel_protoboot_mount[] = LOCKER_PROTOBOOT_MOUNT;
281 extern int mount_locker_protoboot(const char *fsname, const char *mntpoint,
282 const char *pbdevpath);
283 #endif
284
285 extern int ncl;
286 #if DEVELOPMENT || DEBUG
287 extern int syscallfilter_disable;
288 #endif // DEVELOPMENT || DEBUG
289
290 vm_map_t bsd_pageable_map;
291 vm_map_t mb_map;
292
293 static int bsd_simul_execs;
294 static int bsd_pageable_map_size;
295 __private_extern__ int execargs_cache_size = 0;
296 __private_extern__ int execargs_free_count = 0;
297 __private_extern__ vm_offset_t * execargs_cache = NULL;
298
299 void bsd_exec_setup(int);
300
301 __private_extern__ int bootarg_execfailurereports = 0;
302
303 #if __x86_64__
304 __private_extern__ int bootarg_no32exec = 1;
305 #endif
306 __private_extern__ int bootarg_vnode_cache_defeat = 0;
307
308 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
309 __private_extern__ int bootarg_no_vnode_jetsam = 0;
310 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
311
312 __private_extern__ int bootarg_no_vnode_drain = 0;
313
314 /*
315 * Prevent kernel-based ASLR from being used, for testing.
316 */
317 #if DEVELOPMENT || DEBUG
318 __private_extern__ int bootarg_disable_aslr = 0;
319 #endif
320
321
322 /*
323 * Allow an alternate dyld to be used for testing.
324 */
325
326 #if DEVELOPMENT || DEBUG
327 char dyld_alt_path[MAXPATHLEN];
328 int use_alt_dyld = 0;
329 extern uint64_t dyld_flags;
330 #endif
331
332 int cmask = CMASK;
333 extern int customnbuf;
334
335 kern_return_t bsd_autoconf(void);
336 void bsd_utaskbootstrap(void);
337
338 static void parse_bsd_args(void);
339 #if CONFIG_DEV_KMEM
340 extern void dev_kmem_init(void);
341 #endif
342 extern void time_zone_slock_init(void);
343 extern void select_waitq_init(void);
344 static void process_name(const char *, proc_t);
345
346 static void setconf(void);
347
348 #if CONFIG_BASESYSTEMROOT
349 static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg);
350 static boolean_t bsdmgroot_bootable(void);
351 #endif // CONFIG_BASESYSTEMROOT
352
353 static bool bsd_rooted_ramdisk(void);
354
355 #if SYSV_SHM
356 extern void sysv_shm_lock_init(void);
357 #endif
358 #if SYSV_SEM
359 extern void sysv_sem_lock_init(void);
360 #endif
361 #if SYSV_MSG
362 extern void sysv_msg_lock_init(void);
363 #endif
364
365 extern void ulock_initialize(void);
366
367 #if CONFIG_MACF
368 #if defined (__i386__) || defined (__x86_64__)
369 /* MACF policy_check configuration flags; see policy_check.c for details */
370 int policy_check_flags = 0;
371
372 extern int check_policy_init(int);
373 #endif
374 #endif /* CONFIG_MACF */
375
376 /* If we are using CONFIG_DTRACE */
377 #if CONFIG_DTRACE
378 extern void dtrace_postinit(void);
379 #endif
380
381 /*
382 * Initialization code.
383 * Called from cold start routine as
384 * soon as a stack and segmentation
385 * have been established.
386 * Functions:
387 * turn on clock
388 * hand craft 0th process
389 * call all initialization routines
390 * hand craft 1st user process
391 */
392
393 /*
394 * Sets the name for the given task.
395 */
396 static void
397 process_name(const char *s, proc_t p)
398 {
399 strlcpy(p->p_comm, s, sizeof(p->p_comm));
400 strlcpy(p->p_name, s, sizeof(p->p_name));
401 }
402
403 /* To allow these values to be patched, they're globals here */
404 #include <machine/vmparam.h>
405 struct rlimit vm_initial_limit_stack = { .rlim_cur = DFLSSIZ, .rlim_max = MAXSSIZ - PAGE_MAX_SIZE };
406 struct rlimit vm_initial_limit_data = { .rlim_cur = DFLDSIZ, .rlim_max = MAXDSIZ };
407 struct rlimit vm_initial_limit_core = { .rlim_cur = DFLCSIZ, .rlim_max = MAXCSIZ };
408
409 extern struct os_refgrp rlimit_refgrp;
410
411 extern thread_t cloneproc(task_t, coalition_t, proc_t, int, int);
412 extern int (*mountroot)(void);
413
414 lck_grp_t * proc_lck_grp;
415 lck_grp_t * proc_slock_grp;
416 lck_grp_t * proc_fdmlock_grp;
417 lck_grp_t * proc_kqhashlock_grp;
418 lck_grp_t * proc_knhashlock_grp;
419 lck_grp_t * proc_ucred_mlock_grp;
420 lck_grp_t * proc_mlock_grp;
421 lck_grp_t * proc_dirslock_grp;
422 lck_grp_attr_t * proc_lck_grp_attr;
423 lck_attr_t * proc_lck_attr;
424 lck_mtx_t * proc_list_mlock;
425 lck_mtx_t * proc_klist_mlock;
426
427 #if CONFIG_XNUPOST
428 lck_grp_t * sysctl_debug_test_stackshot_owner_grp;
429 lck_mtx_t * sysctl_debug_test_stackshot_owner_init_mtx;
430 #endif /* !CONFIG_XNUPOST */
431
432 extern lck_mtx_t * execargs_cache_lock;
433
434 #if XNU_TARGET_OS_OSX
435 /* hook called after root is mounted XXX temporary hack */
436 void (*mountroot_post_hook)(void);
437 void (*unmountroot_pre_hook)(void);
438 #endif
439 void set_rootvnode(vnode_t);
440
441 extern lck_rw_t * rootvnode_rw_lock;
442
443 /* called with an iocount and usecount on new_rootvnode */
444 void
445 set_rootvnode(vnode_t new_rootvnode)
446 {
447 mount_t new_mount = (new_rootvnode != NULL) ? new_rootvnode->v_mount : NULL;
448 vnode_t new_devvp = (new_mount != NULL) ? new_mount->mnt_devvp : NULL;
449 vnode_t old_rootvnode = rootvnode;
450
451 new_rootvnode->v_flag |= VROOT;
452 rootvp = new_devvp;
453 rootvnode = new_rootvnode;
454 filedesc0.fd_cdir = new_rootvnode;
455 if (new_devvp != NULL) {
456 rootdev = vnode_specrdev(new_devvp);
457 } else if (new_mount != NULL) {
458 rootdev = vfs_statfs(new_mount)->f_fsid.val[0]; /* like ATTR_CMN_DEVID */
459 } else {
460 rootdev = NODEV;
461 }
462
463 if (old_rootvnode) {
464 vnode_rele(old_rootvnode);
465 }
466 }
467
468 #define RAMDEV "md0"
469
470 bool
471 bsd_rooted_ramdisk(void)
472 {
473 bool is_ramdisk = false;
474 char *dev_path = zalloc(ZV_NAMEI);
475 if (dev_path == NULL) {
476 panic("failed to allocate devpath string! \n");
477 }
478
479 if (PE_parse_boot_argn("rd", dev_path, MAXPATHLEN)) {
480 if (strncmp(dev_path, RAMDEV, strlen(RAMDEV)) == 0) {
481 is_ramdisk = true;
482 }
483 }
484
485 zfree(ZV_NAMEI, dev_path);
486 return is_ramdisk;
487 }
488
489 /*
490 * This function is called before IOKit initialization, so that globals
491 * like the sysctl tree are initialized before kernel extensions
492 * are started (since they may want to register sysctls
493 */
494 void
495 bsd_early_init(void)
496 {
497 sysctl_early_init();
498 }
499
500 /*
501 * This function is called very early on in the Mach startup, from the
502 * function start_kernel_threads() in osfmk/kern/startup.c. It's called
503 * in the context of the current (startup) task using a call to the
504 * function kernel_thread_create() to jump into start_kernel_threads().
505 * Internally, kernel_thread_create() calls thread_create_internal(),
506 * which calls uthread_alloc(). The function of uthread_alloc() is
507 * normally to allocate a uthread structure, and fill out the uu_sigmask,
508 * uu_context fields. It skips filling these out in the case of the "task"
509 * being "kernel_task", because the order of operation is inverted. To
510 * account for that, we need to manually fill in at least the contents
511 * of the uu_context.vc_ucred field so that the uthread structure can be
512 * used like any other.
513 */
514
515 void
516 bsd_init(void)
517 {
518 struct uthread *ut;
519 unsigned int i;
520 struct vfs_context context;
521 kern_return_t ret;
522 struct ucred temp_cred;
523 struct posix_cred temp_pcred;
524 vnode_t init_rootvnode = NULLVP;
525 #if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
526 boolean_t netboot = FALSE;
527 #endif
528 #if CONFIG_LOCKERBOOT
529 vnode_t pbvn = NULLVP;
530 mount_t pbmnt = NULL;
531 char *pbdevp = NULL;
532 char pbdevpath[64];
533 char pbfsname[MFSNAMELEN];
534 const char *slash_dev = NULL;
535 #endif
536
537 #define DEBUG_BSDINIT 0
538
539 #if DEBUG_BSDINIT
540 #define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
541 #else
542 #define bsd_init_kprintf(x, ...)
543 #endif
544
545 throttle_init();
546
547 printf(copyright);
548
549 bsd_init_kprintf("calling parse_bsd_args\n");
550 parse_bsd_args();
551
552 #if CONFIG_DEV_KMEM
553 bsd_init_kprintf("calling dev_kmem_init\n");
554 dev_kmem_init();
555 #endif
556
557 /* Initialize kauth subsystem before instancing the first credential */
558 bsd_init_kprintf("calling kauth_init\n");
559 kauth_init();
560
561 /* Initialize process and pgrp structures. */
562 bsd_init_kprintf("calling procinit\n");
563 procinit();
564
565 /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
566 tty_init();
567
568 /* kernel_task->proc = kernproc; */
569 set_bsdtask_info(kernel_task, (void *)kernproc);
570
571 /* give kernproc a name */
572 bsd_init_kprintf("calling process_name\n");
573 process_name("kernel_task", kernproc);
574
575 /* allocate proc lock group attribute and group */
576 bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
577 proc_lck_grp_attr = lck_grp_attr_alloc_init();
578
579 proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr);
580
581 proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr);
582 proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock", proc_lck_grp_attr);
583 proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr);
584 proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr);
585 proc_kqhashlock_grp = lck_grp_alloc_init("proc-kqhashlock", proc_lck_grp_attr);
586 proc_knhashlock_grp = lck_grp_alloc_init("proc-knhashlock", proc_lck_grp_attr);
587 proc_dirslock_grp = lck_grp_alloc_init("proc-dirslock", proc_lck_grp_attr);
588 #if CONFIG_XNUPOST
589 sysctl_debug_test_stackshot_owner_grp = lck_grp_alloc_init("test-stackshot-owner-grp", LCK_GRP_ATTR_NULL);
590 sysctl_debug_test_stackshot_owner_init_mtx = lck_mtx_alloc_init(
591 sysctl_debug_test_stackshot_owner_grp,
592 LCK_ATTR_NULL);
593 #endif /* !CONFIG_XNUPOST */
594 /* Allocate proc lock attribute */
595 proc_lck_attr = lck_attr_alloc_init();
596
597 proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
598 proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
599 lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
600 lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
601 lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
602 lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
603 lck_rw_init(&kernproc->p_dirs_lock, proc_dirslock_grp, proc_lck_attr);
604
605 assert(bsd_simul_execs != 0);
606 execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
607 execargs_cache_size = bsd_simul_execs;
608 execargs_free_count = bsd_simul_execs;
609 execargs_cache = zalloc_permanent(bsd_simul_execs * sizeof(vm_offset_t),
610 ZALIGN(vm_offset_t));
611
612 if (current_task() != kernel_task) {
613 printf("bsd_init: We have a problem, "
614 "current task is not kernel task\n");
615 }
616
617 bsd_init_kprintf("calling get_bsdthread_info\n");
618 ut = (uthread_t)get_bsdthread_info(current_thread());
619
620 #if CONFIG_MACF
621 /*
622 * Initialize the MAC Framework
623 */
624 mac_policy_initbsd();
625
626 #if defined (__i386__) || defined (__x86_64__)
627 /*
628 * We currently only support this on i386/x86_64, as that is the
629 * only lock code we have instrumented so far.
630 */
631 check_policy_init(policy_check_flags);
632 #endif
633 #endif /* MAC */
634
635 ulock_initialize();
636
637 hostname_lck_grp = lck_grp_alloc_init("hostname", LCK_GRP_ATTR_NULL);
638 lck_mtx_init(&hostname_lock, hostname_lck_grp, LCK_ATTR_NULL);
639 lck_mtx_init(&domainname_lock, hostname_lck_grp, LCK_ATTR_NULL);
640
641 /*
642 * Create process 0.
643 */
644 proc_list_lock();
645 LIST_INSERT_HEAD(&allproc, kernproc, p_list);
646 kernproc->p_pgrp = &pgrp0;
647 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
648 LIST_INIT(&pgrp0.pg_members);
649 lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
650 /* There is no other bsd thread this point and is safe without pgrp lock */
651 LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
652 kernproc->p_listflag |= P_LIST_INPGRP;
653 kernproc->p_pgrpid = 0;
654 kernproc->p_uniqueid = 0;
655
656 pgrp0.pg_session = &session0;
657 pgrp0.pg_membercnt = 1;
658
659 session0.s_count = 1;
660 session0.s_leader = kernproc;
661 session0.s_listflags = 0;
662 lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
663 LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
664 proc_list_unlock();
665
666 #if CONFIG_PERSONAS
667 kernproc->p_persona = NULL;
668 #endif
669
670 kernproc->task = kernel_task;
671
672 kernproc->p_stat = SRUN;
673 kernproc->p_flag = P_SYSTEM;
674 kernproc->p_lflag = 0;
675 kernproc->p_ladvflag = 0;
676
677 #if defined(__LP64__)
678 kernproc->p_flag |= P_LP64;
679 #endif
680
681 #if DEVELOPMENT || DEBUG
682 if (bootarg_disable_aslr) {
683 kernproc->p_flag |= P_DISABLE_ASLR;
684 }
685 #endif
686
687 kernproc->p_nice = NZERO;
688 kernproc->p_pptr = kernproc;
689
690 TAILQ_INIT(&kernproc->p_uthlist);
691 TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list);
692
693 kernproc->sigwait = FALSE;
694 kernproc->sigwait_thread = THREAD_NULL;
695 kernproc->exit_thread = THREAD_NULL;
696 kernproc->p_csflags = CS_VALID;
697
698 /*
699 * Create credential. This also Initializes the audit information.
700 */
701 bsd_init_kprintf("calling bzero\n");
702 bzero(&temp_cred, sizeof(temp_cred));
703 bzero(&temp_pcred, sizeof(temp_pcred));
704 temp_pcred.cr_ngroups = 1;
705 /* kern_proc, shouldn't call up to DS for group membership */
706 temp_pcred.cr_flags = CRF_NOMEMBERD;
707 temp_cred.cr_audit.as_aia_p = audit_default_aia_p;
708
709 bsd_init_kprintf("calling kauth_cred_create\n");
710 /*
711 * We have to label the temp cred before we create from it to
712 * properly set cr_ngroups, or the create will fail.
713 */
714 posix_cred_label(&temp_cred, &temp_pcred);
715 kernproc->p_ucred = kauth_cred_create(&temp_cred);
716
717 /* update cred on proc */
718 PROC_UPDATE_CREDS_ONPROC(kernproc);
719
720 /* give the (already exisiting) initial thread a reference on it */
721 bsd_init_kprintf("calling kauth_cred_ref\n");
722 kauth_cred_ref(kernproc->p_ucred);
723 ut->uu_context.vc_ucred = kernproc->p_ucred;
724 ut->uu_context.vc_thread = current_thread();
725
726 vfs_set_context_kernel(&ut->uu_context);
727
728 TAILQ_INIT(&kernproc->p_aio_activeq);
729 TAILQ_INIT(&kernproc->p_aio_doneq);
730 kernproc->p_aio_total_count = 0;
731
732 bsd_init_kprintf("calling file_lock_init\n");
733 file_lock_init();
734
735 #if CONFIG_MACF
736 mac_cred_label_associate_kernel(kernproc->p_ucred);
737 #endif
738
739 /* Create the file descriptor table. */
740 kernproc->p_fd = &filedesc0;
741 filedesc0.fd_cmask = (mode_t)cmask;
742 filedesc0.fd_knlistsize = 0;
743 filedesc0.fd_knlist = NULL;
744 filedesc0.fd_knhash = NULL;
745 filedesc0.fd_knhashmask = 0;
746 lck_mtx_init(&filedesc0.fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
747 lck_mtx_init(&filedesc0.fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
748
749 /* Create the limits structures. */
750 kernproc->p_limit = &limit0;
751 for (i = 0; i < sizeof(kernproc->p_limit->pl_rlimit) / sizeof(kernproc->p_limit->pl_rlimit[0]); i++) {
752 limit0.pl_rlimit[i].rlim_cur =
753 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
754 }
755 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
756 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid;
757 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
758 limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack;
759 limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data;
760 limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core;
761 os_ref_init_count(&limit0.pl_refcnt, &rlimit_refgrp, 1);
762
763 kernproc->p_stats = &pstats0;
764 kernproc->p_sigacts = &sigacts0;
765 kernproc->p_subsystem_root_path = NULL;
766
767 /*
768 * Charge root for one process: launchd.
769 */
770 bsd_init_kprintf("calling chgproccnt\n");
771 (void)chgproccnt(0, 1);
772
773 /*
774 * Allocate a kernel submap for pageable memory
775 * for temporary copying (execve()).
776 */
777 {
778 vm_offset_t minimum;
779
780 bsd_init_kprintf("calling kmem_suballoc\n");
781 assert(bsd_pageable_map_size != 0);
782 ret = kmem_suballoc(kernel_map,
783 &minimum,
784 (vm_size_t)bsd_pageable_map_size,
785 TRUE,
786 VM_FLAGS_ANYWHERE,
787 VM_MAP_KERNEL_FLAGS_NONE,
788 VM_KERN_MEMORY_BSD,
789 &bsd_pageable_map);
790 if (ret != KERN_SUCCESS) {
791 panic("bsd_init: Failed to allocate bsd pageable map");
792 }
793 }
794
795 bsd_init_kprintf("calling fpxlog_init\n");
796 fpxlog_init();
797
798 /*
799 * Initialize buffers and hash links for buffers
800 *
801 * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must
802 * happen after a credential has been associated with
803 * the kernel task.
804 */
805 bsd_init_kprintf("calling bsd_bufferinit\n");
806 bsd_bufferinit();
807
808 /*
809 * Initialize the calendar.
810 */
811 bsd_init_kprintf("calling IOKitInitializeTime\n");
812 IOKitInitializeTime();
813
814 /* Initialize the file systems. */
815 bsd_init_kprintf("calling vfsinit\n");
816 vfsinit();
817
818 /* Initialize file locks. */
819 bsd_init_kprintf("calling lf_init\n");
820 lf_init();
821
822 #if CONFIG_PROC_UUID_POLICY
823 /* Initial proc_uuid_policy subsystem */
824 bsd_init_kprintf("calling proc_uuid_policy_init()\n");
825 proc_uuid_policy_init();
826 #endif
827
828 #if SOCKETS
829 /* Initialize per-CPU cache allocator */
830 mcache_init();
831
832 /* Initialize mbuf's. */
833 bsd_init_kprintf("calling mbinit\n");
834 mbinit();
835 net_str_id_init(); /* for mbuf tags */
836 restricted_in_port_init();
837 #endif /* SOCKETS */
838
839 /*
840 * Initializes security event auditing.
841 * XXX: Should/could this occur later?
842 */
843 #if CONFIG_AUDIT
844 bsd_init_kprintf("calling audit_init\n");
845 audit_init();
846 #endif
847
848 /* Initialize kqueues */
849 bsd_init_kprintf("calling knote_init\n");
850 knote_init();
851
852 /* Initialize event handler */
853 bsd_init_kprintf("calling eventhandler_init\n");
854 eventhandler_init();
855
856 /* Initialize for async IO */
857 bsd_init_kprintf("calling aio_init\n");
858 aio_init();
859
860 /* Initialize SysV shm subsystem locks; the subsystem proper is
861 * initialized through a sysctl.
862 */
863 #if SYSV_SHM
864 bsd_init_kprintf("calling sysv_shm_lock_init\n");
865 sysv_shm_lock_init();
866 #endif
867 #if SYSV_SEM
868 bsd_init_kprintf("calling sysv_sem_lock_init\n");
869 sysv_sem_lock_init();
870 #endif
871 #if SYSV_MSG
872 bsd_init_kprintf("sysv_msg_lock_init\n");
873 sysv_msg_lock_init();
874 #endif
875 bsd_init_kprintf("calling pshm_lock_init\n");
876 pshm_lock_init();
877 bsd_init_kprintf("calling psem_lock_init\n");
878 psem_lock_init();
879
880 pthread_init();
881 /* POSIX Shm and Sem */
882 bsd_init_kprintf("calling pshm_cache_init\n");
883 pshm_cache_init();
884 bsd_init_kprintf("calling psem_cache_init\n");
885 psem_cache_init();
886 bsd_init_kprintf("calling time_zone_slock_init\n");
887 time_zone_slock_init();
888 bsd_init_kprintf("calling select_waitq_init\n");
889 select_waitq_init();
890
891 /*
892 * Initialize protocols. Block reception of incoming packets
893 * until everything is ready.
894 */
895 #if NETWORKING
896 bsd_init_kprintf("calling nwk_wq_init\n");
897 nwk_wq_init();
898 bsd_init_kprintf("calling dlil_init\n");
899 dlil_init();
900 bsd_init_kprintf("calling proto_kpi_init\n");
901 proto_kpi_init();
902 #endif /* NETWORKING */
903 #if SOCKETS
904 bsd_init_kprintf("calling socketinit\n");
905 socketinit();
906 bsd_init_kprintf("calling domaininit\n");
907 domaininit();
908 iptap_init();
909 #if FLOW_DIVERT
910 flow_divert_init();
911 #endif /* FLOW_DIVERT */
912 #endif /* SOCKETS */
913 #if NETWORKING
914 #if NECP
915 /* Initialize Network Extension Control Policies */
916 necp_init();
917 #endif
918 netagent_init();
919 #endif /* NETWORKING */
920 kernproc->p_fd->fd_cdir = NULL;
921 kernproc->p_fd->fd_rdir = NULL;
922
923 #if CONFIG_FREEZE
924 #ifndef CONFIG_MEMORYSTATUS
925 #error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
926 #endif
927 /* Initialise background freezing */
928 bsd_init_kprintf("calling memorystatus_freeze_init\n");
929 memorystatus_freeze_init();
930 #endif
931
932 #if CONFIG_MEMORYSTATUS
933 /* Initialize kernel memory status notifications */
934 bsd_init_kprintf("calling memorystatus_init\n");
935 memorystatus_init();
936 #endif /* CONFIG_MEMORYSTATUS */
937
938 bsd_init_kprintf("calling acct_init\n");
939 acct_init();
940
941 bsd_init_kprintf("calling sysctl_mib_init\n");
942 sysctl_mib_init();
943
944 bsd_init_kprintf("calling bsd_autoconf\n");
945 bsd_autoconf();
946
947 bsd_init_kprintf("calling os_reason_init\n");
948 os_reason_init();
949
950 #if CONFIG_DTRACE
951 dtrace_postinit();
952 #endif
953
954 /*
955 * We attach the loopback interface *way* down here to ensure
956 * it happens after autoconf(), otherwise it becomes the
957 * "primary" interface.
958 */
959 #include <loop.h>
960 #if NLOOP > 0
961 bsd_init_kprintf("calling loopattach\n");
962 loopattach(); /* XXX */
963 #endif
964 #if NGIF
965 /* Initialize gif interface (after lo0) */
966 gif_init();
967 #endif
968
969 #if PFLOG
970 /* Initialize packet filter log interface */
971 pfloginit();
972 #endif /* PFLOG */
973
974 #if NETHER > 0
975 /* Register the built-in dlil ethernet interface family */
976 bsd_init_kprintf("calling ether_family_init\n");
977 ether_family_init();
978 #endif /* ETHER */
979
980 #if NETWORKING
981 #if CONTENT_FILTER
982 cfil_init();
983 #endif
984
985 #if PACKET_MANGLER
986 pkt_mnglr_init();
987 #endif
988
989 /*
990 * Register subsystems with kernel control handlers
991 */
992 utun_register_control();
993 #if IPSEC
994 ipsec_register_control();
995 #endif /* IPSEC */
996 netsrc_init();
997 nstat_init();
998 tcp_cc_init();
999 #if MPTCP
1000 mptcp_control_register();
1001 #endif /* MPTCP */
1002
1003 /*
1004 * The the networking stack is now initialized so it is a good time to call
1005 * the clients that are waiting for the networking stack to be usable.
1006 */
1007 bsd_init_kprintf("calling net_init_run\n");
1008 net_init_run();
1009 #endif /* NETWORKING */
1010
1011 bsd_init_kprintf("calling inittodr\n");
1012 inittodr(0);
1013
1014 /* Mount the root file system. */
1015 while (TRUE) {
1016 int err;
1017
1018 bsd_init_kprintf("calling setconf\n");
1019 setconf();
1020 #if CONFIG_NETBOOT
1021 netboot = (mountroot == netboot_mountroot);
1022 #endif
1023
1024 bsd_init_kprintf("vfs_mountroot\n");
1025 if (0 == (err = vfs_mountroot())) {
1026 break;
1027 }
1028 rootdevice[0] = '\0';
1029 #if CONFIG_NETBOOT
1030 if (netboot) {
1031 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
1032 vc_progress_set(FALSE, 0);
1033 for (i = 1; 1; i *= 2) {
1034 printf("bsd_init: failed to mount network root, error %d, %s\n",
1035 err, PE_boot_args());
1036 printf("We are hanging here...\n");
1037 IOSleep(i * 60 * 1000);
1038 }
1039 /*NOTREACHED*/
1040 }
1041 #endif
1042 printf("cannot mount root, errno = %d\n", err);
1043 boothowto |= RB_ASKNAME;
1044 }
1045
1046 IOSecureBSDRoot(rootdevice);
1047
1048 context.vc_thread = current_thread();
1049 context.vc_ucred = kernproc->p_ucred;
1050 mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
1051
1052 bsd_init_kprintf("calling VFS_ROOT\n");
1053 /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */
1054 if (VFS_ROOT(mountlist.tqh_first, &init_rootvnode, &context)) {
1055 panic("bsd_init: cannot find root vnode: %s", PE_boot_args());
1056 }
1057 (void)vnode_ref(init_rootvnode);
1058 (void)vnode_put(init_rootvnode);
1059
1060 lck_rw_lock_exclusive(rootvnode_rw_lock);
1061 set_rootvnode(init_rootvnode);
1062 lck_rw_unlock_exclusive(rootvnode_rw_lock);
1063 init_rootvnode = NULLVP; /* use rootvnode after this point */
1064
1065
1066 if (!bsd_rooted_ramdisk()) {
1067 #if CONFIG_IMAGEBOOT
1068 #if XNU_TARGET_OS_OSX && defined(__arm64__)
1069 /* Apple Silicon MacOS */
1070 if (!imageboot_desired()) {
1071 /* enforce sealedness */
1072 int autherr = VNOP_IOCTL(rootvnode, FSIOC_KERNEL_ROOTAUTH, NULL, 0, vfs_context_kernel());
1073 if (autherr) {
1074 panic("rootvp not authenticated after mounting \n");
1075 }
1076 }
1077 #endif // TARGET_OS_OSX && arm64
1078 #endif // config_imageboot
1079 /* Otherwise, noop */
1080 }
1081
1082
1083 #if CONFIG_NETBOOT
1084 if (netboot) {
1085 int err;
1086
1087 netboot = TRUE;
1088 /* post mount setup */
1089 if ((err = netboot_setup()) != 0) {
1090 PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
1091 vc_progress_set(FALSE, 0);
1092 for (i = 1; 1; i *= 2) {
1093 printf("bsd_init: NetBoot could not find root, error %d: %s\n",
1094 err, PE_boot_args());
1095 printf("We are hanging here...\n");
1096 IOSleep(i * 60 * 1000);
1097 }
1098 /*NOTREACHED*/
1099 }
1100 }
1101 #endif
1102
1103
1104 #if CONFIG_IMAGEBOOT
1105 #if CONFIG_LOCKERBOOT
1106 /*
1107 * Stash the protoboot vnode, mount, filesystem name, and device name for
1108 * later use. Note that the mount-from name may not have the "/dev/"
1109 * component, so we must sniff out this condition and add it as needed.
1110 */
1111 pbvn = rootvnode;
1112 pbmnt = pbvn->v_mount;
1113 pbdevp = vfs_statfs(pbmnt)->f_mntfromname;
1114 slash_dev = strnstr(pbdevp, "/dev/", strlen(pbdevp));
1115 if (slash_dev) {
1116 /*
1117 * If the old root is a snapshot mount, it will have the form:
1118 *
1119 * com.apple.os.update-<boot manifest hash>@<dev node path>
1120 *
1121 * So we just search the mntfromname for any occurrence of "/dev/" and
1122 * grab that as the device path. The image boot code needs a dev node to
1123 * do the re-mount, so we cannot directly mount the snapshot as the
1124 * protoboot volume currently.
1125 */
1126 strlcpy(pbdevpath, slash_dev, sizeof(pbdevpath));
1127 } else {
1128 snprintf(pbdevpath, sizeof(pbdevpath), "/dev/%s", pbdevp);
1129 }
1130
1131 bsd_init_kprintf("protoboot mount-from: %s\n", pbdevp);
1132 bsd_init_kprintf("protoboot dev path: %s\n", pbdevpath);
1133
1134 strlcpy(pbfsname, pbmnt->mnt_vtable->vfc_name, sizeof(pbfsname));
1135 #endif
1136
1137 /*
1138 * See if a system disk image is present. If so, mount it and
1139 * switch the root vnode to point to it
1140 */
1141 imageboot_type_t imageboot_type = imageboot_needed();
1142 if (netboot == FALSE && imageboot_type) {
1143 /*
1144 * An image was found. No turning back: we're booted
1145 * with a kernel from the disk image.
1146 */
1147 bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type);
1148 imageboot_setup(imageboot_type);
1149 }
1150
1151 #if CONFIG_LOCKERBOOT
1152 if (imageboot_type == IMAGEBOOT_LOCKER) {
1153 bsd_init_kprintf("booting from locker\n");
1154 if (vnode_tag(rootvnode) != VT_LOCKERFS) {
1155 panic("root filesystem not a locker: fsname = %s",
1156 rootvnode->v_mount->mnt_vtable->vfc_name);
1157 }
1158 }
1159 #endif /* CONFIG_LOCKERBOOT */
1160 #endif /* CONFIG_IMAGEBOOT */
1161
1162 /* set initial time; all other resource data is already zero'ed */
1163 microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start);
1164
1165 #if DEVFS
1166 {
1167 char mounthere[] = "/dev"; /* !const because of internal casting */
1168
1169 bsd_init_kprintf("calling devfs_kernel_mount\n");
1170 devfs_kernel_mount(mounthere);
1171 }
1172 #endif /* DEVFS */
1173
1174 #if CONFIG_BASESYSTEMROOT
1175 #if CONFIG_IMAGEBOOT
1176 if (bsdmgroot_bootable()) {
1177 int error;
1178 bool rooted_dmg = false;
1179
1180 printf("trying to find and mount BaseSystem dmg as root volume\n");
1181 #if DEVELOPMENT || DEBUG
1182 printf("(set boot-arg -nobsdmgroot to avoid this)\n");
1183 #endif // DEVELOPMENT || DEBUG
1184
1185 char *dmgpath = NULL;
1186 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1187 if (dmgpath == NULL) {
1188 panic("%s: M_NAMEI zone exhausted", __FUNCTION__);
1189 }
1190
1191 error = bsd_find_basesystem_dmg(dmgpath, &rooted_dmg);
1192 if (error) {
1193 bsd_init_kprintf("failed to to find BaseSystem dmg: error = %d\n", error);
1194 } else {
1195 PE_parse_boot_argn("bsdmgpath", dmgpath, sizeof(dmgpath));
1196
1197 bsd_init_kprintf("found BaseSystem dmg at: %s\n", dmgpath);
1198
1199 error = imageboot_pivot_image(dmgpath, IMAGEBOOT_DMG, "/System/Volumes/BaseSystem", "System/Volumes/macOS", rooted_dmg);
1200 if (error) {
1201 bsd_init_kprintf("couldn't mount BaseSystem dmg: error = %d", error);
1202 }
1203 }
1204 zfree(ZV_NAMEI, dmgpath);
1205 }
1206 #else /* CONFIG_IMAGEBOOT */
1207 #error CONFIG_BASESYSTEMROOT requires CONFIG_IMAGEBOOT
1208 #endif /* CONFIG_IMAGEBOOT */
1209 #endif /* CONFIG_BASESYSTEMROOT */
1210
1211 #if CONFIG_LOCKERBOOT
1212 /*
1213 * We need to wait until devfs is up before remounting the protoboot volume
1214 * within the locker so that it can have a real devfs vnode backing it.
1215 */
1216 if (imageboot_type == IMAGEBOOT_LOCKER) {
1217 bsd_init_kprintf("re-mounting protoboot volume\n");
1218 int error = mount_locker_protoboot(pbfsname, LOCKER_PROTOBOOT_MOUNT,
1219 pbdevpath);
1220 if (error) {
1221 panic("failed to mount protoboot volume: dev path = %s, error = %d",
1222 pbdevpath, error);
1223 }
1224 }
1225 #endif /* CONFIG_LOCKERBOOT */
1226
1227 /* Initialize signal state for process 0. */
1228 bsd_init_kprintf("calling siginit\n");
1229 siginit(kernproc);
1230
1231 bsd_init_kprintf("calling bsd_utaskbootstrap\n");
1232 bsd_utaskbootstrap();
1233
1234 pal_kernel_announce();
1235
1236 bsd_init_kprintf("calling mountroot_post_hook\n");
1237
1238 #if XNU_TARGET_OS_OSX
1239 /* invoke post-root-mount hook */
1240 if (mountroot_post_hook != NULL) {
1241 mountroot_post_hook();
1242 }
1243 #endif
1244
1245 #if 0 /* not yet */
1246 consider_zone_gc(FALSE);
1247 #endif
1248
1249 /* Initialize System Override call */
1250 init_system_override();
1251
1252 bsd_init_kprintf("done\n");
1253 }
1254
1255 void
1256 bsdinit_task(void)
1257 {
1258 proc_t p = current_proc();
1259
1260 process_name("init", p);
1261
1262 /* Set up exception-to-signal reflection */
1263 ux_handler_setup();
1264
1265 #if CONFIG_MACF
1266 mac_cred_label_associate_user(p->p_ucred);
1267 #endif
1268
1269 vm_init_before_launchd();
1270
1271 #if CONFIG_XNUPOST
1272 int result = bsd_list_tests();
1273 result = bsd_do_post();
1274 if (result != 0) {
1275 panic("bsd_do_post: Tests failed with result = 0x%08x\n", result);
1276 }
1277 #endif
1278
1279 bsd_init_kprintf("bsd_do_post - done");
1280
1281 load_init_program(p);
1282 lock_trace = 1;
1283 }
1284
1285 kern_return_t
1286 bsd_autoconf(void)
1287 {
1288 kprintf("bsd_autoconf: calling kminit\n");
1289 kminit();
1290
1291 /*
1292 * Early startup for bsd pseudodevices.
1293 */
1294 {
1295 struct pseudo_init *pi;
1296
1297 for (pi = pseudo_inits; pi->ps_func; pi++) {
1298 (*pi->ps_func)(pi->ps_count);
1299 }
1300 }
1301
1302 return IOKitBSDInit();
1303 }
1304
1305
1306 #include <sys/disklabel.h> /* for MAXPARTITIONS */
1307
1308 static void
1309 setconf(void)
1310 {
1311 u_int32_t flags;
1312 kern_return_t err;
1313
1314 err = IOFindBSDRoot(rootdevice, sizeof(rootdevice), &rootdev, &flags);
1315 if (err) {
1316 printf("setconf: IOFindBSDRoot returned an error (%d);"
1317 "setting rootdevice to 'sd0a'.\n", err); /* XXX DEBUG TEMP */
1318 rootdev = makedev( 6, 0 );
1319 strlcpy(rootdevice, "sd0a", sizeof(rootdevice));
1320 flags = 0;
1321 }
1322
1323 #if CONFIG_NETBOOT
1324 if (flags & 1) {
1325 /* network device */
1326 mountroot = netboot_mountroot;
1327 } else {
1328 #endif
1329 /* otherwise have vfs determine root filesystem */
1330 mountroot = NULL;
1331 #if CONFIG_NETBOOT
1332 }
1333 #endif
1334 }
1335
1336 /*
1337 * Boot into the flavor of Recovery dictated by `mode`.
1338 */
1339 boolean_t
1340 bsd_boot_to_recovery(bsd_bootfail_mode_t mode, uuid_t volume_uuid, boolean_t reboot)
1341 {
1342 return IOSetRecoveryBoot(mode, volume_uuid, reboot);
1343 }
1344
1345 void
1346 bsd_utaskbootstrap(void)
1347 {
1348 thread_t thread;
1349 struct uthread *ut;
1350
1351 /*
1352 * Clone the bootstrap process from the kernel process, without
1353 * inheriting either task characteristics or memory from the kernel;
1354 */
1355 thread = cloneproc(TASK_NULL, COALITION_NULL, kernproc, FALSE, TRUE);
1356
1357 /* Hold the reference as it will be dropped during shutdown */
1358 initproc = proc_find(1);
1359 #if __PROC_INTERNAL_DEBUG
1360 if (initproc == PROC_NULL) {
1361 panic("bsd_utaskbootstrap: initproc not set\n");
1362 }
1363 #endif
1364 /*
1365 * Since we aren't going back out the normal way to our parent,
1366 * we have to drop the transition locks explicitly.
1367 */
1368 proc_signalend(initproc, 0);
1369 proc_transend(initproc, 0);
1370
1371 ut = (struct uthread *)get_bsdthread_info(thread);
1372 ut->uu_sigmask = 0;
1373 act_set_astbsd(thread);
1374 task_clear_return_wait(get_threadtask(thread), TCRW_CLEAR_ALL_WAIT);
1375 }
1376
1377 static void
1378 parse_bsd_args(void)
1379 {
1380 char namep[48];
1381 int msgbuf;
1382
1383 if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
1384 boothowto |= RB_SINGLE;
1385 }
1386
1387 if (PE_parse_boot_argn("-x", namep, sizeof(namep))) { /* safe boot */
1388 boothowto |= RB_SAFEBOOT;
1389 }
1390
1391 if (PE_parse_boot_argn("-minimalboot", namep, sizeof(namep))) {
1392 /*
1393 * -minimalboot indicates that we want userspace to be bootstrapped to a
1394 * minimal environment. What constitutes minimal is up to the bootstrap
1395 * process.
1396 */
1397 minimalboot = 1;
1398 }
1399
1400 #if __x86_64__
1401 int no32exec;
1402
1403 /* disable 32 bit grading */
1404 if (PE_parse_boot_argn("no32exec", &no32exec, sizeof(no32exec))) {
1405 bootarg_no32exec = !!no32exec;
1406 }
1407 #endif
1408
1409 int execfailure_crashreports;
1410 /* enable crash reports on various exec failures */
1411 if (PE_parse_boot_argn("execfailurecrashes", &execfailure_crashreports, sizeof(execfailure_crashreports))) {
1412 bootarg_execfailurereports = !!execfailure_crashreports;
1413 }
1414
1415 /* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */
1416 if (PE_parse_boot_argn("-vnode_cache_defeat", namep, sizeof(namep))) {
1417 bootarg_vnode_cache_defeat = 1;
1418 }
1419
1420 #if DEVELOPMENT || DEBUG
1421 if (PE_parse_boot_argn("-disable_aslr", namep, sizeof(namep))) {
1422 bootarg_disable_aslr = 1;
1423 }
1424 #endif
1425
1426
1427
1428 PE_parse_boot_argn("ncl", &ncl, sizeof(ncl));
1429 if (PE_parse_boot_argn("nbuf", &max_nbuf_headers,
1430 sizeof(max_nbuf_headers))) {
1431 customnbuf = 1;
1432 }
1433
1434 #if CONFIG_MACF
1435 #if defined (__i386__) || defined (__x86_64__)
1436 PE_parse_boot_argn("policy_check", &policy_check_flags, sizeof(policy_check_flags));
1437 #endif
1438 #endif /* CONFIG_MACF */
1439
1440 if (PE_parse_boot_argn("msgbuf", &msgbuf, sizeof(msgbuf))) {
1441 log_setsize(msgbuf);
1442 oslog_setsize(msgbuf);
1443 }
1444
1445 if (PE_parse_boot_argn("-novfscache", namep, sizeof(namep))) {
1446 nc_disabled = 1;
1447 }
1448
1449 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
1450 if (PE_parse_boot_argn("-no_vnode_jetsam", namep, sizeof(namep))) {
1451 bootarg_no_vnode_jetsam = 1;
1452 }
1453 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
1454
1455 if (PE_parse_boot_argn("-no_vnode_drain", namep, sizeof(namep))) {
1456 bootarg_no_vnode_drain = 1;
1457 }
1458
1459 #if CONFIG_DARKBOOT
1460 /*
1461 * The darkboot flag is specified by the bootloader and is stored in
1462 * boot_args->bootFlags. This flag is available starting revision 2.
1463 */
1464 boot_args *args = (boot_args *) PE_state.bootArgs;
1465 if ((args != NULL) && (args->Revision >= kBootArgsRevision2)) {
1466 darkboot = (args->bootFlags & kBootFlagsDarkBoot) ? 1 : 0;
1467 } else {
1468 darkboot = 0;
1469 }
1470 #endif
1471
1472 #if PROC_REF_DEBUG
1473 if (PE_parse_boot_argn("-disable_procref_tracking", namep, sizeof(namep))) {
1474 proc_ref_tracking_disabled = 1;
1475 }
1476 #endif
1477
1478 #if OS_REASON_DEBUG
1479 if (PE_parse_boot_argn("-disable_osreason_debug", namep, sizeof(namep))) {
1480 os_reason_debug_disabled = 1;
1481 }
1482 #endif
1483
1484 PE_parse_boot_argn("sigrestrict", &sigrestrict_arg, sizeof(sigrestrict_arg));
1485
1486 #if DEVELOPMENT || DEBUG
1487 if (PE_parse_boot_argn("-no_sigsys", namep, sizeof(namep))) {
1488 send_sigsys = false;
1489 }
1490
1491 if (PE_parse_boot_argn("alt-dyld", dyld_alt_path, sizeof(dyld_alt_path))) {
1492 if (strlen(dyld_alt_path) > 0) {
1493 use_alt_dyld = 1;
1494 }
1495 }
1496 PE_parse_boot_argn("dyld_flags", &dyld_flags, sizeof(dyld_flags));
1497
1498 if (PE_parse_boot_argn("-disable_syscallfilter", &namep, sizeof(namep))) {
1499 syscallfilter_disable = 1;
1500 }
1501
1502 #if __arm64__
1503 if (PE_parse_boot_argn("legacy_footprint_entitlement_mode", &legacy_footprint_entitlement_mode, sizeof(legacy_footprint_entitlement_mode))) {
1504 /*
1505 * legacy_footprint_entitlement_mode specifies the behavior we want associated
1506 * with the entitlement. The supported modes are:
1507 *
1508 * LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE:
1509 * Indicates that we want every process to have the memory accounting
1510 * that is available in iOS 12.0 and beyond.
1511 *
1512 * LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT:
1513 * Indicates that for every process that has the 'legacy footprint entitlement',
1514 * we want to give it the old iOS 11.0 accounting behavior which accounted some
1515 * of the process's memory to the kernel.
1516 *
1517 * LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE:
1518 * Indicates that for every process that has the 'legacy footprint entitlement',
1519 * we want it to have a higher memory limit which will help them acclimate to the
1520 * iOS 12.0 (& beyond) accounting behavior that does the right accounting.
1521 * The bonus added to the system-wide task limit to calculate this higher memory limit
1522 * is available in legacy_footprint_bonus_mb.
1523 */
1524
1525 if (legacy_footprint_entitlement_mode < LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE ||
1526 legacy_footprint_entitlement_mode > LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE) {
1527 legacy_footprint_entitlement_mode = LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE;
1528 }
1529 }
1530 #endif /* __arm64__ */
1531 #endif /* DEVELOPMENT || DEBUG */
1532 }
1533
1534 #if CONFIG_BASESYSTEMROOT
1535
1536 extern const char* IOGetBootUUID(void);
1537 extern const char* IOGetApfsPrebootUUID(void);
1538
1539 // Get the UUID of the Preboot (and Recovery) folder associated with the
1540 // current boot volume, if applicable. The meaning of the UUID can be
1541 // filesystem-dependent and not all kinds of boots will have a UUID.
1542 // If available, the string will be returned. It does not need to be
1543 // deallocate. (Future: if we need to return the string as a copy that the
1544 // caller must free, we'll introduce a new functcion for that.)
1545 // NULL will be returned if the current boot has no applicable Preboot UUID.
1546 static
1547 const char *
1548 get_preboot_uuid(void)
1549 {
1550 const char *maybe_uuid_string;
1551
1552 // try IOGetApfsPrebootUUID
1553 maybe_uuid_string = IOGetApfsPrebootUUID();
1554 if (maybe_uuid_string) {
1555 uuid_t maybe_uuid;
1556 int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1557 if (error == 0) {
1558 return maybe_uuid_string;
1559 }
1560 }
1561
1562 // try IOGetBootUUID
1563 maybe_uuid_string = IOGetBootUUID();
1564 if (maybe_uuid_string) {
1565 uuid_t maybe_uuid;
1566 int error = uuid_parse(maybe_uuid_string, maybe_uuid);
1567 if (error == 0) {
1568 return maybe_uuid_string;
1569 }
1570 }
1571
1572 // didn't find it
1573 return NULL;
1574 }
1575
1576 #if defined(__arm64__)
1577 extern const char *IOGetBootObjectsPath(void);
1578 #endif
1579
1580 // Find the BaseSystem.dmg to be used as the initial root volume during certain
1581 // kinds of boots.
1582 // This may mount volumes and lookup vnodes.
1583 // The DEVELOPMENT kernel will look for BaseSystem.rooted.dmg first.
1584 // If it returns 0 (no error), then it also writes the absolute path to the
1585 // BaseSystem.dmg into its argument (which must be a char[MAXPATHLEN]).
1586 static
1587 int
1588 bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg)
1589 {
1590 int error;
1591 size_t len;
1592 char *dmgbasepath;
1593 char *dmgpath;
1594
1595 dmgbasepath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1596 dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
1597 vnode_t imagevp = NULLVP;
1598
1599 //must provide output bool
1600 if (rooted_dmg) {
1601 *rooted_dmg = false;
1602 } else {
1603 error = EINVAL;
1604 goto done;
1605 }
1606
1607 error = vfs_mount_recovery();
1608 if (error) {
1609 goto done;
1610 }
1611
1612 len = strlcpy(dmgbasepath, "/System/Volumes/Recovery/", MAXPATHLEN);
1613 if (len > MAXPATHLEN) {
1614 error = ENAMETOOLONG;
1615 goto done;
1616 }
1617
1618 #if defined(__arm64__)
1619 const char *boot_obj_path = IOGetBootObjectsPath();
1620 if (boot_obj_path) {
1621 if (boot_obj_path[0] == '/') {
1622 dmgbasepath[len - 1] = '\0';
1623 }
1624
1625 len = strlcat(dmgbasepath, boot_obj_path, MAXPATHLEN);
1626 if (len > MAXPATHLEN) {
1627 error = ENAMETOOLONG;
1628 goto done;
1629 }
1630
1631 len = strlcat(dmgbasepath, "/usr/standalone/firmware/", MAXPATHLEN);
1632 if (len > MAXPATHLEN) {
1633 error = ENAMETOOLONG;
1634 goto done;
1635 }
1636
1637 #if DEVELOPMENT || DEBUG
1638 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1639 if (len > MAXPATHLEN) {
1640 error = ENAMETOOLONG;
1641 goto done;
1642 }
1643
1644 len = strlcat(dmgpath, "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
1645 if (len > MAXPATHLEN) {
1646 error = ENAMETOOLONG;
1647 goto done;
1648 }
1649
1650 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1651 if (error == 0) {
1652 *rooted_dmg = true;
1653 goto done;
1654 }
1655 memset(dmgpath, 0, MAXPATHLEN);
1656 #endif // DEVELOPMENT || DEBUG
1657
1658 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1659 if (len > MAXPATHLEN) {
1660 error = ENAMETOOLONG;
1661 goto done;
1662 }
1663
1664 len = strlcat(dmgpath, "arm64eBaseSystem.dmg", MAXPATHLEN);
1665 if (len > MAXPATHLEN) {
1666 error = ENAMETOOLONG;
1667 goto done;
1668 }
1669
1670 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1671 if (error == 0) {
1672 goto done;
1673 }
1674 memset(dmgpath, 0, MAXPATHLEN);
1675 dmgbasepath[strlen("/System/Volumes/Recovery/")] = '\0';
1676 }
1677 #endif // __arm64__
1678
1679 const char *preboot_uuid = get_preboot_uuid();
1680 if (preboot_uuid == NULL) {
1681 // no preboot? bail out
1682 return EINVAL;
1683 }
1684
1685 len = strlcat(dmgbasepath, preboot_uuid, MAXPATHLEN);
1686 if (len > MAXPATHLEN) {
1687 error = ENAMETOOLONG;
1688 goto done;
1689 }
1690
1691 #if DEVELOPMENT || DEBUG
1692 // Try BaseSystem.rooted.dmg
1693 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1694 if (len > MAXPATHLEN) {
1695 error = ENAMETOOLONG;
1696 goto done;
1697 }
1698
1699 len = strlcat(dmgpath, "/BaseSystem.rooted.dmg", MAXPATHLEN);
1700 if (len > MAXPATHLEN) {
1701 error = ENAMETOOLONG;
1702 goto done;
1703 }
1704
1705 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1706 if (error == 0) {
1707 // we found it! success!
1708 *rooted_dmg = true;
1709 goto done;
1710 }
1711 #endif // DEVELOPMENT || DEBUG
1712
1713 // Try BaseSystem.dmg
1714 len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
1715 if (len > MAXPATHLEN) {
1716 error = ENAMETOOLONG;
1717 goto done;
1718 }
1719
1720 len = strlcat(dmgpath, "/BaseSystem.dmg", MAXPATHLEN);
1721 if (len > MAXPATHLEN) {
1722 error = ENAMETOOLONG;
1723 goto done;
1724 }
1725
1726 error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
1727 if (error == 0) {
1728 // success!
1729 goto done;
1730 }
1731
1732 done:
1733 if (error == 0) {
1734 strlcpy(bsdmgpath_out, dmgpath, MAXPATHLEN);
1735 } else {
1736 bsd_init_kprintf("%s: error %d\n", __func__, error);
1737 }
1738 if (imagevp != NULLVP) {
1739 vnode_put(imagevp);
1740 }
1741 zfree(ZV_NAMEI, dmgpath);
1742 zfree(ZV_NAMEI, dmgbasepath);
1743 return error;
1744 }
1745
1746 static boolean_t
1747 bsdmgroot_bootable(void)
1748 {
1749 #if defined(__arm64__)
1750 #define BSDMGROOT_DEFAULT true
1751 #else
1752 #define BSDMGROOT_DEFAULT false
1753 #endif
1754
1755 boolean_t resolved = BSDMGROOT_DEFAULT;
1756
1757 boolean_t boot_arg_bsdmgroot = false;
1758 boolean_t boot_arg_nobsdmgroot = false;
1759 int error;
1760 mount_t mp;
1761 boolean_t root_part_of_volume_group = false;
1762 struct vfs_attr vfsattr;
1763
1764 mp = rootvnode->v_mount;
1765 VFSATTR_INIT(&vfsattr);
1766 VFSATTR_WANTED(&vfsattr, f_capabilities);
1767
1768 boot_arg_bsdmgroot = PE_parse_boot_argn("-bsdmgroot", NULL, 0);
1769 boot_arg_nobsdmgroot = PE_parse_boot_argn("-nobsdmgroot", NULL, 0);
1770
1771 error = vfs_getattr(mp, &vfsattr, vfs_context_kernel());
1772 if (!error && VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1773 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
1774 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
1775 root_part_of_volume_group = true;
1776 }
1777 }
1778
1779 boolean_t singleuser = (boothowto & RB_SINGLE) != 0;
1780
1781 // Start with the #defined default above.
1782 // If booting to single-user mode, default to false, because single-
1783 // user mode inside the BaseSystem is probably not what's wanted.
1784 // If the 'yes' boot-arg is set, we'll allow that even in single-user
1785 // mode, we'll assume you know what you're doing.
1786 // The 'no' boot-arg overpowers the 'yes' boot-arg.
1787 // In any case, we will not attempt to root from BaseSystem if the
1788 // original (booter-chosen) root volume isn't in a volume group.
1789 // This is just out of an abundance of caution: if the boot environment
1790 // seems to be "something other than a standard install",
1791 // we'll be conservative in messing with the root volume.
1792
1793 if (singleuser) {
1794 resolved = false;
1795 }
1796
1797 if (boot_arg_bsdmgroot) {
1798 resolved = true;
1799 }
1800
1801 if (boot_arg_nobsdmgroot) {
1802 resolved = false;
1803 }
1804
1805 if (!root_part_of_volume_group) {
1806 resolved = false;
1807 }
1808
1809 return resolved;
1810 }
1811 #endif // CONFIG_BASESYSTEMROOT
1812
1813 void
1814 bsd_exec_setup(int scale)
1815 {
1816 switch (scale) {
1817 case 0:
1818 case 1:
1819 bsd_simul_execs = BSD_SIMUL_EXECS;
1820 break;
1821 case 2:
1822 case 3:
1823 bsd_simul_execs = 65;
1824 break;
1825 case 4:
1826 case 5:
1827 bsd_simul_execs = 129;
1828 break;
1829 case 6:
1830 case 7:
1831 bsd_simul_execs = 257;
1832 break;
1833 default:
1834 bsd_simul_execs = 513;
1835 break;
1836 }
1837 bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
1838 }
1839
1840 #if !CONFIG_NETBOOT
1841 int
1842 netboot_root(void);
1843
1844 int
1845 netboot_root(void)
1846 {
1847 return 0;
1848 }
1849 #endif