/*
- * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/event.h> /* for knote_init() */
#include <sys/eventhandler.h> /* for eventhandler_init() */
#include <sys/kern_memorystatus.h> /* for memorystatus_init() */
+#include <sys/kern_memorystatus_freeze.h> /* for memorystatus_freeze_init() */
#include <sys/aio_kern.h> /* for aio_init() */
#include <sys/semaphore.h> /* for psem_cache_init() */
#include <net/dlil.h> /* for dlil_init() */
#include <netinet/tcp_cc.h> /* for tcp_cc_init() */
#include <netinet/mptcp_var.h> /* for mptcp_control_register() */
#include <net/nwk_wq.h> /* for nwk_wq_init */
+#include <net/restricted_in_port.h> /* for restricted_in_port_init() */
#include <kern/assert.h> /* for assert() */
#include <sys/kern_overrides.h> /* for init_system_override() */
+#include <sys/lockf.h> /* for lf_init() */
#include <net/init.h>
#include <machine/exec.h>
-#if NFSCLIENT
+#if CONFIG_NETBOOT
#include <sys/netboot.h>
#endif
long dumplo; /* offset into dumpdev */
long hostid;
char hostname[MAXHOSTNAMELEN];
-int hostnamelen;
+lck_mtx_t hostname_lock;
+lck_grp_t *hostname_lck_grp;
char domainname[MAXDOMNAMELEN];
-int domainnamelen;
+lck_mtx_t domainname_lock;
char rootdevice[DEVMAXNAMESIZE];
#endif
struct vnode *rootvp;
-int boothowto = RB_DEBUG;
+int boothowto;
int minimalboot = 0;
#if CONFIG_EMBEDDED
int darkboot = 0;
#endif
+#if __arm64__
+int legacy_footprint_entitlement_mode = LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE;
+#endif /* __arm64__ */
+
#if PROC_REF_DEBUG
__private_extern__ int proc_ref_tracking_disabled = 0; /* disable panics on leaked proc refs across syscall boundary */
#endif
extern void throttle_init(void);
extern void acct_init(void);
+#if CONFIG_LOCKERBOOT
+#define LOCKER_PROTOBOOT_MOUNT "/protoboot"
+
+const char kernel_protoboot_mount[] = LOCKER_PROTOBOOT_MOUNT;
+extern int mount_locker_protoboot(const char *fsname, const char *mntpoint,
+ const char *pbdevpath);
+#endif
+
extern int serverperfmode;
extern int ncl;
+#if DEVELOPMENT || DEBUG
+extern int syscallfilter_disable;
+#endif // DEVELOPMENT || DEBUG
vm_map_t bsd_pageable_map;
vm_map_t mb_map;
void bsd_exec_setup(int);
-#if __arm64__
-__private_extern__ int bootarg_no64exec = 0;
-#endif
+__private_extern__ int bootarg_execfailurereports = 0;
+
#if __x86_64__
-__private_extern__ int bootarg_no32exec = 0;
+__private_extern__ int bootarg_no32exec = 1;
#endif
__private_extern__ int bootarg_vnode_cache_defeat = 0;
__private_extern__ int bootarg_no_vnode_jetsam = 0;
#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+__private_extern__ int bootarg_no_vnode_drain = 0;
+
/*
* Prevent kernel-based ASLR from being used, for testing.
*/
#if DEVELOPMENT || DEBUG
char dyld_alt_path[MAXPATHLEN];
int use_alt_dyld = 0;
+extern uint64_t dyld_flags;
#endif
int cmask = CMASK;
/* To allow these values to be patched, they're globals here */
#include <machine/vmparam.h>
-struct rlimit vm_initial_limit_stack = { DFLSSIZ, MAXSSIZ - PAGE_MAX_SIZE };
-struct rlimit vm_initial_limit_data = { DFLDSIZ, MAXDSIZ };
-struct rlimit vm_initial_limit_core = { DFLCSIZ, MAXCSIZ };
+struct rlimit vm_initial_limit_stack = { .rlim_cur = DFLSSIZ, .rlim_max = MAXSSIZ - PAGE_MAX_SIZE };
+struct rlimit vm_initial_limit_data = { .rlim_cur = DFLDSIZ, .rlim_max = MAXDSIZ };
+struct rlimit vm_initial_limit_core = { .rlim_cur = DFLCSIZ, .rlim_max = MAXCSIZ };
extern thread_t cloneproc(task_t, coalition_t, proc_t, int, int);
extern int (*mountroot)(void);
lck_grp_t * proc_knhashlock_grp;
lck_grp_t * proc_ucred_mlock_grp;
lck_grp_t * proc_mlock_grp;
+lck_grp_t * proc_dirslock_grp;
lck_grp_attr_t * proc_lck_grp_attr;
lck_attr_t * proc_lck_attr;
lck_mtx_t * proc_list_mlock;
kern_return_t ret;
struct ucred temp_cred;
struct posix_cred temp_pcred;
-#if NFSCLIENT || CONFIG_IMAGEBOOT
+#if CONFIG_NETBOOT || CONFIG_IMAGEBOOT
boolean_t netboot = FALSE;
#endif
+#if CONFIG_LOCKERBOOT
+ vnode_t pbvn = NULLVP;
+ mount_t pbmnt = NULL;
+ char *pbdevp = NULL;
+ char pbdevpath[64];
+ char pbfsname[MFSNAMELEN];
+ char *slash_dev = NULL;
+#endif
-#define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */
+#define DEBUG_BSDINIT 0
+
+#if DEBUG_BSDINIT
+#define bsd_init_kprintf(x, ...) kprintf("bsd_init: " x, ## __VA_ARGS__)
+#else
+#define bsd_init_kprintf(x, ...)
+#endif
throttle_init();
proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr);
proc_kqhashlock_grp = lck_grp_alloc_init("proc-kqhashlock", proc_lck_grp_attr);
proc_knhashlock_grp = lck_grp_alloc_init("proc-knhashlock", proc_lck_grp_attr);
+ proc_dirslock_grp = lck_grp_alloc_init("proc-dirslock", proc_lck_grp_attr);
#if CONFIG_XNUPOST
sysctl_debug_test_stackshot_owner_grp = lck_grp_alloc_init("test-stackshot-owner-grp", LCK_GRP_ATTR_NULL);
sysctl_debug_test_stackshot_owner_init_mtx = lck_mtx_alloc_init(
lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
+ lck_rw_init(&kernproc->p_dirs_lock, proc_dirslock_grp, proc_lck_attr);
assert(bsd_simul_execs != 0);
execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
ulock_initialize();
+ hostname_lck_grp = lck_grp_alloc_init("hostname", LCK_GRP_ATTR_NULL);
+ lck_mtx_init(&hostname_lock, hostname_lck_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&domainname_lock, hostname_lck_grp, LCK_ATTR_NULL);
+
/*
* Create process 0.
*/
/* Create the file descriptor table. */
kernproc->p_fd = &filedesc0;
filedesc0.fd_cmask = cmask;
- filedesc0.fd_knlistsize = -1;
+ filedesc0.fd_knlistsize = 0;
filedesc0.fd_knlist = NULL;
filedesc0.fd_knhash = NULL;
filedesc0.fd_knhashmask = 0;
bsd_init_kprintf("calling vfsinit\n");
vfsinit();
+ /* Initialize file locks. */
+ bsd_init_kprintf("calling lf_init\n");
+ lf_init();
+
#if CONFIG_PROC_UUID_POLICY
/* Initial proc_uuid_policy subsystem */
bsd_init_kprintf("calling proc_uuid_policy_init()\n");
bsd_init_kprintf("calling mbinit\n");
mbinit();
net_str_id_init(); /* for mbuf tags */
+ restricted_in_port_init();
#endif /* SOCKETS */
/*
bsd_init_kprintf("calling acct_init\n");
acct_init();
-#ifdef GPROF
- /* Initialize kernel profiling. */
- kmstartup();
-#endif
-
bsd_init_kprintf("calling sysctl_mib_init\n");
- sysctl_mib_init()
+ sysctl_mib_init();
bsd_init_kprintf("calling bsd_autoconf\n");
bsd_autoconf();
bsd_init_kprintf("calling setconf\n");
setconf();
-#if NFSCLIENT
+#if CONFIG_NETBOOT
netboot = (mountroot == netboot_mountroot);
#endif
break;
}
rootdevice[0] = '\0';
-#if NFSCLIENT
+#if CONFIG_NETBOOT
if (netboot) {
PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */
vc_progress_set(FALSE, 0);
(void)vnode_put(rootvnode);
filedesc0.fd_cdir = rootvnode;
-#if NFSCLIENT
+#if CONFIG_NETBOOT
if (netboot) {
int err;
#if CONFIG_IMAGEBOOT
+#if CONFIG_LOCKERBOOT
+ /*
+ * Stash the protoboot vnode, mount, filesystem name, and device name for
+ * later use. Note that the mount-from name may not have the "/dev/"
+ * component, so we must sniff out this condition and add it as needed.
+ */
+ pbvn = rootvnode;
+ pbmnt = pbvn->v_mount;
+ pbdevp = vfs_statfs(pbmnt)->f_mntfromname;
+ slash_dev = strnstr(pbdevp, "/dev/", strlen(pbdevp));
+ if (slash_dev) {
+ /*
+ * If the old root is a snapshot mount, it will have the form:
+ *
+ * com.apple.os.update-<boot manifest hash>@<dev node path>
+ *
+ * So we just search the mntfromname for any occurrence of "/dev/" and
+ * grab that as the device path. The image boot code needs a dev node to
+ * do the re-mount, so we cannot directly mount the snapshot as the
+ * protoboot volume currently.
+ */
+ strlcpy(pbdevpath, slash_dev, sizeof(pbdevpath));
+ } else {
+ snprintf(pbdevpath, sizeof(pbdevpath), "/dev/%s", pbdevp);
+ }
+
+ bsd_init_kprintf("protoboot mount-from: %s\n", pbdevp);
+ bsd_init_kprintf("protoboot dev path: %s\n", pbdevpath);
+
+ strlcpy(pbfsname, pbmnt->mnt_vtable->vfc_name, sizeof(pbfsname));
+#endif
/*
* See if a system disk image is present. If so, mount it and
* switch the root vnode to point to it
*/
- if (netboot == FALSE && imageboot_needed()) {
+ imageboot_type_t imageboot_type = imageboot_needed();
+ if (netboot == FALSE && imageboot_type) {
/*
* An image was found. No turning back: we're booted
* with a kernel from the disk image.
*/
- imageboot_setup();
+ bsd_init_kprintf("doing image boot: type = %d\n", imageboot_type);
+ imageboot_setup(imageboot_type);
+ }
+
+#if CONFIG_LOCKERBOOT
+ if (imageboot_type == IMAGEBOOT_LOCKER) {
+ bsd_init_kprintf("booting from locker\n");
+ if (vnode_tag(rootvnode) != VT_LOCKERFS) {
+ panic("root filesystem not a locker: fsname = %s",
+ rootvnode->v_mount->mnt_vtable->vfc_name);
+ }
}
+#endif /* CONFIG_LOCKERBOOT */
#endif /* CONFIG_IMAGEBOOT */
/* set initial time; all other resource data is already zero'ed */
}
#endif /* DEVFS */
+ if (vfs_mount_rosv_data()) {
+ panic("failed to mount data volume!");
+ }
+
+ if (vfs_mount_vm()) {
+ printf("failed to mount vm volume!");
+ }
+
+#if CONFIG_LOCKERBOOT
+ /*
+ * We need to wait until devfs is up before remounting the protoboot volume
+ * within the locker so that it can have a real devfs vnode backing it.
+ */
+ if (imageboot_type == IMAGEBOOT_LOCKER) {
+ bsd_init_kprintf("re-mounting protoboot volume\n");
+ int error = mount_locker_protoboot(pbfsname, LOCKER_PROTOBOOT_MOUNT,
+ pbdevpath);
+ if (error) {
+ panic("failed to mount protoboot volume: dev path = %s, error = %d",
+ pbdevpath, error);
+ }
+ }
+#endif /* CONFIG_LOCKERBOOT */
+
/* Initialize signal state for process 0. */
bsd_init_kprintf("calling siginit\n");
siginit(kernproc);
flags = 0;
}
-#if NFSCLIENT
+#if CONFIG_NETBOOT
if (flags & 1) {
/* network device */
mountroot = netboot_mountroot;
#endif
/* otherwise have vfs determine root filesystem */
mountroot = NULL;
-#if NFSCLIENT
+#if CONFIG_NETBOOT
}
#endif
}
ut = (struct uthread *)get_bsdthread_info(thread);
ut->uu_sigmask = 0;
act_set_astbsd(thread);
- task_clear_return_wait(get_threadtask(thread));
+ task_clear_return_wait(get_threadtask(thread), TCRW_CLEAR_ALL_WAIT);
}
static void
parse_bsd_args(void)
{
- char namep[16];
+ char namep[48];
int msgbuf;
if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
boothowto |= RB_SINGLE;
}
- if (PE_parse_boot_argn("-b", namep, sizeof(namep))) {
- boothowto |= RB_NOBOOTRC;
- }
-
if (PE_parse_boot_argn("-x", namep, sizeof(namep))) { /* safe boot */
boothowto |= RB_SAFEBOOT;
}
minimalboot = 1;
}
-#if __arm64__
- /* disable 64 bit grading */
- if (PE_parse_boot_argn("-no64exec", namep, sizeof(namep))) {
- bootarg_no64exec = 1;
- }
-#endif
#if __x86_64__
+ int no32exec;
+
/* disable 32 bit grading */
- if (PE_parse_boot_argn("-no32exec", namep, sizeof(namep))) {
- bootarg_no32exec = 1;
+ if (PE_parse_boot_argn("no32exec", &no32exec, sizeof(no32exec))) {
+ bootarg_no32exec = !!no32exec;
}
#endif
+ int execfailure_crashreports;
+ /* enable crash reports on various exec failures */
+ if (PE_parse_boot_argn("execfailurecrashes", &execfailure_crashreports, sizeof(execfailure_crashreports))) {
+ bootarg_execfailurereports = !!execfailure_crashreports;
+ }
+
/* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */
if (PE_parse_boot_argn("-vnode_cache_defeat", namep, sizeof(namep))) {
bootarg_vnode_cache_defeat = 1;
}
#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+ if (PE_parse_boot_argn("-no_vnode_drain", namep, sizeof(namep))) {
+ bootarg_no_vnode_drain = 1;
+ }
#if CONFIG_EMBEDDED
/*
if (PE_parse_boot_argn("-no_sigsys", namep, sizeof(namep))) {
send_sigsys = false;
}
-#endif
-#if (DEVELOPMENT || DEBUG)
if (PE_parse_boot_argn("alt-dyld", dyld_alt_path, sizeof(dyld_alt_path))) {
if (strlen(dyld_alt_path) > 0) {
use_alt_dyld = 1;
}
}
-#endif
+ PE_parse_boot_argn("dyld_flags", &dyld_flags, sizeof(dyld_flags));
+
+ if (PE_parse_boot_argn("-disable_syscallfilter", &namep, sizeof(namep))) {
+ syscallfilter_disable = 1;
+ }
+
+#if __arm64__
+ if (PE_parse_boot_argn("legacy_footprint_entitlement_mode", &legacy_footprint_entitlement_mode, sizeof(legacy_footprint_entitlement_mode))) {
+ /*
+ * legacy_footprint_entitlement_mode specifies the behavior we want associated
+ * with the entitlement. The supported modes are:
+ *
+ * LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE:
+ * Indicates that we want every process to have the memory accounting
+ * that is available in iOS 12.0 and beyond.
+ *
+ * LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT:
+ * Indicates that for every process that has the 'legacy footprint entitlement',
+ * we want to give it the old iOS 11.0 accounting behavior which accounted some
+ * of the process's memory to the kernel.
+ *
+ * LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE:
+ * Indicates that for every process that has the 'legacy footprint entitlement',
+ * we want it to have a higher memory limit which will help them acclimate to the
+ * iOS 12.0 (& beyond) accounting behavior that does the right accounting.
+ * The bonus added to the system-wide task limit to calculate this higher memory limit
+ * is available in legacy_footprint_bonus_mb.
+ */
+
+ if (legacy_footprint_entitlement_mode < LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE ||
+ legacy_footprint_entitlement_mode > LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE) {
+ legacy_footprint_entitlement_mode = LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE;
+ }
+ }
+#endif /* __arm64__ */
+#endif /* DEVELOPMENT || DEBUG */
}
void
bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
}
-#if !NFSCLIENT
+#if !CONFIG_NETBOOT
int
netboot_root(void);