+static kern_return_t __attribute__((noinline,not_tail_called))
+PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
+ mach_exception_code_t code,
+ mach_exception_subcode_t subcode,
+ void *reason)
+{
+#ifdef MACH_BSD
+ if (1 == proc_selfpid())
+ return KERN_NOT_SUPPORTED; // initproc is immune
+#endif
+ mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
+ [0] = code,
+ [1] = subcode,
+ };
+ task_t task = current_task();
+ kern_return_t kr;
+
+ /* (See jetsam-related comments below) */
+
+ proc_memstat_terminated(task->bsd_info, TRUE);
+ kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
+ proc_memstat_terminated(task->bsd_info, FALSE);
+ return kr;
+}
+
+extern kern_return_t
+task_violated_guard(mach_exception_code_t, mach_exception_subcode_t, void *);
+
+kern_return_t
+task_violated_guard(
+ mach_exception_code_t code,
+ mach_exception_subcode_t subcode,
+ void *reason)
+{
+ return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
+}
+
+
+#if CONFIG_MEMORYSTATUS
+
+boolean_t
+task_get_memlimit_is_active(task_t task)
+{
+ assert (task != NULL);
+
+ if (task->memlimit_is_active == 1) {
+ return(TRUE);
+ } else {
+ return (FALSE);
+ }
+}
+
+void
+task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
+{
+ assert (task != NULL);
+
+ if (memlimit_is_active) {
+ task->memlimit_is_active = 1;
+ } else {
+ task->memlimit_is_active = 0;
+ }
+}
+
+boolean_t
+task_get_memlimit_is_fatal(task_t task)
+{
+ assert(task != NULL);
+
+ if (task->memlimit_is_fatal == 1) {
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+}
+
+void
+task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
+{
+ assert (task != NULL);
+
+ if (memlimit_is_fatal) {
+ task->memlimit_is_fatal = 1;
+ } else {
+ task->memlimit_is_fatal = 0;
+ }
+}
+
+boolean_t
+task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
+{
+ boolean_t triggered = FALSE;
+
+ assert(task == current_task());
+
+ /*
+ * Returns true, if task has already triggered an exc_resource exception.
+ */
+
+ if (memlimit_is_active) {
+ triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
+ } else {
+ triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
+ }
+
+ return(triggered);
+}
+
+void
+task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
+{
+ assert(task == current_task());
+
+ /*
+ * We allow one exc_resource per process per active/inactive limit.
+ * The limit's fatal attribute does not come into play.
+ */
+
+ if (memlimit_is_active) {
+ task->memlimit_active_exc_resource = 1;
+ } else {
+ task->memlimit_inactive_exc_resource = 1;
+ }
+}
+
+#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
+
+void __attribute__((noinline))
+PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
+{
+ task_t task = current_task();
+ int pid = 0;
+ const char *procname = "unknown";
+ mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
+
+#ifdef MACH_BSD
+ pid = proc_selfpid();
+
+ if (pid == 1) {
+ /*
+ * Cannot have ReportCrash analyzing
+ * a suspended initproc.
+ */
+ return;
+ }
+
+ if (task->bsd_info != NULL)
+ procname = proc_name_address(current_task()->bsd_info);
+#endif
+#if CONFIG_COREDUMP
+ if (hwm_user_cores) {
+ int error;
+ uint64_t starttime, end;
+ clock_sec_t secs = 0;
+ uint32_t microsecs = 0;
+
+ starttime = mach_absolute_time();
+ /*
+ * Trigger a coredump of this process. Don't proceed unless we know we won't
+ * be filling up the disk; and ignore the core size resource limit for this
+ * core file.
+ */
+ if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
+ printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
+ }
+ /*
+ * coredump() leaves the task suspended.
+ */
+ task_resume_internal(current_task());
+
+ end = mach_absolute_time();
+ absolutetime_to_microtime(end - starttime, &secs, µsecs);
+ printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
+ proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
+ }
+#endif /* CONFIG_COREDUMP */
+
+ if (disable_exc_resource) {
+ printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
+ "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
+ return;
+ }
+
+ /*
+ * A task that has triggered an EXC_RESOURCE, should not be
+ * jetsammed when the device is under memory pressure. Here
+ * we set the P_MEMSTAT_TERMINATED flag so that the process
+ * will be skipped if the memorystatus_thread wakes up.
+ */
+ proc_memstat_terminated(current_task()->bsd_info, TRUE);
+
+ code[0] = code[1] = 0;
+ EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
+ EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
+ EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
+
+ /* Do not generate a corpse fork if the violation is a fatal one */
+ if (is_fatal || exc_via_corpse_forking == 0) {
+ /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
+ if (corpse_for_fatal_memkill == 0) {
+ /*
+ * Use the _internal_ variant so that no user-space
+ * process can resume our task from under us.
+ */
+ task_suspend_internal(task);
+ exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
+ task_resume_internal(task);
+ }
+ } else {
+ task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
+ code, EXCEPTION_CODE_MAX, NULL);
+ }
+
+ /*
+ * After the EXC_RESOURCE has been handled, we must clear the
+ * P_MEMSTAT_TERMINATED flag so that the process can again be
+ * considered for jetsam if the memorystatus_thread wakes up.
+ */
+ proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
+}
+
+/*
+ * Callback invoked when a task exceeds its physical footprint limit.
+ */
+void
+task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
+{
+ ledger_amount_t max_footprint, max_footprint_mb;
+ task_t task;
+ boolean_t is_warning;
+ boolean_t memlimit_is_active;
+ boolean_t memlimit_is_fatal;
+
+ if (warning == LEDGER_WARNING_DIPPED_BELOW) {
+ /*
+ * Task memory limits only provide a warning on the way up.
+ */
+ return;
+ } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
+ /*
+ * This task is in danger of violating a memory limit,
+ * It has exceeded a percentage level of the limit.
+ */
+ is_warning = TRUE;
+ } else {
+ /*
+ * The task has exceeded the physical footprint limit.
+ * This is not a warning but a true limit violation.
+ */
+ is_warning = FALSE;
+ }
+
+ task = current_task();
+
+ ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
+ max_footprint_mb = max_footprint >> 20;
+
+ memlimit_is_active = task_get_memlimit_is_active(task);
+ memlimit_is_fatal = task_get_memlimit_is_fatal(task);
+
+ /*
+ * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
+ * We only generate the exception once per process per memlimit (active/inactive limit).
+ * To enforce this, we monitor state based on the memlimit's active/inactive attribute
+ * and we disable it by marking that memlimit as exception triggered.
+ */
+ if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
+ PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
+ memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
+ task_mark_has_triggered_exc_resource(task, memlimit_is_active);
+ }
+
+ memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
+}
+
+extern int proc_check_footprint_priv(void);
+
+kern_return_t
+task_set_phys_footprint_limit(
+ task_t task,
+ int new_limit_mb,
+ int *old_limit_mb)
+{
+ kern_return_t error;
+
+ boolean_t memlimit_is_active;
+ boolean_t memlimit_is_fatal;
+
+ if ((error = proc_check_footprint_priv())) {
+ return (KERN_NO_ACCESS);
+ }
+
+ /*
+ * This call should probably be obsoleted.
+ * But for now, we default to current state.
+ */
+ memlimit_is_active = task_get_memlimit_is_active(task);
+ memlimit_is_fatal = task_get_memlimit_is_fatal(task);
+
+ return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
+}
+
+kern_return_t
+task_convert_phys_footprint_limit(
+ int limit_mb,
+ int *converted_limit_mb)
+{
+ if (limit_mb == -1) {
+ /*
+ * No limit
+ */
+ if (max_task_footprint != 0) {
+ *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
+ } else {
+ *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
+ }
+ } else {
+ /* nothing to convert */
+ *converted_limit_mb = limit_mb;
+ }
+ return (KERN_SUCCESS);
+}
+
+
+kern_return_t
+task_set_phys_footprint_limit_internal(
+ task_t task,
+ int new_limit_mb,
+ int *old_limit_mb,
+ boolean_t memlimit_is_active,
+ boolean_t memlimit_is_fatal)
+{
+ ledger_amount_t old;
+
+ ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
+
+ /*
+ * Check that limit >> 20 will not give an "unexpected" 32-bit
+ * result. There are, however, implicit assumptions that -1 mb limit
+ * equates to LEDGER_LIMIT_INFINITY.
+ */
+ assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
+
+ if (old_limit_mb) {
+ *old_limit_mb = (int)(old >> 20);
+ }
+
+ if (new_limit_mb == -1) {
+ /*
+ * Caller wishes to remove the limit.
+ */
+ ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
+ max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
+ max_task_footprint ? max_task_footprint_warning_level : 0);
+
+ task_lock(task);
+ task_set_memlimit_is_active(task, memlimit_is_active);
+ task_set_memlimit_is_fatal(task, memlimit_is_fatal);
+ task_unlock(task);
+
+ return (KERN_SUCCESS);
+ }
+
+#ifdef CONFIG_NOMONITORS
+ return (KERN_SUCCESS);
+#endif /* CONFIG_NOMONITORS */
+
+ task_lock(task);
+
+ if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
+ (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
+ (((ledger_amount_t)new_limit_mb << 20) == old)) {
+ /*
+ * memlimit state is not changing
+ */
+ task_unlock(task);
+ return(KERN_SUCCESS);
+ }
+
+ task_set_memlimit_is_active(task, memlimit_is_active);
+ task_set_memlimit_is_fatal(task, memlimit_is_fatal);
+
+ ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
+ (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
+
+ if (task == current_task()) {
+ ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
+ }
+
+ task_unlock(task);
+
+ return (KERN_SUCCESS);
+}
+
+kern_return_t
+task_get_phys_footprint_limit(
+ task_t task,
+ int *limit_mb)
+{
+ ledger_amount_t limit;
+
+ ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
+ /*
+ * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
+ * result. There are, however, implicit assumptions that -1 mb limit
+ * equates to LEDGER_LIMIT_INFINITY.
+ */
+ assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
+ *limit_mb = (int)(limit >> 20);
+
+ return (KERN_SUCCESS);
+}
+#else /* CONFIG_MEMORYSTATUS */
+kern_return_t
+task_set_phys_footprint_limit(
+ __unused task_t task,
+ __unused int new_limit_mb,
+ __unused int *old_limit_mb)
+{
+ return (KERN_FAILURE);
+}
+
+kern_return_t
+task_get_phys_footprint_limit(
+ __unused task_t task,
+ __unused int *limit_mb)
+{
+ return (KERN_FAILURE);
+}
+#endif /* CONFIG_MEMORYSTATUS */
+