+ } else {
+ if (m->vmp_pmapped == FALSE) {
+ ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
+
+ pmap_lock_phys_page(phys_page);
+ m->vmp_pmapped = TRUE;
+ pmap_unlock_phys_page(phys_page);
+ }
+ }
+
+ if (fault_type & VM_PROT_WRITE) {
+ if (m->vmp_wpmapped == FALSE) {
+ vm_object_lock_assert_exclusive(object);
+ if (!object->internal && object->pager) {
+ task_update_logical_writes(current_task(), PAGE_SIZE, TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
+ }
+ m->vmp_wpmapped = TRUE;
+ }
+ }
+ return page_needs_sync;
+}
+
+/*
+ * Try to enter the given page into the pmap.
+ * Will retry without execute permission iff PMAP_CS is enabled and we encounter
+ * a codesigning failure on a non-execute fault.
+ */
+static kern_return_t
+vm_fault_attempt_pmap_enter(
+ pmap_t pmap,
+ vm_map_offset_t vaddr,
+ vm_map_size_t fault_page_size,
+ vm_map_offset_t fault_phys_offset,
+ vm_page_t m,
+ vm_prot_t *prot,
+ vm_prot_t caller_prot,
+ vm_prot_t fault_type,
+ bool wired,
+ int pmap_options)
+{
+#if !PMAP_CS
+#pragma unused(caller_prot)
+#endif /* !PMAP_CS */
+ kern_return_t kr;
+ if (fault_page_size != PAGE_SIZE) {
+ DEBUG4K_FAULT("pmap %p va 0x%llx pa 0x%llx (0x%llx+0x%llx) prot 0x%x fault_type 0x%x\n", pmap, (uint64_t)vaddr, (uint64_t)((((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT) + fault_phys_offset), (uint64_t)(((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT), (uint64_t)fault_phys_offset, *prot, fault_type);
+ assertf((!(fault_phys_offset & FOURK_PAGE_MASK) &&
+ fault_phys_offset < PAGE_SIZE),
+ "0x%llx\n", (uint64_t)fault_phys_offset);
+ } else {
+ assertf(fault_phys_offset == 0,
+ "0x%llx\n", (uint64_t)fault_phys_offset);
+ }
+
+ PMAP_ENTER_OPTIONS(pmap, vaddr,
+ fault_phys_offset,
+ m, *prot, fault_type, 0,
+ wired,
+ pmap_options,
+ kr);
+ return kr;
+}
+
+/*
+ * Enter the given page into the pmap.
+ * The map must be locked shared.
+ * The vm object must NOT be locked.
+ *
+ * @param need_retry if not null, avoid making a (potentially) blocking call into
+ * the pmap layer. When such a call would be necessary, return true in this boolean instead.
+ */
+static kern_return_t
+vm_fault_pmap_enter(
+ pmap_t pmap,
+ vm_map_offset_t vaddr,
+ vm_map_size_t fault_page_size,
+ vm_map_offset_t fault_phys_offset,
+ vm_page_t m,
+ vm_prot_t *prot,
+ vm_prot_t caller_prot,
+ vm_prot_t fault_type,
+ bool wired,
+ int pmap_options,
+ boolean_t *need_retry)
+{
+ kern_return_t kr;
+ if (need_retry != NULL) {
+ /*
+ * Although we don't hold a lock on this object, we hold a lock
+ * on the top object in the chain. To prevent a deadlock, we
+ * can't allow the pmap layer to block.
+ */
+ pmap_options |= PMAP_OPTIONS_NOWAIT;
+ }
+ kr = vm_fault_attempt_pmap_enter(pmap, vaddr,
+ fault_page_size, fault_phys_offset,
+ m, prot, caller_prot, fault_type, wired, pmap_options);
+ if (kr == KERN_RESOURCE_SHORTAGE) {
+ if (need_retry) {
+ /*
+ * There's nothing we can do here since we hold the
+ * lock on the top object in the chain. The caller
+ * will need to deal with this by dropping that lock and retrying.
+ */
+ *need_retry = TRUE;
+ vm_pmap_enter_retried++;
+ }
+ }
+ return kr;
+}
+
+/*
+ * Enter the given page into the pmap.
+ * The vm map must be locked shared.
+ * The vm object must be locked exclusive, unless this is a soft fault.
+ * For a soft fault, the object must be locked shared or exclusive.
+ *
+ * @param need_retry if not null, avoid making a (potentially) blocking call into
+ * the pmap layer. When such a call would be necessary, return true in this boolean instead.
+ */
+static kern_return_t
+vm_fault_pmap_enter_with_object_lock(
+ vm_object_t object,
+ pmap_t pmap,
+ vm_map_offset_t vaddr,
+ vm_map_size_t fault_page_size,
+ vm_map_offset_t fault_phys_offset,
+ vm_page_t m,
+ vm_prot_t *prot,
+ vm_prot_t caller_prot,
+ vm_prot_t fault_type,
+ bool wired,
+ int pmap_options,
+ boolean_t *need_retry)
+{
+ kern_return_t kr;
+ /*
+ * Prevent a deadlock by not
+ * holding the object lock if we need to wait for a page in
+ * pmap_enter() - <rdar://problem/7138958>
+ */
+ kr = vm_fault_attempt_pmap_enter(pmap, vaddr,
+ fault_page_size, fault_phys_offset,
+ m, prot, caller_prot, fault_type, wired, pmap_options | PMAP_OPTIONS_NOWAIT);
+#if __x86_64__
+ if (kr == KERN_INVALID_ARGUMENT &&
+ pmap == PMAP_NULL &&
+ wired) {
+ /*
+ * Wiring a page in a pmap-less VM map:
+ * VMware's "vmmon" kernel extension does this
+ * to grab pages.
+ * Let it proceed even though the PMAP_ENTER() failed.
+ */
+ kr = KERN_SUCCESS;
+ }
+#endif /* __x86_64__ */
+
+ if (kr == KERN_RESOURCE_SHORTAGE) {
+ if (need_retry) {
+ /*
+ * this will be non-null in the case where we hold the lock
+ * on the top-object in this chain... we can't just drop
+ * the lock on the object we're inserting the page into
+ * and recall the PMAP_ENTER since we can still cause
+ * a deadlock if one of the critical paths tries to
+ * acquire the lock on the top-object and we're blocked
+ * in PMAP_ENTER waiting for memory... our only recourse
+ * is to deal with it at a higher level where we can
+ * drop both locks.
+ */
+ *need_retry = TRUE;
+ vm_pmap_enter_retried++;
+ goto done;
+ }
+ /*
+ * The nonblocking version of pmap_enter did not succeed.
+ * and we don't need to drop other locks and retry
+ * at the level above us, so
+ * use the blocking version instead. Requires marking
+ * the page busy and unlocking the object
+ */
+ boolean_t was_busy = m->vmp_busy;
+
+ vm_object_lock_assert_exclusive(object);
+
+ m->vmp_busy = TRUE;
+ vm_object_unlock(object);
+
+ PMAP_ENTER_OPTIONS(pmap, vaddr,
+ fault_phys_offset,
+ m, *prot, fault_type,
+ 0, wired,
+ pmap_options, kr);
+
+ assert(VM_PAGE_OBJECT(m) == object);
+
+ /* Take the object lock again. */
+ vm_object_lock(object);
+
+ /* If the page was busy, someone else will wake it up.
+ * Otherwise, we have to do it now. */
+ assert(m->vmp_busy);
+ if (!was_busy) {
+ PAGE_WAKEUP_DONE(m);
+ }
+ vm_pmap_enter_blocked++;
+ }
+
+done:
+ return kr;
+}
+
+/*
+ * Prepare to enter a page into the pmap by checking CS, protection bits,
+ * and setting mapped bits on the page_t.
+ * Does not modify the page's paging queue.
+ *
+ * page queue lock must NOT be held
+ * m->vmp_object must be locked
+ *
+ * NOTE: m->vmp_object could be locked "shared" only if we are called
+ * from vm_fault() as part of a soft fault.
+ */
+static kern_return_t
+vm_fault_enter_prepare(
+ vm_page_t m,
+ pmap_t pmap,
+ vm_map_offset_t vaddr,
+ vm_prot_t *prot,
+ vm_prot_t caller_prot,
+ vm_map_size_t fault_page_size,
+ vm_map_offset_t fault_phys_offset,
+ boolean_t change_wiring,
+ vm_prot_t fault_type,
+ vm_object_fault_info_t fault_info,
+ int *type_of_fault,
+ bool *page_needs_data_sync)
+{
+ kern_return_t kr;
+ bool is_tainted = false;
+ vm_object_t object;
+ boolean_t cs_bypass = fault_info->cs_bypass;
+
+ object = VM_PAGE_OBJECT(m);
+
+ vm_object_lock_assert_held(object);
+
+#if KASAN
+ if (pmap == kernel_pmap) {
+ kasan_notify_address(vaddr, PAGE_SIZE);
+ }
+#endif
+
+ LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
+
+ if (*type_of_fault == DBG_ZERO_FILL_FAULT) {
+ vm_object_lock_assert_exclusive(object);
+ } else if ((fault_type & VM_PROT_WRITE) == 0 &&
+ !change_wiring &&
+ (!m->vmp_wpmapped
+#if VM_OBJECT_ACCESS_TRACKING
+ || object->access_tracking
+#endif /* VM_OBJECT_ACCESS_TRACKING */
+ )) {
+ /*
+ * This is not a "write" fault, so we
+ * might not have taken the object lock
+ * exclusively and we might not be able
+ * to update the "wpmapped" bit in
+ * vm_fault_enter().
+ * Let's just grant read access to
+ * the page for now and we'll
+ * soft-fault again if we need write
+ * access later...
+ */
+
+ /* This had better not be a JIT page. */
+ if (!pmap_has_prot_policy(pmap, fault_info->pmap_options & PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE, *prot)) {
+ *prot &= ~VM_PROT_WRITE;
+ } else {
+ assert(cs_bypass);
+ }
+ }
+ if (m->vmp_pmapped == FALSE) {
+ if (m->vmp_clustered) {
+ if (*type_of_fault == DBG_CACHE_HIT_FAULT) {
+ /*
+ * found it in the cache, but this
+ * is the first fault-in of the page (m->vmp_pmapped == FALSE)
+ * so it must have come in as part of
+ * a cluster... account 1 pagein against it
+ */
+ if (object->internal) {
+ *type_of_fault = DBG_PAGEIND_FAULT;
+ } else {
+ *type_of_fault = DBG_PAGEINV_FAULT;
+ }
+
+ VM_PAGE_COUNT_AS_PAGEIN(m);
+ }
+ VM_PAGE_CONSUME_CLUSTERED(m);
+ }
+ }
+
+ if (*type_of_fault != DBG_COW_FAULT) {
+ DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL);
+
+ if (pmap == kernel_pmap) {
+ DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL);
+ }
+ }
+
+ kr = vm_fault_validate_cs(cs_bypass, object, m, pmap, vaddr,
+ *prot, caller_prot, fault_page_size, fault_phys_offset,
+ fault_info, &is_tainted);
+ if (kr == KERN_SUCCESS) {
+ /*
+ * We either have a good page, or a tainted page that has been accepted by the process.
+ * In both cases the page will be entered into the pmap.
+ */
+ *page_needs_data_sync = vm_fault_enter_set_mapped(object, m, *prot, fault_type);
+ if ((fault_type & VM_PROT_WRITE) && is_tainted) {
+ /*
+ * This page is tainted but we're inserting it anyways.
+ * Since it's writeable, we need to disconnect it from other pmaps
+ * now so those processes can take note.
+ */
+
+ /*
+ * We can only get here
+ * because of the CSE logic
+ */
+ assert(pmap_get_vm_map_cs_enforced(pmap));
+ pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
+ /*
+ * If we are faulting for a write, we can clear
+ * the execute bit - that will ensure the page is
+ * checked again before being executable, which
+ * protects against a map switch.
+ * This only happens the first time the page
+ * gets tainted, so we won't get stuck here
+ * to make an already writeable page executable.
+ */
+ if (!cs_bypass) {
+ assert(!pmap_has_prot_policy(pmap, fault_info->pmap_options & PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE, *prot));
+ *prot &= ~VM_PROT_EXECUTE;
+ }
+ }
+ assert(VM_PAGE_OBJECT(m) == object);
+
+#if VM_OBJECT_ACCESS_TRACKING
+ if (object->access_tracking) {
+ DTRACE_VM2(access_tracking, vm_map_offset_t, vaddr, int, fault_type);
+ if (fault_type & VM_PROT_WRITE) {
+ object->access_tracking_writes++;
+ vm_object_access_tracking_writes++;
+ } else {
+ object->access_tracking_reads++;
+ vm_object_access_tracking_reads++;
+ }
+ }
+#endif /* VM_OBJECT_ACCESS_TRACKING */
+ }
+
+ return kr;
+}
+
+/*
+ * page queue lock must NOT be held
+ * m->vmp_object must be locked
+ *
+ * NOTE: m->vmp_object could be locked "shared" only if we are called
+ * from vm_fault() as part of a soft fault. If so, we must be
+ * careful not to modify the VM object in any way that is not
+ * legal under a shared lock...
+ */
+kern_return_t
+vm_fault_enter(
+ vm_page_t m,
+ pmap_t pmap,
+ vm_map_offset_t vaddr,
+ vm_map_size_t fault_page_size,
+ vm_map_offset_t fault_phys_offset,
+ vm_prot_t prot,
+ vm_prot_t caller_prot,
+ boolean_t wired,
+ boolean_t change_wiring,
+ vm_tag_t wire_tag,
+ vm_object_fault_info_t fault_info,
+ boolean_t *need_retry,
+ int *type_of_fault)
+{
+ kern_return_t kr;
+ vm_object_t object;
+ bool page_needs_data_sync;
+ vm_prot_t fault_type;
+ int pmap_options = fault_info->pmap_options;
+
+ if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {
+ assert(m->vmp_fictitious);
+ return KERN_SUCCESS;
+ }
+
+ fault_type = change_wiring ? VM_PROT_NONE : caller_prot;
+
+ kr = vm_fault_enter_prepare(m, pmap, vaddr, &prot, caller_prot,
+ fault_page_size, fault_phys_offset, change_wiring, fault_type,
+ fault_info, type_of_fault, &page_needs_data_sync);
+ object = VM_PAGE_OBJECT(m);
+
+ vm_fault_enqueue_page(object, m, wired, change_wiring, wire_tag, fault_info->no_cache, type_of_fault, kr);
+
+ if (kr == KERN_SUCCESS) {
+ if (page_needs_data_sync) {
+ pmap_sync_page_data_phys(VM_PAGE_GET_PHYS_PAGE(m));
+ }
+
+ kr = vm_fault_pmap_enter_with_object_lock(object, pmap, vaddr,
+ fault_page_size, fault_phys_offset, m,
+ &prot, caller_prot, fault_type, wired, pmap_options, need_retry);
+ }
+
+ return kr;
+}
+
+void
+vm_pre_fault(vm_map_offset_t vaddr, vm_prot_t prot)
+{
+ if (pmap_find_phys(current_map()->pmap, vaddr) == 0) {
+ vm_fault(current_map(), /* map */
+ vaddr, /* vaddr */
+ prot, /* fault_type */
+ FALSE, /* change_wiring */
+ VM_KERN_MEMORY_NONE, /* tag - not wiring */
+ THREAD_UNINT, /* interruptible */
+ NULL, /* caller_pmap */
+ 0 /* caller_pmap_addr */);
+ }
+}
+
+
+/*
+ * Routine: vm_fault
+ * Purpose:
+ * Handle page faults, including pseudo-faults
+ * used to change the wiring status of pages.
+ * Returns:
+ * Explicit continuations have been removed.
+ * Implementation:
+ * vm_fault and vm_fault_page save mucho state
+ * in the moral equivalent of a closure. The state
+ * structure is allocated when first entering vm_fault
+ * and deallocated when leaving vm_fault.
+ */
+
+extern uint64_t get_current_unique_pid(void);
+
+unsigned long vm_fault_collapse_total = 0;
+unsigned long vm_fault_collapse_skipped = 0;
+
+
+kern_return_t
+vm_fault_external(
+ vm_map_t map,
+ vm_map_offset_t vaddr,
+ vm_prot_t fault_type,
+ boolean_t change_wiring,
+ int interruptible,
+ pmap_t caller_pmap,
+ vm_map_offset_t caller_pmap_addr)
+{
+ return vm_fault_internal(map, vaddr, fault_type, change_wiring,
+ change_wiring ? vm_tag_bt() : VM_KERN_MEMORY_NONE,
+ interruptible, caller_pmap, caller_pmap_addr,
+ NULL);
+}
+
+kern_return_t
+vm_fault(
+ vm_map_t map,
+ vm_map_offset_t vaddr,
+ vm_prot_t fault_type,
+ boolean_t change_wiring,
+ vm_tag_t wire_tag, /* if wiring must pass tag != VM_KERN_MEMORY_NONE */
+ int interruptible,
+ pmap_t caller_pmap,
+ vm_map_offset_t caller_pmap_addr)
+{
+ return vm_fault_internal(map, vaddr, fault_type, change_wiring, wire_tag,
+ interruptible, caller_pmap, caller_pmap_addr,
+ NULL);
+}
+
+static boolean_t
+current_proc_is_privileged(void)
+{
+ return csproc_get_platform_binary(current_proc());
+}
+
+uint64_t vm_copied_on_read = 0;
+
+/*
+ * Cleanup after a vm_fault_enter.
+ * At this point, the fault should either have failed (kr != KERN_SUCCESS)
+ * or the page should be in the pmap and on the correct paging queue.
+ *
+ * Precondition:
+ * map must be locked shared.
+ * m_object must be locked.
+ * If top_object != VM_OBJECT_NULL, it must be locked.
+ * real_map must be locked.
+ *
+ * Postcondition:
+ * map will be unlocked
+ * m_object will be unlocked
+ * top_object will be unlocked
+ * If real_map != map, it will be unlocked
+ */
+static void
+vm_fault_complete(
+ vm_map_t map,
+ vm_map_t real_map,
+ vm_object_t object,
+ vm_object_t m_object,
+ vm_page_t m,
+ vm_map_offset_t offset,
+ vm_map_offset_t trace_real_vaddr,
+ vm_object_fault_info_t fault_info,
+ vm_prot_t caller_prot,
+#if CONFIG_DTRACE
+ vm_map_offset_t real_vaddr,
+#else
+ __unused vm_map_offset_t real_vaddr,
+#endif /* CONFIG_DTRACE */
+ int type_of_fault,
+ boolean_t need_retry,
+ kern_return_t kr,
+ ppnum_t *physpage_p,
+ vm_prot_t prot,
+ vm_object_t top_object,
+ boolean_t need_collapse,
+ vm_map_offset_t cur_offset,
+ vm_prot_t fault_type,
+ vm_object_t *written_on_object,
+ memory_object_t *written_on_pager,
+ vm_object_offset_t *written_on_offset)
+{
+ int event_code = 0;
+ vm_map_lock_assert_shared(map);
+ vm_object_lock_assert_held(m_object);
+ if (top_object != VM_OBJECT_NULL) {
+ vm_object_lock_assert_held(top_object);
+ }
+ vm_map_lock_assert_held(real_map);
+
+ if (m_object->internal) {
+ event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_INTERNAL));
+ } else if (m_object->object_is_shared_cache) {
+ event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_SHAREDCACHE));
+ } else {
+ event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_EXTERNAL));
+ }
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info->user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0);
+ if (need_retry == FALSE) {
+ KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_FAST), get_current_unique_pid(), 0, 0, 0, 0);
+ }
+ DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info->user_tag);
+ if (kr == KERN_SUCCESS &&
+ physpage_p != NULL) {
+ /* for vm_map_wire_and_extract() */
+ *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
+ if (prot & VM_PROT_WRITE) {
+ vm_object_lock_assert_exclusive(m_object);
+ m->vmp_dirty = TRUE;
+ }
+ }
+
+ if (top_object != VM_OBJECT_NULL) {
+ /*
+ * It's safe to drop the top object
+ * now that we've done our
+ * vm_fault_enter(). Any other fault
+ * in progress for that virtual
+ * address will either find our page
+ * and translation or put in a new page
+ * and translation.
+ */
+ vm_object_unlock(top_object);
+ top_object = VM_OBJECT_NULL;