+
+
+/*
+ * CODE SIGNING:
+ * When soft faulting a page, we have to validate the page if:
+ * 1. the page is being mapped in user space
+ * 2. the page hasn't already been found to be "tainted"
+ * 3. the page belongs to a code-signed object
+ * 4. the page has not been validated yet or has been mapped for write.
+ */
+#define VM_FAULT_NEED_CS_VALIDATION(pmap, page, page_obj) \
+ ((pmap) != kernel_pmap /*1*/ && \
+ !(page)->cs_tainted /*2*/ && \
+ (page_obj)->code_signed /*3*/ && \
+ (!(page)->cs_validated || (page)->wpmapped /*4*/))
+
+
+/*
+ * page queue lock must NOT be held
+ * m->object must be locked
+ *
+ * NOTE: m->object could be locked "shared" only if we are called
+ * from vm_fault() as part of a soft fault. If so, we must be
+ * careful not to modify the VM object in any way that is not
+ * legal under a shared lock...
+ */
+extern int panic_on_cs_killed;
+extern int proc_selfpid(void);
+extern char *proc_name_address(void *p);
+unsigned long cs_enter_tainted_rejected = 0;
+unsigned long cs_enter_tainted_accepted = 0;
+kern_return_t
+vm_fault_enter(vm_page_t m,
+ pmap_t pmap,
+ vm_map_offset_t vaddr,
+ vm_prot_t prot,
+ vm_prot_t caller_prot,
+ boolean_t wired,
+ boolean_t change_wiring,
+ vm_tag_t wire_tag,
+ boolean_t no_cache,
+ boolean_t cs_bypass,
+ __unused int user_tag,
+ int pmap_options,
+ boolean_t *need_retry,
+ int *type_of_fault)
+{
+ kern_return_t kr, pe_result;
+ boolean_t previously_pmapped = m->pmapped;
+ boolean_t must_disconnect = 0;
+ boolean_t map_is_switched, map_is_switch_protected;
+ int cs_enforcement_enabled;
+ vm_prot_t fault_type;
+ vm_object_t object;
+
+ fault_type = change_wiring ? VM_PROT_NONE : caller_prot;
+ object = VM_PAGE_OBJECT(m);
+
+ vm_object_lock_assert_held(object);
+
+#if KASAN
+ if (pmap == kernel_pmap) {
+ kasan_notify_address(vaddr, PAGE_SIZE);
+ }
+#endif
+
+ LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
+
+ if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {
+ assert(m->fictitious);
+ return KERN_SUCCESS;
+ }
+
+ if (*type_of_fault == DBG_ZERO_FILL_FAULT) {
+
+ vm_object_lock_assert_exclusive(object);
+
+ } else if ((fault_type & VM_PROT_WRITE) == 0 && !m->wpmapped) {
+ /*
+ * This is not a "write" fault, so we
+ * might not have taken the object lock
+ * exclusively and we might not be able
+ * to update the "wpmapped" bit in
+ * vm_fault_enter().
+ * Let's just grant read access to
+ * the page for now and we'll
+ * soft-fault again if we need write
+ * access later...
+ */
+
+ /* This had better not be a JIT page. */
+ if (!pmap_has_prot_policy(prot)) {
+ prot &= ~VM_PROT_WRITE;
+ } else {
+ assert(cs_bypass);
+ }
+ }
+ if (m->pmapped == FALSE) {
+
+ if (m->clustered) {
+ if (*type_of_fault == DBG_CACHE_HIT_FAULT) {
+ /*
+ * found it in the cache, but this
+ * is the first fault-in of the page (m->pmapped == FALSE)
+ * so it must have come in as part of
+ * a cluster... account 1 pagein against it
+ */
+ if (object->internal)
+ *type_of_fault = DBG_PAGEIND_FAULT;
+ else
+ *type_of_fault = DBG_PAGEINV_FAULT;
+
+ VM_PAGE_COUNT_AS_PAGEIN(m);
+ }
+ VM_PAGE_CONSUME_CLUSTERED(m);
+ }
+ }
+
+ if (*type_of_fault != DBG_COW_FAULT) {
+ DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL);
+
+ if (pmap == kernel_pmap) {
+ DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL);
+ }
+ }
+
+ /* Validate code signature if necessary. */
+ if (VM_FAULT_NEED_CS_VALIDATION(pmap, m, object)) {
+ vm_object_lock_assert_exclusive(object);
+
+ if (m->cs_validated) {
+ vm_cs_revalidates++;
+ }
+
+ /* VM map is locked, so 1 ref will remain on VM object -
+ * so no harm if vm_page_validate_cs drops the object lock */
+ vm_page_validate_cs(m);
+ }
+
+#define page_immutable(m,prot) ((m)->cs_validated /*&& ((prot) & VM_PROT_EXECUTE)*/)
+#define page_nx(m) ((m)->cs_nx)
+
+ map_is_switched = ((pmap != vm_map_pmap(current_task()->map)) &&
+ (pmap == vm_map_pmap(current_thread()->map)));
+ map_is_switch_protected = current_thread()->map->switch_protect;
+
+ /* If the map is switched, and is switch-protected, we must protect
+ * some pages from being write-faulted: immutable pages because by
+ * definition they may not be written, and executable pages because that
+ * would provide a way to inject unsigned code.
+ * If the page is immutable, we can simply return. However, we can't
+ * immediately determine whether a page is executable anywhere. But,
+ * we can disconnect it everywhere and remove the executable protection
+ * from the current map. We do that below right before we do the
+ * PMAP_ENTER.
+ */
+ cs_enforcement_enabled = cs_enforcement(NULL);
+
+ if(cs_enforcement_enabled && map_is_switched &&
+ map_is_switch_protected && page_immutable(m, prot) &&
+ (prot & VM_PROT_WRITE))
+ {
+ return KERN_CODESIGN_ERROR;
+ }
+
+ if (cs_enforcement_enabled && page_nx(m) && (prot & VM_PROT_EXECUTE)) {
+ if (cs_debug)
+ printf("page marked to be NX, not letting it be mapped EXEC\n");
+ return KERN_CODESIGN_ERROR;
+ }
+
+ if (cs_enforcement_enabled &&
+ !m->cs_validated &&
+ (prot & VM_PROT_EXECUTE) &&
+ !(caller_prot & VM_PROT_EXECUTE)) {
+ /*
+ * FOURK PAGER:
+ * This page has not been validated and will not be
+ * allowed to be mapped for "execute".
+ * But the caller did not request "execute" access for this
+ * fault, so we should not raise a code-signing violation
+ * (and possibly kill the process) below.
+ * Instead, let's just remove the "execute" access request.
+ *
+ * This can happen on devices with a 4K page size if a 16K
+ * page contains a mix of signed&executable and
+ * unsigned&non-executable 4K pages, making the whole 16K
+ * mapping "executable".
+ */
+ if (!pmap_has_prot_policy(prot)) {
+ prot &= ~VM_PROT_EXECUTE;
+ } else {
+ assert(cs_bypass);
+ }
+ }
+
+ /* A page could be tainted, or pose a risk of being tainted later.
+ * Check whether the receiving process wants it, and make it feel
+ * the consequences (that hapens in cs_invalid_page()).
+ * For CS Enforcement, two other conditions will
+ * cause that page to be tainted as well:
+ * - pmapping an unsigned page executable - this means unsigned code;
+ * - writeable mapping of a validated page - the content of that page
+ * can be changed without the kernel noticing, therefore unsigned
+ * code can be created
+ */
+ if (!cs_bypass &&
+ (m->cs_tainted ||
+ (cs_enforcement_enabled &&
+ (/* The page is unsigned and wants to be executable */
+ (!m->cs_validated && (prot & VM_PROT_EXECUTE)) ||
+ /* The page should be immutable, but is in danger of being modified
+ * This is the case where we want policy from the code directory -
+ * is the page immutable or not? For now we have to assume that
+ * code pages will be immutable, data pages not.
+ * We'll assume a page is a code page if it has a code directory
+ * and we fault for execution.
+ * That is good enough since if we faulted the code page for
+ * writing in another map before, it is wpmapped; if we fault
+ * it for writing in this map later it will also be faulted for executing
+ * at the same time; and if we fault for writing in another map
+ * later, we will disconnect it from this pmap so we'll notice
+ * the change.
+ */
+ (page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped))
+ ))
+ ))
+ {
+ /* We will have a tainted page. Have to handle the special case
+ * of a switched map now. If the map is not switched, standard
+ * procedure applies - call cs_invalid_page().
+ * If the map is switched, the real owner is invalid already.
+ * There is no point in invalidating the switching process since
+ * it will not be executing from the map. So we don't call
+ * cs_invalid_page() in that case. */
+ boolean_t reject_page, cs_killed;
+ if(map_is_switched) {
+ assert(pmap==vm_map_pmap(current_thread()->map));
+ assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE));
+ reject_page = FALSE;
+ } else {
+ if (cs_debug > 5)
+ printf("vm_fault: signed: %s validate: %s tainted: %s wpmapped: %s slid: %s prot: 0x%x\n",
+ object->code_signed ? "yes" : "no",
+ m->cs_validated ? "yes" : "no",
+ m->cs_tainted ? "yes" : "no",
+ m->wpmapped ? "yes" : "no",
+ m->slid ? "yes" : "no",
+ (int)prot);
+ reject_page = cs_invalid_page((addr64_t) vaddr, &cs_killed);
+ }
+
+ if (reject_page) {
+ /* reject the invalid page: abort the page fault */
+ int pid;
+ const char *procname;
+ task_t task;
+ vm_object_t file_object, shadow;
+ vm_object_offset_t file_offset;
+ char *pathname, *filename;
+ vm_size_t pathname_len, filename_len;
+ boolean_t truncated_path;
+#define __PATH_MAX 1024
+ struct timespec mtime, cs_mtime;
+ int shadow_depth;
+ os_reason_t codesigning_exit_reason = OS_REASON_NULL;
+
+ kr = KERN_CODESIGN_ERROR;
+ cs_enter_tainted_rejected++;
+
+ /* get process name and pid */
+ procname = "?";
+ task = current_task();
+ pid = proc_selfpid();
+ if (task->bsd_info != NULL)
+ procname = proc_name_address(task->bsd_info);
+
+ /* get file's VM object */
+ file_object = object;
+ file_offset = m->offset;
+ for (shadow = file_object->shadow,
+ shadow_depth = 0;
+ shadow != VM_OBJECT_NULL;
+ shadow = file_object->shadow,
+ shadow_depth++) {
+ vm_object_lock_shared(shadow);
+ if (file_object != object) {
+ vm_object_unlock(file_object);
+ }
+ file_offset += file_object->vo_shadow_offset;
+ file_object = shadow;
+ }
+
+ mtime.tv_sec = 0;
+ mtime.tv_nsec = 0;
+ cs_mtime.tv_sec = 0;
+ cs_mtime.tv_nsec = 0;
+
+ /* get file's pathname and/or filename */
+ pathname = NULL;
+ filename = NULL;
+ pathname_len = 0;
+ filename_len = 0;
+ truncated_path = FALSE;
+ /* no pager -> no file -> no pathname, use "<nil>" in that case */
+ if (file_object->pager != NULL) {
+ pathname = (char *)kalloc(__PATH_MAX * 2);
+ if (pathname) {
+ pathname[0] = '\0';
+ pathname_len = __PATH_MAX;
+ filename = pathname + pathname_len;
+ filename_len = __PATH_MAX;
+ }
+ vnode_pager_get_object_name(file_object->pager,
+ pathname,
+ pathname_len,
+ filename,
+ filename_len,
+ &truncated_path);
+ if (pathname) {
+ /* safety first... */
+ pathname[__PATH_MAX-1] = '\0';
+ filename[__PATH_MAX-1] = '\0';
+ }
+ vnode_pager_get_object_mtime(file_object->pager,
+ &mtime,
+ &cs_mtime);
+ }
+ printf("CODE SIGNING: process %d[%s]: "
+ "rejecting invalid page at address 0x%llx "
+ "from offset 0x%llx in file \"%s%s%s\" "
+ "(cs_mtime:%lu.%ld %s mtime:%lu.%ld) "
+ "(signed:%d validated:%d tainted:%d nx:%d "
+ "wpmapped:%d slid:%d dirty:%d depth:%d)\n",
+ pid, procname, (addr64_t) vaddr,
+ file_offset,
+ (pathname ? pathname : "<nil>"),
+ (truncated_path ? "/.../" : ""),
+ (truncated_path ? filename : ""),
+ cs_mtime.tv_sec, cs_mtime.tv_nsec,
+ ((cs_mtime.tv_sec == mtime.tv_sec &&
+ cs_mtime.tv_nsec == mtime.tv_nsec)
+ ? "=="
+ : "!="),
+ mtime.tv_sec, mtime.tv_nsec,
+ object->code_signed,
+ m->cs_validated,
+ m->cs_tainted,
+ m->cs_nx,
+ m->wpmapped,
+ m->slid,
+ m->dirty,
+ shadow_depth);
+
+ /*
+ * We currently only generate an exit reason if cs_invalid_page directly killed a process. If cs_invalid_page
+ * did not kill the process (more the case on desktop), vm_fault_enter will not satisfy the fault and whether the
+ * process dies is dependent on whether there is a signal handler registered for SIGSEGV and how that handler
+ * will deal with the segmentation fault.
+ */
+ if (cs_killed) {
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
+ pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_INVALID_PAGE, 0, 0);
+
+ codesigning_exit_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_INVALID_PAGE);
+ if (codesigning_exit_reason == NULL) {
+ printf("vm_fault_enter: failed to allocate codesigning exit reason\n");
+ } else {
+ mach_vm_address_t data_addr = 0;
+ struct codesigning_exit_reason_info *ceri = NULL;
+ uint32_t reason_buffer_size_estimate = kcdata_estimate_required_buffer_size(1, sizeof(*ceri));
+
+ if (os_reason_alloc_buffer_noblock(codesigning_exit_reason, reason_buffer_size_estimate)) {
+ printf("vm_fault_enter: failed to allocate buffer for codesigning exit reason\n");
+ } else {
+ if (KERN_SUCCESS == kcdata_get_memory_addr(&codesigning_exit_reason->osr_kcd_descriptor,
+ EXIT_REASON_CODESIGNING_INFO, sizeof(*ceri), &data_addr)) {
+ ceri = (struct codesigning_exit_reason_info *)data_addr;
+ static_assert(__PATH_MAX == sizeof(ceri->ceri_pathname));
+
+ ceri->ceri_virt_addr = vaddr;
+ ceri->ceri_file_offset = file_offset;
+ if (pathname)
+ strncpy((char *)&ceri->ceri_pathname, pathname, sizeof(ceri->ceri_pathname));
+ else
+ ceri->ceri_pathname[0] = '\0';
+ if (filename)
+ strncpy((char *)&ceri->ceri_filename, filename, sizeof(ceri->ceri_filename));
+ else
+ ceri->ceri_filename[0] = '\0';
+ ceri->ceri_path_truncated = (truncated_path);
+ ceri->ceri_codesig_modtime_secs = cs_mtime.tv_sec;
+ ceri->ceri_codesig_modtime_nsecs = cs_mtime.tv_nsec;
+ ceri->ceri_page_modtime_secs = mtime.tv_sec;
+ ceri->ceri_page_modtime_nsecs = mtime.tv_nsec;
+ ceri->ceri_object_codesigned = (object->code_signed);
+ ceri->ceri_page_codesig_validated = (m->cs_validated);
+ ceri->ceri_page_codesig_tainted = (m->cs_tainted);
+ ceri->ceri_page_codesig_nx = (m->cs_nx);
+ ceri->ceri_page_wpmapped = (m->wpmapped);
+ ceri->ceri_page_slid = (m->slid);
+ ceri->ceri_page_dirty = (m->dirty);
+ ceri->ceri_page_shadow_depth = shadow_depth;
+ } else {
+#if DEBUG || DEVELOPMENT
+ panic("vm_fault_enter: failed to allocate kcdata for codesigning exit reason");
+#else
+ printf("vm_fault_enter: failed to allocate kcdata for codesigning exit reason\n");
+#endif /* DEBUG || DEVELOPMENT */
+ /* Free the buffer */
+ os_reason_alloc_buffer_noblock(codesigning_exit_reason, 0);
+ }
+ }
+ }
+
+ set_thread_exit_reason(current_thread(), codesigning_exit_reason, FALSE);
+ }
+ if (panic_on_cs_killed &&
+ object->object_slid) {
+ panic("CODE SIGNING: process %d[%s]: "
+ "rejecting invalid page at address 0x%llx "
+ "from offset 0x%llx in file \"%s%s%s\" "
+ "(cs_mtime:%lu.%ld %s mtime:%lu.%ld) "
+ "(signed:%d validated:%d tainted:%d nx:%d"
+ "wpmapped:%d slid:%d dirty:%d depth:%d)\n",
+ pid, procname, (addr64_t) vaddr,
+ file_offset,
+ (pathname ? pathname : "<nil>"),
+ (truncated_path ? "/.../" : ""),
+ (truncated_path ? filename : ""),
+ cs_mtime.tv_sec, cs_mtime.tv_nsec,
+ ((cs_mtime.tv_sec == mtime.tv_sec &&
+ cs_mtime.tv_nsec == mtime.tv_nsec)
+ ? "=="
+ : "!="),
+ mtime.tv_sec, mtime.tv_nsec,
+ object->code_signed,
+ m->cs_validated,
+ m->cs_tainted,
+ m->cs_nx,
+ m->wpmapped,
+ m->slid,
+ m->dirty,
+ shadow_depth);
+ }
+
+ if (file_object != object) {
+ vm_object_unlock(file_object);
+ }
+ if (pathname_len != 0) {
+ kfree(pathname, __PATH_MAX * 2);
+ pathname = NULL;
+ filename = NULL;
+ }
+ } else {
+ /* proceed with the invalid page */
+ kr = KERN_SUCCESS;
+ if (!m->cs_validated &&
+ !object->code_signed) {
+ /*
+ * This page has not been (fully) validated but
+ * does not belong to a code-signed object
+ * so it should not be forcefully considered
+ * as tainted.
+ * We're just concerned about it here because
+ * we've been asked to "execute" it but that
+ * does not mean that it should cause other
+ * accesses to fail.
+ * This happens when a debugger sets a
+ * breakpoint and we then execute code in
+ * that page. Marking the page as "tainted"
+ * would cause any inspection tool ("leaks",
+ * "vmmap", "CrashReporter", ...) to get killed
+ * due to code-signing violation on that page,
+ * even though they're just reading it and not
+ * executing from it.
+ */
+ } else {
+ /*
+ * Page might have been tainted before or not;
+ * now it definitively is. If the page wasn't
+ * tainted, we must disconnect it from all
+ * pmaps later, to force existing mappings
+ * through that code path for re-consideration
+ * of the validity of that page.
+ */
+ must_disconnect = !m->cs_tainted;
+ m->cs_tainted = TRUE;
+ }
+ cs_enter_tainted_accepted++;
+ }
+ if (kr != KERN_SUCCESS) {
+ if (cs_debug) {
+ printf("CODESIGNING: vm_fault_enter(0x%llx): "
+ "*** INVALID PAGE ***\n",
+ (long long)vaddr);
+ }
+#if !SECURE_KERNEL
+ if (cs_enforcement_panic) {
+ panic("CODESIGNING: panicking on invalid page\n");
+ }
+#endif
+ }
+
+ } else {
+ /* proceed with the valid page */
+ kr = KERN_SUCCESS;
+ }
+
+ boolean_t page_queues_locked = FALSE;
+#define __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED() \
+MACRO_BEGIN \
+ if (! page_queues_locked) { \
+ page_queues_locked = TRUE; \
+ vm_page_lockspin_queues(); \
+ } \
+MACRO_END
+#define __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED() \
+MACRO_BEGIN \
+ if (page_queues_locked) { \
+ page_queues_locked = FALSE; \
+ vm_page_unlock_queues(); \
+ } \
+MACRO_END
+
+ /*
+ * Hold queues lock to manipulate
+ * the page queues. Change wiring
+ * case is obvious.
+ */
+ assert((m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) || object != compressor_object);
+
+#if CONFIG_BACKGROUND_QUEUE
+ vm_page_update_background_state(m);
+#endif
+ if (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
+ /*
+ * Compressor pages are neither wired
+ * nor pageable and should never change.
+ */
+ assert(object == compressor_object);
+ } else if (change_wiring) {
+ __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED();
+
+ if (wired) {
+ if (kr == KERN_SUCCESS) {
+ vm_page_wire(m, wire_tag, TRUE);
+ }
+ } else {
+ vm_page_unwire(m, TRUE);
+ }
+ /* we keep the page queues lock, if we need it later */
+
+ } else {
+ if (object->internal == TRUE) {
+ /*
+ * don't allow anonymous pages on
+ * the speculative queues
+ */
+ no_cache = FALSE;
+ }
+ if (kr != KERN_SUCCESS) {
+ __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED();
+ vm_page_deactivate(m);
+ /* we keep the page queues lock, if we need it later */
+ } else if (((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
+ (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ||
+ (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
+ ((m->vm_page_q_state != VM_PAGE_ON_THROTTLED_Q) && no_cache)) &&
+ !VM_PAGE_WIRED(m)) {
+
+ if (vm_page_local_q &&
+ (*type_of_fault == DBG_COW_FAULT ||
+ *type_of_fault == DBG_ZERO_FILL_FAULT) ) {
+ struct vpl *lq;
+ uint32_t lid;
+
+ assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
+
+ __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED();
+ vm_object_lock_assert_exclusive(object);
+
+ /*
+ * we got a local queue to stuff this
+ * new page on...
+ * its safe to manipulate local and
+ * local_id at this point since we're
+ * behind an exclusive object lock and
+ * the page is not on any global queue.
+ *
+ * we'll use the current cpu number to
+ * select the queue note that we don't
+ * need to disable preemption... we're
+ * going to be behind the local queue's
+ * lock to do the real work
+ */
+ lid = cpu_number();
+
+ lq = &vm_page_local_q[lid].vpl_un.vpl;
+
+ VPL_LOCK(&lq->vpl_lock);
+
+ vm_page_check_pageable_safe(m);
+ vm_page_queue_enter(&lq->vpl_queue, m,
+ vm_page_t, pageq);
+ m->vm_page_q_state = VM_PAGE_ON_ACTIVE_LOCAL_Q;
+ m->local_id = lid;
+ lq->vpl_count++;
+
+ if (object->internal)
+ lq->vpl_internal_count++;
+ else
+ lq->vpl_external_count++;
+
+ VPL_UNLOCK(&lq->vpl_lock);
+
+ if (lq->vpl_count > vm_page_local_q_soft_limit)
+ {
+ /*
+ * we're beyond the soft limit
+ * for the local queue
+ * vm_page_reactivate_local will
+ * 'try' to take the global page
+ * queue lock... if it can't
+ * that's ok... we'll let the
+ * queue continue to grow up
+ * to the hard limit... at that
+ * point we'll wait for the
+ * lock... once we've got the
+ * lock, we'll transfer all of
+ * the pages from the local
+ * queue to the global active
+ * queue
+ */
+ vm_page_reactivate_local(lid, FALSE, FALSE);
+ }
+ } else {
+
+ __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED();
+
+ /*
+ * test again now that we hold the
+ * page queue lock
+ */
+ if (!VM_PAGE_WIRED(m)) {
+ if (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
+ vm_page_queues_remove(m, FALSE);
+
+ vm_pageout_cleaned_reactivated++;
+ vm_pageout_cleaned_fault_reactivated++;
+ }
+
+ if ( !VM_PAGE_ACTIVE_OR_INACTIVE(m) ||
+ no_cache) {
+ /*
+ * If this is a no_cache mapping
+ * and the page has never been
+ * mapped before or was
+ * previously a no_cache page,
+ * then we want to leave pages
+ * in the speculative state so
+ * that they can be readily
+ * recycled if free memory runs
+ * low. Otherwise the page is
+ * activated as normal.
+ */
+
+ if (no_cache &&
+ (!previously_pmapped ||
+ m->no_cache)) {
+ m->no_cache = TRUE;
+
+ if (m->vm_page_q_state != VM_PAGE_ON_SPECULATIVE_Q)
+ vm_page_speculate(m, FALSE);
+
+ } else if ( !VM_PAGE_ACTIVE_OR_INACTIVE(m)) {
+ vm_page_activate(m);
+ }
+ }
+ }
+ /* we keep the page queues lock, if we need it later */
+ }
+ }
+ }
+ /* we're done with the page queues lock, if we ever took it */
+ __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED();
+
+
+ /* If we have a KERN_SUCCESS from the previous checks, we either have
+ * a good page, or a tainted page that has been accepted by the process.
+ * In both cases the page will be entered into the pmap.
+ * If the page is writeable, we need to disconnect it from other pmaps
+ * now so those processes can take note.
+ */
+ if (kr == KERN_SUCCESS) {
+ /*
+ * NOTE: we may only hold the vm_object lock SHARED
+ * at this point, so we need the phys_page lock to
+ * properly serialize updating the pmapped and
+ * xpmapped bits
+ */
+ if ((prot & VM_PROT_EXECUTE) && !m->xpmapped) {
+ ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
+
+ pmap_lock_phys_page(phys_page);
+ /*
+ * go ahead and take the opportunity
+ * to set 'pmapped' here so that we don't
+ * need to grab this lock a 2nd time
+ * just below
+ */
+ m->pmapped = TRUE;
+
+ if (!m->xpmapped) {
+
+ m->xpmapped = TRUE;
+
+ pmap_unlock_phys_page(phys_page);
+
+ if (!object->internal)
+ OSAddAtomic(1, &vm_page_xpmapped_external_count);
+
+#if defined(__arm__) || defined(__arm64__)
+ pmap_sync_page_data_phys(phys_page);
+#else
+ if (object->internal &&
+ object->pager != NULL) {
+ /*
+ * This page could have been
+ * uncompressed by the
+ * compressor pager and its
+ * contents might be only in
+ * the data cache.
+ * Since it's being mapped for
+ * "execute" for the fist time,
+ * make sure the icache is in
+ * sync.
+ */
+ assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
+ pmap_sync_page_data_phys(phys_page);
+ }
+#endif
+ } else
+ pmap_unlock_phys_page(phys_page);
+ } else {
+ if (m->pmapped == FALSE) {
+ ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
+
+ pmap_lock_phys_page(phys_page);
+ m->pmapped = TRUE;
+ pmap_unlock_phys_page(phys_page);
+ }
+ }
+ if (vm_page_is_slideable(m)) {
+ boolean_t was_busy = m->busy;
+
+ vm_object_lock_assert_exclusive(object);
+
+ m->busy = TRUE;
+ kr = vm_page_slide(m, 0);
+ assert(m->busy);
+ if(!was_busy) {
+ PAGE_WAKEUP_DONE(m);
+ }
+ if (kr != KERN_SUCCESS) {
+ /*
+ * This page has not been slid correctly,
+ * do not do the pmap_enter() !
+ * Let vm_fault_enter() return the error
+ * so the caller can fail the fault.
+ */
+ goto after_the_pmap_enter;
+ }
+ }
+
+ if (fault_type & VM_PROT_WRITE) {
+
+ if (m->wpmapped == FALSE) {
+ vm_object_lock_assert_exclusive(object);
+ if (!object->internal && object->pager) {
+ task_update_logical_writes(current_task(), PAGE_SIZE, TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
+ }
+ m->wpmapped = TRUE;
+ }
+ if (must_disconnect) {
+ /*
+ * We can only get here
+ * because of the CSE logic
+ */
+ assert(cs_enforcement_enabled);
+ pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
+ /*
+ * If we are faulting for a write, we can clear
+ * the execute bit - that will ensure the page is
+ * checked again before being executable, which
+ * protects against a map switch.
+ * This only happens the first time the page
+ * gets tainted, so we won't get stuck here
+ * to make an already writeable page executable.
+ */
+ if (!cs_bypass){
+ assert(!pmap_has_prot_policy(prot));
+ prot &= ~VM_PROT_EXECUTE;
+ }
+ }
+ }
+ assert(VM_PAGE_OBJECT(m) == object);
+
+ /* Prevent a deadlock by not
+ * holding the object lock if we need to wait for a page in
+ * pmap_enter() - <rdar://problem/7138958> */
+ PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type, 0,
+ wired,
+ pmap_options | PMAP_OPTIONS_NOWAIT,
+ pe_result);
+#if __x86_64__
+ if (pe_result == KERN_INVALID_ARGUMENT &&
+ pmap == PMAP_NULL &&
+ wired) {
+ /*
+ * Wiring a page in a pmap-less VM map:
+ * VMware's "vmmon" kernel extension does this
+ * to grab pages.
+ * Let it proceed even though the PMAP_ENTER() failed.
+ */
+ pe_result = KERN_SUCCESS;
+ }
+#endif /* __x86_64__ */
+
+ if(pe_result == KERN_RESOURCE_SHORTAGE) {
+
+ if (need_retry) {
+ /*
+ * this will be non-null in the case where we hold the lock
+ * on the top-object in this chain... we can't just drop
+ * the lock on the object we're inserting the page into
+ * and recall the PMAP_ENTER since we can still cause
+ * a deadlock if one of the critical paths tries to
+ * acquire the lock on the top-object and we're blocked
+ * in PMAP_ENTER waiting for memory... our only recourse
+ * is to deal with it at a higher level where we can
+ * drop both locks.
+ */
+ *need_retry = TRUE;
+ vm_pmap_enter_retried++;
+ goto after_the_pmap_enter;
+ }
+ /* The nonblocking version of pmap_enter did not succeed.
+ * and we don't need to drop other locks and retry
+ * at the level above us, so
+ * use the blocking version instead. Requires marking
+ * the page busy and unlocking the object */
+ boolean_t was_busy = m->busy;
+
+ vm_object_lock_assert_exclusive(object);
+
+ m->busy = TRUE;
+ vm_object_unlock(object);
+
+ PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type,
+ 0, wired,
+ pmap_options, pe_result);
+
+ assert(VM_PAGE_OBJECT(m) == object);
+
+ /* Take the object lock again. */
+ vm_object_lock(object);
+
+ /* If the page was busy, someone else will wake it up.
+ * Otherwise, we have to do it now. */
+ assert(m->busy);
+ if(!was_busy) {
+ PAGE_WAKEUP_DONE(m);
+ }
+ vm_pmap_enter_blocked++;
+ }
+
+ kr = pe_result;
+ }
+
+after_the_pmap_enter:
+ return kr;
+}
+
+void
+vm_pre_fault(vm_map_offset_t vaddr)
+{
+ if (pmap_find_phys(current_map()->pmap, vaddr) == 0) {
+
+ vm_fault(current_map(), /* map */
+ vaddr, /* vaddr */
+ VM_PROT_READ, /* fault_type */
+ FALSE, /* change_wiring */
+ VM_KERN_MEMORY_NONE, /* tag - not wiring */
+ THREAD_UNINT, /* interruptible */
+ NULL, /* caller_pmap */
+ 0 /* caller_pmap_addr */);
+ }
+}
+
+