+ if (pmap == kernel_pmap) {
+ DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL);
+ }
+ }
+
+ /* Validate code signature if necessary. */
+ if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) {
+ vm_object_lock_assert_exclusive(m->object);
+
+ if (m->cs_validated) {
+ vm_cs_revalidates++;
+ }
+
+ /* VM map is locked, so 1 ref will remain on VM object -
+ * so no harm if vm_page_validate_cs drops the object lock */
+ vm_page_validate_cs(m);
+ }
+
+#define page_immutable(m,prot) ((m)->cs_validated /*&& ((prot) & VM_PROT_EXECUTE)*/)
+
+ map_is_switched = ((pmap != vm_map_pmap(current_task()->map)) &&
+ (pmap == vm_map_pmap(current_thread()->map)));
+ map_is_switch_protected = current_thread()->map->switch_protect;
+
+ /* If the map is switched, and is switch-protected, we must protect
+ * some pages from being write-faulted: immutable pages because by
+ * definition they may not be written, and executable pages because that
+ * would provide a way to inject unsigned code.
+ * If the page is immutable, we can simply return. However, we can't
+ * immediately determine whether a page is executable anywhere. But,
+ * we can disconnect it everywhere and remove the executable protection
+ * from the current map. We do that below right before we do the
+ * PMAP_ENTER.
+ */
+ if(!cs_enforcement_disable && map_is_switched &&
+ map_is_switch_protected && page_immutable(m, prot) &&
+ (prot & VM_PROT_WRITE))
+ {
+ return KERN_CODESIGN_ERROR;
+ }
+
+ /* A page could be tainted, or pose a risk of being tainted later.
+ * Check whether the receiving process wants it, and make it feel
+ * the consequences (that hapens in cs_invalid_page()).
+ * For CS Enforcement, two other conditions will
+ * cause that page to be tainted as well:
+ * - pmapping an unsigned page executable - this means unsigned code;
+ * - writeable mapping of a validated page - the content of that page
+ * can be changed without the kernel noticing, therefore unsigned
+ * code can be created
+ */
+ if (m->cs_tainted ||
+ ( !cs_enforcement_disable &&
+ (/* The page is unsigned and wants to be executable */
+ (!m->cs_validated && (prot & VM_PROT_EXECUTE)) ||
+ /* The page should be immutable, but is in danger of being modified
+ * This is the case where we want policy from the code directory -
+ * is the page immutable or not? For now we have to assume that
+ * code pages will be immutable, data pages not.
+ * We'll assume a page is a code page if it has a code directory
+ * and we fault for execution.
+ * That is good enough since if we faulted the code page for
+ * writing in another map before, it is wpmapped; if we fault
+ * it for writing in this map later it will also be faulted for executing
+ * at the same time; and if we fault for writing in another map
+ * later, we will disconnect it from this pmap so we'll notice
+ * the change.
+ */
+ (page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped))
+ ))
+ )
+ {
+ /* We will have a tainted page. Have to handle the special case
+ * of a switched map now. If the map is not switched, standard
+ * procedure applies - call cs_invalid_page().
+ * If the map is switched, the real owner is invalid already.
+ * There is no point in invalidating the switching process since
+ * it will not be executing from the map. So we don't call
+ * cs_invalid_page() in that case. */
+ boolean_t reject_page;
+ if(map_is_switched) {
+ assert(pmap==vm_map_pmap(current_thread()->map));
+ assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE));
+ reject_page = FALSE;
+ } else {
+ reject_page = cs_invalid_page((addr64_t) vaddr);
+ }
+
+ if (reject_page) {
+ /* reject the tainted page: abort the page fault */
+ kr = KERN_CODESIGN_ERROR;
+ cs_enter_tainted_rejected++;
+ } else {
+ /* proceed with the tainted page */
+ kr = KERN_SUCCESS;
+ /* Page might have been tainted before or not; now it
+ * definitively is. If the page wasn't tainted, we must
+ * disconnect it from all pmaps later. */
+ must_disconnect = !m->cs_tainted;
+ m->cs_tainted = TRUE;
+ cs_enter_tainted_accepted++;
+ }
+ if (cs_debug || kr != KERN_SUCCESS) {
+ printf("CODESIGNING: vm_fault_enter(0x%llx): "
+ "page %p obj %p off 0x%llx *** INVALID PAGE ***\n",
+ (long long)vaddr, m, m->object, m->offset);
+ }
+
+ } else {
+ /* proceed with the valid page */
+ kr = KERN_SUCCESS;
+ }
+
+ /* If we have a KERN_SUCCESS from the previous checks, we either have
+ * a good page, or a tainted page that has been accepted by the process.
+ * In both cases the page will be entered into the pmap.
+ * If the page is writeable, we need to disconnect it from other pmaps
+ * now so those processes can take note.
+ */
+ if (kr == KERN_SUCCESS) {
+ /*
+ * NOTE: we may only hold the vm_object lock SHARED
+ * at this point, but the update of pmapped is ok
+ * since this is the ONLY bit updated behind the SHARED
+ * lock... however, we need to figure out how to do an atomic
+ * update on a bit field to make this less fragile... right
+ * now I don't know how to coerce 'C' to give me the offset info
+ * that's needed for an AtomicCompareAndSwap
+ */
+ m->pmapped = TRUE;
+ if (prot & VM_PROT_WRITE) {
+ vm_object_lock_assert_exclusive(m->object);
+ m->wpmapped = TRUE;
+ if(must_disconnect) {
+ /* We can only get here
+ * because of the CSE logic */
+ assert(cs_enforcement_disable == FALSE);
+ pmap_disconnect(m->phys_page);
+ /* If we are faulting for a write, we can clear
+ * the execute bit - that will ensure the page is
+ * checked again before being executable, which
+ * protects against a map switch.
+ * This only happens the first time the page
+ * gets tainted, so we won't get stuck here
+ * to make an already writeable page executable. */
+ prot &= ~VM_PROT_EXECUTE;
+ }
+ }
+ PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
+ }
+
+ /*
+ * Hold queues lock to manipulate
+ * the page queues. Change wiring
+ * case is obvious.
+ */
+ if (change_wiring) {
+ vm_page_lockspin_queues();
+
+ if (wired) {
+ if (kr == KERN_SUCCESS) {
+ vm_page_wire(m);
+ }
+ } else {
+ vm_page_unwire(m);
+ }
+ vm_page_unlock_queues();
+
+ } else {
+ if (kr != KERN_SUCCESS) {
+ vm_page_lockspin_queues();
+ vm_page_deactivate(m);
+ vm_page_unlock_queues();
+ } else {
+ if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m) && !m->throttled) {
+
+ if ( vm_page_local_q && !no_cache && (*type_of_fault == DBG_COW_FAULT || *type_of_fault == DBG_ZERO_FILL_FAULT) ) {
+ struct vpl *lq;
+ uint32_t lid;
+
+ /*
+ * we got a local queue to stuff this new page on...
+ * its safe to manipulate local and local_id at this point
+ * since we're behind an exclusive object lock and the
+ * page is not on any global queue.
+ *
+ * we'll use the current cpu number to select the queue
+ * note that we don't need to disable preemption... we're
+ * going to behind the local queue's lock to do the real
+ * work
+ */
+ lid = cpu_number();
+
+ lq = &vm_page_local_q[lid].vpl_un.vpl;
+
+ VPL_LOCK(&lq->vpl_lock);
+
+ queue_enter(&lq->vpl_queue, m, vm_page_t, pageq);
+ m->local = TRUE;
+ m->local_id = lid;
+ lq->vpl_count++;
+
+ VPL_UNLOCK(&lq->vpl_lock);
+
+ if (lq->vpl_count > vm_page_local_q_soft_limit) {
+ /*
+ * we're beyond the soft limit for the local queue
+ * vm_page_reactivate_local will 'try' to take
+ * the global page queue lock... if it can't that's
+ * ok... we'll let the queue continue to grow up
+ * to the hard limit... at that point we'll wait
+ * for the lock... once we've got the lock, we'll
+ * transfer all of the pages from the local queue
+ * to the global active queue
+ */
+ vm_page_reactivate_local(lid, FALSE, FALSE);
+ }
+ return kr;
+ }
+
+ vm_page_lockspin_queues();
+ /*
+ * test again now that we hold the page queue lock
+ */
+ if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m)) {
+
+ /*
+ * If this is a no_cache mapping and the page has never been
+ * mapped before or was previously a no_cache page, then we
+ * want to leave pages in the speculative state so that they
+ * can be readily recycled if free memory runs low. Otherwise
+ * the page is activated as normal.
+ */
+
+ if (no_cache && (!previously_pmapped || m->no_cache)) {
+ m->no_cache = TRUE;
+
+ if (m->active || m->inactive)
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ if (!m->speculative)
+ vm_page_speculate(m, TRUE);
+
+ } else if (!m->active && !m->inactive)
+ vm_page_activate(m);
+
+ }
+
+ vm_page_unlock_queues();
+ }
+ }
+ }
+ return kr;
+}
+
+
+/*
+ * Routine: vm_fault
+ * Purpose:
+ * Handle page faults, including pseudo-faults
+ * used to change the wiring status of pages.
+ * Returns:
+ * Explicit continuations have been removed.
+ * Implementation:
+ * vm_fault and vm_fault_page save mucho state
+ * in the moral equivalent of a closure. The state
+ * structure is allocated when first entering vm_fault
+ * and deallocated when leaving vm_fault.
+ */
+
+extern int _map_enter_debug;
+
+unsigned long vm_fault_collapse_total = 0;
+unsigned long vm_fault_collapse_skipped = 0;
+
+kern_return_t
+vm_fault(
+ vm_map_t map,
+ vm_map_offset_t vaddr,
+ vm_prot_t fault_type,
+ boolean_t change_wiring,
+ int interruptible,
+ pmap_t caller_pmap,
+ vm_map_offset_t caller_pmap_addr)
+{
+ vm_map_version_t version; /* Map version for verificiation */
+ boolean_t wired; /* Should mapping be wired down? */
+ vm_object_t object; /* Top-level object */
+ vm_object_offset_t offset; /* Top-level offset */
+ vm_prot_t prot; /* Protection for mapping */
+ vm_object_t old_copy_object; /* Saved copy object */
+ vm_page_t result_page; /* Result of vm_fault_page */
+ vm_page_t top_page; /* Placeholder page */
+ kern_return_t kr;
+
+ vm_page_t m; /* Fast access to result_page */
+ kern_return_t error_code;
+ vm_object_t cur_object;
+ vm_object_offset_t cur_offset;
+ vm_page_t cur_m;
+ vm_object_t new_object;
+ int type_of_fault;
+ pmap_t pmap;
+ boolean_t interruptible_state;
+ vm_map_t real_map = map;
+ vm_map_t original_map = map;
+ vm_prot_t original_fault_type;
+ struct vm_object_fault_info fault_info;
+ boolean_t need_collapse = FALSE;
+ int object_lock_type = 0;
+ int cur_object_lock_type;
+ vm_object_t top_object = VM_OBJECT_NULL;
+
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
+ (int)((uint64_t)vaddr >> 32),
+ (int)vaddr,
+ 0,
+ 0,
+ 0);
+
+ if (get_preemption_level() != 0) {
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
+ (int)((uint64_t)vaddr >> 32),
+ (int)vaddr,
+ KERN_FAILURE,
+ 0,
+ 0);
+
+ return (KERN_FAILURE);