+ if (m->cs_tainted ||
+ ( !cs_enforcement_disable &&
+ (/* The page is unsigned and wants to be executable */
+ (!m->cs_validated && (prot & VM_PROT_EXECUTE)) ||
+ /* The page should be immutable, but is in danger of being modified
+ * This is the case where we want policy from the code directory -
+ * is the page immutable or not? For now we have to assume that
+ * code pages will be immutable, data pages not.
+ * We'll assume a page is a code page if it has a code directory
+ * and we fault for execution.
+ * That is good enough since if we faulted the code page for
+ * writing in another map before, it is wpmapped; if we fault
+ * it for writing in this map later it will also be faulted for executing
+ * at the same time; and if we fault for writing in another map
+ * later, we will disconnect it from this pmap so we'll notice
+ * the change.
+ */
+ (page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped))
+ ))
+ )
+ {
+ /* We will have a tainted page. Have to handle the special case
+ * of a switched map now. If the map is not switched, standard
+ * procedure applies - call cs_invalid_page().
+ * If the map is switched, the real owner is invalid already.
+ * There is no point in invalidating the switching process since
+ * it will not be executing from the map. So we don't call
+ * cs_invalid_page() in that case. */
+ boolean_t reject_page;
+ if(map_is_switched) {
+ assert(pmap==vm_map_pmap(current_thread()->map));
+ assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE));
+ reject_page = FALSE;
+ } else {
+ reject_page = cs_invalid_page((addr64_t) vaddr);
+ }
+
+ if (reject_page) {
+ /* reject the tainted page: abort the page fault */
+ kr = KERN_CODESIGN_ERROR;
+ cs_enter_tainted_rejected++;
+ } else {
+ /* proceed with the tainted page */
+ kr = KERN_SUCCESS;
+ /* Page might have been tainted before or not; now it
+ * definitively is. If the page wasn't tainted, we must
+ * disconnect it from all pmaps later. */
+ must_disconnect = !m->cs_tainted;
+ m->cs_tainted = TRUE;
+ cs_enter_tainted_accepted++;
+ }
+ if (cs_debug || kr != KERN_SUCCESS) {
+ printf("CODESIGNING: vm_fault_enter(0x%llx): "
+ "page %p obj %p off 0x%llx *** INVALID PAGE ***\n",
+ (long long)vaddr, m, m->object, m->offset);
+ }
+
+ } else {
+ /* proceed with the valid page */
+ kr = KERN_SUCCESS;
+ }
+
+ /* If we have a KERN_SUCCESS from the previous checks, we either have
+ * a good page, or a tainted page that has been accepted by the process.
+ * In both cases the page will be entered into the pmap.
+ * If the page is writeable, we need to disconnect it from other pmaps
+ * now so those processes can take note.
+ */
+ if (kr == KERN_SUCCESS) {
+ /*
+ * NOTE: we may only hold the vm_object lock SHARED
+ * at this point, but the update of pmapped is ok
+ * since this is the ONLY bit updated behind the SHARED
+ * lock... however, we need to figure out how to do an atomic
+ * update on a bit field to make this less fragile... right
+ * now I don't know how to coerce 'C' to give me the offset info
+ * that's needed for an AtomicCompareAndSwap
+ */
+ m->pmapped = TRUE;
+ if (prot & VM_PROT_WRITE) {
+ vm_object_lock_assert_exclusive(m->object);
+ m->wpmapped = TRUE;
+ if(must_disconnect) {
+ /* We can only get here
+ * because of the CSE logic */
+ assert(cs_enforcement_disable == FALSE);
+ pmap_disconnect(m->phys_page);
+ /* If we are faulting for a write, we can clear
+ * the execute bit - that will ensure the page is
+ * checked again before being executable, which
+ * protects against a map switch.
+ * This only happens the first time the page
+ * gets tainted, so we won't get stuck here
+ * to make an already writeable page executable. */
+ prot &= ~VM_PROT_EXECUTE;
+ }
+ }
+
+ /* Prevent a deadlock by not
+ * holding the object lock if we need to wait for a page in
+ * pmap_enter() - <rdar://problem/7138958> */
+ PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, cache_attr,
+ wired, PMAP_OPTIONS_NOWAIT, pe_result);
+
+ if(pe_result == KERN_RESOURCE_SHORTAGE) {
+ /* The nonblocking version of pmap_enter did not succeed.
+ * Use the blocking version instead. Requires marking
+ * the page busy and unlocking the object */
+ boolean_t was_busy = m->busy;
+ m->busy = TRUE;
+ vm_object_unlock(m->object);
+
+ PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
+
+ /* Take the object lock again. */
+ vm_object_lock(m->object);
+
+ /* If the page was busy, someone else will wake it up.
+ * Otherwise, we have to do it now. */
+ assert(m->busy);
+ if(!was_busy) {
+ PAGE_WAKEUP_DONE(m);
+ }
+ vm_pmap_enter_blocked++;
+ }
+ }
+
+ /*
+ * Hold queues lock to manipulate
+ * the page queues. Change wiring
+ * case is obvious.
+ */
+ if (change_wiring) {
+ vm_page_lockspin_queues();
+
+ if (wired) {
+ if (kr == KERN_SUCCESS) {
+ vm_page_wire(m);
+ }
+ } else {
+ vm_page_unwire(m, TRUE);
+ }
+ vm_page_unlock_queues();
+
+ } else {
+ if (kr != KERN_SUCCESS) {
+ vm_page_lockspin_queues();
+ vm_page_deactivate(m);
+ vm_page_unlock_queues();
+ } else {
+ if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m) && !m->throttled) {
+
+ if ( vm_page_local_q && !no_cache && (*type_of_fault == DBG_COW_FAULT || *type_of_fault == DBG_ZERO_FILL_FAULT) ) {
+ struct vpl *lq;
+ uint32_t lid;
+
+ /*
+ * we got a local queue to stuff this new page on...
+ * its safe to manipulate local and local_id at this point
+ * since we're behind an exclusive object lock and the
+ * page is not on any global queue.
+ *
+ * we'll use the current cpu number to select the queue
+ * note that we don't need to disable preemption... we're
+ * going to behind the local queue's lock to do the real
+ * work
+ */
+ lid = cpu_number();
+
+ lq = &vm_page_local_q[lid].vpl_un.vpl;
+
+ VPL_LOCK(&lq->vpl_lock);
+
+ queue_enter(&lq->vpl_queue, m, vm_page_t, pageq);
+ m->local = TRUE;
+ m->local_id = lid;
+ lq->vpl_count++;
+
+ VPL_UNLOCK(&lq->vpl_lock);
+
+ if (lq->vpl_count > vm_page_local_q_soft_limit) {
+ /*
+ * we're beyond the soft limit for the local queue
+ * vm_page_reactivate_local will 'try' to take
+ * the global page queue lock... if it can't that's
+ * ok... we'll let the queue continue to grow up
+ * to the hard limit... at that point we'll wait
+ * for the lock... once we've got the lock, we'll
+ * transfer all of the pages from the local queue
+ * to the global active queue
+ */
+ vm_page_reactivate_local(lid, FALSE, FALSE);
+ }
+ return kr;
+ }
+
+ vm_page_lockspin_queues();
+ /*
+ * test again now that we hold the page queue lock
+ */
+ if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m)) {
+
+ /*
+ * If this is a no_cache mapping and the page has never been
+ * mapped before or was previously a no_cache page, then we
+ * want to leave pages in the speculative state so that they
+ * can be readily recycled if free memory runs low. Otherwise
+ * the page is activated as normal.
+ */
+
+ if (no_cache && (!previously_pmapped || m->no_cache)) {
+ m->no_cache = TRUE;
+
+ if (m->active || m->inactive)
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ if (!m->speculative)
+ vm_page_speculate(m, TRUE);
+
+ } else if (!m->active && !m->inactive)
+ vm_page_activate(m);
+
+ }
+
+ vm_page_unlock_queues();
+ }
+ }
+ }
+ return kr;
+}
+
+
+/*
+ * Routine: vm_fault
+ * Purpose:
+ * Handle page faults, including pseudo-faults
+ * used to change the wiring status of pages.
+ * Returns:
+ * Explicit continuations have been removed.
+ * Implementation:
+ * vm_fault and vm_fault_page save mucho state
+ * in the moral equivalent of a closure. The state
+ * structure is allocated when first entering vm_fault
+ * and deallocated when leaving vm_fault.
+ */
+
+extern int _map_enter_debug;
+
+unsigned long vm_fault_collapse_total = 0;
+unsigned long vm_fault_collapse_skipped = 0;
+
+kern_return_t
+vm_fault(
+ vm_map_t map,
+ vm_map_offset_t vaddr,
+ vm_prot_t fault_type,
+ boolean_t change_wiring,
+ int interruptible,
+ pmap_t caller_pmap,
+ vm_map_offset_t caller_pmap_addr)
+{
+ vm_map_version_t version; /* Map version for verificiation */
+ boolean_t wired; /* Should mapping be wired down? */
+ vm_object_t object; /* Top-level object */
+ vm_object_offset_t offset; /* Top-level offset */
+ vm_prot_t prot; /* Protection for mapping */
+ vm_object_t old_copy_object; /* Saved copy object */
+ vm_page_t result_page; /* Result of vm_fault_page */
+ vm_page_t top_page; /* Placeholder page */
+ kern_return_t kr;
+
+ vm_page_t m; /* Fast access to result_page */
+ kern_return_t error_code;
+ vm_object_t cur_object;
+ vm_object_offset_t cur_offset;
+ vm_page_t cur_m;
+ vm_object_t new_object;
+ int type_of_fault;
+ pmap_t pmap;
+ boolean_t interruptible_state;
+ vm_map_t real_map = map;
+ vm_map_t original_map = map;
+ vm_prot_t original_fault_type;
+ struct vm_object_fault_info fault_info;
+ boolean_t need_collapse = FALSE;
+ int object_lock_type = 0;
+ int cur_object_lock_type;
+ vm_object_t top_object = VM_OBJECT_NULL;
+
+
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
+ (int)((uint64_t)vaddr >> 32),
+ (int)vaddr,
+ 0,
+ 0,
+ 0);
+
+ if (get_preemption_level() != 0) {
+ KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
+ (int)((uint64_t)vaddr >> 32),
+ (int)vaddr,
+ KERN_FAILURE,
+ 0,
+ 0);
+
+ return (KERN_FAILURE);
+ }
+
+ interruptible_state = thread_interrupt_level(interruptible);
+
+ VM_STAT_INCR(faults);
+ current_task()->faults++;
+ original_fault_type = fault_type;
+
+ if (fault_type & VM_PROT_WRITE)
+ object_lock_type = OBJECT_LOCK_EXCLUSIVE;
+ else
+ object_lock_type = OBJECT_LOCK_SHARED;
+
+ cur_object_lock_type = OBJECT_LOCK_SHARED;
+
+RetryFault:
+ /*
+ * assume we will hit a page in the cache
+ * otherwise, explicitly override with
+ * the real fault type once we determine it
+ */
+ type_of_fault = DBG_CACHE_HIT_FAULT;
+
+ /*
+ * Find the backing store object and offset into
+ * it to begin the search.
+ */
+ fault_type = original_fault_type;
+ map = original_map;
+ vm_map_lock_read(map);
+
+ kr = vm_map_lookup_locked(&map, vaddr, fault_type,
+ object_lock_type, &version,
+ &object, &offset, &prot, &wired,
+ &fault_info,
+ &real_map);
+
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock_read(map);
+ goto done;
+ }
+ pmap = real_map->pmap;
+ fault_info.interruptible = interruptible;
+ fault_info.stealth = FALSE;
+ fault_info.mark_zf_absent = FALSE;
+
+ /*
+ * If the page is wired, we must fault for the current protection
+ * value, to avoid further faults.
+ */
+ if (wired) {
+ fault_type = prot | VM_PROT_WRITE;
+ /*
+ * since we're treating this fault as a 'write'
+ * we must hold the top object lock exclusively
+ */
+ if (object_lock_type == OBJECT_LOCK_SHARED) {
+
+ object_lock_type = OBJECT_LOCK_EXCLUSIVE;
+
+ if (vm_object_lock_upgrade(object) == FALSE) {
+ /*
+ * couldn't upgrade, so explictly
+ * take the lock exclusively
+ */
+ vm_object_lock(object);
+ }
+ }
+ }
+
+#if VM_FAULT_CLASSIFY
+ /*
+ * Temporary data gathering code
+ */
+ vm_fault_classify(object, offset, fault_type);
+#endif
+ /*
+ * Fast fault code. The basic idea is to do as much as
+ * possible while holding the map lock and object locks.
+ * Busy pages are not used until the object lock has to