+ case VM_FAULT_SUCCESS:
+
+ if ( !dst_page->absent) {
+ PAGE_WAKEUP_DONE(dst_page);
+ } else {
+ /*
+ * we only get back an absent page if we
+ * requested that it not be zero-filled
+ * because we are about to fill it via I/O
+ *
+ * absent pages should be left BUSY
+ * to prevent them from being faulted
+ * into an address space before we've
+ * had a chance to complete the I/O on
+ * them since they may contain info that
+ * shouldn't be seen by the faulting task
+ */
+ }
+ /*
+ * Release paging references and
+ * top-level placeholder page, if any.
+ */
+ if (top_page != VM_PAGE_NULL) {
+ vm_object_t local_object;
+
+ local_object = top_page->object;
+
+ if (top_page->object != dst_page->object) {
+ vm_object_lock(local_object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(local_object);
+ vm_object_unlock(local_object);
+ } else {
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(local_object);
+ }
+ }
+ vm_object_paging_end(object);
+ break;
+
+ case VM_FAULT_RETRY:
+ vm_object_lock(object);
+ break;
+
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+
+ vm_object_lock(object);
+ break;
+
+ case VM_FAULT_MEMORY_SHORTAGE:
+ if (vm_page_wait(interruptible)) {
+ vm_object_lock(object);
+ break;
+ }
+ /* fall thru */
+
+ case VM_FAULT_INTERRUPTED:
+ error_code = MACH_SEND_INTERRUPTED;
+ case VM_FAULT_MEMORY_ERROR:
+ memory_error:
+ ret = (error_code ? error_code: KERN_MEMORY_ERROR);
+
+ vm_object_lock(object);
+ goto return_err;
+
+ case VM_FAULT_SUCCESS_NO_VM_PAGE:
+ /* success but no page: fail */
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ goto memory_error;
+
+ default:
+ panic("vm_object_iopl_request: unexpected error"
+ " 0x%x from vm_fault_page()\n", result);
+ }
+ } while (result != VM_FAULT_SUCCESS);
+
+ }
+
+ if (upl->flags & UPL_KERNEL_OBJECT)
+ goto record_phys_addr;
+
+ if (dst_page->cleaning) {
+ /*
+ * Someone else is cleaning this page in place.as
+ * In theory, we should be able to proceed and use this
+ * page but they'll probably end up clearing the "busy"
+ * bit on it in upl_commit_range() but they didn't set
+ * it, so they would clear our "busy" bit and open
+ * us to race conditions.
+ * We'd better wait for the cleaning to complete and
+ * then try again.
+ */
+ vm_object_iopl_request_sleep_for_cleaning++;
+ PAGE_SLEEP(object, dst_page, THREAD_UNINT);
+ continue;
+ }
+ if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
+ dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
+ vm_page_t low_page;
+ int refmod;
+
+ /*
+ * support devices that can't DMA above 32 bits
+ * by substituting pages from a pool of low address
+ * memory for any pages we find above the 4G mark
+ * can't substitute if the page is already wired because
+ * we don't know whether that physical address has been
+ * handed out to some other 64 bit capable DMA device to use
+ */
+ if (VM_PAGE_WIRED(dst_page)) {
+ ret = KERN_PROTECTION_FAILURE;
+ goto return_err;
+ }
+ low_page = vm_page_grablo();
+
+ if (low_page == VM_PAGE_NULL) {
+ ret = KERN_RESOURCE_SHORTAGE;
+ goto return_err;
+ }
+ /*
+ * from here until the vm_page_replace completes
+ * we musn't drop the object lock... we don't
+ * want anyone refaulting this page in and using
+ * it after we disconnect it... we want the fault
+ * to find the new page being substituted.
+ */
+ if (dst_page->pmapped)
+ refmod = pmap_disconnect(dst_page->phys_page);
+ else
+ refmod = 0;
+
+ if ( !dst_page->absent)
+ vm_page_copy(dst_page, low_page);
+
+ low_page->reference = dst_page->reference;
+ low_page->dirty = dst_page->dirty;
+ low_page->absent = dst_page->absent;
+
+ if (refmod & VM_MEM_REFERENCED)
+ low_page->reference = TRUE;
+ if (refmod & VM_MEM_MODIFIED)
+ low_page->dirty = TRUE;
+
+ vm_page_replace(low_page, object, dst_offset);
+
+ dst_page = low_page;
+ /*
+ * vm_page_grablo returned the page marked
+ * BUSY... we don't need a PAGE_WAKEUP_DONE
+ * here, because we've never dropped the object lock
+ */
+ if ( !dst_page->absent)
+ dst_page->busy = FALSE;
+ }
+ if ( !dst_page->busy)
+ dwp->dw_mask |= DW_vm_page_wire;
+
+ if (cntrl_flags & UPL_BLOCK_ACCESS) {
+ /*
+ * Mark the page "busy" to block any future page fault
+ * on this page. We'll also remove the mapping
+ * of all these pages before leaving this routine.
+ */
+ assert(!dst_page->fictitious);
+ dst_page->busy = TRUE;
+ }
+ /*
+ * expect the page to be used
+ * page queues lock must be held to set 'reference'
+ */
+ dwp->dw_mask |= DW_set_reference;
+
+ if (!(cntrl_flags & UPL_COPYOUT_FROM))
+ dst_page->dirty = TRUE;
+record_phys_addr:
+ if (dst_page->busy)
+ upl->flags |= UPL_HAS_BUSY;
+
+ pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
+ assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
+ lite_list[pg_num>>5] |= 1 << (pg_num & 31);
+
+ if (dst_page->phys_page > upl->highest_page)
+ upl->highest_page = dst_page->phys_page;
+
+ if (user_page_list) {
+ user_page_list[entry].phys_addr = dst_page->phys_page;
+ user_page_list[entry].pageout = dst_page->pageout;
+ user_page_list[entry].absent = dst_page->absent;
+ user_page_list[entry].dirty = dst_page->dirty;
+ user_page_list[entry].precious = dst_page->precious;
+ user_page_list[entry].device = FALSE;
+ if (dst_page->clustered == TRUE)
+ user_page_list[entry].speculative = dst_page->speculative;
+ else
+ user_page_list[entry].speculative = FALSE;
+ user_page_list[entry].cs_validated = dst_page->cs_validated;
+ user_page_list[entry].cs_tainted = dst_page->cs_tainted;
+ }
+ if (object != kernel_object) {
+ /*
+ * someone is explicitly grabbing this page...
+ * update clustered and speculative state
+ *
+ */
+ VM_PAGE_CONSUME_CLUSTERED(dst_page);
+ }
+ entry++;
+ dst_offset += PAGE_SIZE_64;
+ xfer_size -= PAGE_SIZE;
+
+ if (dwp->dw_mask) {
+ if (dst_page->busy == FALSE) {
+ /*
+ * dw_do_work may need to drop the object lock
+ * if it does, we need the pages it's looking at to
+ * be held stable via the busy bit.
+ */
+ dst_page->busy = TRUE;
+ dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
+ }
+ dwp->dw_m = dst_page;
+ dwp++;
+ dw_count++;
+
+ if (dw_count >= DELAYED_WORK_LIMIT) {
+ dw_do_work(object, &dw_array[0], dw_count);
+
+ dwp = &dw_array[0];
+ dw_count = 0;
+ }
+ }
+ }
+ if (dw_count)
+ dw_do_work(object, &dw_array[0], dw_count);
+
+ if (page_list_count != NULL) {
+ if (upl->flags & UPL_INTERNAL)
+ *page_list_count = 0;
+ else if (*page_list_count > entry)
+ *page_list_count = entry;
+ }
+ vm_object_unlock(object);
+
+ if (cntrl_flags & UPL_BLOCK_ACCESS) {
+ /*
+ * We've marked all the pages "busy" so that future
+ * page faults will block.
+ * Now remove the mapping for these pages, so that they
+ * can't be accessed without causing a page fault.
+ */
+ vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
+ PMAP_NULL, 0, VM_PROT_NONE);
+ assert(!object->blocked_access);
+ object->blocked_access = TRUE;
+ }
+ return KERN_SUCCESS;
+
+return_err:
+ dw_index = 0;
+
+ for (; offset < dst_offset; offset += PAGE_SIZE) {
+ boolean_t need_unwire;
+
+ dst_page = vm_page_lookup(object, offset);
+
+ if (dst_page == VM_PAGE_NULL)
+ panic("vm_object_iopl_request: Wired page missing. \n");
+
+ /*
+ * if we've already processed this page in an earlier
+ * dw_do_work, we need to undo the wiring... we will
+ * leave the dirty and reference bits on if they
+ * were set, since we don't have a good way of knowing
+ * what the previous state was and we won't get here
+ * under any normal circumstances... we will always
+ * clear BUSY and wakeup any waiters via vm_page_free
+ * or PAGE_WAKEUP_DONE
+ */
+ need_unwire = TRUE;
+
+ if (dw_count) {
+ if (dw_array[dw_index].dw_m == dst_page) {
+ /*
+ * still in the deferred work list
+ * which means we haven't yet called
+ * vm_page_wire on this page
+ */
+ need_unwire = FALSE;
+
+ dw_index++;
+ dw_count--;
+ }
+ }
+ vm_page_lock_queues();
+
+ if (dst_page->absent) {
+ vm_page_free(dst_page);
+
+ need_unwire = FALSE;
+ } else {
+ if (need_unwire == TRUE)
+ vm_page_unwire(dst_page, TRUE);
+
+ PAGE_WAKEUP_DONE(dst_page);
+ }
+ vm_page_unlock_queues();
+
+ if (need_unwire == TRUE)
+ VM_STAT_INCR(reactivations);
+ }
+#if UPL_DEBUG
+ upl->upl_state = 2;
+#endif
+ if (! (upl->flags & UPL_KERNEL_OBJECT)) {
+ vm_object_activity_end(object);
+ }
+ vm_object_unlock(object);
+ upl_destroy(upl);
+
+ return ret;
+}
+
+kern_return_t
+upl_transpose(
+ upl_t upl1,
+ upl_t upl2)
+{
+ kern_return_t retval;
+ boolean_t upls_locked;
+ vm_object_t object1, object2;
+
+ if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ upls_locked = FALSE;
+
+ /*
+ * Since we need to lock both UPLs at the same time,
+ * avoid deadlocks by always taking locks in the same order.
+ */
+ if (upl1 < upl2) {
+ upl_lock(upl1);
+ upl_lock(upl2);
+ } else {
+ upl_lock(upl2);
+ upl_lock(upl1);
+ }
+ upls_locked = TRUE; /* the UPLs will need to be unlocked */
+
+ object1 = upl1->map_object;
+ object2 = upl2->map_object;
+
+ if (upl1->offset != 0 || upl2->offset != 0 ||
+ upl1->size != upl2->size) {
+ /*
+ * We deal only with full objects, not subsets.
+ * That's because we exchange the entire backing store info
+ * for the objects: pager, resident pages, etc... We can't do
+ * only part of it.
+ */
+ retval = KERN_INVALID_VALUE;
+ goto done;
+ }
+
+ /*
+ * Tranpose the VM objects' backing store.
+ */
+ retval = vm_object_transpose(object1, object2,
+ (vm_object_size_t) upl1->size);
+
+ if (retval == KERN_SUCCESS) {
+ /*
+ * Make each UPL point to the correct VM object, i.e. the
+ * object holding the pages that the UPL refers to...
+ */
+#if UPL_DEBUG
+ queue_remove(&object1->uplq, upl1, upl_t, uplq);
+ queue_remove(&object2->uplq, upl2, upl_t, uplq);
+#endif
+ upl1->map_object = object2;
+ upl2->map_object = object1;
+#if UPL_DEBUG
+ queue_enter(&object1->uplq, upl2, upl_t, uplq);
+ queue_enter(&object2->uplq, upl1, upl_t, uplq);
+#endif
+ }
+
+done:
+ /*
+ * Cleanup.
+ */
+ if (upls_locked) {
+ upl_unlock(upl1);
+ upl_unlock(upl2);
+ upls_locked = FALSE;
+ }
+
+ return retval;
+}
+
+/*
+ * ENCRYPTED SWAP:
+ *
+ * Rationale: the user might have some encrypted data on disk (via
+ * FileVault or any other mechanism). That data is then decrypted in
+ * memory, which is safe as long as the machine is secure. But that
+ * decrypted data in memory could be paged out to disk by the default
+ * pager. The data would then be stored on disk in clear (not encrypted)
+ * and it could be accessed by anyone who gets physical access to the
+ * disk (if the laptop or the disk gets stolen for example). This weakens
+ * the security offered by FileVault.
+ *
+ * Solution: the default pager will optionally request that all the
+ * pages it gathers for pageout be encrypted, via the UPL interfaces,
+ * before it sends this UPL to disk via the vnode_pageout() path.
+ *
+ * Notes:
+ *
+ * To avoid disrupting the VM LRU algorithms, we want to keep the
+ * clean-in-place mechanisms, which allow us to send some extra pages to
+ * swap (clustering) without actually removing them from the user's
+ * address space. We don't want the user to unknowingly access encrypted
+ * data, so we have to actually remove the encrypted pages from the page
+ * table. When the user accesses the data, the hardware will fail to
+ * locate the virtual page in its page table and will trigger a page
+ * fault. We can then decrypt the page and enter it in the page table
+ * again. Whenever we allow the user to access the contents of a page,
+ * we have to make sure it's not encrypted.
+ *
+ *
+ */
+/*
+ * ENCRYPTED SWAP:
+ * Reserve of virtual addresses in the kernel address space.
+ * We need to map the physical pages in the kernel, so that we
+ * can call the encryption/decryption routines with a kernel
+ * virtual address. We keep this pool of pre-allocated kernel
+ * virtual addresses so that we don't have to scan the kernel's
+ * virtaul address space each time we need to encrypt or decrypt
+ * a physical page.
+ * It would be nice to be able to encrypt and decrypt in physical
+ * mode but that might not always be more efficient...
+ */
+decl_simple_lock_data(,vm_paging_lock)
+#define VM_PAGING_NUM_PAGES 64
+vm_map_offset_t vm_paging_base_address = 0;
+boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
+int vm_paging_max_index = 0;
+int vm_paging_page_waiter = 0;
+int vm_paging_page_waiter_total = 0;
+unsigned long vm_paging_no_kernel_page = 0;
+unsigned long vm_paging_objects_mapped = 0;
+unsigned long vm_paging_pages_mapped = 0;
+unsigned long vm_paging_objects_mapped_slow = 0;
+unsigned long vm_paging_pages_mapped_slow = 0;
+
+void
+vm_paging_map_init(void)
+{
+ kern_return_t kr;
+ vm_map_offset_t page_map_offset;
+ vm_map_entry_t map_entry;
+
+ assert(vm_paging_base_address == 0);
+
+ /*
+ * Initialize our pool of pre-allocated kernel
+ * virtual addresses.
+ */
+ page_map_offset = 0;
+ kr = vm_map_find_space(kernel_map,
+ &page_map_offset,
+ VM_PAGING_NUM_PAGES * PAGE_SIZE,
+ 0,
+ 0,
+ &map_entry);
+ if (kr != KERN_SUCCESS) {
+ panic("vm_paging_map_init: kernel_map full\n");
+ }
+ map_entry->object.vm_object = kernel_object;
+ map_entry->offset = page_map_offset;
+ vm_object_reference(kernel_object);
+ vm_map_unlock(kernel_map);
+
+ assert(vm_paging_base_address == 0);
+ vm_paging_base_address = page_map_offset;
+}
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_paging_map_object:
+ * Maps part of a VM object's pages in the kernel
+ * virtual address space, using the pre-allocated
+ * kernel virtual addresses, if possible.
+ * Context:
+ * The VM object is locked. This lock will get
+ * dropped and re-acquired though, so the caller
+ * must make sure the VM object is kept alive
+ * (by holding a VM map that has a reference
+ * on it, for example, or taking an extra reference).
+ * The page should also be kept busy to prevent
+ * it from being reclaimed.
+ */
+kern_return_t
+vm_paging_map_object(
+ vm_map_offset_t *address,
+ vm_page_t page,
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_map_size_t *size,
+ vm_prot_t protection,
+ boolean_t can_unlock_object)
+{
+ kern_return_t kr;
+ vm_map_offset_t page_map_offset;
+ vm_map_size_t map_size;
+ vm_object_offset_t object_offset;
+ int i;
+
+
+ if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
+ assert(page->busy);
+ /*
+ * Use one of the pre-allocated kernel virtual addresses
+ * and just enter the VM page in the kernel address space
+ * at that virtual address.
+ */
+ simple_lock(&vm_paging_lock);
+
+ /*
+ * Try and find an available kernel virtual address
+ * from our pre-allocated pool.
+ */
+ page_map_offset = 0;
+ for (;;) {
+ for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
+ if (vm_paging_page_inuse[i] == FALSE) {
+ page_map_offset =
+ vm_paging_base_address +
+ (i * PAGE_SIZE);
+ break;
+ }
+ }
+ if (page_map_offset != 0) {
+ /* found a space to map our page ! */
+ break;
+ }
+
+ if (can_unlock_object) {
+ /*
+ * If we can afford to unlock the VM object,
+ * let's take the slow path now...
+ */
+ break;
+ }
+ /*
+ * We can't afford to unlock the VM object, so
+ * let's wait for a space to become available...
+ */
+ vm_paging_page_waiter_total++;
+ vm_paging_page_waiter++;
+ thread_sleep_fast_usimple_lock(&vm_paging_page_waiter,
+ &vm_paging_lock,
+ THREAD_UNINT);
+ vm_paging_page_waiter--;
+ /* ... and try again */
+ }
+
+ if (page_map_offset != 0) {
+ /*
+ * We found a kernel virtual address;
+ * map the physical page to that virtual address.
+ */
+ if (i > vm_paging_max_index) {
+ vm_paging_max_index = i;
+ }
+ vm_paging_page_inuse[i] = TRUE;
+ simple_unlock(&vm_paging_lock);
+
+ if (page->pmapped == FALSE) {
+ pmap_sync_page_data_phys(page->phys_page);
+ }
+ page->pmapped = TRUE;
+
+ /*
+ * Keep the VM object locked over the PMAP_ENTER
+ * and the actual use of the page by the kernel,
+ * or this pmap mapping might get undone by a
+ * vm_object_pmap_protect() call...
+ */
+ PMAP_ENTER(kernel_pmap,
+ page_map_offset,
+ page,
+ protection,
+ ((int) page->object->wimg_bits &
+ VM_WIMG_MASK),
+ TRUE);
+ vm_paging_objects_mapped++;
+ vm_paging_pages_mapped++;
+ *address = page_map_offset;
+
+ /* all done and mapped, ready to use ! */
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * We ran out of pre-allocated kernel virtual
+ * addresses. Just map the page in the kernel
+ * the slow and regular way.
+ */
+ vm_paging_no_kernel_page++;
+ simple_unlock(&vm_paging_lock);
+ }
+
+ if (! can_unlock_object) {
+ return KERN_NOT_SUPPORTED;
+ }
+
+ object_offset = vm_object_trunc_page(offset);
+ map_size = vm_map_round_page(*size);
+
+ /*
+ * Try and map the required range of the object
+ * in the kernel_map
+ */
+
+ vm_object_reference_locked(object); /* for the map entry */
+ vm_object_unlock(object);
+
+ kr = vm_map_enter(kernel_map,
+ address,
+ map_size,
+ 0,
+ VM_FLAGS_ANYWHERE,
+ object,
+ object_offset,
+ FALSE,
+ protection,
+ VM_PROT_ALL,
+ VM_INHERIT_NONE);
+ if (kr != KERN_SUCCESS) {
+ *address = 0;
+ *size = 0;
+ vm_object_deallocate(object); /* for the map entry */
+ vm_object_lock(object);
+ return kr;
+ }
+
+ *size = map_size;
+
+ /*
+ * Enter the mapped pages in the page table now.
+ */
+ vm_object_lock(object);
+ /*
+ * VM object must be kept locked from before PMAP_ENTER()
+ * until after the kernel is done accessing the page(s).
+ * Otherwise, the pmap mappings in the kernel could be
+ * undone by a call to vm_object_pmap_protect().
+ */
+
+ for (page_map_offset = 0;
+ map_size != 0;
+ map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
+ unsigned int cache_attr;
+
+ page = vm_page_lookup(object, offset + page_map_offset);
+ if (page == VM_PAGE_NULL) {
+ printf("vm_paging_map_object: no page !?");
+ vm_object_unlock(object);
+ kr = vm_map_remove(kernel_map, *address, *size,
+ VM_MAP_NO_FLAGS);
+ assert(kr == KERN_SUCCESS);
+ *address = 0;
+ *size = 0;
+ vm_object_lock(object);
+ return KERN_MEMORY_ERROR;
+ }
+ if (page->pmapped == FALSE) {
+ pmap_sync_page_data_phys(page->phys_page);
+ }
+ page->pmapped = TRUE;
+ cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
+
+ //assert(pmap_verify_free(page->phys_page));
+ PMAP_ENTER(kernel_pmap,
+ *address + page_map_offset,
+ page,
+ protection,
+ cache_attr,
+ TRUE);
+ }
+
+ vm_paging_objects_mapped_slow++;
+ vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_paging_unmap_object:
+ * Unmaps part of a VM object's pages from the kernel
+ * virtual address space.
+ * Context:
+ * The VM object is locked. This lock will get
+ * dropped and re-acquired though.
+ */
+void
+vm_paging_unmap_object(
+ vm_object_t object,
+ vm_map_offset_t start,
+ vm_map_offset_t end)
+{
+ kern_return_t kr;
+ int i;
+
+ if ((vm_paging_base_address == 0) ||
+ (start < vm_paging_base_address) ||
+ (end > (vm_paging_base_address
+ + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
+ /*
+ * We didn't use our pre-allocated pool of
+ * kernel virtual address. Deallocate the
+ * virtual memory.
+ */
+ if (object != VM_OBJECT_NULL) {
+ vm_object_unlock(object);
+ }
+ kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
+ if (object != VM_OBJECT_NULL) {
+ vm_object_lock(object);
+ }
+ assert(kr == KERN_SUCCESS);
+ } else {
+ /*
+ * We used a kernel virtual address from our
+ * pre-allocated pool. Put it back in the pool
+ * for next time.
+ */
+ assert(end - start == PAGE_SIZE);
+ i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
+ assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
+
+ /* undo the pmap mapping */
+ pmap_remove(kernel_pmap, start, end);
+
+ simple_lock(&vm_paging_lock);
+ vm_paging_page_inuse[i] = FALSE;
+ if (vm_paging_page_waiter) {
+ thread_wakeup(&vm_paging_page_waiter);
+ }
+ simple_unlock(&vm_paging_lock);
+ }
+}
+
+#if CRYPTO
+/*
+ * Encryption data.
+ * "iv" is the "initial vector". Ideally, we want to
+ * have a different one for each page we encrypt, so that
+ * crackers can't find encryption patterns too easily.
+ */
+#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
+boolean_t swap_crypt_ctx_initialized = FALSE;
+aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
+aes_ctx swap_crypt_ctx;
+const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
+
+#if DEBUG
+boolean_t swap_crypt_ctx_tested = FALSE;
+unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
+unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
+unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
+#endif /* DEBUG */
+
+/*
+ * Initialize the encryption context: key and key size.
+ */
+void swap_crypt_ctx_initialize(void); /* forward */
+void
+swap_crypt_ctx_initialize(void)
+{
+ unsigned int i;
+
+ /*
+ * No need for locking to protect swap_crypt_ctx_initialized
+ * because the first use of encryption will come from the
+ * pageout thread (we won't pagein before there's been a pageout)
+ * and there's only one pageout thread.
+ */
+ if (swap_crypt_ctx_initialized == FALSE) {
+ for (i = 0;
+ i < (sizeof (swap_crypt_key) /
+ sizeof (swap_crypt_key[0]));
+ i++) {
+ swap_crypt_key[i] = random();
+ }
+ aes_encrypt_key((const unsigned char *) swap_crypt_key,
+ SWAP_CRYPT_AES_KEY_SIZE,
+ &swap_crypt_ctx.encrypt);
+ aes_decrypt_key((const unsigned char *) swap_crypt_key,
+ SWAP_CRYPT_AES_KEY_SIZE,
+ &swap_crypt_ctx.decrypt);
+ swap_crypt_ctx_initialized = TRUE;
+ }
+
+#if DEBUG
+ /*
+ * Validate the encryption algorithms.
+ */
+ if (swap_crypt_ctx_tested == FALSE) {
+ /* initialize */
+ for (i = 0; i < 4096; i++) {
+ swap_crypt_test_page_ref[i] = (char) i;
+ }
+ /* encrypt */
+ aes_encrypt_cbc(swap_crypt_test_page_ref,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_encrypt,
+ &swap_crypt_ctx.encrypt);
+ /* decrypt */
+ aes_decrypt_cbc(swap_crypt_test_page_encrypt,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_decrypt,
+ &swap_crypt_ctx.decrypt);
+ /* compare result with original */
+ for (i = 0; i < 4096; i ++) {
+ if (swap_crypt_test_page_decrypt[i] !=
+ swap_crypt_test_page_ref[i]) {
+ panic("encryption test failed");
+ }
+ }
+
+ /* encrypt again */
+ aes_encrypt_cbc(swap_crypt_test_page_decrypt,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_decrypt,
+ &swap_crypt_ctx.encrypt);
+ /* decrypt in place */
+ aes_decrypt_cbc(swap_crypt_test_page_decrypt,
+ swap_crypt_null_iv,
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ swap_crypt_test_page_decrypt,
+ &swap_crypt_ctx.decrypt);
+ for (i = 0; i < 4096; i ++) {
+ if (swap_crypt_test_page_decrypt[i] !=
+ swap_crypt_test_page_ref[i]) {
+ panic("in place encryption test failed");
+ }
+ }
+
+ swap_crypt_ctx_tested = TRUE;
+ }
+#endif /* DEBUG */
+}
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_page_encrypt:
+ * Encrypt the given page, for secure paging.
+ * The page might already be mapped at kernel virtual
+ * address "kernel_mapping_offset". Otherwise, we need
+ * to map it.
+ *
+ * Context:
+ * The page's object is locked, but this lock will be released
+ * and re-acquired.
+ * The page is busy and not accessible by users (not entered in any pmap).
+ */
+void
+vm_page_encrypt(
+ vm_page_t page,
+ vm_map_offset_t kernel_mapping_offset)
+{
+ kern_return_t kr;
+ vm_map_size_t kernel_mapping_size;
+ vm_offset_t kernel_vaddr;
+ union {
+ unsigned char aes_iv[AES_BLOCK_SIZE];
+ struct {
+ memory_object_t pager_object;
+ vm_object_offset_t paging_offset;
+ } vm;
+ } encrypt_iv;
+
+ if (! vm_pages_encrypted) {
+ vm_pages_encrypted = TRUE;
+ }
+
+ assert(page->busy);
+ assert(page->dirty || page->precious);
+
+ if (page->encrypted) {
+ /*
+ * Already encrypted: no need to do it again.
+ */
+ vm_page_encrypt_already_encrypted_counter++;
+ return;
+ }
+ ASSERT_PAGE_DECRYPTED(page);
+
+ /*
+ * Take a paging-in-progress reference to keep the object
+ * alive even if we have to unlock it (in vm_paging_map_object()
+ * for example)...
+ */
+ vm_object_paging_begin(page->object);
+
+ if (kernel_mapping_offset == 0) {
+ /*
+ * The page hasn't already been mapped in kernel space
+ * by the caller. Map it now, so that we can access
+ * its contents and encrypt them.
+ */
+ kernel_mapping_size = PAGE_SIZE;
+ kr = vm_paging_map_object(&kernel_mapping_offset,
+ page,
+ page->object,
+ page->offset,
+ &kernel_mapping_size,
+ VM_PROT_READ | VM_PROT_WRITE,
+ FALSE);
+ if (kr != KERN_SUCCESS) {
+ panic("vm_page_encrypt: "
+ "could not map page in kernel: 0x%x\n",
+ kr);
+ }
+ } else {
+ kernel_mapping_size = 0;
+ }
+ kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
+
+ if (swap_crypt_ctx_initialized == FALSE) {
+ swap_crypt_ctx_initialize();
+ }
+ assert(swap_crypt_ctx_initialized);
+
+ /*
+ * Prepare an "initial vector" for the encryption.
+ * We use the "pager" and the "paging_offset" for that
+ * page to obfuscate the encrypted data a bit more and
+ * prevent crackers from finding patterns that they could
+ * use to break the key.
+ */
+ bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
+ encrypt_iv.vm.pager_object = page->object->pager;
+ encrypt_iv.vm.paging_offset =
+ page->object->paging_offset + page->offset;
+
+ /* encrypt the "initial vector" */
+ aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
+ swap_crypt_null_iv,
+ 1,
+ &encrypt_iv.aes_iv[0],
+ &swap_crypt_ctx.encrypt);
+
+ /*
+ * Encrypt the page.
+ */
+ aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
+ &encrypt_iv.aes_iv[0],
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ (unsigned char *) kernel_vaddr,
+ &swap_crypt_ctx.encrypt);
+
+ vm_page_encrypt_counter++;
+
+ /*
+ * Unmap the page from the kernel's address space,
+ * if we had to map it ourselves. Otherwise, let
+ * the caller undo the mapping if needed.
+ */
+ if (kernel_mapping_size != 0) {
+ vm_paging_unmap_object(page->object,
+ kernel_mapping_offset,
+ kernel_mapping_offset + kernel_mapping_size);
+ }
+
+ /*
+ * Clear the "reference" and "modified" bits.
+ * This should clean up any impact the encryption had
+ * on them.
+ * The page was kept busy and disconnected from all pmaps,
+ * so it can't have been referenced or modified from user
+ * space.
+ * The software bits will be reset later after the I/O
+ * has completed (in upl_commit_range()).
+ */
+ pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
+
+ page->encrypted = TRUE;
+
+ vm_object_paging_end(page->object);
+}
+
+/*
+ * ENCRYPTED SWAP:
+ * vm_page_decrypt:
+ * Decrypt the given page.
+ * The page might already be mapped at kernel virtual
+ * address "kernel_mapping_offset". Otherwise, we need
+ * to map it.
+ *
+ * Context:
+ * The page's VM object is locked but will be unlocked and relocked.
+ * The page is busy and not accessible by users (not entered in any pmap).
+ */
+void
+vm_page_decrypt(
+ vm_page_t page,
+ vm_map_offset_t kernel_mapping_offset)
+{
+ kern_return_t kr;
+ vm_map_size_t kernel_mapping_size;
+ vm_offset_t kernel_vaddr;
+ union {
+ unsigned char aes_iv[AES_BLOCK_SIZE];
+ struct {
+ memory_object_t pager_object;
+ vm_object_offset_t paging_offset;
+ } vm;
+ } decrypt_iv;
+
+ assert(page->busy);
+ assert(page->encrypted);
+
+ /*
+ * Take a paging-in-progress reference to keep the object
+ * alive even if we have to unlock it (in vm_paging_map_object()
+ * for example)...
+ */
+ vm_object_paging_begin(page->object);
+
+ if (kernel_mapping_offset == 0) {
+ /*
+ * The page hasn't already been mapped in kernel space
+ * by the caller. Map it now, so that we can access
+ * its contents and decrypt them.
+ */
+ kernel_mapping_size = PAGE_SIZE;
+ kr = vm_paging_map_object(&kernel_mapping_offset,
+ page,
+ page->object,
+ page->offset,
+ &kernel_mapping_size,
+ VM_PROT_READ | VM_PROT_WRITE,
+ FALSE);
+ if (kr != KERN_SUCCESS) {
+ panic("vm_page_decrypt: "
+ "could not map page in kernel: 0x%x\n",
+ kr);
+ }
+ } else {
+ kernel_mapping_size = 0;
+ }
+ kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
+
+ assert(swap_crypt_ctx_initialized);
+
+ /*
+ * Prepare an "initial vector" for the decryption.
+ * It has to be the same as the "initial vector" we
+ * used to encrypt that page.
+ */
+ bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
+ decrypt_iv.vm.pager_object = page->object->pager;
+ decrypt_iv.vm.paging_offset =
+ page->object->paging_offset + page->offset;
+
+ /* encrypt the "initial vector" */
+ aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
+ swap_crypt_null_iv,
+ 1,
+ &decrypt_iv.aes_iv[0],
+ &swap_crypt_ctx.encrypt);
+
+ /*
+ * Decrypt the page.
+ */
+ aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
+ &decrypt_iv.aes_iv[0],
+ PAGE_SIZE / AES_BLOCK_SIZE,
+ (unsigned char *) kernel_vaddr,
+ &swap_crypt_ctx.decrypt);
+ vm_page_decrypt_counter++;
+
+ /*
+ * Unmap the page from the kernel's address space,
+ * if we had to map it ourselves. Otherwise, let
+ * the caller undo the mapping if needed.
+ */
+ if (kernel_mapping_size != 0) {
+ vm_paging_unmap_object(page->object,
+ kernel_vaddr,
+ kernel_vaddr + PAGE_SIZE);
+ }
+
+ /*
+ * After decryption, the page is actually clean.
+ * It was encrypted as part of paging, which "cleans"
+ * the "dirty" pages.
+ * Noone could access it after it was encrypted
+ * and the decryption doesn't count.
+ */
+ page->dirty = FALSE;
+ assert (page->cs_validated == FALSE);
+ pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ page->encrypted = FALSE;
+
+ /*
+ * We've just modified the page's contents via the data cache and part
+ * of the new contents might still be in the cache and not yet in RAM.
+ * Since the page is now available and might get gathered in a UPL to
+ * be part of a DMA transfer from a driver that expects the memory to
+ * be coherent at this point, we have to flush the data cache.
+ */
+ pmap_sync_page_attributes_phys(page->phys_page);
+ /*
+ * Since the page is not mapped yet, some code might assume that it
+ * doesn't need to invalidate the instruction cache when writing to
+ * that page. That code relies on "pmapped" being FALSE, so that the
+ * caches get synchronized when the page is first mapped.
+ */
+ assert(pmap_verify_free(page->phys_page));
+ page->pmapped = FALSE;
+ page->wpmapped = FALSE;
+
+ vm_object_paging_end(page->object);
+}
+
+#if DEVELOPMENT || DEBUG
+unsigned long upl_encrypt_upls = 0;
+unsigned long upl_encrypt_pages = 0;
+#endif
+
+/*
+ * ENCRYPTED SWAP:
+ *
+ * upl_encrypt:
+ * Encrypts all the pages in the UPL, within the specified range.
+ *
+ */
+void
+upl_encrypt(
+ upl_t upl,
+ upl_offset_t crypt_offset,
+ upl_size_t crypt_size)
+{
+ upl_size_t upl_size, subupl_size=crypt_size;
+ upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
+ vm_object_t upl_object;
+ vm_object_offset_t upl_offset;
+ vm_page_t page;
+ vm_object_t shadow_object;
+ vm_object_offset_t shadow_offset;
+ vm_object_offset_t paging_offset;
+ vm_object_offset_t base_offset;
+ int isVectorUPL = 0;
+ upl_t vector_upl = NULL;
+
+ if((isVectorUPL = vector_upl_is_valid(upl)))
+ vector_upl = upl;
+
+process_upl_to_encrypt:
+ if(isVectorUPL) {
+ crypt_size = subupl_size;
+ crypt_offset = subupl_offset;
+ upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
+ if(upl == NULL)
+ panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
+ subupl_size -= crypt_size;
+ subupl_offset += crypt_size;
+ }
+
+#if DEVELOPMENT || DEBUG
+ upl_encrypt_upls++;
+ upl_encrypt_pages += crypt_size / PAGE_SIZE;
+#endif
+ upl_object = upl->map_object;
+ upl_offset = upl->offset;
+ upl_size = upl->size;
+
+ vm_object_lock(upl_object);
+
+ /*
+ * Find the VM object that contains the actual pages.
+ */
+ if (upl_object->pageout) {
+ shadow_object = upl_object->shadow;
+ /*
+ * The offset in the shadow object is actually also
+ * accounted for in upl->offset. It possibly shouldn't be
+ * this way, but for now don't account for it twice.
+ */
+ shadow_offset = 0;
+ assert(upl_object->paging_offset == 0); /* XXX ? */
+ vm_object_lock(shadow_object);
+ } else {
+ shadow_object = upl_object;
+ shadow_offset = 0;
+ }
+
+ paging_offset = shadow_object->paging_offset;
+ vm_object_paging_begin(shadow_object);
+
+ if (shadow_object != upl_object)
+ vm_object_unlock(upl_object);
+
+
+ base_offset = shadow_offset;
+ base_offset += upl_offset;
+ base_offset += crypt_offset;
+ base_offset -= paging_offset;
+
+ assert(crypt_offset + crypt_size <= upl_size);
+
+ for (offset_in_upl = 0;
+ offset_in_upl < crypt_size;
+ offset_in_upl += PAGE_SIZE) {
+ page = vm_page_lookup(shadow_object,
+ base_offset + offset_in_upl);
+ if (page == VM_PAGE_NULL) {
+ panic("upl_encrypt: "
+ "no page for (obj=%p,off=%lld+%d)!\n",
+ shadow_object,
+ base_offset,
+ offset_in_upl);
+ }
+ /*
+ * Disconnect the page from all pmaps, so that nobody can
+ * access it while it's encrypted. After that point, all
+ * accesses to this page will cause a page fault and block
+ * while the page is busy being encrypted. After the
+ * encryption completes, any access will cause a
+ * page fault and the page gets decrypted at that time.
+ */
+ pmap_disconnect(page->phys_page);
+ vm_page_encrypt(page, 0);
+
+ if (vm_object_lock_avoid(shadow_object)) {
+ /*
+ * Give vm_pageout_scan() a chance to convert more
+ * pages from "clean-in-place" to "clean-and-free",
+ * if it's interested in the same pages we selected
+ * in this cluster.
+ */
+ vm_object_unlock(shadow_object);
+ mutex_pause(2);
+ vm_object_lock(shadow_object);
+ }
+ }
+
+ vm_object_paging_end(shadow_object);
+ vm_object_unlock(shadow_object);
+
+ if(isVectorUPL && subupl_size)
+ goto process_upl_to_encrypt;
+}
+
+#else /* CRYPTO */
+void
+upl_encrypt(
+ __unused upl_t upl,
+ __unused upl_offset_t crypt_offset,
+ __unused upl_size_t crypt_size)
+{
+}
+
+void
+vm_page_encrypt(
+ __unused vm_page_t page,
+ __unused vm_map_offset_t kernel_mapping_offset)
+{
+}
+
+void
+vm_page_decrypt(
+ __unused vm_page_t page,
+ __unused vm_map_offset_t kernel_mapping_offset)
+{
+}
+
+#endif /* CRYPTO */
+
+void
+vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked)
+{
+ boolean_t pageout;
+
+ pageout = page->pageout;