+ vm_map_lock(dst_map);
+
+ /* LP64todo - remove this check when vm_map_commpage64()
+ * no longer has to stuff in a map_entry for the commpage
+ * above the map's max_offset.
+ */
+ if (dst_addr >= dst_map->max_offset) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+start_pass_1:
+ if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ vm_map_clip_start(dst_map,
+ tmp_entry,
+ vm_map_trunc_page(dst_addr,
+ VM_MAP_PAGE_MASK(dst_map)));
+ for (entry = tmp_entry;;) {
+ vm_map_entry_t next = entry->vme_next;
+
+ while (entry->is_sub_map) {
+ vm_map_offset_t sub_start;
+ vm_map_offset_t sub_end;
+ vm_map_offset_t local_end;
+
+ if (entry->in_transition) {
+ /*
+ * Say that we are waiting, and wait for entry.
+ */
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(dst_map, THREAD_UNINT);
+
+ goto start_pass_1;
+ }
+
+ local_end = entry->vme_end;
+ if (!(entry->needs_copy)) {
+ /* if needs_copy we are a COW submap */
+ /* in such a case we just replace so */
+ /* there is no need for the follow- */
+ /* ing check. */
+ encountered_sub_map = TRUE;
+ sub_start = VME_OFFSET(entry);
+
+ if (entry->vme_end < dst_end) {
+ sub_end = entry->vme_end;
+ } else {
+ sub_end = dst_end;
+ }
+ sub_end -= entry->vme_start;
+ sub_end += VME_OFFSET(entry);
+ vm_map_unlock(dst_map);
+
+ kr = vm_map_overwrite_submap_recurse(
+ VME_SUBMAP(entry),
+ sub_start,
+ sub_end - sub_start);
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+ vm_map_lock(dst_map);
+ }
+
+ if (dst_end <= entry->vme_end) {
+ goto start_overwrite;
+ }
+ if (!vm_map_lookup_entry(dst_map, local_end,
+ &entry)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ next = entry->vme_next;
+ }
+
+ if (!(entry->protection & VM_PROT_WRITE)) {
+ vm_map_unlock(dst_map);
+ return KERN_PROTECTION_FAILURE;
+ }
+
+ /*
+ * If the entry is in transition, we must wait
+ * for it to exit that state. Anything could happen
+ * when we unlock the map, so start over.
+ */
+ if (entry->in_transition) {
+ /*
+ * Say that we are waiting, and wait for entry.
+ */
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(dst_map, THREAD_UNINT);
+
+ goto start_pass_1;
+ }
+
+/*
+ * our range is contained completely within this map entry
+ */
+ if (dst_end <= entry->vme_end) {
+ break;
+ }
+/*
+ * check that range specified is contiguous region
+ */
+ if ((next == vm_map_to_entry(dst_map)) ||
+ (next->vme_start != entry->vme_end)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+
+ /*
+ * Check for permanent objects in the destination.
+ */
+ if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
+ ((!VME_OBJECT(entry)->internal) ||
+ (VME_OBJECT(entry)->true_share))) {
+ contains_permanent_objects = TRUE;
+ }
+
+ entry = next;
+ }/* for */
+
+start_overwrite:
+ /*
+ * If there are permanent objects in the destination, then
+ * the copy cannot be interrupted.
+ */
+
+ if (interruptible && contains_permanent_objects) {
+ vm_map_unlock(dst_map);
+ return KERN_FAILURE; /* XXX */
+ }
+
+ /*
+ *
+ * Make a second pass, overwriting the data
+ * At the beginning of each loop iteration,
+ * the next entry to be overwritten is "tmp_entry"
+ * (initially, the value returned from the lookup above),
+ * and the starting address expected in that entry
+ * is "start".
+ */
+
+ total_size = copy->size;
+ if (encountered_sub_map) {
+ copy_size = 0;
+ /* re-calculate tmp_entry since we've had the map */
+ /* unlocked */
+ if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ } else {
+ copy_size = copy->size;
+ }
+
+ base_addr = dst_addr;
+ while (TRUE) {
+ /* deconstruct the copy object and do in parts */
+ /* only in sub_map, interruptable case */
+ vm_map_entry_t copy_entry;
+ vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
+ vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
+ int nentries;
+ int remaining_entries = 0;
+ vm_map_offset_t new_offset = 0;
+
+ for (entry = tmp_entry; copy_size == 0;) {
+ vm_map_entry_t next;
+
+ next = entry->vme_next;
+
+ /* tmp_entry and base address are moved along */
+ /* each time we encounter a sub-map. Otherwise */
+ /* entry can outpase tmp_entry, and the copy_size */
+ /* may reflect the distance between them */
+ /* if the current entry is found to be in transition */
+ /* we will start over at the beginning or the last */
+ /* encounter of a submap as dictated by base_addr */
+ /* we will zero copy_size accordingly. */
+ if (entry->in_transition) {
+ /*
+ * Say that we are waiting, and wait for entry.
+ */
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(dst_map, THREAD_UNINT);
+
+ if (!vm_map_lookup_entry(dst_map, base_addr,
+ &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ copy_size = 0;
+ entry = tmp_entry;
+ continue;
+ }
+ if (entry->is_sub_map) {
+ vm_map_offset_t sub_start;
+ vm_map_offset_t sub_end;
+ vm_map_offset_t local_end;
+
+ if (entry->needs_copy) {
+ /* if this is a COW submap */
+ /* just back the range with a */
+ /* anonymous entry */
+ if (entry->vme_end < dst_end) {
+ sub_end = entry->vme_end;
+ } else {
+ sub_end = dst_end;
+ }
+ if (entry->vme_start < base_addr) {
+ sub_start = base_addr;
+ } else {
+ sub_start = entry->vme_start;
+ }
+ vm_map_clip_end(
+ dst_map, entry, sub_end);
+ vm_map_clip_start(
+ dst_map, entry, sub_start);
+ assert(!entry->use_pmap);
+ assert(!entry->iokit_acct);
+ entry->use_pmap = TRUE;
+ entry->is_sub_map = FALSE;
+ vm_map_deallocate(
+ VME_SUBMAP(entry));
+ VME_OBJECT_SET(entry, VM_OBJECT_NULL);
+ VME_OFFSET_SET(entry, 0);
+ entry->is_shared = FALSE;
+ entry->needs_copy = FALSE;
+ entry->protection = VM_PROT_DEFAULT;
+ entry->max_protection = VM_PROT_ALL;
+ entry->wired_count = 0;
+ entry->user_wired_count = 0;
+ if (entry->inheritance
+ == VM_INHERIT_SHARE) {
+ entry->inheritance = VM_INHERIT_COPY;
+ }
+ continue;
+ }
+ /* first take care of any non-sub_map */
+ /* entries to send */
+ if (base_addr < entry->vme_start) {
+ /* stuff to send */
+ copy_size =
+ entry->vme_start - base_addr;
+ break;
+ }
+ sub_start = VME_OFFSET(entry);
+
+ if (entry->vme_end < dst_end) {
+ sub_end = entry->vme_end;
+ } else {
+ sub_end = dst_end;
+ }
+ sub_end -= entry->vme_start;
+ sub_end += VME_OFFSET(entry);
+ local_end = entry->vme_end;
+ vm_map_unlock(dst_map);
+ copy_size = sub_end - sub_start;
+
+ /* adjust the copy object */
+ if (total_size > copy_size) {
+ vm_map_size_t local_size = 0;
+ vm_map_size_t entry_size;
+
+ nentries = 1;
+ new_offset = copy->offset;
+ copy_entry = vm_map_copy_first_entry(copy);
+ while (copy_entry !=
+ vm_map_copy_to_entry(copy)) {
+ entry_size = copy_entry->vme_end -
+ copy_entry->vme_start;
+ if ((local_size < copy_size) &&
+ ((local_size + entry_size)
+ >= copy_size)) {
+ vm_map_copy_clip_end(copy,
+ copy_entry,
+ copy_entry->vme_start +
+ (copy_size - local_size));
+ entry_size = copy_entry->vme_end -
+ copy_entry->vme_start;
+ local_size += entry_size;
+ new_offset += entry_size;
+ }
+ if (local_size >= copy_size) {
+ next_copy = copy_entry->vme_next;
+ copy_entry->vme_next =
+ vm_map_copy_to_entry(copy);
+ previous_prev =
+ copy->cpy_hdr.links.prev;
+ copy->cpy_hdr.links.prev = copy_entry;
+ copy->size = copy_size;
+ remaining_entries =
+ copy->cpy_hdr.nentries;
+ remaining_entries -= nentries;
+ copy->cpy_hdr.nentries = nentries;
+ break;
+ } else {
+ local_size += entry_size;
+ new_offset += entry_size;
+ nentries++;
+ }
+ copy_entry = copy_entry->vme_next;
+ }
+ }
+
+ if ((entry->use_pmap) && (pmap == NULL)) {
+ kr = vm_map_copy_overwrite_nested(
+ VME_SUBMAP(entry),
+ sub_start,
+ copy,
+ interruptible,
+ VME_SUBMAP(entry)->pmap,
+ TRUE);
+ } else if (pmap != NULL) {
+ kr = vm_map_copy_overwrite_nested(
+ VME_SUBMAP(entry),
+ sub_start,
+ copy,
+ interruptible, pmap,
+ TRUE);
+ } else {
+ kr = vm_map_copy_overwrite_nested(
+ VME_SUBMAP(entry),
+ sub_start,
+ copy,
+ interruptible,
+ dst_map->pmap,
+ TRUE);
+ }
+ if (kr != KERN_SUCCESS) {
+ if (next_copy != NULL) {
+ copy->cpy_hdr.nentries +=
+ remaining_entries;
+ copy->cpy_hdr.links.prev->vme_next =
+ next_copy;
+ copy->cpy_hdr.links.prev
+ = previous_prev;
+ copy->size = total_size;
+ }
+ return kr;
+ }
+ if (dst_end <= local_end) {
+ return KERN_SUCCESS;
+ }
+ /* otherwise copy no longer exists, it was */
+ /* destroyed after successful copy_overwrite */
+ copy = vm_map_copy_allocate();
+ copy->type = VM_MAP_COPY_ENTRY_LIST;
+ copy->offset = new_offset;
+ copy->cpy_hdr.page_shift = copy_page_shift;
+
+ /*
+ * XXX FBDP
+ * this does not seem to deal with
+ * the VM map store (R&B tree)
+ */
+
+ total_size -= copy_size;
+ copy_size = 0;
+ /* put back remainder of copy in container */
+ if (next_copy != NULL) {
+ copy->cpy_hdr.nentries = remaining_entries;
+ copy->cpy_hdr.links.next = next_copy;
+ copy->cpy_hdr.links.prev = previous_prev;
+ copy->size = total_size;
+ next_copy->vme_prev =
+ vm_map_copy_to_entry(copy);
+ next_copy = NULL;
+ }
+ base_addr = local_end;
+ vm_map_lock(dst_map);
+ if (!vm_map_lookup_entry(dst_map,
+ local_end, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ entry = tmp_entry;
+ continue;
+ }
+ if (dst_end <= entry->vme_end) {
+ copy_size = dst_end - base_addr;
+ break;
+ }
+
+ if ((next == vm_map_to_entry(dst_map)) ||
+ (next->vme_start != entry->vme_end)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+ entry = next;
+ }/* for */
+
+ next_copy = NULL;
+ nentries = 1;
+
+ /* adjust the copy object */
+ if (total_size > copy_size) {
+ vm_map_size_t local_size = 0;
+ vm_map_size_t entry_size;
+
+ new_offset = copy->offset;
+ copy_entry = vm_map_copy_first_entry(copy);
+ while (copy_entry != vm_map_copy_to_entry(copy)) {
+ entry_size = copy_entry->vme_end -
+ copy_entry->vme_start;
+ if ((local_size < copy_size) &&
+ ((local_size + entry_size)
+ >= copy_size)) {
+ vm_map_copy_clip_end(copy, copy_entry,
+ copy_entry->vme_start +
+ (copy_size - local_size));
+ entry_size = copy_entry->vme_end -
+ copy_entry->vme_start;
+ local_size += entry_size;
+ new_offset += entry_size;
+ }
+ if (local_size >= copy_size) {
+ next_copy = copy_entry->vme_next;
+ copy_entry->vme_next =
+ vm_map_copy_to_entry(copy);
+ previous_prev =
+ copy->cpy_hdr.links.prev;
+ copy->cpy_hdr.links.prev = copy_entry;
+ copy->size = copy_size;
+ remaining_entries =
+ copy->cpy_hdr.nentries;
+ remaining_entries -= nentries;
+ copy->cpy_hdr.nentries = nentries;
+ break;
+ } else {
+ local_size += entry_size;
+ new_offset += entry_size;
+ nentries++;
+ }
+ copy_entry = copy_entry->vme_next;
+ }
+ }
+
+ if (aligned) {
+ pmap_t local_pmap;
+
+ if (pmap) {
+ local_pmap = pmap;
+ } else {
+ local_pmap = dst_map->pmap;
+ }
+
+ if ((kr = vm_map_copy_overwrite_aligned(
+ dst_map, tmp_entry, copy,
+ base_addr, local_pmap)) != KERN_SUCCESS) {
+ if (next_copy != NULL) {
+ copy->cpy_hdr.nentries +=
+ remaining_entries;
+ copy->cpy_hdr.links.prev->vme_next =
+ next_copy;
+ copy->cpy_hdr.links.prev =
+ previous_prev;
+ copy->size += copy_size;
+ }
+ return kr;
+ }
+ vm_map_unlock(dst_map);
+ } else {
+ /*
+ * Performance gain:
+ *
+ * if the copy and dst address are misaligned but the same
+ * offset within the page we can copy_not_aligned the
+ * misaligned parts and copy aligned the rest. If they are
+ * aligned but len is unaligned we simply need to copy
+ * the end bit unaligned. We'll need to split the misaligned
+ * bits of the region in this case !
+ */
+ /* ALWAYS UNLOCKS THE dst_map MAP */
+ kr = vm_map_copy_overwrite_unaligned(
+ dst_map,
+ tmp_entry,
+ copy,
+ base_addr,
+ discard_on_success);
+ if (kr != KERN_SUCCESS) {
+ if (next_copy != NULL) {
+ copy->cpy_hdr.nentries +=
+ remaining_entries;
+ copy->cpy_hdr.links.prev->vme_next =
+ next_copy;
+ copy->cpy_hdr.links.prev =
+ previous_prev;
+ copy->size += copy_size;
+ }
+ return kr;
+ }
+ }
+ total_size -= copy_size;
+ if (total_size == 0) {
+ break;
+ }
+ base_addr += copy_size;
+ copy_size = 0;
+ copy->offset = new_offset;
+ if (next_copy != NULL) {
+ copy->cpy_hdr.nentries = remaining_entries;
+ copy->cpy_hdr.links.next = next_copy;
+ copy->cpy_hdr.links.prev = previous_prev;
+ next_copy->vme_prev = vm_map_copy_to_entry(copy);
+ copy->size = total_size;
+ }
+ vm_map_lock(dst_map);
+ while (TRUE) {
+ if (!vm_map_lookup_entry(dst_map,
+ base_addr, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ if (tmp_entry->in_transition) {
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(dst_map, THREAD_UNINT);
+ } else {
+ break;
+ }
+ }
+ vm_map_clip_start(dst_map,
+ tmp_entry,
+ vm_map_trunc_page(base_addr,
+ VM_MAP_PAGE_MASK(dst_map)));
+
+ entry = tmp_entry;
+ } /* while */
+
+ /*
+ * Throw away the vm_map_copy object
+ */
+ if (discard_on_success) {
+ vm_map_copy_discard(copy);
+ }
+
+ return KERN_SUCCESS;
+}/* vm_map_copy_overwrite */
+
+kern_return_t
+vm_map_copy_overwrite(
+ vm_map_t dst_map,
+ vm_map_offset_t dst_addr,
+ vm_map_copy_t copy,
+ vm_map_size_t copy_size,
+ boolean_t interruptible)
+{
+ vm_map_size_t head_size, tail_size;
+ vm_map_copy_t head_copy, tail_copy;
+ vm_map_offset_t head_addr, tail_addr;
+ vm_map_entry_t entry;
+ kern_return_t kr;
+ vm_map_offset_t effective_page_mask, effective_page_size;
+ int copy_page_shift;
+
+ head_size = 0;
+ tail_size = 0;
+ head_copy = NULL;
+ tail_copy = NULL;
+ head_addr = 0;
+ tail_addr = 0;
+
+ if (interruptible ||
+ copy == VM_MAP_COPY_NULL ||
+ copy->type != VM_MAP_COPY_ENTRY_LIST) {
+ /*
+ * We can't split the "copy" map if we're interruptible
+ * or if we don't have a "copy" map...
+ */
+blunt_copy:
+ return vm_map_copy_overwrite_nested(dst_map,
+ dst_addr,
+ copy,
+ interruptible,
+ (pmap_t) NULL,
+ TRUE);
+ }
+
+ copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
+ if (copy_page_shift < PAGE_SHIFT ||
+ VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
+ goto blunt_copy;
+ }
+
+ if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
+ effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
+ } else {
+ effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
+ effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
+ effective_page_mask);
+ }
+ effective_page_size = effective_page_mask + 1;
+
+ if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
+ /*
+ * Too small to bother with optimizing...
+ */
+ goto blunt_copy;
+ }
+
+ if ((dst_addr & effective_page_mask) !=
+ (copy->offset & effective_page_mask)) {
+ /*
+ * Incompatible mis-alignment of source and destination...
+ */
+ goto blunt_copy;
+ }
+
+ /*
+ * Proper alignment or identical mis-alignment at the beginning.
+ * Let's try and do a small unaligned copy first (if needed)
+ * and then an aligned copy for the rest.
+ */
+ if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
+ head_addr = dst_addr;
+ head_size = (effective_page_size -
+ (copy->offset & effective_page_mask));
+ head_size = MIN(head_size, copy_size);
+ }
+ if (!vm_map_page_aligned(copy->offset + copy_size,
+ effective_page_mask)) {
+ /*
+ * Mis-alignment at the end.
+ * Do an aligned copy up to the last page and
+ * then an unaligned copy for the remaining bytes.
+ */
+ tail_size = ((copy->offset + copy_size) &
+ effective_page_mask);
+ tail_size = MIN(tail_size, copy_size);
+ tail_addr = dst_addr + copy_size - tail_size;
+ assert(tail_addr >= head_addr + head_size);
+ }
+ assert(head_size + tail_size <= copy_size);
+
+ if (head_size + tail_size == copy_size) {
+ /*
+ * It's all unaligned, no optimization possible...
+ */
+ goto blunt_copy;
+ }
+
+ /*
+ * Can't optimize if there are any submaps in the
+ * destination due to the way we free the "copy" map
+ * progressively in vm_map_copy_overwrite_nested()
+ * in that case.
+ */
+ vm_map_lock_read(dst_map);
+ if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
+ vm_map_unlock_read(dst_map);
+ goto blunt_copy;
+ }
+ for (;
+ (entry != vm_map_copy_to_entry(copy) &&
+ entry->vme_start < dst_addr + copy_size);
+ entry = entry->vme_next) {
+ if (entry->is_sub_map) {
+ vm_map_unlock_read(dst_map);
+ goto blunt_copy;
+ }
+ }
+ vm_map_unlock_read(dst_map);
+
+ if (head_size) {
+ /*
+ * Unaligned copy of the first "head_size" bytes, to reach
+ * a page boundary.
+ */
+
+ /*
+ * Extract "head_copy" out of "copy".
+ */
+ head_copy = vm_map_copy_allocate();
+ head_copy->type = VM_MAP_COPY_ENTRY_LIST;
+ head_copy->cpy_hdr.entries_pageable =
+ copy->cpy_hdr.entries_pageable;
+ vm_map_store_init(&head_copy->cpy_hdr);
+ head_copy->cpy_hdr.page_shift = copy_page_shift;
+
+ entry = vm_map_copy_first_entry(copy);
+ if (entry->vme_end < copy->offset + head_size) {
+ head_size = entry->vme_end - copy->offset;
+ }
+
+ head_copy->offset = copy->offset;
+ head_copy->size = head_size;
+ copy->offset += head_size;
+ copy->size -= head_size;
+ copy_size -= head_size;
+ assert(copy_size > 0);
+
+ vm_map_copy_clip_end(copy, entry, copy->offset);
+ vm_map_copy_entry_unlink(copy, entry);
+ vm_map_copy_entry_link(head_copy,
+ vm_map_copy_to_entry(head_copy),
+ entry);
+
+ /*
+ * Do the unaligned copy.
+ */
+ kr = vm_map_copy_overwrite_nested(dst_map,
+ head_addr,
+ head_copy,
+ interruptible,
+ (pmap_t) NULL,
+ FALSE);
+ if (kr != KERN_SUCCESS) {
+ goto done;
+ }
+ }
+
+ if (tail_size) {
+ /*
+ * Extract "tail_copy" out of "copy".
+ */
+ tail_copy = vm_map_copy_allocate();
+ tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
+ tail_copy->cpy_hdr.entries_pageable =
+ copy->cpy_hdr.entries_pageable;
+ vm_map_store_init(&tail_copy->cpy_hdr);
+ tail_copy->cpy_hdr.page_shift = copy_page_shift;
+
+ tail_copy->offset = copy->offset + copy_size - tail_size;
+ tail_copy->size = tail_size;
+
+ copy->size -= tail_size;
+ copy_size -= tail_size;
+ assert(copy_size > 0);
+
+ entry = vm_map_copy_last_entry(copy);
+ vm_map_copy_clip_start(copy, entry, tail_copy->offset);
+ entry = vm_map_copy_last_entry(copy);
+ vm_map_copy_entry_unlink(copy, entry);
+ vm_map_copy_entry_link(tail_copy,
+ vm_map_copy_last_entry(tail_copy),
+ entry);
+ }
+
+ /*
+ * If we are here from ipc_kmsg_copyout_ool_descriptor(),
+ * we want to avoid TOCTOU issues w.r.t copy->size but
+ * we don't need to change vm_map_copy_overwrite_nested()
+ * and all other vm_map_copy_overwrite variants.
+ *
+ * So we assign the original copy_size that was passed into
+ * this routine back to copy.
+ *
+ * This use of local 'copy_size' passed into this routine is
+ * to try and protect against TOCTOU attacks where the kernel
+ * has been exploited. We don't expect this to be an issue
+ * during normal system operation.
+ */
+ assertf(copy->size == copy_size,
+ "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
+ copy->size = copy_size;
+
+ /*
+ * Copy most (or possibly all) of the data.
+ */
+ kr = vm_map_copy_overwrite_nested(dst_map,
+ dst_addr + head_size,
+ copy,
+ interruptible,
+ (pmap_t) NULL,
+ FALSE);
+ if (kr != KERN_SUCCESS) {
+ goto done;
+ }
+
+ if (tail_size) {
+ kr = vm_map_copy_overwrite_nested(dst_map,
+ tail_addr,
+ tail_copy,
+ interruptible,
+ (pmap_t) NULL,
+ FALSE);
+ }
+
+done:
+ assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
+ if (kr == KERN_SUCCESS) {
+ /*
+ * Discard all the copy maps.
+ */
+ if (head_copy) {
+ vm_map_copy_discard(head_copy);
+ head_copy = NULL;
+ }
+ vm_map_copy_discard(copy);
+ if (tail_copy) {
+ vm_map_copy_discard(tail_copy);
+ tail_copy = NULL;
+ }
+ } else {
+ /*
+ * Re-assemble the original copy map.
+ */
+ if (head_copy) {
+ entry = vm_map_copy_first_entry(head_copy);
+ vm_map_copy_entry_unlink(head_copy, entry);
+ vm_map_copy_entry_link(copy,
+ vm_map_copy_to_entry(copy),
+ entry);
+ copy->offset -= head_size;
+ copy->size += head_size;
+ vm_map_copy_discard(head_copy);
+ head_copy = NULL;
+ }
+ if (tail_copy) {
+ entry = vm_map_copy_last_entry(tail_copy);
+ vm_map_copy_entry_unlink(tail_copy, entry);
+ vm_map_copy_entry_link(copy,
+ vm_map_copy_last_entry(copy),
+ entry);
+ copy->size += tail_size;
+ vm_map_copy_discard(tail_copy);
+ tail_copy = NULL;
+ }
+ }
+ return kr;
+}
+
+
+/*
+ * Routine: vm_map_copy_overwrite_unaligned [internal use only]
+ *
+ * Decription:
+ * Physically copy unaligned data
+ *
+ * Implementation:
+ * Unaligned parts of pages have to be physically copied. We use
+ * a modified form of vm_fault_copy (which understands none-aligned
+ * page offsets and sizes) to do the copy. We attempt to copy as
+ * much memory in one go as possibly, however vm_fault_copy copies
+ * within 1 memory object so we have to find the smaller of "amount left"
+ * "source object data size" and "target object data size". With
+ * unaligned data we don't need to split regions, therefore the source
+ * (copy) object should be one map entry, the target range may be split
+ * over multiple map entries however. In any event we are pessimistic
+ * about these assumptions.
+ *
+ * Assumptions:
+ * dst_map is locked on entry and is return locked on success,
+ * unlocked on error.
+ */
+
+static kern_return_t
+vm_map_copy_overwrite_unaligned(
+ vm_map_t dst_map,
+ vm_map_entry_t entry,
+ vm_map_copy_t copy,
+ vm_map_offset_t start,
+ boolean_t discard_on_success)
+{
+ vm_map_entry_t copy_entry;
+ vm_map_entry_t copy_entry_next;
+ vm_map_version_t version;
+ vm_object_t dst_object;
+ vm_object_offset_t dst_offset;
+ vm_object_offset_t src_offset;
+ vm_object_offset_t entry_offset;
+ vm_map_offset_t entry_end;
+ vm_map_size_t src_size,
+ dst_size,
+ copy_size,
+ amount_left;
+ kern_return_t kr = KERN_SUCCESS;
+
+
+ copy_entry = vm_map_copy_first_entry(copy);
+
+ vm_map_lock_write_to_read(dst_map);
+
+ src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
+ amount_left = copy->size;
+/*
+ * unaligned so we never clipped this entry, we need the offset into
+ * the vm_object not just the data.
+ */
+ while (amount_left > 0) {
+ if (entry == vm_map_to_entry(dst_map)) {
+ vm_map_unlock_read(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /* "start" must be within the current map entry */
+ assert((start >= entry->vme_start) && (start < entry->vme_end));
+
+ dst_offset = start - entry->vme_start;
+
+ dst_size = entry->vme_end - start;
+
+ src_size = copy_entry->vme_end -
+ (copy_entry->vme_start + src_offset);
+
+ if (dst_size < src_size) {
+/*
+ * we can only copy dst_size bytes before
+ * we have to get the next destination entry
+ */
+ copy_size = dst_size;
+ } else {
+/*
+ * we can only copy src_size bytes before
+ * we have to get the next source copy entry
+ */
+ copy_size = src_size;
+ }
+
+ if (copy_size > amount_left) {
+ copy_size = amount_left;
+ }
+/*
+ * Entry needs copy, create a shadow shadow object for
+ * Copy on write region.
+ */
+ if (entry->needs_copy &&
+ ((entry->protection & VM_PROT_WRITE) != 0)) {
+ if (vm_map_lock_read_to_write(dst_map)) {
+ vm_map_lock_read(dst_map);
+ goto RetryLookup;
+ }
+ VME_OBJECT_SHADOW(entry,
+ (vm_map_size_t)(entry->vme_end
+ - entry->vme_start));
+ entry->needs_copy = FALSE;
+ vm_map_lock_write_to_read(dst_map);
+ }
+ dst_object = VME_OBJECT(entry);
+/*
+ * unlike with the virtual (aligned) copy we're going
+ * to fault on it therefore we need a target object.
+ */
+ if (dst_object == VM_OBJECT_NULL) {
+ if (vm_map_lock_read_to_write(dst_map)) {
+ vm_map_lock_read(dst_map);
+ goto RetryLookup;
+ }
+ dst_object = vm_object_allocate((vm_map_size_t)
+ entry->vme_end - entry->vme_start);
+ VME_OBJECT_SET(entry, dst_object);
+ VME_OFFSET_SET(entry, 0);
+ assert(entry->use_pmap);
+ vm_map_lock_write_to_read(dst_map);
+ }
+/*
+ * Take an object reference and unlock map. The "entry" may
+ * disappear or change when the map is unlocked.
+ */
+ vm_object_reference(dst_object);
+ version.main_timestamp = dst_map->timestamp;
+ entry_offset = VME_OFFSET(entry);
+ entry_end = entry->vme_end;
+ vm_map_unlock_read(dst_map);
+/*
+ * Copy as much as possible in one pass
+ */
+ kr = vm_fault_copy(
+ VME_OBJECT(copy_entry),
+ VME_OFFSET(copy_entry) + src_offset,
+ ©_size,
+ dst_object,
+ entry_offset + dst_offset,
+ dst_map,
+ &version,
+ THREAD_UNINT );
+
+ start += copy_size;
+ src_offset += copy_size;
+ amount_left -= copy_size;
+/*
+ * Release the object reference
+ */
+ vm_object_deallocate(dst_object);
+/*
+ * If a hard error occurred, return it now
+ */
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+
+ if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
+ || amount_left == 0) {
+/*
+ * all done with this copy entry, dispose.
+ */
+ copy_entry_next = copy_entry->vme_next;
+
+ if (discard_on_success) {
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ assert(!copy_entry->is_sub_map);
+ vm_object_deallocate(VME_OBJECT(copy_entry));
+ vm_map_copy_entry_dispose(copy, copy_entry);
+ }
+
+ if (copy_entry_next == vm_map_copy_to_entry(copy) &&
+ amount_left) {
+/*
+ * not finished copying but run out of source
+ */
+ return KERN_INVALID_ADDRESS;
+ }
+
+ copy_entry = copy_entry_next;
+
+ src_offset = 0;
+ }
+
+ if (amount_left == 0) {
+ return KERN_SUCCESS;
+ }
+
+ vm_map_lock_read(dst_map);
+ if (version.main_timestamp == dst_map->timestamp) {
+ if (start == entry_end) {
+/*
+ * destination region is split. Use the version
+ * information to avoid a lookup in the normal
+ * case.
+ */
+ entry = entry->vme_next;
+/*
+ * should be contiguous. Fail if we encounter
+ * a hole in the destination.
+ */
+ if (start != entry->vme_start) {
+ vm_map_unlock_read(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ }
+ } else {
+/*
+ * Map version check failed.
+ * we must lookup the entry because somebody
+ * might have changed the map behind our backs.
+ */
+RetryLookup:
+ if (!vm_map_lookup_entry(dst_map, start, &entry)) {
+ vm_map_unlock_read(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ }
+ }/* while */
+
+ return KERN_SUCCESS;
+}/* vm_map_copy_overwrite_unaligned */
+
+/*
+ * Routine: vm_map_copy_overwrite_aligned [internal use only]
+ *
+ * Description:
+ * Does all the vm_trickery possible for whole pages.
+ *
+ * Implementation:
+ *
+ * If there are no permanent objects in the destination,
+ * and the source and destination map entry zones match,
+ * and the destination map entry is not shared,
+ * then the map entries can be deleted and replaced
+ * with those from the copy. The following code is the
+ * basic idea of what to do, but there are lots of annoying
+ * little details about getting protection and inheritance
+ * right. Should add protection, inheritance, and sharing checks
+ * to the above pass and make sure that no wiring is involved.
+ */
+
+int vm_map_copy_overwrite_aligned_src_not_internal = 0;
+int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
+int vm_map_copy_overwrite_aligned_src_large = 0;
+
+static kern_return_t
+vm_map_copy_overwrite_aligned(
+ vm_map_t dst_map,
+ vm_map_entry_t tmp_entry,
+ vm_map_copy_t copy,
+ vm_map_offset_t start,
+ __unused pmap_t pmap)
+{
+ vm_object_t object;
+ vm_map_entry_t copy_entry;
+ vm_map_size_t copy_size;
+ vm_map_size_t size;
+ vm_map_entry_t entry;
+
+ while ((copy_entry = vm_map_copy_first_entry(copy))
+ != vm_map_copy_to_entry(copy)) {
+ copy_size = (copy_entry->vme_end - copy_entry->vme_start);
+
+ entry = tmp_entry;
+ if (entry->is_sub_map) {
+ /* unnested when clipped earlier */
+ assert(!entry->use_pmap);
+ }
+ if (entry == vm_map_to_entry(dst_map)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ size = (entry->vme_end - entry->vme_start);
+ /*
+ * Make sure that no holes popped up in the
+ * address map, and that the protection is
+ * still valid, in case the map was unlocked
+ * earlier.
+ */
+
+ if ((entry->vme_start != start) || ((entry->is_sub_map)
+ && !entry->needs_copy)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ assert(entry != vm_map_to_entry(dst_map));
+
+ /*
+ * Check protection again
+ */
+
+ if (!(entry->protection & VM_PROT_WRITE)) {
+ vm_map_unlock(dst_map);
+ return KERN_PROTECTION_FAILURE;
+ }
+
+ /*
+ * Adjust to source size first
+ */
+
+ if (copy_size < size) {
+ if (entry->map_aligned &&
+ !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
+ VM_MAP_PAGE_MASK(dst_map))) {
+ /* no longer map-aligned */
+ entry->map_aligned = FALSE;
+ }
+ vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
+ size = copy_size;
+ }
+
+ /*
+ * Adjust to destination size
+ */
+
+ if (size < copy_size) {
+ vm_map_copy_clip_end(copy, copy_entry,
+ copy_entry->vme_start + size);
+ copy_size = size;
+ }
+
+ assert((entry->vme_end - entry->vme_start) == size);
+ assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
+ assert((copy_entry->vme_end - copy_entry->vme_start) == size);
+
+ /*
+ * If the destination contains temporary unshared memory,
+ * we can perform the copy by throwing it away and
+ * installing the source data.
+ */
+
+ object = VME_OBJECT(entry);
+ if ((!entry->is_shared &&
+ ((object == VM_OBJECT_NULL) ||
+ (object->internal && !object->true_share))) ||
+ entry->needs_copy) {
+ vm_object_t old_object = VME_OBJECT(entry);
+ vm_object_offset_t old_offset = VME_OFFSET(entry);
+ vm_object_offset_t offset;
+
+ /*
+ * Ensure that the source and destination aren't
+ * identical
+ */
+ if (old_object == VME_OBJECT(copy_entry) &&
+ old_offset == VME_OFFSET(copy_entry)) {
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ vm_map_copy_entry_dispose(copy, copy_entry);
+
+ if (old_object != VM_OBJECT_NULL) {
+ vm_object_deallocate(old_object);
+ }
+
+ start = tmp_entry->vme_end;
+ tmp_entry = tmp_entry->vme_next;
+ continue;
+ }
+
+#if XNU_TARGET_OS_OSX
+#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
+#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
+ if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
+ VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
+ copy_size <= __TRADEOFF1_COPY_SIZE) {
+ /*
+ * Virtual vs. Physical copy tradeoff #1.
+ *
+ * Copying only a few pages out of a large
+ * object: do a physical copy instead of
+ * a virtual copy, to avoid possibly keeping
+ * the entire large object alive because of
+ * those few copy-on-write pages.
+ */
+ vm_map_copy_overwrite_aligned_src_large++;
+ goto slow_copy;
+ }
+#endif /* XNU_TARGET_OS_OSX */
+
+ if ((dst_map->pmap != kernel_pmap) &&
+ (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
+ (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
+ vm_object_t new_object, new_shadow;
+
+ /*
+ * We're about to map something over a mapping
+ * established by malloc()...
+ */
+ new_object = VME_OBJECT(copy_entry);
+ if (new_object != VM_OBJECT_NULL) {
+ vm_object_lock_shared(new_object);
+ }
+ while (new_object != VM_OBJECT_NULL &&
+#if XNU_TARGET_OS_OSX
+ !new_object->true_share &&
+ new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
+#endif /* XNU_TARGET_OS_OSX */
+ new_object->internal) {
+ new_shadow = new_object->shadow;
+ if (new_shadow == VM_OBJECT_NULL) {
+ break;
+ }
+ vm_object_lock_shared(new_shadow);
+ vm_object_unlock(new_object);
+ new_object = new_shadow;
+ }
+ if (new_object != VM_OBJECT_NULL) {
+ if (!new_object->internal) {
+ /*
+ * The new mapping is backed
+ * by an external object. We
+ * don't want malloc'ed memory
+ * to be replaced with such a
+ * non-anonymous mapping, so
+ * let's go off the optimized
+ * path...
+ */
+ vm_map_copy_overwrite_aligned_src_not_internal++;
+ vm_object_unlock(new_object);
+ goto slow_copy;
+ }
+#if XNU_TARGET_OS_OSX
+ if (new_object->true_share ||
+ new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
+ /*
+ * Same if there's a "true_share"
+ * object in the shadow chain, or
+ * an object with a non-default
+ * (SYMMETRIC) copy strategy.
+ */
+ vm_map_copy_overwrite_aligned_src_not_symmetric++;
+ vm_object_unlock(new_object);
+ goto slow_copy;
+ }
+#endif /* XNU_TARGET_OS_OSX */
+ vm_object_unlock(new_object);
+ }
+ /*
+ * The new mapping is still backed by
+ * anonymous (internal) memory, so it's
+ * OK to substitute it for the original
+ * malloc() mapping.
+ */
+ }
+
+ if (old_object != VM_OBJECT_NULL) {
+ if (entry->is_sub_map) {
+ if (entry->use_pmap) {
+#ifndef NO_NESTED_PMAP
+ pmap_unnest(dst_map->pmap,
+ (addr64_t)entry->vme_start,
+ entry->vme_end - entry->vme_start);
+#endif /* NO_NESTED_PMAP */
+ if (dst_map->mapped_in_other_pmaps) {
+ /* clean up parent */
+ /* map/maps */
+ vm_map_submap_pmap_clean(
+ dst_map, entry->vme_start,
+ entry->vme_end,
+ VME_SUBMAP(entry),
+ VME_OFFSET(entry));
+ }
+ } else {
+ vm_map_submap_pmap_clean(
+ dst_map, entry->vme_start,
+ entry->vme_end,
+ VME_SUBMAP(entry),
+ VME_OFFSET(entry));
+ }
+ vm_map_deallocate(VME_SUBMAP(entry));
+ } else {
+ if (dst_map->mapped_in_other_pmaps) {
+ vm_object_pmap_protect_options(
+ VME_OBJECT(entry),
+ VME_OFFSET(entry),
+ entry->vme_end
+ - entry->vme_start,
+ PMAP_NULL,
+ PAGE_SIZE,
+ entry->vme_start,
+ VM_PROT_NONE,
+ PMAP_OPTIONS_REMOVE);
+ } else {
+ pmap_remove_options(
+ dst_map->pmap,
+ (addr64_t)(entry->vme_start),
+ (addr64_t)(entry->vme_end),
+ PMAP_OPTIONS_REMOVE);
+ }
+ vm_object_deallocate(old_object);
+ }
+ }
+
+ if (entry->iokit_acct) {
+ /* keep using iokit accounting */
+ entry->use_pmap = FALSE;
+ } else {
+ /* use pmap accounting */
+ entry->use_pmap = TRUE;
+ }
+ entry->is_sub_map = FALSE;
+ VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
+ object = VME_OBJECT(entry);
+ entry->needs_copy = copy_entry->needs_copy;
+ entry->wired_count = 0;
+ entry->user_wired_count = 0;
+ offset = VME_OFFSET(copy_entry);
+ VME_OFFSET_SET(entry, offset);
+
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ vm_map_copy_entry_dispose(copy, copy_entry);
+
+ /*
+ * we could try to push pages into the pmap at this point, BUT
+ * this optimization only saved on average 2 us per page if ALL
+ * the pages in the source were currently mapped
+ * and ALL the pages in the dest were touched, if there were fewer
+ * than 2/3 of the pages touched, this optimization actually cost more cycles
+ * it also puts a lot of pressure on the pmap layer w/r to mapping structures
+ */
+
+ /*
+ * Set up for the next iteration. The map
+ * has not been unlocked, so the next
+ * address should be at the end of this
+ * entry, and the next map entry should be
+ * the one following it.
+ */
+
+ start = tmp_entry->vme_end;
+ tmp_entry = tmp_entry->vme_next;
+ } else {
+ vm_map_version_t version;
+ vm_object_t dst_object;
+ vm_object_offset_t dst_offset;
+ kern_return_t r;
+
+slow_copy:
+ if (entry->needs_copy) {
+ VME_OBJECT_SHADOW(entry,
+ (entry->vme_end -
+ entry->vme_start));
+ entry->needs_copy = FALSE;
+ }
+
+ dst_object = VME_OBJECT(entry);
+ dst_offset = VME_OFFSET(entry);
+
+ /*
+ * Take an object reference, and record
+ * the map version information so that the
+ * map can be safely unlocked.
+ */
+
+ if (dst_object == VM_OBJECT_NULL) {
+ /*
+ * We would usually have just taken the
+ * optimized path above if the destination
+ * object has not been allocated yet. But we
+ * now disable that optimization if the copy
+ * entry's object is not backed by anonymous
+ * memory to avoid replacing malloc'ed
+ * (i.e. re-usable) anonymous memory with a
+ * not-so-anonymous mapping.
+ * So we have to handle this case here and
+ * allocate a new VM object for this map entry.
+ */
+ dst_object = vm_object_allocate(
+ entry->vme_end - entry->vme_start);
+ dst_offset = 0;
+ VME_OBJECT_SET(entry, dst_object);
+ VME_OFFSET_SET(entry, dst_offset);
+ assert(entry->use_pmap);
+ }
+
+ vm_object_reference(dst_object);
+
+ /* account for unlock bumping up timestamp */
+ version.main_timestamp = dst_map->timestamp + 1;
+
+ vm_map_unlock(dst_map);
+
+ /*
+ * Copy as much as possible in one pass
+ */
+
+ copy_size = size;
+ r = vm_fault_copy(
+ VME_OBJECT(copy_entry),
+ VME_OFFSET(copy_entry),
+ ©_size,
+ dst_object,
+ dst_offset,
+ dst_map,
+ &version,
+ THREAD_UNINT );
+
+ /*
+ * Release the object reference
+ */
+
+ vm_object_deallocate(dst_object);
+
+ /*
+ * If a hard error occurred, return it now
+ */
+
+ if (r != KERN_SUCCESS) {
+ return r;
+ }
+
+ if (copy_size != 0) {
+ /*
+ * Dispose of the copied region
+ */
+
+ vm_map_copy_clip_end(copy, copy_entry,
+ copy_entry->vme_start + copy_size);
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ vm_object_deallocate(VME_OBJECT(copy_entry));
+ vm_map_copy_entry_dispose(copy, copy_entry);
+ }
+
+ /*
+ * Pick up in the destination map where we left off.
+ *
+ * Use the version information to avoid a lookup
+ * in the normal case.
+ */
+
+ start += copy_size;
+ vm_map_lock(dst_map);
+ if (version.main_timestamp == dst_map->timestamp &&
+ copy_size != 0) {
+ /* We can safely use saved tmp_entry value */
+
+ if (tmp_entry->map_aligned &&
+ !VM_MAP_PAGE_ALIGNED(
+ start,
+ VM_MAP_PAGE_MASK(dst_map))) {
+ /* no longer map-aligned */
+ tmp_entry->map_aligned = FALSE;
+ }
+ vm_map_clip_end(dst_map, tmp_entry, start);
+ tmp_entry = tmp_entry->vme_next;
+ } else {
+ /* Must do lookup of tmp_entry */
+
+ if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return KERN_INVALID_ADDRESS;
+ }
+ if (tmp_entry->map_aligned &&
+ !VM_MAP_PAGE_ALIGNED(
+ start,
+ VM_MAP_PAGE_MASK(dst_map))) {
+ /* no longer map-aligned */
+ tmp_entry->map_aligned = FALSE;
+ }
+ vm_map_clip_start(dst_map, tmp_entry, start);
+ }
+ }
+ }/* while */
+
+ return KERN_SUCCESS;
+}/* vm_map_copy_overwrite_aligned */
+
+/*
+ * Routine: vm_map_copyin_kernel_buffer [internal use only]
+ *
+ * Description:
+ * Copy in data to a kernel buffer from space in the
+ * source map. The original space may be optionally
+ * deallocated.
+ *
+ * If successful, returns a new copy object.
+ */
+static kern_return_t
+vm_map_copyin_kernel_buffer(
+ vm_map_t src_map,
+ vm_map_offset_t src_addr,
+ vm_map_size_t len,
+ boolean_t src_destroy,
+ vm_map_copy_t *copy_result)
+{
+ kern_return_t kr;
+ vm_map_copy_t copy;
+
+ if (len > msg_ool_size_small) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
+ if (copy == VM_MAP_COPY_NULL) {
+ return KERN_RESOURCE_SHORTAGE;
+ }
+ copy->cpy_kdata = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
+ if (copy->cpy_kdata == NULL) {
+ zfree(vm_map_copy_zone, copy);
+ return KERN_RESOURCE_SHORTAGE;
+ }
+
+ copy->type = VM_MAP_COPY_KERNEL_BUFFER;
+ copy->size = len;
+ copy->offset = 0;
+
+ kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
+ if (kr != KERN_SUCCESS) {
+ kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, len);
+ zfree(vm_map_copy_zone, copy);
+ return kr;
+ }
+ if (src_destroy) {
+ (void) vm_map_remove(
+ src_map,
+ vm_map_trunc_page(src_addr,
+ VM_MAP_PAGE_MASK(src_map)),
+ vm_map_round_page(src_addr + len,
+ VM_MAP_PAGE_MASK(src_map)),
+ (VM_MAP_REMOVE_INTERRUPTIBLE |
+ VM_MAP_REMOVE_WAIT_FOR_KWIRE |
+ ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
+ }
+ *copy_result = copy;
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_map_copyout_kernel_buffer [internal use only]
+ *
+ * Description:
+ * Copy out data from a kernel buffer into space in the
+ * destination map. The space may be otpionally dynamically
+ * allocated.
+ *
+ * If successful, consumes the copy object.
+ * Otherwise, the caller is responsible for it.
+ */
+static int vm_map_copyout_kernel_buffer_failures = 0;
+static kern_return_t
+vm_map_copyout_kernel_buffer(
+ vm_map_t map,
+ vm_map_address_t *addr, /* IN/OUT */
+ vm_map_copy_t copy,
+ vm_map_size_t copy_size,
+ boolean_t overwrite,
+ boolean_t consume_on_success)
+{
+ kern_return_t kr = KERN_SUCCESS;
+ thread_t thread = current_thread();
+
+ assert(copy->size == copy_size);
+
+ /*
+ * check for corrupted vm_map_copy structure
+ */
+ if (copy_size > msg_ool_size_small || copy->offset) {
+ panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
+ (long long)copy->size, (long long)copy->offset);
+ }
+
+ if (!overwrite) {
+ /*
+ * Allocate space in the target map for the data
+ */
+ *addr = 0;
+ kr = vm_map_enter(map,
+ addr,
+ vm_map_round_page(copy_size,
+ VM_MAP_PAGE_MASK(map)),
+ (vm_map_offset_t) 0,
+ VM_FLAGS_ANYWHERE,
+ VM_MAP_KERNEL_FLAGS_NONE,
+ VM_KERN_MEMORY_NONE,
+ VM_OBJECT_NULL,
+ (vm_object_offset_t) 0,
+ FALSE,
+ VM_PROT_DEFAULT,
+ VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+#if KASAN
+ if (map->pmap == kernel_pmap) {
+ kasan_notify_address(*addr, copy->size);
+ }
+#endif
+ }
+
+ /*
+ * Copyout the data from the kernel buffer to the target map.
+ */
+ if (thread->map == map) {
+ /*
+ * If the target map is the current map, just do
+ * the copy.
+ */
+ assert((vm_size_t)copy_size == copy_size);
+ if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
+ kr = KERN_INVALID_ADDRESS;
+ }
+ } else {
+ vm_map_t oldmap;
+
+ /*
+ * If the target map is another map, assume the
+ * target's address space identity for the duration
+ * of the copy.
+ */
+ vm_map_reference(map);
+ oldmap = vm_map_switch(map);
+
+ assert((vm_size_t)copy_size == copy_size);
+ if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
+ vm_map_copyout_kernel_buffer_failures++;
+ kr = KERN_INVALID_ADDRESS;
+ }
+
+ (void) vm_map_switch(oldmap);
+ vm_map_deallocate(map);
+ }
+
+ if (kr != KERN_SUCCESS) {
+ /* the copy failed, clean up */
+ if (!overwrite) {
+ /*
+ * Deallocate the space we allocated in the target map.
+ */
+ (void) vm_map_remove(
+ map,
+ vm_map_trunc_page(*addr,
+ VM_MAP_PAGE_MASK(map)),
+ vm_map_round_page((*addr +
+ vm_map_round_page(copy_size,
+ VM_MAP_PAGE_MASK(map))),
+ VM_MAP_PAGE_MASK(map)),
+ VM_MAP_REMOVE_NO_FLAGS);
+ *addr = 0;
+ }
+ } else {
+ /* copy was successful, dicard the copy structure */
+ if (consume_on_success) {
+ kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy_size);
+ zfree(vm_map_copy_zone, copy);
+ }
+ }
+
+ return kr;
+}
+
+/*
+ * Routine: vm_map_copy_insert [internal use only]
+ *
+ * Description:
+ * Link a copy chain ("copy") into a map at the
+ * specified location (after "where").
+ * Side effects:
+ * The copy chain is destroyed.
+ */
+static void
+vm_map_copy_insert(
+ vm_map_t map,
+ vm_map_entry_t after_where,
+ vm_map_copy_t copy)
+{
+ vm_map_entry_t entry;
+
+ while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
+ entry = vm_map_copy_first_entry(copy);
+ vm_map_copy_entry_unlink(copy, entry);
+ vm_map_store_entry_link(map, after_where, entry,
+ VM_MAP_KERNEL_FLAGS_NONE);
+ after_where = entry;
+ }
+ zfree(vm_map_copy_zone, copy);
+}
+
+void
+vm_map_copy_remap(
+ vm_map_t map,
+ vm_map_entry_t where,
+ vm_map_copy_t copy,
+ vm_map_offset_t adjustment,
+ vm_prot_t cur_prot,
+ vm_prot_t max_prot,
+ vm_inherit_t inheritance)
+{
+ vm_map_entry_t copy_entry, new_entry;
+
+ for (copy_entry = vm_map_copy_first_entry(copy);
+ copy_entry != vm_map_copy_to_entry(copy);
+ copy_entry = copy_entry->vme_next) {
+ /* get a new VM map entry for the map */
+ new_entry = vm_map_entry_create(map,
+ !map->hdr.entries_pageable);
+ /* copy the "copy entry" to the new entry */
+ vm_map_entry_copy(map, new_entry, copy_entry);
+ /* adjust "start" and "end" */
+ new_entry->vme_start += adjustment;
+ new_entry->vme_end += adjustment;
+ /* clear some attributes */
+ new_entry->inheritance = inheritance;
+ new_entry->protection = cur_prot;
+ new_entry->max_protection = max_prot;
+ new_entry->behavior = VM_BEHAVIOR_DEFAULT;
+ /* take an extra reference on the entry's "object" */
+ if (new_entry->is_sub_map) {
+ assert(!new_entry->use_pmap); /* not nested */
+ vm_map_lock(VME_SUBMAP(new_entry));
+ vm_map_reference(VME_SUBMAP(new_entry));
+ vm_map_unlock(VME_SUBMAP(new_entry));
+ } else {
+ vm_object_reference(VME_OBJECT(new_entry));
+ }
+ /* insert the new entry in the map */
+ vm_map_store_entry_link(map, where, new_entry,
+ VM_MAP_KERNEL_FLAGS_NONE);
+ /* continue inserting the "copy entries" after the new entry */
+ where = new_entry;
+ }
+}
+
+
+/*
+ * Returns true if *size matches (or is in the range of) copy->size.
+ * Upon returning true, the *size field is updated with the actual size of the
+ * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
+ */
+boolean_t
+vm_map_copy_validate_size(
+ vm_map_t dst_map,
+ vm_map_copy_t copy,
+ vm_map_size_t *size)
+{
+ if (copy == VM_MAP_COPY_NULL) {
+ return FALSE;
+ }
+ vm_map_size_t copy_sz = copy->size;
+ vm_map_size_t sz = *size;
+ switch (copy->type) {
+ case VM_MAP_COPY_OBJECT:
+ case VM_MAP_COPY_KERNEL_BUFFER:
+ if (sz == copy_sz) {
+ return TRUE;
+ }
+ break;
+ case VM_MAP_COPY_ENTRY_LIST:
+ /*
+ * potential page-size rounding prevents us from exactly
+ * validating this flavor of vm_map_copy, but we can at least
+ * assert that it's within a range.
+ */
+ if (copy_sz >= sz &&
+ copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
+ *size = copy_sz;
+ return TRUE;
+ }
+ break;
+ default:
+ break;
+ }
+ return FALSE;
+}
+
+/*
+ * Routine: vm_map_copyout_size
+ *
+ * Description:
+ * Copy out a copy chain ("copy") into newly-allocated
+ * space in the destination map. Uses a prevalidated
+ * size for the copy object (vm_map_copy_validate_size).
+ *
+ * If successful, consumes the copy object.
+ * Otherwise, the caller is responsible for it.
+ */
+kern_return_t
+vm_map_copyout_size(
+ vm_map_t dst_map,
+ vm_map_address_t *dst_addr, /* OUT */
+ vm_map_copy_t copy,
+ vm_map_size_t copy_size)
+{
+ return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
+ TRUE, /* consume_on_success */
+ VM_PROT_DEFAULT,
+ VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+}
+
+/*
+ * Routine: vm_map_copyout
+ *
+ * Description:
+ * Copy out a copy chain ("copy") into newly-allocated
+ * space in the destination map.
+ *
+ * If successful, consumes the copy object.
+ * Otherwise, the caller is responsible for it.
+ */
+kern_return_t
+vm_map_copyout(
+ vm_map_t dst_map,
+ vm_map_address_t *dst_addr, /* OUT */
+ vm_map_copy_t copy)
+{
+ return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
+ TRUE, /* consume_on_success */
+ VM_PROT_DEFAULT,
+ VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+}
+
+kern_return_t
+vm_map_copyout_internal(
+ vm_map_t dst_map,
+ vm_map_address_t *dst_addr, /* OUT */
+ vm_map_copy_t copy,
+ vm_map_size_t copy_size,
+ boolean_t consume_on_success,
+ vm_prot_t cur_protection,
+ vm_prot_t max_protection,
+ vm_inherit_t inheritance)
+{
+ vm_map_size_t size;
+ vm_map_size_t adjustment;
+ vm_map_offset_t start;
+ vm_object_offset_t vm_copy_start;
+ vm_map_entry_t last;
+ vm_map_entry_t entry;
+ vm_map_entry_t hole_entry;
+ vm_map_copy_t original_copy;
+
+ /*
+ * Check for null copy object.
+ */
+
+ if (copy == VM_MAP_COPY_NULL) {
+ *dst_addr = 0;
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * Assert that the vm_map_copy is coming from the right
+ * zone and hasn't been forged
+ */
+ vm_map_copy_require(copy);
+
+ if (copy->size != copy_size) {
+ *dst_addr = 0;
+ return KERN_FAILURE;
+ }
+
+ /*
+ * Check for special copy object, created
+ * by vm_map_copyin_object.
+ */
+
+ if (copy->type == VM_MAP_COPY_OBJECT) {
+ vm_object_t object = copy->cpy_object;
+ kern_return_t kr;
+ vm_object_offset_t offset;
+
+ offset = vm_object_trunc_page(copy->offset);
+ size = vm_map_round_page((copy_size +
+ (vm_map_size_t)(copy->offset -
+ offset)),
+ VM_MAP_PAGE_MASK(dst_map));
+ *dst_addr = 0;
+ kr = vm_map_enter(dst_map, dst_addr, size,
+ (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
+ VM_MAP_KERNEL_FLAGS_NONE,
+ VM_KERN_MEMORY_NONE,
+ object, offset, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+ /* Account for non-pagealigned copy object */
+ *dst_addr += (vm_map_offset_t)(copy->offset - offset);
+ if (consume_on_success) {
+ zfree(vm_map_copy_zone, copy);
+ }
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * Check for special kernel buffer allocated
+ * by new_ipc_kmsg_copyin.
+ */
+
+ if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
+ return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
+ copy, copy_size, FALSE,
+ consume_on_success);
+ }
+
+ original_copy = copy;
+ if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
+ kern_return_t kr;
+ vm_map_copy_t target_copy;
+ vm_map_offset_t overmap_start, overmap_end, trimmed_start;
+
+ target_copy = VM_MAP_COPY_NULL;
+ DEBUG4K_ADJUST("adjusting...\n");
+ kr = vm_map_copy_adjust_to_target(
+ copy,
+ 0, /* offset */
+ copy->size, /* size */
+ dst_map,
+ TRUE, /* copy */
+ &target_copy,
+ &overmap_start,
+ &overmap_end,
+ &trimmed_start);
+ if (kr != KERN_SUCCESS) {
+ DEBUG4K_COPY("adjust failed 0x%x\n", kr);
+ return kr;
+ }
+ DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
+ if (target_copy != copy) {
+ copy = target_copy;
+ }
+ copy_size = copy->size;
+ }
+
+ /*
+ * Find space for the data
+ */
+
+ vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
+ VM_MAP_COPY_PAGE_MASK(copy));
+ size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
+ VM_MAP_COPY_PAGE_MASK(copy))
+ - vm_copy_start;
+
+
+StartAgain:;
+
+ vm_map_lock(dst_map);
+ if (dst_map->disable_vmentry_reuse == TRUE) {
+ VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
+ last = entry;
+ } else {
+ if (dst_map->holelistenabled) {
+ hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
+
+ if (hole_entry == NULL) {
+ /*
+ * No more space in the map?
+ */
+ vm_map_unlock(dst_map);
+ return KERN_NO_SPACE;
+ }
+
+ last = hole_entry;
+ start = last->vme_start;
+ } else {
+ assert(first_free_is_valid(dst_map));
+ start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
+ vm_map_min(dst_map) : last->vme_end;
+ }
+ start = vm_map_round_page(start,
+ VM_MAP_PAGE_MASK(dst_map));
+ }
+
+ while (TRUE) {
+ vm_map_entry_t next = last->vme_next;
+ vm_map_offset_t end = start + size;
+
+ if ((end > dst_map->max_offset) || (end < start)) {
+ if (dst_map->wait_for_space) {
+ if (size <= (dst_map->max_offset - dst_map->min_offset)) {
+ assert_wait((event_t) dst_map,
+ THREAD_INTERRUPTIBLE);
+ vm_map_unlock(dst_map);
+ thread_block(THREAD_CONTINUE_NULL);
+ goto StartAgain;
+ }
+ }
+ vm_map_unlock(dst_map);
+ return KERN_NO_SPACE;
+ }
+
+ if (dst_map->holelistenabled) {
+ if (last->vme_end >= end) {
+ break;
+ }
+ } else {
+ /*
+ * If there are no more entries, we must win.
+ *
+ * OR
+ *
+ * If there is another entry, it must be
+ * after the end of the potential new region.
+ */
+
+ if (next == vm_map_to_entry(dst_map)) {
+ break;
+ }
+
+ if (next->vme_start >= end) {
+ break;
+ }
+ }
+
+ last = next;
+
+ if (dst_map->holelistenabled) {
+ if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
+ /*
+ * Wrapped around
+ */
+ vm_map_unlock(dst_map);
+ return KERN_NO_SPACE;
+ }
+ start = last->vme_start;
+ } else {
+ start = last->vme_end;
+ }
+ start = vm_map_round_page(start,
+ VM_MAP_PAGE_MASK(dst_map));
+ }
+
+ if (dst_map->holelistenabled) {
+ if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
+ panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
+ }
+ }
+
+
+ adjustment = start - vm_copy_start;
+ if (!consume_on_success) {
+ /*
+ * We're not allowed to consume "copy", so we'll have to
+ * copy its map entries into the destination map below.
+ * No need to re-allocate map entries from the correct
+ * (pageable or not) zone, since we'll get new map entries
+ * during the transfer.
+ * We'll also adjust the map entries's "start" and "end"
+ * during the transfer, to keep "copy"'s entries consistent
+ * with its "offset".
+ */
+ goto after_adjustments;
+ }
+
+ /*
+ * Since we're going to just drop the map
+ * entries from the copy into the destination
+ * map, they must come from the same pool.
+ */
+
+ if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
+ /*
+ * Mismatches occur when dealing with the default
+ * pager.
+ */
+ zone_t old_zone;
+ vm_map_entry_t next, new;
+
+ /*
+ * Find the zone that the copies were allocated from
+ */
+
+ entry = vm_map_copy_first_entry(copy);
+
+ /*
+ * Reinitialize the copy so that vm_map_copy_entry_link
+ * will work.
+ */
+ vm_map_store_copy_reset(copy, entry);
+ copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
+
+ /*
+ * Copy each entry.
+ */
+ while (entry != vm_map_copy_to_entry(copy)) {
+ new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
+ vm_map_entry_copy_full(new, entry);
+ new->vme_no_copy_on_read = FALSE;
+ assert(!new->iokit_acct);
+ if (new->is_sub_map) {
+ /* clr address space specifics */
+ new->use_pmap = FALSE;
+ }
+ vm_map_copy_entry_link(copy,
+ vm_map_copy_last_entry(copy),
+ new);
+ next = entry->vme_next;
+ old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
+ zfree(old_zone, entry);
+ entry = next;
+ }
+ }
+
+ /*
+ * Adjust the addresses in the copy chain, and
+ * reset the region attributes.
+ */
+
+ for (entry = vm_map_copy_first_entry(copy);
+ entry != vm_map_copy_to_entry(copy);
+ entry = entry->vme_next) {
+ if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
+ /*
+ * We're injecting this copy entry into a map that
+ * has the standard page alignment, so clear
+ * "map_aligned" (which might have been inherited
+ * from the original map entry).
+ */
+ entry->map_aligned = FALSE;
+ }
+
+ entry->vme_start += adjustment;
+ entry->vme_end += adjustment;
+
+ if (entry->map_aligned) {
+ assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
+ VM_MAP_PAGE_MASK(dst_map)));
+ assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
+ VM_MAP_PAGE_MASK(dst_map)));
+ }
+
+ entry->inheritance = VM_INHERIT_DEFAULT;
+ entry->protection = VM_PROT_DEFAULT;
+ entry->max_protection = VM_PROT_ALL;
+ entry->behavior = VM_BEHAVIOR_DEFAULT;
+
+ /*
+ * If the entry is now wired,
+ * map the pages into the destination map.
+ */
+ if (entry->wired_count != 0) {
+ vm_map_offset_t va;
+ vm_object_offset_t offset;
+ vm_object_t object;
+ vm_prot_t prot;
+ int type_of_fault;
+
+ /* TODO4K would need to use actual page size */
+ assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
+
+ object = VME_OBJECT(entry);
+ offset = VME_OFFSET(entry);
+ va = entry->vme_start;
+
+ pmap_pageable(dst_map->pmap,
+ entry->vme_start,
+ entry->vme_end,
+ TRUE);
+
+ while (va < entry->vme_end) {
+ vm_page_t m;
+ struct vm_object_fault_info fault_info = {};
+
+ /*
+ * Look up the page in the object.
+ * Assert that the page will be found in the
+ * top object:
+ * either
+ * the object was newly created by
+ * vm_object_copy_slowly, and has
+ * copies of all of the pages from
+ * the source object
+ * or
+ * the object was moved from the old
+ * map entry; because the old map
+ * entry was wired, all of the pages
+ * were in the top-level object.
+ * (XXX not true if we wire pages for
+ * reading)
+ */
+ vm_object_lock(object);
+
+ m = vm_page_lookup(object, offset);
+ if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
+ m->vmp_absent) {
+ panic("vm_map_copyout: wiring %p", m);
+ }
+
+ prot = entry->protection;
+
+ if (override_nx(dst_map, VME_ALIAS(entry)) &&
+ prot) {
+ prot |= VM_PROT_EXECUTE;
+ }
+
+ type_of_fault = DBG_CACHE_HIT_FAULT;
+
+ fault_info.user_tag = VME_ALIAS(entry);
+ fault_info.pmap_options = 0;
+ if (entry->iokit_acct ||
+ (!entry->is_sub_map && !entry->use_pmap)) {
+ fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
+ }
+
+ vm_fault_enter(m,
+ dst_map->pmap,
+ va,
+ PAGE_SIZE, 0,
+ prot,
+ prot,
+ VM_PAGE_WIRED(m),
+ FALSE, /* change_wiring */
+ VM_KERN_MEMORY_NONE, /* tag - not wiring */
+ &fault_info,
+ NULL, /* need_retry */
+ &type_of_fault);
+
+ vm_object_unlock(object);
+
+ offset += PAGE_SIZE_64;
+ va += PAGE_SIZE;
+ }
+ }
+ }
+
+after_adjustments:
+
+ /*
+ * Correct the page alignment for the result
+ */
+
+ *dst_addr = start + (copy->offset - vm_copy_start);
+
+#if KASAN
+ kasan_notify_address(*dst_addr, size);
+#endif
+
+ /*
+ * Update the hints and the map size
+ */
+
+ if (consume_on_success) {
+ SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
+ } else {
+ SAVE_HINT_MAP_WRITE(dst_map, last);
+ }
+
+ dst_map->size += size;
+
+ /*
+ * Link in the copy
+ */
+
+ if (consume_on_success) {
+ vm_map_copy_insert(dst_map, last, copy);
+ if (copy != original_copy) {
+ vm_map_copy_discard(original_copy);
+ original_copy = VM_MAP_COPY_NULL;
+ }
+ } else {
+ vm_map_copy_remap(dst_map, last, copy, adjustment,
+ cur_protection, max_protection,
+ inheritance);
+ if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
+ vm_map_copy_discard(copy);
+ copy = original_copy;
+ }
+ }
+
+
+ vm_map_unlock(dst_map);
+
+ /*
+ * XXX If wiring_required, call vm_map_pageable
+ */
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_map_copyin
+ *
+ * Description:
+ * see vm_map_copyin_common. Exported via Unsupported.exports.
+ *
+ */
+
+#undef vm_map_copyin
+
+kern_return_t
+vm_map_copyin(
+ vm_map_t src_map,
+ vm_map_address_t src_addr,
+ vm_map_size_t len,
+ boolean_t src_destroy,
+ vm_map_copy_t *copy_result) /* OUT */
+{
+ return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
+ FALSE, copy_result, FALSE);
+}
+
+/*
+ * Routine: vm_map_copyin_common
+ *
+ * Description:
+ * Copy the specified region (src_addr, len) from the
+ * source address space (src_map), possibly removing
+ * the region from the source address space (src_destroy).
+ *
+ * Returns:
+ * A vm_map_copy_t object (copy_result), suitable for
+ * insertion into another address space (using vm_map_copyout),
+ * copying over another address space region (using
+ * vm_map_copy_overwrite). If the copy is unused, it
+ * should be destroyed (using vm_map_copy_discard).
+ *
+ * In/out conditions:
+ * The source map should not be locked on entry.
+ */
+
+typedef struct submap_map {
+ vm_map_t parent_map;
+ vm_map_offset_t base_start;
+ vm_map_offset_t base_end;
+ vm_map_size_t base_len;
+ struct submap_map *next;
+} submap_map_t;
+
+kern_return_t
+vm_map_copyin_common(
+ vm_map_t src_map,
+ vm_map_address_t src_addr,
+ vm_map_size_t len,
+ boolean_t src_destroy,
+ __unused boolean_t src_volatile,
+ vm_map_copy_t *copy_result, /* OUT */
+ boolean_t use_maxprot)
+{
+ int flags;
+
+ flags = 0;
+ if (src_destroy) {
+ flags |= VM_MAP_COPYIN_SRC_DESTROY;
+ }
+ if (use_maxprot) {
+ flags |= VM_MAP_COPYIN_USE_MAXPROT;
+ }
+ return vm_map_copyin_internal(src_map,
+ src_addr,
+ len,
+ flags,
+ copy_result);
+}
+kern_return_t
+vm_map_copyin_internal(
+ vm_map_t src_map,
+ vm_map_address_t src_addr,
+ vm_map_size_t len,
+ int flags,
+ vm_map_copy_t *copy_result) /* OUT */
+{
+ vm_map_entry_t tmp_entry; /* Result of last map lookup --
+ * in multi-level lookup, this
+ * entry contains the actual
+ * vm_object/offset.
+ */
+ vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
+
+ vm_map_offset_t src_start; /* Start of current entry --
+ * where copy is taking place now
+ */
+ vm_map_offset_t src_end; /* End of entire region to be
+ * copied */
+ vm_map_offset_t src_base;
+ vm_map_t base_map = src_map;
+ boolean_t map_share = FALSE;
+ submap_map_t *parent_maps = NULL;
+
+ vm_map_copy_t copy; /* Resulting copy */
+ vm_map_address_t copy_addr;
+ vm_map_size_t copy_size;
+ boolean_t src_destroy;
+ boolean_t use_maxprot;
+ boolean_t preserve_purgeable;
+ boolean_t entry_was_shared;
+ vm_map_entry_t saved_src_entry;
+
+ if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
+ use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
+ preserve_purgeable =
+ (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
+
+ /*
+ * Check for copies of zero bytes.
+ */
+
+ if (len == 0) {
+ *copy_result = VM_MAP_COPY_NULL;
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * Check that the end address doesn't overflow
+ */
+ src_end = src_addr + len;
+ if (src_end < src_addr) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Compute (page aligned) start and end of region
+ */
+ src_start = vm_map_trunc_page(src_addr,
+ VM_MAP_PAGE_MASK(src_map));
+ src_end = vm_map_round_page(src_end,
+ VM_MAP_PAGE_MASK(src_map));
+
+ /*
+ * If the copy is sufficiently small, use a kernel buffer instead
+ * of making a virtual copy. The theory being that the cost of
+ * setting up VM (and taking C-O-W faults) dominates the copy costs
+ * for small regions.
+ */
+ if ((len < msg_ool_size_small) &&
+ !use_maxprot &&
+ !preserve_purgeable &&
+ !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
+ /*
+ * Since the "msg_ool_size_small" threshold was increased and
+ * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
+ * address space limits, we revert to doing a virtual copy if the
+ * copied range goes beyond those limits. Otherwise, mach_vm_read()
+ * of the commpage would now fail when it used to work.
+ */
+ (src_start >= vm_map_min(src_map) &&
+ src_start < vm_map_max(src_map) &&
+ src_end >= vm_map_min(src_map) &&
+ src_end < vm_map_max(src_map))) {
+ return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
+ src_destroy, copy_result);
+ }
+
+ /*
+ * Allocate a header element for the list.
+ *
+ * Use the start and end in the header to
+ * remember the endpoints prior to rounding.
+ */
+
+ copy = vm_map_copy_allocate();
+ copy->type = VM_MAP_COPY_ENTRY_LIST;
+ copy->cpy_hdr.entries_pageable = TRUE;
+ copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
+
+ vm_map_store_init( &(copy->cpy_hdr));
+
+ copy->offset = src_addr;
+ copy->size = len;
+
+ new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
+
+#define RETURN(x) \
+ MACRO_BEGIN \
+ vm_map_unlock(src_map); \
+ if(src_map != base_map) \
+ vm_map_deallocate(src_map); \
+ if (new_entry != VM_MAP_ENTRY_NULL) \
+ vm_map_copy_entry_dispose(copy,new_entry); \
+ vm_map_copy_discard(copy); \
+ { \
+ submap_map_t *_ptr; \
+ \
+ for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
+ parent_maps=parent_maps->next; \
+ if (_ptr->parent_map != base_map) \
+ vm_map_deallocate(_ptr->parent_map); \
+ kfree(_ptr, sizeof(submap_map_t)); \
+ } \
+ } \
+ MACRO_RETURN(x); \
+ MACRO_END
+
+ /*
+ * Find the beginning of the region.
+ */
+
+ vm_map_lock(src_map);
+
+ /*
+ * Lookup the original "src_addr" rather than the truncated
+ * "src_start", in case "src_start" falls in a non-map-aligned
+ * map entry *before* the map entry that contains "src_addr"...
+ */
+ if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+ if (!tmp_entry->is_sub_map) {
+ /*
+ * ... but clip to the map-rounded "src_start" rather than
+ * "src_addr" to preserve map-alignment. We'll adjust the
+ * first copy entry at the end, if needed.
+ */
+ vm_map_clip_start(src_map, tmp_entry, src_start);
+ }
+ if (src_start < tmp_entry->vme_start) {
+ /*
+ * Move "src_start" up to the start of the
+ * first map entry to copy.
+ */
+ src_start = tmp_entry->vme_start;
+ }
+ /* set for later submap fix-up */
+ copy_addr = src_start;
+
+ /*
+ * Go through entries until we get to the end.
+ */
+
+ while (TRUE) {
+ vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
+ vm_map_size_t src_size; /* Size of source
+ * map entry (in both
+ * maps)
+ */
+
+ vm_object_t src_object; /* Object to copy */
+ vm_object_offset_t src_offset;
+
+ boolean_t src_needs_copy; /* Should source map
+ * be made read-only
+ * for copy-on-write?
+ */
+
+ boolean_t new_entry_needs_copy; /* Will new entry be COW? */
+
+ boolean_t was_wired; /* Was source wired? */
+ vm_map_version_t version; /* Version before locks
+ * dropped to make copy
+ */
+ kern_return_t result; /* Return value from
+ * copy_strategically.
+ */
+ while (tmp_entry->is_sub_map) {
+ vm_map_size_t submap_len;
+ submap_map_t *ptr;
+
+ ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
+ ptr->next = parent_maps;
+ parent_maps = ptr;
+ ptr->parent_map = src_map;
+ ptr->base_start = src_start;
+ ptr->base_end = src_end;
+ submap_len = tmp_entry->vme_end - src_start;
+ if (submap_len > (src_end - src_start)) {
+ submap_len = src_end - src_start;
+ }
+ ptr->base_len = submap_len;
+
+ src_start -= tmp_entry->vme_start;
+ src_start += VME_OFFSET(tmp_entry);
+ src_end = src_start + submap_len;
+ src_map = VME_SUBMAP(tmp_entry);
+ vm_map_lock(src_map);
+ /* keep an outstanding reference for all maps in */
+ /* the parents tree except the base map */
+ vm_map_reference(src_map);
+ vm_map_unlock(ptr->parent_map);
+ if (!vm_map_lookup_entry(
+ src_map, src_start, &tmp_entry)) {
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+ map_share = TRUE;
+ if (!tmp_entry->is_sub_map) {
+ vm_map_clip_start(src_map, tmp_entry, src_start);
+ }
+ src_entry = tmp_entry;
+ }
+ /* we are now in the lowest level submap... */
+
+ if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
+ (VME_OBJECT(tmp_entry)->phys_contiguous)) {
+ /* This is not, supported for now.In future */
+ /* we will need to detect the phys_contig */
+ /* condition and then upgrade copy_slowly */
+ /* to do physical copy from the device mem */
+ /* based object. We can piggy-back off of */
+ /* the was wired boolean to set-up the */
+ /* proper handling */
+ RETURN(KERN_PROTECTION_FAILURE);
+ }
+ /*
+ * Create a new address map entry to hold the result.
+ * Fill in the fields from the appropriate source entries.
+ * We must unlock the source map to do this if we need
+ * to allocate a map entry.
+ */
+ if (new_entry == VM_MAP_ENTRY_NULL) {
+ version.main_timestamp = src_map->timestamp;
+ vm_map_unlock(src_map);
+
+ new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
+
+ vm_map_lock(src_map);
+ if ((version.main_timestamp + 1) != src_map->timestamp) {
+ if (!vm_map_lookup_entry(src_map, src_start,
+ &tmp_entry)) {
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+ if (!tmp_entry->is_sub_map) {
+ vm_map_clip_start(src_map, tmp_entry, src_start);
+ }
+ continue; /* restart w/ new tmp_entry */
+ }
+ }
+
+ /*
+ * Verify that the region can be read.
+ */
+ if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
+ !use_maxprot) ||
+ (src_entry->max_protection & VM_PROT_READ) == 0) {
+ RETURN(KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * Clip against the endpoints of the entire region.
+ */
+
+ vm_map_clip_end(src_map, src_entry, src_end);
+
+ src_size = src_entry->vme_end - src_start;
+ src_object = VME_OBJECT(src_entry);
+ src_offset = VME_OFFSET(src_entry);
+ was_wired = (src_entry->wired_count != 0);
+
+ vm_map_entry_copy(src_map, new_entry, src_entry);
+ if (new_entry->is_sub_map) {
+ /* clr address space specifics */
+ new_entry->use_pmap = FALSE;
+ } else {
+ /*
+ * We're dealing with a copy-on-write operation,
+ * so the resulting mapping should not inherit the
+ * original mapping's accounting settings.
+ * "iokit_acct" should have been cleared in
+ * vm_map_entry_copy().
+ * "use_pmap" should be reset to its default (TRUE)
+ * so that the new mapping gets accounted for in
+ * the task's memory footprint.
+ */
+ assert(!new_entry->iokit_acct);
+ new_entry->use_pmap = TRUE;
+ }
+
+ /*
+ * Attempt non-blocking copy-on-write optimizations.
+ */
+
+ /*
+ * If we are destroying the source, and the object
+ * is internal, we could move the object reference
+ * from the source to the copy. The copy is
+ * copy-on-write only if the source is.
+ * We make another reference to the object, because
+ * destroying the source entry will deallocate it.
+ *
+ * This memory transfer has to be atomic, (to prevent
+ * the VM object from being shared or copied while
+ * it's being moved here), so we could only do this
+ * if we won't have to unlock the VM map until the
+ * original mapping has been fully removed.
+ */
+
+RestartCopy:
+ if ((src_object == VM_OBJECT_NULL ||
+ (!was_wired && !map_share && !tmp_entry->is_shared
+ && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
+ vm_object_copy_quickly(
+ VME_OBJECT_PTR(new_entry),
+ src_offset,
+ src_size,
+ &src_needs_copy,
+ &new_entry_needs_copy)) {
+ new_entry->needs_copy = new_entry_needs_copy;
+
+ /*
+ * Handle copy-on-write obligations
+ */
+
+ if (src_needs_copy && !tmp_entry->needs_copy) {
+ vm_prot_t prot;
+
+ prot = src_entry->protection & ~VM_PROT_WRITE;
+
+ if (override_nx(src_map, VME_ALIAS(src_entry))
+ && prot) {
+ prot |= VM_PROT_EXECUTE;
+ }
+
+ vm_object_pmap_protect(
+ src_object,
+ src_offset,
+ src_size,
+ (src_entry->is_shared ?
+ PMAP_NULL
+ : src_map->pmap),
+ VM_MAP_PAGE_SIZE(src_map),
+ src_entry->vme_start,
+ prot);
+
+ assert(tmp_entry->wired_count == 0);
+ tmp_entry->needs_copy = TRUE;
+ }
+
+ /*
+ * The map has never been unlocked, so it's safe
+ * to move to the next entry rather than doing
+ * another lookup.
+ */
+
+ goto CopySuccessful;
+ }
+
+ entry_was_shared = tmp_entry->is_shared;
+
+ /*
+ * Take an object reference, so that we may
+ * release the map lock(s).
+ */
+
+ assert(src_object != VM_OBJECT_NULL);
+ vm_object_reference(src_object);
+
+ /*
+ * Record the timestamp for later verification.
+ * Unlock the map.
+ */
+
+ version.main_timestamp = src_map->timestamp;
+ vm_map_unlock(src_map); /* Increments timestamp once! */
+ saved_src_entry = src_entry;
+ tmp_entry = VM_MAP_ENTRY_NULL;
+ src_entry = VM_MAP_ENTRY_NULL;
+
+ /*
+ * Perform the copy
+ */
+
+ if (was_wired ||
+ (debug4k_no_cow_copyin &&
+ VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
+CopySlowly:
+ vm_object_lock(src_object);
+ result = vm_object_copy_slowly(
+ src_object,
+ src_offset,
+ src_size,
+ THREAD_UNINT,
+ VME_OBJECT_PTR(new_entry));
+ VME_OFFSET_SET(new_entry,
+ src_offset - vm_object_trunc_page(src_offset));
+ new_entry->needs_copy = FALSE;
+ } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
+ (entry_was_shared || map_share)) {
+ vm_object_t new_object;
+
+ vm_object_lock_shared(src_object);
+ new_object = vm_object_copy_delayed(
+ src_object,
+ src_offset,
+ src_size,
+ TRUE);
+ if (new_object == VM_OBJECT_NULL) {
+ goto CopySlowly;
+ }
+
+ VME_OBJECT_SET(new_entry, new_object);
+ assert(new_entry->wired_count == 0);
+ new_entry->needs_copy = TRUE;
+ assert(!new_entry->iokit_acct);
+ assert(new_object->purgable == VM_PURGABLE_DENY);
+ assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
+ result = KERN_SUCCESS;
+ } else {
+ vm_object_offset_t new_offset;
+ new_offset = VME_OFFSET(new_entry);
+ result = vm_object_copy_strategically(src_object,
+ src_offset,
+ src_size,
+ VME_OBJECT_PTR(new_entry),
+ &new_offset,
+ &new_entry_needs_copy);
+ if (new_offset != VME_OFFSET(new_entry)) {
+ VME_OFFSET_SET(new_entry, new_offset);
+ }
+
+ new_entry->needs_copy = new_entry_needs_copy;
+ }
+
+ if (result == KERN_SUCCESS &&
+ ((preserve_purgeable &&
+ src_object->purgable != VM_PURGABLE_DENY) ||
+ new_entry->used_for_jit)) {
+ /*
+ * Purgeable objects should be COPY_NONE, true share;
+ * this should be propogated to the copy.
+ *
+ * Also force mappings the pmap specially protects to
+ * be COPY_NONE; trying to COW these mappings would
+ * change the effective protections, which could have
+ * side effects if the pmap layer relies on the
+ * specified protections.
+ */
+
+ vm_object_t new_object;
+
+ new_object = VME_OBJECT(new_entry);
+ assert(new_object != src_object);
+ vm_object_lock(new_object);
+ assert(new_object->ref_count == 1);
+ assert(new_object->shadow == VM_OBJECT_NULL);
+ assert(new_object->copy == VM_OBJECT_NULL);
+ assert(new_object->vo_owner == NULL);
+
+ new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
+
+ if (preserve_purgeable &&
+ src_object->purgable != VM_PURGABLE_DENY) {
+ new_object->true_share = TRUE;
+
+ /* start as non-volatile with no owner... */
+ new_object->purgable = VM_PURGABLE_NONVOLATILE;
+ vm_purgeable_nonvolatile_enqueue(new_object, NULL);
+ /* ... and move to src_object's purgeable state */
+ if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
+ int state;
+ state = src_object->purgable;
+ vm_object_purgable_control(
+ new_object,
+ VM_PURGABLE_SET_STATE_FROM_KERNEL,
+ &state);
+ }
+ /* no pmap accounting for purgeable objects */
+ new_entry->use_pmap = FALSE;
+ }
+
+ vm_object_unlock(new_object);
+ new_object = VM_OBJECT_NULL;
+ }
+
+ if (result != KERN_SUCCESS &&
+ result != KERN_MEMORY_RESTART_COPY) {
+ vm_map_lock(src_map);
+ RETURN(result);
+ }
+
+ /*
+ * Throw away the extra reference
+ */
+
+ vm_object_deallocate(src_object);
+
+ /*
+ * Verify that the map has not substantially
+ * changed while the copy was being made.
+ */
+
+ vm_map_lock(src_map);
+
+ if ((version.main_timestamp + 1) == src_map->timestamp) {
+ /* src_map hasn't changed: src_entry is still valid */
+ src_entry = saved_src_entry;
+ goto VerificationSuccessful;
+ }
+
+ /*
+ * Simple version comparison failed.
+ *
+ * Retry the lookup and verify that the
+ * same object/offset are still present.
+ *
+ * [Note: a memory manager that colludes with
+ * the calling task can detect that we have
+ * cheated. While the map was unlocked, the
+ * mapping could have been changed and restored.]
+ */
+
+ if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
+ if (result != KERN_MEMORY_RESTART_COPY) {
+ vm_object_deallocate(VME_OBJECT(new_entry));
+ VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
+ /* reset accounting state */
+ new_entry->iokit_acct = FALSE;
+ new_entry->use_pmap = TRUE;
+ }
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+
+ src_entry = tmp_entry;
+ vm_map_clip_start(src_map, src_entry, src_start);
+
+ if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
+ !use_maxprot) ||
+ ((src_entry->max_protection & VM_PROT_READ) == 0)) {
+ goto VerificationFailed;
+ }
+
+ if (src_entry->vme_end < new_entry->vme_end) {
+ /*
+ * This entry might have been shortened
+ * (vm_map_clip_end) or been replaced with
+ * an entry that ends closer to "src_start"
+ * than before.
+ * Adjust "new_entry" accordingly; copying
+ * less memory would be correct but we also
+ * redo the copy (see below) if the new entry
+ * no longer points at the same object/offset.
+ */
+ assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
+ VM_MAP_COPY_PAGE_MASK(copy)));
+ new_entry->vme_end = src_entry->vme_end;
+ src_size = new_entry->vme_end - src_start;
+ } else if (src_entry->vme_end > new_entry->vme_end) {
+ /*
+ * This entry might have been extended
+ * (vm_map_entry_simplify() or coalesce)
+ * or been replaced with an entry that ends farther
+ * from "src_start" than before.
+ *
+ * We've called vm_object_copy_*() only on
+ * the previous <start:end> range, so we can't
+ * just extend new_entry. We have to re-do
+ * the copy based on the new entry as if it was
+ * pointing at a different object/offset (see
+ * "Verification failed" below).
+ */
+ }
+
+ if ((VME_OBJECT(src_entry) != src_object) ||
+ (VME_OFFSET(src_entry) != src_offset) ||
+ (src_entry->vme_end > new_entry->vme_end)) {
+ /*
+ * Verification failed.
+ *
+ * Start over with this top-level entry.
+ */
+
+VerificationFailed: ;
+
+ vm_object_deallocate(VME_OBJECT(new_entry));
+ tmp_entry = src_entry;
+ continue;
+ }
+
+ /*
+ * Verification succeeded.
+ */
+
+VerificationSuccessful:;
+
+ if (result == KERN_MEMORY_RESTART_COPY) {
+ goto RestartCopy;
+ }
+
+ /*
+ * Copy succeeded.
+ */
+
+CopySuccessful: ;
+
+ /*
+ * Link in the new copy entry.
+ */
+
+ vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
+ new_entry);
+
+ /*
+ * Determine whether the entire region
+ * has been copied.
+ */
+ src_base = src_start;
+ src_start = new_entry->vme_end;
+ new_entry = VM_MAP_ENTRY_NULL;
+ while ((src_start >= src_end) && (src_end != 0)) {
+ submap_map_t *ptr;
+
+ if (src_map == base_map) {
+ /* back to the top */
+ break;
+ }
+
+ ptr = parent_maps;
+ assert(ptr != NULL);
+ parent_maps = parent_maps->next;
+
+ /* fix up the damage we did in that submap */
+ vm_map_simplify_range(src_map,
+ src_base,
+ src_end);
+
+ vm_map_unlock(src_map);
+ vm_map_deallocate(src_map);
+ vm_map_lock(ptr->parent_map);
+ src_map = ptr->parent_map;
+ src_base = ptr->base_start;
+ src_start = ptr->base_start + ptr->base_len;
+ src_end = ptr->base_end;
+ if (!vm_map_lookup_entry(src_map,
+ src_start,
+ &tmp_entry) &&
+ (src_end > src_start)) {
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+ kfree(ptr, sizeof(submap_map_t));
+ if (parent_maps == NULL) {
+ map_share = FALSE;
+ }
+ src_entry = tmp_entry->vme_prev;
+ }
+
+ if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
+ (src_start >= src_addr + len) &&
+ (src_addr + len != 0)) {
+ /*
+ * Stop copying now, even though we haven't reached
+ * "src_end". We'll adjust the end of the last copy
+ * entry at the end, if needed.
+ *
+ * If src_map's aligment is different from the
+ * system's page-alignment, there could be
+ * extra non-map-aligned map entries between
+ * the original (non-rounded) "src_addr + len"
+ * and the rounded "src_end".
+ * We do not want to copy those map entries since
+ * they're not part of the copied range.
+ */
+ break;
+ }
+
+ if ((src_start >= src_end) && (src_end != 0)) {
+ break;
+ }
+
+ /*
+ * Verify that there are no gaps in the region
+ */
+
+ tmp_entry = src_entry->vme_next;
+ if ((tmp_entry->vme_start != src_start) ||
+ (tmp_entry == vm_map_to_entry(src_map))) {
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+ }
+
+ /*
+ * If the source should be destroyed, do it now, since the
+ * copy was successful.
+ */
+ if (src_destroy) {
+ (void) vm_map_delete(
+ src_map,
+ vm_map_trunc_page(src_addr,
+ VM_MAP_PAGE_MASK(src_map)),
+ src_end,
+ ((src_map == kernel_map) ?
+ VM_MAP_REMOVE_KUNWIRE :
+ VM_MAP_REMOVE_NO_FLAGS),
+ VM_MAP_NULL);
+ } else {
+ /* fix up the damage we did in the base map */
+ vm_map_simplify_range(
+ src_map,
+ vm_map_trunc_page(src_addr,
+ VM_MAP_PAGE_MASK(src_map)),
+ vm_map_round_page(src_end,
+ VM_MAP_PAGE_MASK(src_map)));
+ }
+
+ vm_map_unlock(src_map);
+ tmp_entry = VM_MAP_ENTRY_NULL;
+
+ if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
+ VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
+ vm_map_offset_t original_start, original_offset, original_end;
+
+ assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
+
+ /* adjust alignment of first copy_entry's "vme_start" */
+ tmp_entry = vm_map_copy_first_entry(copy);
+ if (tmp_entry != vm_map_copy_to_entry(copy)) {
+ vm_map_offset_t adjustment;
+
+ original_start = tmp_entry->vme_start;
+ original_offset = VME_OFFSET(tmp_entry);
+
+ /* map-align the start of the first copy entry... */
+ adjustment = (tmp_entry->vme_start -
+ vm_map_trunc_page(
+ tmp_entry->vme_start,
+ VM_MAP_PAGE_MASK(src_map)));
+ tmp_entry->vme_start -= adjustment;
+ VME_OFFSET_SET(tmp_entry,
+ VME_OFFSET(tmp_entry) - adjustment);
+ copy_addr -= adjustment;
+ assert(tmp_entry->vme_start < tmp_entry->vme_end);
+ /* ... adjust for mis-aligned start of copy range */
+ adjustment =
+ (vm_map_trunc_page(copy->offset,
+ PAGE_MASK) -
+ vm_map_trunc_page(copy->offset,
+ VM_MAP_PAGE_MASK(src_map)));
+ if (adjustment) {
+ assert(page_aligned(adjustment));
+ assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
+ tmp_entry->vme_start += adjustment;
+ VME_OFFSET_SET(tmp_entry,
+ (VME_OFFSET(tmp_entry) +
+ adjustment));
+ copy_addr += adjustment;
+ assert(tmp_entry->vme_start < tmp_entry->vme_end);
+ }
+
+ /*
+ * Assert that the adjustments haven't exposed
+ * more than was originally copied...
+ */
+ assert(tmp_entry->vme_start >= original_start);
+ assert(VME_OFFSET(tmp_entry) >= original_offset);
+ /*
+ * ... and that it did not adjust outside of a
+ * a single 16K page.
+ */
+ assert(vm_map_trunc_page(tmp_entry->vme_start,
+ VM_MAP_PAGE_MASK(src_map)) ==
+ vm_map_trunc_page(original_start,
+ VM_MAP_PAGE_MASK(src_map)));
+ }
+
+ /* adjust alignment of last copy_entry's "vme_end" */
+ tmp_entry = vm_map_copy_last_entry(copy);
+ if (tmp_entry != vm_map_copy_to_entry(copy)) {
+ vm_map_offset_t adjustment;
+
+ original_end = tmp_entry->vme_end;
+
+ /* map-align the end of the last copy entry... */
+ tmp_entry->vme_end =
+ vm_map_round_page(tmp_entry->vme_end,
+ VM_MAP_PAGE_MASK(src_map));
+ /* ... adjust for mis-aligned end of copy range */
+ adjustment =
+ (vm_map_round_page((copy->offset +
+ copy->size),
+ VM_MAP_PAGE_MASK(src_map)) -
+ vm_map_round_page((copy->offset +
+ copy->size),
+ PAGE_MASK));
+ if (adjustment) {
+ assert(page_aligned(adjustment));
+ assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
+ tmp_entry->vme_end -= adjustment;
+ assert(tmp_entry->vme_start < tmp_entry->vme_end);
+ }
+
+ /*
+ * Assert that the adjustments haven't exposed
+ * more than was originally copied...
+ */
+ assert(tmp_entry->vme_end <= original_end);
+ /*
+ * ... and that it did not adjust outside of a
+ * a single 16K page.
+ */
+ assert(vm_map_round_page(tmp_entry->vme_end,
+ VM_MAP_PAGE_MASK(src_map)) ==
+ vm_map_round_page(original_end,
+ VM_MAP_PAGE_MASK(src_map)));
+ }
+ }
+
+ /* Fix-up start and end points in copy. This is necessary */
+ /* when the various entries in the copy object were picked */
+ /* up from different sub-maps */
+
+ tmp_entry = vm_map_copy_first_entry(copy);
+ copy_size = 0; /* compute actual size */
+ while (tmp_entry != vm_map_copy_to_entry(copy)) {
+ assert(VM_MAP_PAGE_ALIGNED(
+ copy_addr + (tmp_entry->vme_end -
+ tmp_entry->vme_start),
+ MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
+ assert(VM_MAP_PAGE_ALIGNED(
+ copy_addr,
+ MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
+
+ /*
+ * The copy_entries will be injected directly into the
+ * destination map and might not be "map aligned" there...
+ */
+ tmp_entry->map_aligned = FALSE;
+
+ tmp_entry->vme_end = copy_addr +
+ (tmp_entry->vme_end - tmp_entry->vme_start);
+ tmp_entry->vme_start = copy_addr;
+ assert(tmp_entry->vme_start < tmp_entry->vme_end);
+ copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
+ copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
+ tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
+ }
+
+ if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
+ copy_size < copy->size) {
+ /*
+ * The actual size of the VM map copy is smaller than what
+ * was requested by the caller. This must be because some
+ * PAGE_SIZE-sized pages are missing at the end of the last
+ * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
+ * The caller might not have been aware of those missing
+ * pages and might not want to be aware of it, which is
+ * fine as long as they don't try to access (and crash on)
+ * those missing pages.
+ * Let's adjust the size of the "copy", to avoid failing
+ * in vm_map_copyout() or vm_map_copy_overwrite().
+ */
+ assert(vm_map_round_page(copy_size,
+ VM_MAP_PAGE_MASK(src_map)) ==
+ vm_map_round_page(copy->size,
+ VM_MAP_PAGE_MASK(src_map)));
+ copy->size = copy_size;
+ }
+
+ *copy_result = copy;
+ return KERN_SUCCESS;
+
+#undef RETURN
+}
+
+kern_return_t
+vm_map_copy_extract(
+ vm_map_t src_map,
+ vm_map_address_t src_addr,
+ vm_map_size_t len,
+ boolean_t do_copy,
+ vm_map_copy_t *copy_result, /* OUT */
+ vm_prot_t *cur_prot, /* IN/OUT */
+ vm_prot_t *max_prot, /* IN/OUT */
+ vm_inherit_t inheritance,
+ vm_map_kernel_flags_t vmk_flags)
+{
+ vm_map_copy_t copy;
+ kern_return_t kr;
+ vm_prot_t required_cur_prot, required_max_prot;
+
+ /*
+ * Check for copies of zero bytes.
+ */
+
+ if (len == 0) {
+ *copy_result = VM_MAP_COPY_NULL;
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * Check that the end address doesn't overflow
+ */
+ if (src_addr + len < src_addr) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
+ DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
+ }
+
+ required_cur_prot = *cur_prot;
+ required_max_prot = *max_prot;
+
+ /*
+ * Allocate a header element for the list.
+ *
+ * Use the start and end in the header to
+ * remember the endpoints prior to rounding.
+ */
+
+ copy = vm_map_copy_allocate();
+ copy->type = VM_MAP_COPY_ENTRY_LIST;
+ copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
+
+ vm_map_store_init(©->cpy_hdr);
+
+ copy->offset = 0;
+ copy->size = len;
+
+ kr = vm_map_remap_extract(src_map,
+ src_addr,
+ len,
+ do_copy, /* copy */
+ ©->cpy_hdr,
+ cur_prot, /* IN/OUT */
+ max_prot, /* IN/OUT */
+ inheritance,
+ vmk_flags);
+ if (kr != KERN_SUCCESS) {
+ vm_map_copy_discard(copy);
+ return kr;
+ }
+ if (required_cur_prot != VM_PROT_NONE) {
+ assert((*cur_prot & required_cur_prot) == required_cur_prot);
+ assert((*max_prot & required_max_prot) == required_max_prot);
+ }
+
+ *copy_result = copy;
+ return KERN_SUCCESS;
+}
+
+/*
+ * vm_map_copyin_object:
+ *
+ * Create a copy object from an object.
+ * Our caller donates an object reference.
+ */
+
+kern_return_t
+vm_map_copyin_object(
+ vm_object_t object,
+ vm_object_offset_t offset, /* offset of region in object */
+ vm_object_size_t size, /* size of region in object */
+ vm_map_copy_t *copy_result) /* OUT */
+{
+ vm_map_copy_t copy; /* Resulting copy */
+
+ /*
+ * We drop the object into a special copy object
+ * that contains the object directly.
+ */
+
+ copy = vm_map_copy_allocate();
+ copy->type = VM_MAP_COPY_OBJECT;
+ copy->cpy_object = object;
+ copy->offset = offset;
+ copy->size = size;
+
+ *copy_result = copy;
+ return KERN_SUCCESS;
+}
+
+static void
+vm_map_fork_share(
+ vm_map_t old_map,
+ vm_map_entry_t old_entry,
+ vm_map_t new_map)
+{
+ vm_object_t object;
+ vm_map_entry_t new_entry;
+
+ /*
+ * New sharing code. New map entry
+ * references original object. Internal
+ * objects use asynchronous copy algorithm for
+ * future copies. First make sure we have
+ * the right object. If we need a shadow,
+ * or someone else already has one, then
+ * make a new shadow and share it.
+ */
+
+ object = VME_OBJECT(old_entry);
+ if (old_entry->is_sub_map) {
+ assert(old_entry->wired_count == 0);
+#ifndef NO_NESTED_PMAP
+ if (old_entry->use_pmap) {
+ kern_return_t result;
+
+ result = pmap_nest(new_map->pmap,
+ (VME_SUBMAP(old_entry))->pmap,
+ (addr64_t)old_entry->vme_start,
+ (uint64_t)(old_entry->vme_end - old_entry->vme_start));
+ if (result) {
+ panic("vm_map_fork_share: pmap_nest failed!");
+ }
+ }
+#endif /* NO_NESTED_PMAP */
+ } else if (object == VM_OBJECT_NULL) {
+ object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
+ old_entry->vme_start));
+ VME_OFFSET_SET(old_entry, 0);
+ VME_OBJECT_SET(old_entry, object);
+ old_entry->use_pmap = TRUE;
+// assert(!old_entry->needs_copy);
+ } else if (object->copy_strategy !=
+ MEMORY_OBJECT_COPY_SYMMETRIC) {
+ /*
+ * We are already using an asymmetric
+ * copy, and therefore we already have
+ * the right object.
+ */
+
+ assert(!old_entry->needs_copy);
+ } else if (old_entry->needs_copy || /* case 1 */
+ object->shadowed || /* case 2 */
+ (!object->true_share && /* case 3 */
+ !old_entry->is_shared &&
+ (object->vo_size >
+ (vm_map_size_t)(old_entry->vme_end -
+ old_entry->vme_start)))) {
+ /*
+ * We need to create a shadow.
+ * There are three cases here.
+ * In the first case, we need to
+ * complete a deferred symmetrical
+ * copy that we participated in.
+ * In the second and third cases,
+ * we need to create the shadow so
+ * that changes that we make to the
+ * object do not interfere with
+ * any symmetrical copies which
+ * have occured (case 2) or which
+ * might occur (case 3).
+ *
+ * The first case is when we had
+ * deferred shadow object creation
+ * via the entry->needs_copy mechanism.
+ * This mechanism only works when
+ * only one entry points to the source
+ * object, and we are about to create
+ * a second entry pointing to the
+ * same object. The problem is that
+ * there is no way of mapping from
+ * an object to the entries pointing
+ * to it. (Deferred shadow creation
+ * works with one entry because occurs
+ * at fault time, and we walk from the
+ * entry to the object when handling
+ * the fault.)
+ *
+ * The second case is when the object
+ * to be shared has already been copied
+ * with a symmetric copy, but we point
+ * directly to the object without
+ * needs_copy set in our entry. (This
+ * can happen because different ranges
+ * of an object can be pointed to by
+ * different entries. In particular,
+ * a single entry pointing to an object
+ * can be split by a call to vm_inherit,
+ * which, combined with task_create, can
+ * result in the different entries
+ * having different needs_copy values.)
+ * The shadowed flag in the object allows
+ * us to detect this case. The problem
+ * with this case is that if this object
+ * has or will have shadows, then we
+ * must not perform an asymmetric copy
+ * of this object, since such a copy
+ * allows the object to be changed, which
+ * will break the previous symmetrical
+ * copies (which rely upon the object
+ * not changing). In a sense, the shadowed
+ * flag says "don't change this object".
+ * We fix this by creating a shadow
+ * object for this object, and sharing
+ * that. This works because we are free
+ * to change the shadow object (and thus
+ * to use an asymmetric copy strategy);
+ * this is also semantically correct,
+ * since this object is temporary, and
+ * therefore a copy of the object is
+ * as good as the object itself. (This
+ * is not true for permanent objects,
+ * since the pager needs to see changes,
+ * which won't happen if the changes
+ * are made to a copy.)
+ *
+ * The third case is when the object
+ * to be shared has parts sticking
+ * outside of the entry we're working
+ * with, and thus may in the future
+ * be subject to a symmetrical copy.
+ * (This is a preemptive version of
+ * case 2.)
+ */
+ VME_OBJECT_SHADOW(old_entry,
+ (vm_map_size_t) (old_entry->vme_end -
+ old_entry->vme_start));
+
+ /*
+ * If we're making a shadow for other than
+ * copy on write reasons, then we have
+ * to remove write permission.
+ */
+
+ if (!old_entry->needs_copy &&
+ (old_entry->protection & VM_PROT_WRITE)) {
+ vm_prot_t prot;
+
+ assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
+
+ prot = old_entry->protection & ~VM_PROT_WRITE;
+
+ assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
+
+ if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
+ prot |= VM_PROT_EXECUTE;
+ }
+
+
+ if (old_map->mapped_in_other_pmaps) {
+ vm_object_pmap_protect(
+ VME_OBJECT(old_entry),
+ VME_OFFSET(old_entry),
+ (old_entry->vme_end -
+ old_entry->vme_start),
+ PMAP_NULL,
+ PAGE_SIZE,
+ old_entry->vme_start,
+ prot);
+ } else {
+ pmap_protect(old_map->pmap,
+ old_entry->vme_start,
+ old_entry->vme_end,
+ prot);
+ }
+ }
+
+ old_entry->needs_copy = FALSE;
+ object = VME_OBJECT(old_entry);
+ }
+
+
+ /*
+ * If object was using a symmetric copy strategy,
+ * change its copy strategy to the default
+ * asymmetric copy strategy, which is copy_delay
+ * in the non-norma case and copy_call in the
+ * norma case. Bump the reference count for the
+ * new entry.
+ */
+
+ if (old_entry->is_sub_map) {
+ vm_map_lock(VME_SUBMAP(old_entry));
+ vm_map_reference(VME_SUBMAP(old_entry));
+ vm_map_unlock(VME_SUBMAP(old_entry));
+ } else {
+ vm_object_lock(object);
+ vm_object_reference_locked(object);
+ if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
+ object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
+ }
+ vm_object_unlock(object);
+ }
+
+ /*
+ * Clone the entry, using object ref from above.
+ * Mark both entries as shared.
+ */
+
+ new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
+ * map or descendants */
+ vm_map_entry_copy(old_map, new_entry, old_entry);
+ old_entry->is_shared = TRUE;
+ new_entry->is_shared = TRUE;
+
+ /*
+ * We're dealing with a shared mapping, so the resulting mapping
+ * should inherit some of the original mapping's accounting settings.
+ * "iokit_acct" should have been cleared in vm_map_entry_copy().
+ * "use_pmap" should stay the same as before (if it hasn't been reset
+ * to TRUE when we cleared "iokit_acct").
+ */
+ assert(!new_entry->iokit_acct);
+
+ /*
+ * If old entry's inheritence is VM_INHERIT_NONE,
+ * the new entry is for corpse fork, remove the
+ * write permission from the new entry.
+ */
+ if (old_entry->inheritance == VM_INHERIT_NONE) {
+ new_entry->protection &= ~VM_PROT_WRITE;
+ new_entry->max_protection &= ~VM_PROT_WRITE;
+ }
+
+ /*
+ * Insert the entry into the new map -- we
+ * know we're inserting at the end of the new
+ * map.
+ */
+
+ vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
+ VM_MAP_KERNEL_FLAGS_NONE);
+
+ /*
+ * Update the physical map
+ */
+
+ if (old_entry->is_sub_map) {
+ /* Bill Angell pmap support goes here */
+ } else {
+ pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
+ old_entry->vme_end - old_entry->vme_start,
+ old_entry->vme_start);
+ }
+}
+
+static boolean_t
+vm_map_fork_copy(
+ vm_map_t old_map,
+ vm_map_entry_t *old_entry_p,
+ vm_map_t new_map,
+ int vm_map_copyin_flags)
+{
+ vm_map_entry_t old_entry = *old_entry_p;
+ vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
+ vm_map_offset_t start = old_entry->vme_start;
+ vm_map_copy_t copy;
+ vm_map_entry_t last = vm_map_last_entry(new_map);
+
+ vm_map_unlock(old_map);
+ /*
+ * Use maxprot version of copyin because we
+ * care about whether this memory can ever
+ * be accessed, not just whether it's accessible
+ * right now.
+ */
+ vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
+ if (vm_map_copyin_internal(old_map, start, entry_size,
+ vm_map_copyin_flags, ©)
+ != KERN_SUCCESS) {
+ /*
+ * The map might have changed while it
+ * was unlocked, check it again. Skip
+ * any blank space or permanently
+ * unreadable region.
+ */
+ vm_map_lock(old_map);
+ if (!vm_map_lookup_entry(old_map, start, &last) ||
+ (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
+ last = last->vme_next;
+ }
+ *old_entry_p = last;
+
+ /*
+ * XXX For some error returns, want to
+ * XXX skip to the next element. Note
+ * that INVALID_ADDRESS and
+ * PROTECTION_FAILURE are handled above.
+ */
+
+ return FALSE;
+ }
+
+ /*
+ * Assert that the vm_map_copy is coming from the right
+ * zone and hasn't been forged
+ */
+ vm_map_copy_require(copy);
+
+ /*
+ * Insert the copy into the new map
+ */
+ vm_map_copy_insert(new_map, last, copy);
+
+ /*
+ * Pick up the traversal at the end of
+ * the copied region.
+ */
+
+ vm_map_lock(old_map);
+ start += entry_size;
+ if (!vm_map_lookup_entry(old_map, start, &last)) {
+ last = last->vme_next;
+ } else {
+ if (last->vme_start == start) {
+ /*
+ * No need to clip here and we don't
+ * want to cause any unnecessary
+ * unnesting...
+ */
+ } else {
+ vm_map_clip_start(old_map, last, start);
+ }
+ }
+ *old_entry_p = last;
+
+ return TRUE;
+}
+
+/*
+ * vm_map_fork:
+ *
+ * Create and return a new map based on the old
+ * map, according to the inheritance values on the
+ * regions in that map and the options.
+ *
+ * The source map must not be locked.
+ */
+vm_map_t
+vm_map_fork(
+ ledger_t ledger,
+ vm_map_t old_map,
+ int options)
+{
+ pmap_t new_pmap;
+ vm_map_t new_map;
+ vm_map_entry_t old_entry;
+ vm_map_size_t new_size = 0, entry_size;
+ vm_map_entry_t new_entry;
+ boolean_t src_needs_copy;
+ boolean_t new_entry_needs_copy;
+ boolean_t pmap_is64bit;
+ int vm_map_copyin_flags;
+ vm_inherit_t old_entry_inheritance;
+ int map_create_options;
+ kern_return_t footprint_collect_kr;
+
+ if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
+ VM_MAP_FORK_PRESERVE_PURGEABLE |
+ VM_MAP_FORK_CORPSE_FOOTPRINT)) {
+ /* unsupported option */
+ return VM_MAP_NULL;
+ }
+
+ pmap_is64bit =
+#if defined(__i386__) || defined(__x86_64__)
+ old_map->pmap->pm_task_map != TASK_MAP_32BIT;
+#elif defined(__arm64__)
+ old_map->pmap->max == MACH_VM_MAX_ADDRESS;
+#elif defined(__arm__)
+ FALSE;
+#else
+#error Unknown architecture.
+#endif
+
+ unsigned int pmap_flags = 0;
+ pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
+#if defined(HAS_APPLE_PAC)
+ pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
+#endif
+#if PMAP_CREATE_FORCE_4K_PAGES
+ if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
+ PAGE_SIZE != FOURK_PAGE_SIZE) {
+ pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
+ }
+#endif /* PMAP_CREATE_FORCE_4K_PAGES */
+ new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
+
+ vm_map_reference(old_map);
+ vm_map_lock(old_map);
+
+ map_create_options = 0;
+ if (old_map->hdr.entries_pageable) {
+ map_create_options |= VM_MAP_CREATE_PAGEABLE;
+ }
+ if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
+ map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
+ footprint_collect_kr = KERN_SUCCESS;
+ }
+ new_map = vm_map_create_options(new_pmap,
+ old_map->min_offset,
+ old_map->max_offset,
+ map_create_options);
+ /* inherit cs_enforcement */
+ vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
+ vm_map_lock(new_map);
+ vm_commit_pagezero_status(new_map);
+ /* inherit the parent map's page size */
+ vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
+ for (
+ old_entry = vm_map_first_entry(old_map);
+ old_entry != vm_map_to_entry(old_map);
+ ) {
+ entry_size = old_entry->vme_end - old_entry->vme_start;
+
+ old_entry_inheritance = old_entry->inheritance;
+ /*
+ * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
+ * share VM_INHERIT_NONE entries that are not backed by a
+ * device pager.
+ */
+ if (old_entry_inheritance == VM_INHERIT_NONE &&
+ (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
+ (old_entry->protection & VM_PROT_READ) &&
+ !(!old_entry->is_sub_map &&
+ VME_OBJECT(old_entry) != NULL &&
+ VME_OBJECT(old_entry)->pager != NULL &&
+ is_device_pager_ops(
+ VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
+ old_entry_inheritance = VM_INHERIT_SHARE;
+ }
+
+ if (old_entry_inheritance != VM_INHERIT_NONE &&
+ (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
+ footprint_collect_kr == KERN_SUCCESS) {
+ /*
+ * The corpse won't have old_map->pmap to query
+ * footprint information, so collect that data now
+ * and store it in new_map->vmmap_corpse_footprint
+ * for later autopsy.
+ */
+ footprint_collect_kr =
+ vm_map_corpse_footprint_collect(old_map,
+ old_entry,
+ new_map);
+ }
+
+ switch (old_entry_inheritance) {
+ case VM_INHERIT_NONE:
+ break;
+
+ case VM_INHERIT_SHARE:
+ vm_map_fork_share(old_map, old_entry, new_map);
+ new_size += entry_size;
+ break;
+
+ case VM_INHERIT_COPY:
+
+ /*
+ * Inline the copy_quickly case;
+ * upon failure, fall back on call
+ * to vm_map_fork_copy.
+ */
+
+ if (old_entry->is_sub_map) {
+ break;
+ }
+ if ((old_entry->wired_count != 0) ||
+ ((VME_OBJECT(old_entry) != NULL) &&
+ (VME_OBJECT(old_entry)->true_share))) {
+ goto slow_vm_map_fork_copy;
+ }
+
+ new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
+ vm_map_entry_copy(old_map, new_entry, old_entry);
+
+ if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
+ new_map->jit_entry_exists = TRUE;
+ }
+
+ if (new_entry->is_sub_map) {
+ /* clear address space specifics */
+ new_entry->use_pmap = FALSE;
+ } else {
+ /*
+ * We're dealing with a copy-on-write operation,
+ * so the resulting mapping should not inherit
+ * the original mapping's accounting settings.
+ * "iokit_acct" should have been cleared in
+ * vm_map_entry_copy().
+ * "use_pmap" should be reset to its default
+ * (TRUE) so that the new mapping gets
+ * accounted for in the task's memory footprint.
+ */
+ assert(!new_entry->iokit_acct);
+ new_entry->use_pmap = TRUE;
+ }
+
+ if (!vm_object_copy_quickly(
+ VME_OBJECT_PTR(new_entry),
+ VME_OFFSET(old_entry),
+ (old_entry->vme_end -
+ old_entry->vme_start),
+ &src_needs_copy,
+ &new_entry_needs_copy)) {
+ vm_map_entry_dispose(new_map, new_entry);
+ goto slow_vm_map_fork_copy;
+ }
+
+ /*
+ * Handle copy-on-write obligations
+ */
+
+ if (src_needs_copy && !old_entry->needs_copy) {
+ vm_prot_t prot;
+
+ assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
+
+ prot = old_entry->protection & ~VM_PROT_WRITE;
+
+ if (override_nx(old_map, VME_ALIAS(old_entry))
+ && prot) {
+ prot |= VM_PROT_EXECUTE;
+ }
+
+ assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
+
+ vm_object_pmap_protect(
+ VME_OBJECT(old_entry),
+ VME_OFFSET(old_entry),
+ (old_entry->vme_end -
+ old_entry->vme_start),
+ ((old_entry->is_shared
+ || old_map->mapped_in_other_pmaps)
+ ? PMAP_NULL :
+ old_map->pmap),
+ VM_MAP_PAGE_SIZE(old_map),
+ old_entry->vme_start,
+ prot);
+
+ assert(old_entry->wired_count == 0);
+ old_entry->needs_copy = TRUE;
+ }
+ new_entry->needs_copy = new_entry_needs_copy;
+
+ /*
+ * Insert the entry at the end
+ * of the map.
+ */
+
+ vm_map_store_entry_link(new_map,
+ vm_map_last_entry(new_map),
+ new_entry,
+ VM_MAP_KERNEL_FLAGS_NONE);
+ new_size += entry_size;
+ break;
+
+slow_vm_map_fork_copy:
+ vm_map_copyin_flags = 0;
+ if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
+ vm_map_copyin_flags |=
+ VM_MAP_COPYIN_PRESERVE_PURGEABLE;
+ }
+ if (vm_map_fork_copy(old_map,
+ &old_entry,
+ new_map,
+ vm_map_copyin_flags)) {
+ new_size += entry_size;
+ }
+ continue;
+ }
+ old_entry = old_entry->vme_next;
+ }
+
+#if defined(__arm64__)
+ pmap_insert_sharedpage(new_map->pmap);
+#endif /* __arm64__ */
+
+ new_map->size = new_size;
+
+ if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
+ vm_map_corpse_footprint_collect_done(new_map);
+ }
+
+ /* Propagate JIT entitlement for the pmap layer. */
+ if (pmap_get_jit_entitled(old_map->pmap)) {
+ /* Tell the pmap that it supports JIT. */
+ pmap_set_jit_entitled(new_map->pmap);
+ }
+
+ vm_map_unlock(new_map);
+ vm_map_unlock(old_map);
+ vm_map_deallocate(old_map);
+
+ return new_map;
+}
+
+/*
+ * vm_map_exec:
+ *
+ * Setup the "new_map" with the proper execution environment according
+ * to the type of executable (platform, 64bit, chroot environment).
+ * Map the comm page and shared region, etc...
+ */
+kern_return_t
+vm_map_exec(
+ vm_map_t new_map,
+ task_t task,
+ boolean_t is64bit,
+ void *fsroot,
+ cpu_type_t cpu,
+ cpu_subtype_t cpu_subtype,
+ boolean_t reslide)
+{
+ SHARED_REGION_TRACE_DEBUG(
+ ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
+ (void *)VM_KERNEL_ADDRPERM(current_task()),
+ (void *)VM_KERNEL_ADDRPERM(new_map),
+ (void *)VM_KERNEL_ADDRPERM(task),
+ (void *)VM_KERNEL_ADDRPERM(fsroot),
+ cpu,
+ cpu_subtype));
+ (void) vm_commpage_enter(new_map, task, is64bit);
+
+ (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide);
+
+ SHARED_REGION_TRACE_DEBUG(
+ ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
+ (void *)VM_KERNEL_ADDRPERM(current_task()),
+ (void *)VM_KERNEL_ADDRPERM(new_map),
+ (void *)VM_KERNEL_ADDRPERM(task),
+ (void *)VM_KERNEL_ADDRPERM(fsroot),
+ cpu,
+ cpu_subtype));
+
+ /*
+ * Some devices have region(s) of memory that shouldn't get allocated by
+ * user processes. The following code creates dummy vm_map_entry_t's for each
+ * of the regions that needs to be reserved to prevent any allocations in
+ * those regions.
+ */
+ kern_return_t kr = KERN_FAILURE;
+ vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ vmk_flags.vmkf_beyond_max = TRUE;
+
+ struct vm_reserved_region *regions = NULL;
+ size_t num_regions = ml_get_vm_reserved_regions(is64bit, ®ions);
+ assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
+
+ for (size_t i = 0; i < num_regions; ++i) {
+ kr = vm_map_enter(
+ new_map,
+ ®ions[i].vmrr_addr,
+ regions[i].vmrr_size,
+ (vm_map_offset_t)0,
+ VM_FLAGS_FIXED,
+ vmk_flags,
+ VM_KERN_MEMORY_NONE,
+ VM_OBJECT_NULL,
+ (vm_object_offset_t)0,
+ FALSE,
+ VM_PROT_NONE,
+ VM_PROT_NONE,
+ VM_INHERIT_NONE);
+
+ if (kr != KERN_SUCCESS) {
+ panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
+ }
+ }
+
+ new_map->reserved_regions = (num_regions ? TRUE : FALSE);
+
+ return KERN_SUCCESS;
+}
+
+uint64_t vm_map_lookup_locked_copy_slowly_count = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_size = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_max = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_restart = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_error = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_count = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_size = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_max = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_restart = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_error = 0;
+uint64_t vm_map_lookup_locked_copy_shadow_count = 0;
+uint64_t vm_map_lookup_locked_copy_shadow_size = 0;
+uint64_t vm_map_lookup_locked_copy_shadow_max = 0;
+/*
+ * vm_map_lookup_locked:
+ *
+ * Finds the VM object, offset, and
+ * protection for a given virtual address in the
+ * specified map, assuming a page fault of the
+ * type specified.
+ *
+ * Returns the (object, offset, protection) for
+ * this address, whether it is wired down, and whether
+ * this map has the only reference to the data in question.
+ * In order to later verify this lookup, a "version"
+ * is returned.
+ * If contended != NULL, *contended will be set to
+ * true iff the thread had to spin or block to acquire
+ * an exclusive lock.
+ *
+ * The map MUST be locked by the caller and WILL be
+ * locked on exit. In order to guarantee the
+ * existence of the returned object, it is returned
+ * locked.
+ *
+ * If a lookup is requested with "write protection"
+ * specified, the map may be changed to perform virtual
+ * copying operations, although the data referenced will
+ * remain the same.
+ */
+kern_return_t
+vm_map_lookup_locked(
+ vm_map_t *var_map, /* IN/OUT */
+ vm_map_offset_t vaddr,
+ vm_prot_t fault_type,
+ int object_lock_type,
+ vm_map_version_t *out_version, /* OUT */
+ vm_object_t *object, /* OUT */
+ vm_object_offset_t *offset, /* OUT */
+ vm_prot_t *out_prot, /* OUT */
+ boolean_t *wired, /* OUT */
+ vm_object_fault_info_t fault_info, /* OUT */
+ vm_map_t *real_map, /* OUT */
+ bool *contended) /* OUT */
+{
+ vm_map_entry_t entry;
+ vm_map_t map = *var_map;
+ vm_map_t old_map = *var_map;
+ vm_map_t cow_sub_map_parent = VM_MAP_NULL;
+ vm_map_offset_t cow_parent_vaddr = 0;
+ vm_map_offset_t old_start = 0;
+ vm_map_offset_t old_end = 0;
+ vm_prot_t prot;
+ boolean_t mask_protections;
+ boolean_t force_copy;
+ boolean_t no_force_copy_if_executable;
+ boolean_t submap_needed_copy;
+ vm_prot_t original_fault_type;
+ vm_map_size_t fault_page_mask;
+
+ /*
+ * VM_PROT_MASK means that the caller wants us to use "fault_type"
+ * as a mask against the mapping's actual protections, not as an
+ * absolute value.
+ */
+ mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
+ force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
+ no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
+ fault_type &= VM_PROT_ALL;
+ original_fault_type = fault_type;
+ if (contended) {
+ *contended = false;
+ }
+
+ *real_map = map;
+
+ fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
+ vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
+
+RetryLookup:
+ fault_type = original_fault_type;
+
+ /*
+ * If the map has an interesting hint, try it before calling
+ * full blown lookup routine.
+ */
+ entry = map->hint;
+
+ if ((entry == vm_map_to_entry(map)) ||
+ (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
+ vm_map_entry_t tmp_entry;
+
+ /*
+ * Entry was either not a valid hint, or the vaddr
+ * was not contained in the entry, so do a full lookup.
+ */
+ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ return KERN_INVALID_ADDRESS;
+ }
+
+ entry = tmp_entry;
+ }
+ if (map == old_map) {
+ old_start = entry->vme_start;
+ old_end = entry->vme_end;
+ }
+
+ /*
+ * Handle submaps. Drop lock on upper map, submap is
+ * returned locked.
+ */
+
+ submap_needed_copy = FALSE;
+submap_recurse:
+ if (entry->is_sub_map) {
+ vm_map_offset_t local_vaddr;
+ vm_map_offset_t end_delta;
+ vm_map_offset_t start_delta;
+ vm_map_entry_t submap_entry, saved_submap_entry;
+ vm_object_offset_t submap_entry_offset;
+ vm_object_size_t submap_entry_size;
+ vm_prot_t subentry_protection;
+ vm_prot_t subentry_max_protection;
+ boolean_t subentry_no_copy_on_read;
+ boolean_t mapped_needs_copy = FALSE;
+ vm_map_version_t version;
+
+ assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
+ "map %p (%d) entry %p submap %p (%d)\n",
+ map, VM_MAP_PAGE_SHIFT(map), entry,
+ VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
+
+ local_vaddr = vaddr;
+
+ if ((entry->use_pmap &&
+ !((fault_type & VM_PROT_WRITE) ||
+ force_copy))) {
+ /* if real_map equals map we unlock below */
+ if ((*real_map != map) &&
+ (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = VME_SUBMAP(entry);
+ }
+
+ if (entry->needs_copy &&
+ ((fault_type & VM_PROT_WRITE) ||
+ force_copy)) {
+ if (!mapped_needs_copy) {
+ if (vm_map_lock_read_to_write(map)) {
+ vm_map_lock_read(map);
+ *real_map = map;
+ goto RetryLookup;
+ }
+ vm_map_lock_read(VME_SUBMAP(entry));
+ *var_map = VME_SUBMAP(entry);
+ cow_sub_map_parent = map;
+ /* reset base to map before cow object */
+ /* this is the map which will accept */
+ /* the new cow object */
+ old_start = entry->vme_start;
+ old_end = entry->vme_end;
+ cow_parent_vaddr = vaddr;
+ mapped_needs_copy = TRUE;
+ } else {
+ vm_map_lock_read(VME_SUBMAP(entry));
+ *var_map = VME_SUBMAP(entry);
+ if ((cow_sub_map_parent != map) &&
+ (*real_map != map)) {
+ vm_map_unlock(map);
+ }
+ }
+ } else {
+ if (entry->needs_copy) {
+ submap_needed_copy = TRUE;
+ }
+ vm_map_lock_read(VME_SUBMAP(entry));
+ *var_map = VME_SUBMAP(entry);
+ /* leave map locked if it is a target */
+ /* cow sub_map above otherwise, just */
+ /* follow the maps down to the object */
+ /* here we unlock knowing we are not */
+ /* revisiting the map. */
+ if ((*real_map != map) && (map != cow_sub_map_parent)) {
+ vm_map_unlock_read(map);
+ }
+ }
+
+ map = *var_map;
+
+ /* calculate the offset in the submap for vaddr */
+ local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
+ assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
+ "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
+ (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
+
+RetrySubMap:
+ if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = map;
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /* find the attenuated shadow of the underlying object */
+ /* on our target map */
+
+ /* in english the submap object may extend beyond the */
+ /* region mapped by the entry or, may only fill a portion */
+ /* of it. For our purposes, we only care if the object */
+ /* doesn't fill. In this case the area which will */
+ /* ultimately be clipped in the top map will only need */
+ /* to be as big as the portion of the underlying entry */
+ /* which is mapped */
+ start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
+ submap_entry->vme_start - VME_OFFSET(entry) : 0;
+
+ end_delta =
+ (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
+ submap_entry->vme_end ?
+ 0 : (VME_OFFSET(entry) +
+ (old_end - old_start))
+ - submap_entry->vme_end;
+
+ old_start += start_delta;
+ old_end -= end_delta;
+
+ if (submap_entry->is_sub_map) {
+ entry = submap_entry;
+ vaddr = local_vaddr;
+ goto submap_recurse;
+ }
+
+ if (((fault_type & VM_PROT_WRITE) ||
+ force_copy)
+ && cow_sub_map_parent) {
+ vm_object_t sub_object, copy_object;
+ vm_object_offset_t copy_offset;
+ vm_map_offset_t local_start;
+ vm_map_offset_t local_end;
+ boolean_t object_copied = FALSE;
+ vm_object_offset_t object_copied_offset = 0;
+ boolean_t object_copied_needs_copy = FALSE;
+ kern_return_t kr = KERN_SUCCESS;
+
+ if (vm_map_lock_read_to_write(map)) {
+ vm_map_lock_read(map);
+ old_start -= start_delta;
+ old_end += end_delta;
+ goto RetrySubMap;
+ }
+
+
+ sub_object = VME_OBJECT(submap_entry);
+ if (sub_object == VM_OBJECT_NULL) {
+ sub_object =
+ vm_object_allocate(
+ (vm_map_size_t)
+ (submap_entry->vme_end -
+ submap_entry->vme_start));
+ VME_OBJECT_SET(submap_entry, sub_object);
+ VME_OFFSET_SET(submap_entry, 0);
+ assert(!submap_entry->is_sub_map);
+ assert(submap_entry->use_pmap);
+ }
+ local_start = local_vaddr -
+ (cow_parent_vaddr - old_start);
+ local_end = local_vaddr +
+ (old_end - cow_parent_vaddr);
+ vm_map_clip_start(map, submap_entry, local_start);
+ vm_map_clip_end(map, submap_entry, local_end);
+ if (submap_entry->is_sub_map) {
+ /* unnesting was done when clipping */
+ assert(!submap_entry->use_pmap);
+ }
+
+ /* This is the COW case, lets connect */
+ /* an entry in our space to the underlying */
+ /* object in the submap, bypassing the */
+ /* submap. */
+ submap_entry_offset = VME_OFFSET(submap_entry);
+ submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
+
+ if ((submap_entry->wired_count != 0 ||
+ sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
+ (submap_entry->protection & VM_PROT_EXECUTE) &&
+ no_force_copy_if_executable) {
+// printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = map;
+ vm_map_lock_write_to_read(map);
+ kr = KERN_PROTECTION_FAILURE;
+ DTRACE_VM4(submap_no_copy_executable,
+ vm_map_t, map,
+ vm_object_offset_t, submap_entry_offset,
+ vm_object_size_t, submap_entry_size,
+ int, kr);
+ return kr;
+ }
+
+ if (submap_entry->wired_count != 0) {
+ vm_object_reference(sub_object);
+
+ assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
+ "submap_entry %p offset 0x%llx\n",
+ submap_entry, VME_OFFSET(submap_entry));
+
+ DTRACE_VM6(submap_copy_slowly,
+ vm_map_t, cow_sub_map_parent,
+ vm_map_offset_t, vaddr,
+ vm_map_t, map,
+ vm_object_size_t, submap_entry_size,
+ int, submap_entry->wired_count,
+ int, sub_object->copy_strategy);
+
+ saved_submap_entry = submap_entry;
+ version.main_timestamp = map->timestamp;
+ vm_map_unlock(map); /* Increments timestamp by 1 */
+ submap_entry = VM_MAP_ENTRY_NULL;
+
+ vm_object_lock(sub_object);
+ kr = vm_object_copy_slowly(sub_object,
+ submap_entry_offset,
+ submap_entry_size,
+ FALSE,
+ ©_object);
+ object_copied = TRUE;
+ object_copied_offset = 0;
+ /* 4k: account for extra offset in physical page */
+ object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
+ object_copied_needs_copy = FALSE;
+ vm_object_deallocate(sub_object);
+
+ vm_map_lock(map);
+
+ if (kr != KERN_SUCCESS &&
+ kr != KERN_MEMORY_RESTART_COPY) {
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = map;
+ vm_object_deallocate(copy_object);
+ copy_object = VM_OBJECT_NULL;
+ vm_map_lock_write_to_read(map);
+ DTRACE_VM4(submap_copy_error_slowly,
+ vm_object_t, sub_object,
+ vm_object_offset_t, submap_entry_offset,
+ vm_object_size_t, submap_entry_size,
+ int, kr);
+ vm_map_lookup_locked_copy_slowly_error++;
+ return kr;
+ }
+
+ if ((kr == KERN_SUCCESS) &&
+ (version.main_timestamp + 1) == map->timestamp) {
+ submap_entry = saved_submap_entry;
+ } else {
+ saved_submap_entry = NULL;
+ old_start -= start_delta;
+ old_end += end_delta;
+ vm_object_deallocate(copy_object);
+ copy_object = VM_OBJECT_NULL;
+ vm_map_lock_write_to_read(map);
+ vm_map_lookup_locked_copy_slowly_restart++;
+ goto RetrySubMap;
+ }
+ vm_map_lookup_locked_copy_slowly_count++;
+ vm_map_lookup_locked_copy_slowly_size += submap_entry_size;
+ if (submap_entry_size > vm_map_lookup_locked_copy_slowly_max) {
+ vm_map_lookup_locked_copy_slowly_max = submap_entry_size;
+ }
+ } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
+ submap_entry_offset = VME_OFFSET(submap_entry);
+ copy_object = VM_OBJECT_NULL;
+ object_copied_offset = submap_entry_offset;
+ object_copied_needs_copy = FALSE;
+ DTRACE_VM6(submap_copy_strategically,
+ vm_map_t, cow_sub_map_parent,
+ vm_map_offset_t, vaddr,
+ vm_map_t, map,
+ vm_object_size_t, submap_entry_size,
+ int, submap_entry->wired_count,
+ int, sub_object->copy_strategy);
+ kr = vm_object_copy_strategically(
+ sub_object,
+ submap_entry_offset,
+ submap_entry->vme_end - submap_entry->vme_start,
+ ©_object,
+ &object_copied_offset,
+ &object_copied_needs_copy);
+ if (kr == KERN_MEMORY_RESTART_COPY) {
+ old_start -= start_delta;
+ old_end += end_delta;
+ vm_object_deallocate(copy_object);
+ copy_object = VM_OBJECT_NULL;
+ vm_map_lock_write_to_read(map);
+ vm_map_lookup_locked_copy_strategically_restart++;
+ goto RetrySubMap;
+ }
+ if (kr != KERN_SUCCESS) {
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = map;
+ vm_object_deallocate(copy_object);
+ copy_object = VM_OBJECT_NULL;
+ vm_map_lock_write_to_read(map);
+ DTRACE_VM4(submap_copy_error_strategically,
+ vm_object_t, sub_object,
+ vm_object_offset_t, submap_entry_offset,
+ vm_object_size_t, submap_entry_size,
+ int, kr);
+ vm_map_lookup_locked_copy_strategically_error++;
+ return kr;
+ }
+ assert(copy_object != VM_OBJECT_NULL);
+ assert(copy_object != sub_object);
+ object_copied = TRUE;
+ vm_map_lookup_locked_copy_strategically_count++;
+ vm_map_lookup_locked_copy_strategically_size += submap_entry_size;
+ if (submap_entry_size > vm_map_lookup_locked_copy_strategically_max) {
+ vm_map_lookup_locked_copy_strategically_max = submap_entry_size;
+ }
+ } else {
+ /* set up shadow object */
+ object_copied = FALSE;
+ copy_object = sub_object;
+ vm_object_lock(sub_object);
+ vm_object_reference_locked(sub_object);
+ sub_object->shadowed = TRUE;
+ vm_object_unlock(sub_object);
+
+ assert(submap_entry->wired_count == 0);
+ submap_entry->needs_copy = TRUE;
+
+ prot = submap_entry->protection;
+ assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
+ prot = prot & ~VM_PROT_WRITE;
+ assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
+
+ if (override_nx(old_map,
+ VME_ALIAS(submap_entry))
+ && prot) {
+ prot |= VM_PROT_EXECUTE;
+ }
+
+ vm_object_pmap_protect(
+ sub_object,
+ VME_OFFSET(submap_entry),
+ submap_entry->vme_end -
+ submap_entry->vme_start,
+ (submap_entry->is_shared
+ || map->mapped_in_other_pmaps) ?
+ PMAP_NULL : map->pmap,
+ VM_MAP_PAGE_SIZE(map),
+ submap_entry->vme_start,
+ prot);
+ vm_map_lookup_locked_copy_shadow_count++;
+ vm_map_lookup_locked_copy_shadow_size += submap_entry_size;
+ if (submap_entry_size > vm_map_lookup_locked_copy_shadow_max) {
+ vm_map_lookup_locked_copy_shadow_max = submap_entry_size;
+ }
+ }
+
+ /*
+ * Adjust the fault offset to the submap entry.
+ */
+ copy_offset = (local_vaddr -
+ submap_entry->vme_start +
+ VME_OFFSET(submap_entry));
+
+ /* This works diffently than the */
+ /* normal submap case. We go back */
+ /* to the parent of the cow map and*/
+ /* clip out the target portion of */
+ /* the sub_map, substituting the */
+ /* new copy object, */
+
+ subentry_protection = submap_entry->protection;
+ subentry_max_protection = submap_entry->max_protection;
+ subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
+ vm_map_unlock(map);
+ submap_entry = NULL; /* not valid after map unlock */
+
+ local_start = old_start;
+ local_end = old_end;
+ map = cow_sub_map_parent;
+ *var_map = cow_sub_map_parent;
+ vaddr = cow_parent_vaddr;
+ cow_sub_map_parent = NULL;
+
+ if (!vm_map_lookup_entry(map,
+ vaddr, &entry)) {
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = map;
+ vm_object_deallocate(
+ copy_object);
+ copy_object = VM_OBJECT_NULL;
+ vm_map_lock_write_to_read(map);
+ DTRACE_VM4(submap_lookup_post_unlock,
+ uint64_t, (uint64_t)entry->vme_start,
+ uint64_t, (uint64_t)entry->vme_end,
+ vm_map_offset_t, vaddr,
+ int, object_copied);
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /* clip out the portion of space */
+ /* mapped by the sub map which */
+ /* corresponds to the underlying */
+ /* object */
+
+ /*
+ * Clip (and unnest) the smallest nested chunk
+ * possible around the faulting address...
+ */
+ local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
+ local_end = local_start + pmap_shared_region_size_min(map->pmap);
+ /*
+ * ... but don't go beyond the "old_start" to "old_end"
+ * range, to avoid spanning over another VM region
+ * with a possibly different VM object and/or offset.
+ */
+ if (local_start < old_start) {
+ local_start = old_start;
+ }
+ if (local_end > old_end) {
+ local_end = old_end;
+ }
+ /*
+ * Adjust copy_offset to the start of the range.
+ */
+ copy_offset -= (vaddr - local_start);
+
+ vm_map_clip_start(map, entry, local_start);
+ vm_map_clip_end(map, entry, local_end);
+ if (entry->is_sub_map) {
+ /* unnesting was done when clipping */
+ assert(!entry->use_pmap);
+ }
+
+ /* substitute copy object for */
+ /* shared map entry */
+ vm_map_deallocate(VME_SUBMAP(entry));
+ assert(!entry->iokit_acct);
+ entry->is_sub_map = FALSE;
+ entry->use_pmap = TRUE;
+ VME_OBJECT_SET(entry, copy_object);
+
+ /* propagate the submap entry's protections */
+ if (entry->protection != VM_PROT_READ) {
+ /*
+ * Someone has already altered the top entry's
+ * protections via vm_protect(VM_PROT_COPY).
+ * Respect these new values and ignore the
+ * submap entry's protections.
+ */
+ } else {
+ /*
+ * Regular copy-on-write: propagate the submap
+ * entry's protections to the top map entry.
+ */
+ entry->protection |= subentry_protection;
+ }
+ entry->max_protection |= subentry_max_protection;
+ /* propagate no_copy_on_read */
+ entry->vme_no_copy_on_read = subentry_no_copy_on_read;
+
+ if ((entry->protection & VM_PROT_WRITE) &&
+ (entry->protection & VM_PROT_EXECUTE) &&
+#if XNU_TARGET_OS_OSX
+ map->pmap != kernel_pmap &&
+ (vm_map_cs_enforcement(map)
+#if __arm64__
+ || !VM_MAP_IS_EXOTIC(map)
+#endif /* __arm64__ */
+ ) &&
+#endif /* XNU_TARGET_OS_OSX */
+ !(entry->used_for_jit) &&
+ VM_MAP_POLICY_WX_STRIP_X(map)) {
+ DTRACE_VM3(cs_wx,
+ uint64_t, (uint64_t)entry->vme_start,
+ uint64_t, (uint64_t)entry->vme_end,
+ vm_prot_t, entry->protection);
+ printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
+ proc_selfpid(),
+ (current_task()->bsd_info
+ ? proc_name_address(current_task()->bsd_info)
+ : "?"),
+ __FUNCTION__);
+ entry->protection &= ~VM_PROT_EXECUTE;
+ }
+
+ if (object_copied) {
+ VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
+ entry->needs_copy = object_copied_needs_copy;
+ entry->is_shared = FALSE;
+ } else {
+ assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
+ assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
+ assert(entry->wired_count == 0);
+ VME_OFFSET_SET(entry, copy_offset);
+ entry->needs_copy = TRUE;
+ if (map != old_map) {
+ entry->is_shared = TRUE;
+ }
+ }
+ if (entry->inheritance == VM_INHERIT_SHARE) {
+ entry->inheritance = VM_INHERIT_COPY;
+ }
+
+ vm_map_lock_write_to_read(map);
+ } else {
+ if ((cow_sub_map_parent)
+ && (cow_sub_map_parent != *real_map)
+ && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ entry = submap_entry;
+ vaddr = local_vaddr;
+ }
+ }
+
+ /*
+ * Check whether this task is allowed to have
+ * this page.
+ */
+
+ prot = entry->protection;
+
+ if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
+ /*
+ * HACK -- if not a stack, then allow execution
+ */
+ prot |= VM_PROT_EXECUTE;
+ }
+
+ if (mask_protections) {
+ fault_type &= prot;
+ if (fault_type == VM_PROT_NONE) {
+ goto protection_failure;
+ }
+ }
+ if (((fault_type & prot) != fault_type)
+#if __arm64__
+ /* prefetch abort in execute-only page */
+ && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
+#endif
+ ) {
+protection_failure:
+ if (*real_map != map) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = map;
+
+ if ((fault_type & VM_PROT_EXECUTE) && prot) {
+ log_stack_execution_failure((addr64_t)vaddr, prot);
+ }
+
+ DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
+ return KERN_PROTECTION_FAILURE;
+ }
+
+ /*
+ * If this page is not pageable, we have to get
+ * it for all possible accesses.
+ */
+
+ *wired = (entry->wired_count != 0);
+ if (*wired) {
+ fault_type = prot;
+ }
+
+ /*
+ * If the entry was copy-on-write, we either ...
+ */
+
+ if (entry->needs_copy) {
+ /*
+ * If we want to write the page, we may as well
+ * handle that now since we've got the map locked.
+ *
+ * If we don't need to write the page, we just
+ * demote the permissions allowed.
+ */
+
+ if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
+ /*
+ * Make a new object, and place it in the
+ * object chain. Note that no new references
+ * have appeared -- one just moved from the
+ * map to the new object.
+ */
+
+ if (vm_map_lock_read_to_write(map)) {
+ vm_map_lock_read(map);
+ goto RetryLookup;
+ }
+
+ if (VME_OBJECT(entry)->shadowed == FALSE) {
+ vm_object_lock(VME_OBJECT(entry));
+ VME_OBJECT(entry)->shadowed = TRUE;
+ vm_object_unlock(VME_OBJECT(entry));
+ }
+ VME_OBJECT_SHADOW(entry,
+ (vm_map_size_t) (entry->vme_end -
+ entry->vme_start));
+ entry->needs_copy = FALSE;
+
+ vm_map_lock_write_to_read(map);
+ }
+ if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
+ /*
+ * We're attempting to read a copy-on-write
+ * page -- don't allow writes.
+ */
+
+ prot &= (~VM_PROT_WRITE);
+ }
+ }
+
+ if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
+ /*
+ * We went through a "needs_copy" submap without triggering
+ * a copy, so granting write access to the page would bypass
+ * that submap's "needs_copy".
+ */
+ assert(!(fault_type & VM_PROT_WRITE));
+ assert(!*wired);
+ assert(!force_copy);
+ // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
+ prot &= ~VM_PROT_WRITE;
+ }
+
+ /*
+ * Create an object if necessary.
+ */
+ if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
+ if (vm_map_lock_read_to_write(map)) {
+ vm_map_lock_read(map);
+ goto RetryLookup;
+ }
+
+ VME_OBJECT_SET(entry,
+ vm_object_allocate(
+ (vm_map_size_t)(entry->vme_end -
+ entry->vme_start)));
+ VME_OFFSET_SET(entry, 0);
+ assert(entry->use_pmap);
+ vm_map_lock_write_to_read(map);
+ }
+
+ /*
+ * Return the object/offset from this entry. If the entry
+ * was copy-on-write or empty, it has been fixed up. Also
+ * return the protection.
+ */
+
+ *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
+ *object = VME_OBJECT(entry);
+ *out_prot = prot;
+ KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
+
+ if (fault_info) {
+ fault_info->interruptible = THREAD_UNINT; /* for now... */
+ /* ... the caller will change "interruptible" if needed */
+ fault_info->cluster_size = 0;
+ fault_info->user_tag = VME_ALIAS(entry);
+ fault_info->pmap_options = 0;
+ if (entry->iokit_acct ||
+ (!entry->is_sub_map && !entry->use_pmap)) {
+ fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
+ }
+ fault_info->behavior = entry->behavior;
+ fault_info->lo_offset = VME_OFFSET(entry);
+ fault_info->hi_offset =
+ (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
+ fault_info->no_cache = entry->no_cache;
+ fault_info->stealth = FALSE;
+ fault_info->io_sync = FALSE;
+ if (entry->used_for_jit ||
+ entry->vme_resilient_codesign) {
+ fault_info->cs_bypass = TRUE;
+ } else {
+ fault_info->cs_bypass = FALSE;
+ }
+ fault_info->pmap_cs_associated = FALSE;
+#if CONFIG_PMAP_CS
+ if (entry->pmap_cs_associated) {
+ /*
+ * The pmap layer will validate this page
+ * before allowing it to be executed from.
+ */
+ fault_info->pmap_cs_associated = TRUE;
+ }
+#endif /* CONFIG_PMAP_CS */
+ fault_info->mark_zf_absent = FALSE;
+ fault_info->batch_pmap_op = FALSE;
+ fault_info->resilient_media = entry->vme_resilient_media;
+ fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
+ if (entry->translated_allow_execute) {
+ fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
+ }
+ }
+
+ /*
+ * Lock the object to prevent it from disappearing
+ */
+ if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
+ if (contended == NULL) {
+ vm_object_lock(*object);
+ } else {
+ *contended = vm_object_lock_check_contended(*object);
+ }
+ } else {
+ vm_object_lock_shared(*object);
+ }
+
+ /*
+ * Save the version number
+ */
+
+ out_version->main_timestamp = map->timestamp;
+
+ return KERN_SUCCESS;
+}
+
+
+/*
+ * vm_map_verify:
+ *
+ * Verifies that the map in question has not changed
+ * since the given version. The map has to be locked
+ * ("shared" mode is fine) before calling this function
+ * and it will be returned locked too.
+ */
+boolean_t
+vm_map_verify(
+ vm_map_t map,
+ vm_map_version_t *version) /* REF */
+{
+ boolean_t result;
+
+ vm_map_lock_assert_held(map);
+ result = (map->timestamp == version->main_timestamp);
+
+ return result;
+}
+
+/*
+ * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
+ * Goes away after regular vm_region_recurse function migrates to
+ * 64 bits
+ * vm_region_recurse: A form of vm_region which follows the
+ * submaps in a target map
+ *
+ */
+
+kern_return_t
+vm_map_region_recurse_64(
+ vm_map_t map,
+ vm_map_offset_t *address, /* IN/OUT */
+ vm_map_size_t *size, /* OUT */
+ natural_t *nesting_depth, /* IN/OUT */
+ vm_region_submap_info_64_t submap_info, /* IN/OUT */
+ mach_msg_type_number_t *count) /* IN/OUT */
+{
+ mach_msg_type_number_t original_count;
+ vm_region_extended_info_data_t extended;
+ vm_map_entry_t tmp_entry;
+ vm_map_offset_t user_address;
+ unsigned int user_max_depth;
+
+ /*
+ * "curr_entry" is the VM map entry preceding or including the
+ * address we're looking for.
+ * "curr_map" is the map or sub-map containing "curr_entry".
+ * "curr_address" is the equivalent of the top map's "user_address"
+ * in the current map.
+ * "curr_offset" is the cumulated offset of "curr_map" in the
+ * target task's address space.
+ * "curr_depth" is the depth of "curr_map" in the chain of
+ * sub-maps.
+ *
+ * "curr_max_below" and "curr_max_above" limit the range (around
+ * "curr_address") we should take into account in the current (sub)map.
+ * They limit the range to what's visible through the map entries
+ * we've traversed from the top map to the current map.
+ *
+ */
+ vm_map_entry_t curr_entry;
+ vm_map_address_t curr_address;
+ vm_map_offset_t curr_offset;
+ vm_map_t curr_map;
+ unsigned int curr_depth;
+ vm_map_offset_t curr_max_below, curr_max_above;
+ vm_map_offset_t curr_skip;
+
+ /*
+ * "next_" is the same as "curr_" but for the VM region immediately
+ * after the address we're looking for. We need to keep track of this
+ * too because we want to return info about that region if the
+ * address we're looking for is not mapped.
+ */
+ vm_map_entry_t next_entry;
+ vm_map_offset_t next_offset;
+ vm_map_offset_t next_address;
+ vm_map_t next_map;
+ unsigned int next_depth;
+ vm_map_offset_t next_max_below, next_max_above;
+ vm_map_offset_t next_skip;
+
+ boolean_t look_for_pages;
+ vm_region_submap_short_info_64_t short_info;
+ boolean_t do_region_footprint;
+ int effective_page_size, effective_page_shift;
+ boolean_t submap_needed_copy;
+
+ if (map == VM_MAP_NULL) {
+ /* no address space to work on */
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ effective_page_shift = vm_self_region_page_shift(map);
+ effective_page_size = (1 << effective_page_shift);
+
+ if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
+ /*
+ * "info" structure is not big enough and
+ * would overflow
+ */
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ do_region_footprint = task_self_region_footprint();
+ original_count = *count;
+
+ if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
+ *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
+ look_for_pages = FALSE;
+ short_info = (vm_region_submap_short_info_64_t) submap_info;
+ submap_info = NULL;
+ } else {
+ look_for_pages = TRUE;
+ *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
+ short_info = NULL;
+
+ if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
+ *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
+ }
+ if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
+ *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
+ }
+ }
+
+ user_address = *address;
+ user_max_depth = *nesting_depth;
+ submap_needed_copy = FALSE;
+
+ if (not_in_kdp) {
+ vm_map_lock_read(map);
+ }
+
+recurse_again:
+ curr_entry = NULL;
+ curr_map = map;
+ curr_address = user_address;
+ curr_offset = 0;
+ curr_skip = 0;
+ curr_depth = 0;
+ curr_max_above = ((vm_map_offset_t) -1) - curr_address;
+ curr_max_below = curr_address;
+
+ next_entry = NULL;
+ next_map = NULL;
+ next_address = 0;
+ next_offset = 0;
+ next_skip = 0;
+ next_depth = 0;
+ next_max_above = (vm_map_offset_t) -1;
+ next_max_below = (vm_map_offset_t) -1;
+
+ for (;;) {
+ if (vm_map_lookup_entry(curr_map,
+ curr_address,
+ &tmp_entry)) {
+ /* tmp_entry contains the address we're looking for */
+ curr_entry = tmp_entry;
+ } else {
+ vm_map_offset_t skip;
+ /*
+ * The address is not mapped. "tmp_entry" is the
+ * map entry preceding the address. We want the next
+ * one, if it exists.
+ */
+ curr_entry = tmp_entry->vme_next;
+
+ if (curr_entry == vm_map_to_entry(curr_map) ||
+ (curr_entry->vme_start >=
+ curr_address + curr_max_above)) {
+ /* no next entry at this level: stop looking */
+ if (not_in_kdp) {
+ vm_map_unlock_read(curr_map);
+ }
+ curr_entry = NULL;
+ curr_map = NULL;
+ curr_skip = 0;
+ curr_offset = 0;
+ curr_depth = 0;
+ curr_max_above = 0;
+ curr_max_below = 0;
+ break;
+ }
+
+ /* adjust current address and offset */
+ skip = curr_entry->vme_start - curr_address;
+ curr_address = curr_entry->vme_start;
+ curr_skip += skip;
+ curr_offset += skip;
+ curr_max_above -= skip;
+ curr_max_below = 0;
+ }
+
+ /*
+ * Is the next entry at this level closer to the address (or
+ * deeper in the submap chain) than the one we had
+ * so far ?
+ */
+ tmp_entry = curr_entry->vme_next;
+ if (tmp_entry == vm_map_to_entry(curr_map)) {
+ /* no next entry at this level */
+ } else if (tmp_entry->vme_start >=
+ curr_address + curr_max_above) {
+ /*
+ * tmp_entry is beyond the scope of what we mapped of
+ * this submap in the upper level: ignore it.
+ */
+ } else if ((next_entry == NULL) ||
+ (tmp_entry->vme_start + curr_offset <=
+ next_entry->vme_start + next_offset)) {
+ /*
+ * We didn't have a "next_entry" or this one is
+ * closer to the address we're looking for:
+ * use this "tmp_entry" as the new "next_entry".
+ */
+ if (next_entry != NULL) {
+ /* unlock the last "next_map" */
+ if (next_map != curr_map && not_in_kdp) {
+ vm_map_unlock_read(next_map);
+ }
+ }
+ next_entry = tmp_entry;
+ next_map = curr_map;
+ next_depth = curr_depth;
+ next_address = next_entry->vme_start;
+ next_skip = curr_skip;
+ next_skip += (next_address - curr_address);
+ next_offset = curr_offset;
+ next_offset += (next_address - curr_address);
+ next_max_above = MIN(next_max_above, curr_max_above);
+ next_max_above = MIN(next_max_above,
+ next_entry->vme_end - next_address);
+ next_max_below = MIN(next_max_below, curr_max_below);
+ next_max_below = MIN(next_max_below,
+ next_address - next_entry->vme_start);
+ }
+
+ /*
+ * "curr_max_{above,below}" allow us to keep track of the
+ * portion of the submap that is actually mapped at this level:
+ * the rest of that submap is irrelevant to us, since it's not
+ * mapped here.
+ * The relevant portion of the map starts at
+ * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
+ */
+ curr_max_above = MIN(curr_max_above,
+ curr_entry->vme_end - curr_address);
+ curr_max_below = MIN(curr_max_below,
+ curr_address - curr_entry->vme_start);
+
+ if (!curr_entry->is_sub_map ||
+ curr_depth >= user_max_depth) {
+ /*
+ * We hit a leaf map or we reached the maximum depth
+ * we could, so stop looking. Keep the current map
+ * locked.
+ */
+ break;
+ }
+
+ /*
+ * Get down to the next submap level.
+ */
+
+ if (curr_entry->needs_copy) {
+ /* everything below this is effectively copy-on-write */
+ submap_needed_copy = TRUE;
+ }
+
+ /*
+ * Lock the next level and unlock the current level,
+ * unless we need to keep it locked to access the "next_entry"
+ * later.
+ */
+ if (not_in_kdp) {
+ vm_map_lock_read(VME_SUBMAP(curr_entry));
+ }
+ if (curr_map == next_map) {
+ /* keep "next_map" locked in case we need it */
+ } else {
+ /* release this map */
+ if (not_in_kdp) {
+ vm_map_unlock_read(curr_map);
+ }
+ }
+
+ /*
+ * Adjust the offset. "curr_entry" maps the submap
+ * at relative address "curr_entry->vme_start" in the
+ * curr_map but skips the first "VME_OFFSET(curr_entry)"
+ * bytes of the submap.
+ * "curr_offset" always represents the offset of a virtual
+ * address in the curr_map relative to the absolute address
+ * space (i.e. the top-level VM map).
+ */
+ curr_offset +=
+ (VME_OFFSET(curr_entry) - curr_entry->vme_start);
+ curr_address = user_address + curr_offset;
+ /* switch to the submap */
+ curr_map = VME_SUBMAP(curr_entry);
+ curr_depth++;
+ curr_entry = NULL;
+ }
+
+// LP64todo: all the current tools are 32bit, obviously never worked for 64b
+// so probably should be a real 32b ID vs. ptr.
+// Current users just check for equality
+
+ if (curr_entry == NULL) {
+ /* no VM region contains the address... */
+
+ if (do_region_footprint && /* we want footprint numbers */
+ next_entry == NULL && /* & there are no more regions */
+ /* & we haven't already provided our fake region: */
+ user_address <= vm_map_last_entry(map)->vme_end) {
+ ledger_amount_t ledger_resident, ledger_compressed;
+
+ /*
+ * Add a fake memory region to account for
+ * purgeable and/or ledger-tagged memory that
+ * counts towards this task's memory footprint,
+ * i.e. the resident/compressed pages of non-volatile
+ * objects owned by that task.
+ */
+ task_ledgers_footprint(map->pmap->ledger,
+ &ledger_resident,
+ &ledger_compressed);
+ if (ledger_resident + ledger_compressed == 0) {
+ /* no purgeable memory usage to report */
+ return KERN_INVALID_ADDRESS;
+ }
+ /* fake region to show nonvolatile footprint */
+ if (look_for_pages) {
+ submap_info->protection = VM_PROT_DEFAULT;
+ submap_info->max_protection = VM_PROT_DEFAULT;
+ submap_info->inheritance = VM_INHERIT_DEFAULT;
+ submap_info->offset = 0;
+ submap_info->user_tag = -1;
+ submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
+ submap_info->pages_shared_now_private = 0;
+ submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
+ submap_info->pages_dirtied = submap_info->pages_resident;
+ submap_info->ref_count = 1;
+ submap_info->shadow_depth = 0;
+ submap_info->external_pager = 0;
+ submap_info->share_mode = SM_PRIVATE;
+ if (submap_needed_copy) {
+ submap_info->share_mode = SM_COW;
+ }
+ submap_info->is_submap = 0;
+ submap_info->behavior = VM_BEHAVIOR_DEFAULT;
+ submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
+ submap_info->user_wired_count = 0;
+ submap_info->pages_reusable = 0;
+ } else {
+ short_info->user_tag = -1;
+ short_info->offset = 0;
+ short_info->protection = VM_PROT_DEFAULT;
+ short_info->inheritance = VM_INHERIT_DEFAULT;
+ short_info->max_protection = VM_PROT_DEFAULT;
+ short_info->behavior = VM_BEHAVIOR_DEFAULT;
+ short_info->user_wired_count = 0;
+ short_info->is_submap = 0;
+ short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
+ short_info->external_pager = 0;
+ short_info->shadow_depth = 0;
+ short_info->share_mode = SM_PRIVATE;
+ if (submap_needed_copy) {
+ short_info->share_mode = SM_COW;
+ }
+ short_info->ref_count = 1;
+ }
+ *nesting_depth = 0;
+ *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
+// *address = user_address;
+ *address = vm_map_last_entry(map)->vme_end;
+ return KERN_SUCCESS;
+ }
+
+ if (next_entry == NULL) {
+ /* ... and no VM region follows it either */
+ return KERN_INVALID_ADDRESS;
+ }
+ /* ... gather info about the next VM region */
+ curr_entry = next_entry;
+ curr_map = next_map; /* still locked ... */
+ curr_address = next_address;
+ curr_skip = next_skip;
+ curr_offset = next_offset;
+ curr_depth = next_depth;
+ curr_max_above = next_max_above;
+ curr_max_below = next_max_below;
+ } else {
+ /* we won't need "next_entry" after all */
+ if (next_entry != NULL) {
+ /* release "next_map" */
+ if (next_map != curr_map && not_in_kdp) {
+ vm_map_unlock_read(next_map);
+ }
+ }
+ }
+ next_entry = NULL;
+ next_map = NULL;
+ next_offset = 0;
+ next_skip = 0;
+ next_depth = 0;
+ next_max_below = -1;
+ next_max_above = -1;
+
+ if (curr_entry->is_sub_map &&
+ curr_depth < user_max_depth) {
+ /*
+ * We're not as deep as we could be: we must have
+ * gone back up after not finding anything mapped
+ * below the original top-level map entry's.
+ * Let's move "curr_address" forward and recurse again.
+ */
+ user_address = curr_address;
+ goto recurse_again;
+ }
+
+ *nesting_depth = curr_depth;
+ *size = curr_max_above + curr_max_below;
+ *address = user_address + curr_skip - curr_max_below;
+
+ if (look_for_pages) {
+ submap_info->user_tag = VME_ALIAS(curr_entry);
+ submap_info->offset = VME_OFFSET(curr_entry);
+ submap_info->protection = curr_entry->protection;
+ submap_info->inheritance = curr_entry->inheritance;
+ submap_info->max_protection = curr_entry->max_protection;
+ submap_info->behavior = curr_entry->behavior;
+ submap_info->user_wired_count = curr_entry->user_wired_count;
+ submap_info->is_submap = curr_entry->is_sub_map;
+ submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
+ } else {
+ short_info->user_tag = VME_ALIAS(curr_entry);
+ short_info->offset = VME_OFFSET(curr_entry);
+ short_info->protection = curr_entry->protection;
+ short_info->inheritance = curr_entry->inheritance;
+ short_info->max_protection = curr_entry->max_protection;
+ short_info->behavior = curr_entry->behavior;
+ short_info->user_wired_count = curr_entry->user_wired_count;
+ short_info->is_submap = curr_entry->is_sub_map;
+ short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
+ }
+
+ extended.pages_resident = 0;
+ extended.pages_swapped_out = 0;
+ extended.pages_shared_now_private = 0;
+ extended.pages_dirtied = 0;
+ extended.pages_reusable = 0;
+ extended.external_pager = 0;
+ extended.shadow_depth = 0;
+ extended.share_mode = SM_EMPTY;
+ extended.ref_count = 0;
+
+ if (not_in_kdp) {
+ if (!curr_entry->is_sub_map) {
+ vm_map_offset_t range_start, range_end;
+ range_start = MAX((curr_address - curr_max_below),
+ curr_entry->vme_start);
+ range_end = MIN((curr_address + curr_max_above),
+ curr_entry->vme_end);
+ vm_map_region_walk(curr_map,
+ range_start,
+ curr_entry,
+ (VME_OFFSET(curr_entry) +
+ (range_start -
+ curr_entry->vme_start)),
+ range_end - range_start,
+ &extended,
+ look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
+ if (extended.external_pager &&
+ extended.ref_count == 2 &&
+ extended.share_mode == SM_SHARED) {
+ extended.share_mode = SM_PRIVATE;
+ }
+ if (submap_needed_copy) {
+ extended.share_mode = SM_COW;
+ }
+ } else {
+ if (curr_entry->use_pmap) {
+ extended.share_mode = SM_TRUESHARED;
+ } else {
+ extended.share_mode = SM_PRIVATE;
+ }
+ extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
+ }
+ }
+
+ if (look_for_pages) {
+ submap_info->pages_resident = extended.pages_resident;
+ submap_info->pages_swapped_out = extended.pages_swapped_out;
+ submap_info->pages_shared_now_private =
+ extended.pages_shared_now_private;
+ submap_info->pages_dirtied = extended.pages_dirtied;
+ submap_info->external_pager = extended.external_pager;
+ submap_info->shadow_depth = extended.shadow_depth;
+ submap_info->share_mode = extended.share_mode;
+ submap_info->ref_count = extended.ref_count;
+
+ if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
+ submap_info->pages_reusable = extended.pages_reusable;
+ }
+ if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
+ submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
+ }
+ } else {
+ short_info->external_pager = extended.external_pager;
+ short_info->shadow_depth = extended.shadow_depth;
+ short_info->share_mode = extended.share_mode;
+ short_info->ref_count = extended.ref_count;
+ }
+
+ if (not_in_kdp) {
+ vm_map_unlock_read(curr_map);
+ }
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * vm_region:
+ *
+ * User call to obtain information about a region in
+ * a task's address map. Currently, only one flavor is
+ * supported.
+ *
+ * XXX The reserved and behavior fields cannot be filled
+ * in until the vm merge from the IK is completed, and
+ * vm_reserve is implemented.
+ */
+
+kern_return_t
+vm_map_region(
+ vm_map_t map,
+ vm_map_offset_t *address, /* IN/OUT */
+ vm_map_size_t *size, /* OUT */
+ vm_region_flavor_t flavor, /* IN */
+ vm_region_info_t info, /* OUT */
+ mach_msg_type_number_t *count, /* IN/OUT */
+ mach_port_t *object_name) /* OUT */
+{
+ vm_map_entry_t tmp_entry;
+ vm_map_entry_t entry;
+ vm_map_offset_t start;
+
+ if (map == VM_MAP_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ switch (flavor) {
+ case VM_REGION_BASIC_INFO:
+ /* legacy for old 32-bit objects info */
+ {
+ vm_region_basic_info_t basic;
+
+ if (*count < VM_REGION_BASIC_INFO_COUNT) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ basic = (vm_region_basic_info_t) info;
+ *count = VM_REGION_BASIC_INFO_COUNT;
+
+ vm_map_lock_read(map);
+
+ start = *address;
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+ } else {
+ entry = tmp_entry;
+ }
+
+ start = entry->vme_start;
+
+ basic->offset = (uint32_t)VME_OFFSET(entry);
+ basic->protection = entry->protection;
+ basic->inheritance = entry->inheritance;
+ basic->max_protection = entry->max_protection;
+ basic->behavior = entry->behavior;
+ basic->user_wired_count = entry->user_wired_count;
+ basic->reserved = entry->is_sub_map;
+ *address = start;
+ *size = (entry->vme_end - start);
+
+ if (object_name) {
+ *object_name = IP_NULL;
+ }
+ if (entry->is_sub_map) {
+ basic->shared = FALSE;
+ } else {
+ basic->shared = entry->is_shared;
+ }
+
+ vm_map_unlock_read(map);
+ return KERN_SUCCESS;
+ }
+
+ case VM_REGION_BASIC_INFO_64:
+ {
+ vm_region_basic_info_64_t basic;
+
+ if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ basic = (vm_region_basic_info_64_t) info;
+ *count = VM_REGION_BASIC_INFO_COUNT_64;
+
+ vm_map_lock_read(map);
+
+ start = *address;
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+ } else {
+ entry = tmp_entry;
+ }
+
+ start = entry->vme_start;
+
+ basic->offset = VME_OFFSET(entry);
+ basic->protection = entry->protection;
+ basic->inheritance = entry->inheritance;
+ basic->max_protection = entry->max_protection;
+ basic->behavior = entry->behavior;
+ basic->user_wired_count = entry->user_wired_count;
+ basic->reserved = entry->is_sub_map;
+ *address = start;
+ *size = (entry->vme_end - start);
+
+ if (object_name) {
+ *object_name = IP_NULL;
+ }
+ if (entry->is_sub_map) {
+ basic->shared = FALSE;
+ } else {
+ basic->shared = entry->is_shared;
+ }
+
+ vm_map_unlock_read(map);
+ return KERN_SUCCESS;
+ }
+ case VM_REGION_EXTENDED_INFO:
+ if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
+ return KERN_INVALID_ARGUMENT;
+ }
+ OS_FALLTHROUGH;
+ case VM_REGION_EXTENDED_INFO__legacy:
+ if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ {
+ vm_region_extended_info_t extended;
+ mach_msg_type_number_t original_count;
+ int effective_page_size, effective_page_shift;
+
+ extended = (vm_region_extended_info_t) info;
+
+ effective_page_shift = vm_self_region_page_shift(map);
+ effective_page_size = (1 << effective_page_shift);
+
+ vm_map_lock_read(map);
+
+ start = *address;
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+ } else {
+ entry = tmp_entry;
+ }
+ start = entry->vme_start;
+
+ extended->protection = entry->protection;
+ extended->user_tag = VME_ALIAS(entry);
+ extended->pages_resident = 0;
+ extended->pages_swapped_out = 0;
+ extended->pages_shared_now_private = 0;
+ extended->pages_dirtied = 0;
+ extended->external_pager = 0;
+ extended->shadow_depth = 0;
+
+ original_count = *count;
+ if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
+ *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
+ } else {
+ extended->pages_reusable = 0;
+ *count = VM_REGION_EXTENDED_INFO_COUNT;
+ }
+
+ vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
+
+ if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
+ extended->share_mode = SM_PRIVATE;
+ }
+
+ if (object_name) {
+ *object_name = IP_NULL;
+ }
+ *address = start;
+ *size = (entry->vme_end - start);
+
+ vm_map_unlock_read(map);
+ return KERN_SUCCESS;
+ }
+ case VM_REGION_TOP_INFO:
+ {
+ vm_region_top_info_t top;
+
+ if (*count < VM_REGION_TOP_INFO_COUNT) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ top = (vm_region_top_info_t) info;
+ *count = VM_REGION_TOP_INFO_COUNT;
+
+ vm_map_lock_read(map);
+
+ start = *address;
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+ } else {
+ entry = tmp_entry;
+ }
+ start = entry->vme_start;
+
+ top->private_pages_resident = 0;
+ top->shared_pages_resident = 0;
+
+ vm_map_region_top_walk(entry, top);
+
+ if (object_name) {
+ *object_name = IP_NULL;
+ }
+ *address = start;
+ *size = (entry->vme_end - start);
+
+ vm_map_unlock_read(map);
+ return KERN_SUCCESS;
+ }
+ default:
+ return KERN_INVALID_ARGUMENT;
+ }
+}
+
+#define OBJ_RESIDENT_COUNT(obj, entry_size) \
+ MIN((entry_size), \
+ ((obj)->all_reusable ? \
+ (obj)->wired_page_count : \
+ (obj)->resident_page_count - (obj)->reusable_page_count))
+
+void
+vm_map_region_top_walk(
+ vm_map_entry_t entry,
+ vm_region_top_info_t top)
+{
+ if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
+ top->share_mode = SM_EMPTY;
+ top->ref_count = 0;
+ top->obj_id = 0;
+ return;
+ }
+
+ {
+ struct vm_object *obj, *tmp_obj;
+ int ref_count;
+ uint32_t entry_size;
+
+ entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
+
+ obj = VME_OBJECT(entry);
+
+ vm_object_lock(obj);
+
+ if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
+ ref_count--;
+ }
+
+ assert(obj->reusable_page_count <= obj->resident_page_count);
+ if (obj->shadow) {
+ if (ref_count == 1) {
+ top->private_pages_resident =
+ OBJ_RESIDENT_COUNT(obj, entry_size);
+ } else {
+ top->shared_pages_resident =
+ OBJ_RESIDENT_COUNT(obj, entry_size);
+ }
+ top->ref_count = ref_count;
+ top->share_mode = SM_COW;
+
+ while ((tmp_obj = obj->shadow)) {
+ vm_object_lock(tmp_obj);
+ vm_object_unlock(obj);
+ obj = tmp_obj;
+
+ if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
+ ref_count--;
+ }
+
+ assert(obj->reusable_page_count <= obj->resident_page_count);
+ top->shared_pages_resident +=
+ OBJ_RESIDENT_COUNT(obj, entry_size);
+ top->ref_count += ref_count - 1;
+ }
+ } else {
+ if (entry->superpage_size) {
+ top->share_mode = SM_LARGE_PAGE;
+ top->shared_pages_resident = 0;
+ top->private_pages_resident = entry_size;
+ } else if (entry->needs_copy) {
+ top->share_mode = SM_COW;
+ top->shared_pages_resident =
+ OBJ_RESIDENT_COUNT(obj, entry_size);
+ } else {
+ if (ref_count == 1 ||
+ (ref_count == 2 && obj->named)) {
+ top->share_mode = SM_PRIVATE;
+ top->private_pages_resident =
+ OBJ_RESIDENT_COUNT(obj,
+ entry_size);
+ } else {
+ top->share_mode = SM_SHARED;
+ top->shared_pages_resident =
+ OBJ_RESIDENT_COUNT(obj,
+ entry_size);
+ }
+ }
+ top->ref_count = ref_count;
+ }
+ /* XXX K64: obj_id will be truncated */
+ top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
+
+ vm_object_unlock(obj);
+ }
+}
+
+void
+vm_map_region_walk(
+ vm_map_t map,
+ vm_map_offset_t va,
+ vm_map_entry_t entry,
+ vm_object_offset_t offset,
+ vm_object_size_t range,
+ vm_region_extended_info_t extended,
+ boolean_t look_for_pages,
+ mach_msg_type_number_t count)
+{
+ struct vm_object *obj, *tmp_obj;
+ vm_map_offset_t last_offset;
+ int i;
+ int ref_count;
+ struct vm_object *shadow_object;
+ unsigned short shadow_depth;
+ boolean_t do_region_footprint;
+ int effective_page_size, effective_page_shift;
+ vm_map_offset_t effective_page_mask;
+
+ do_region_footprint = task_self_region_footprint();
+
+ if ((VME_OBJECT(entry) == 0) ||
+ (entry->is_sub_map) ||
+ (VME_OBJECT(entry)->phys_contiguous &&
+ !entry->superpage_size)) {
+ extended->share_mode = SM_EMPTY;
+ extended->ref_count = 0;
+ return;
+ }
+
+ if (entry->superpage_size) {
+ extended->shadow_depth = 0;
+ extended->share_mode = SM_LARGE_PAGE;
+ extended->ref_count = 1;
+ extended->external_pager = 0;
+
+ /* TODO4K: Superpage in 4k mode? */
+ extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
+ extended->shadow_depth = 0;
+ return;
+ }
+
+ effective_page_shift = vm_self_region_page_shift(map);
+ effective_page_size = (1 << effective_page_shift);
+ effective_page_mask = effective_page_size - 1;
+
+ offset = vm_map_trunc_page(offset, effective_page_mask);
+
+ obj = VME_OBJECT(entry);
+
+ vm_object_lock(obj);
+
+ if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
+ ref_count--;
+ }
+
+ if (look_for_pages) {
+ for (last_offset = offset + range;
+ offset < last_offset;
+ offset += effective_page_size, va += effective_page_size) {
+ if (do_region_footprint) {
+ int disp;
+
+ disp = 0;
+ if (map->has_corpse_footprint) {
+ /*
+ * Query the page info data we saved
+ * while forking the corpse.
+ */
+ vm_map_corpse_footprint_query_page_info(
+ map,
+ va,
+ &disp);
+ } else {
+ /*
+ * Query the pmap.
+ */
+ vm_map_footprint_query_page_info(
+ map,
+ entry,
+ va,
+ &disp);
+ }
+ if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
+ extended->pages_resident++;
+ }
+ if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
+ extended->pages_reusable++;
+ }
+ if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
+ extended->pages_dirtied++;
+ }
+ if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
+ extended->pages_swapped_out++;
+ }
+ continue;
+ }
+
+ vm_map_region_look_for_page(map, va, obj,
+ vm_object_trunc_page(offset), ref_count,
+ 0, extended, count);
+ }
+
+ if (do_region_footprint) {
+ goto collect_object_info;
+ }
+ } else {
+collect_object_info:
+ shadow_object = obj->shadow;
+ shadow_depth = 0;
+
+ if (!(obj->internal)) {
+ extended->external_pager = 1;
+ }
+
+ if (shadow_object != VM_OBJECT_NULL) {
+ vm_object_lock(shadow_object);
+ for (;
+ shadow_object != VM_OBJECT_NULL;
+ shadow_depth++) {
+ vm_object_t next_shadow;
+
+ if (!(shadow_object->internal)) {
+ extended->external_pager = 1;
+ }
+
+ next_shadow = shadow_object->shadow;
+ if (next_shadow) {
+ vm_object_lock(next_shadow);
+ }
+ vm_object_unlock(shadow_object);
+ shadow_object = next_shadow;
+ }
+ }
+ extended->shadow_depth = shadow_depth;
+ }
+
+ if (extended->shadow_depth || entry->needs_copy) {
+ extended->share_mode = SM_COW;
+ } else {
+ if (ref_count == 1) {
+ extended->share_mode = SM_PRIVATE;
+ } else {
+ if (obj->true_share) {
+ extended->share_mode = SM_TRUESHARED;
+ } else {
+ extended->share_mode = SM_SHARED;
+ }
+ }
+ }
+ extended->ref_count = ref_count - extended->shadow_depth;
+
+ for (i = 0; i < extended->shadow_depth; i++) {
+ if ((tmp_obj = obj->shadow) == 0) {
+ break;
+ }
+ vm_object_lock(tmp_obj);
+ vm_object_unlock(obj);
+
+ if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
+ ref_count--;
+ }
+
+ extended->ref_count += ref_count;
+ obj = tmp_obj;
+ }
+ vm_object_unlock(obj);
+
+ if (extended->share_mode == SM_SHARED) {
+ vm_map_entry_t cur;
+ vm_map_entry_t last;
+ int my_refs;
+
+ obj = VME_OBJECT(entry);
+ last = vm_map_to_entry(map);
+ my_refs = 0;
+
+ if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
+ ref_count--;
+ }
+ for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
+ my_refs += vm_map_region_count_obj_refs(cur, obj);
+ }
+
+ if (my_refs == ref_count) {
+ extended->share_mode = SM_PRIVATE_ALIASED;
+ } else if (my_refs > 1) {
+ extended->share_mode = SM_SHARED_ALIASED;
+ }
+ }
+}
+
+
+/* object is locked on entry and locked on return */
+
+
+static void
+vm_map_region_look_for_page(
+ __unused vm_map_t map,
+ __unused vm_map_offset_t va,
+ vm_object_t object,
+ vm_object_offset_t offset,
+ int max_refcnt,
+ unsigned short depth,
+ vm_region_extended_info_t extended,
+ mach_msg_type_number_t count)
+{
+ vm_page_t p;
+ vm_object_t shadow;
+ int ref_count;
+ vm_object_t caller_object;
+
+ shadow = object->shadow;
+ caller_object = object;
+
+
+ while (TRUE) {
+ if (!(object->internal)) {
+ extended->external_pager = 1;
+ }
+
+ if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
+ if (shadow && (max_refcnt == 1)) {
+ extended->pages_shared_now_private++;
+ }
+
+ if (!p->vmp_fictitious &&
+ (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
+ extended->pages_dirtied++;
+ } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
+ if (p->vmp_reusable || object->all_reusable) {
+ extended->pages_reusable++;
+ }
+ }
+
+ extended->pages_resident++;
+
+ if (object != caller_object) {
+ vm_object_unlock(object);
+ }
+
+ return;
+ }
+ if (object->internal &&
+ object->alive &&
+ !object->terminating &&
+ object->pager_ready) {
+ if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
+ == VM_EXTERNAL_STATE_EXISTS) {
+ /* the pager has that page */
+ extended->pages_swapped_out++;
+ if (object != caller_object) {
+ vm_object_unlock(object);
+ }
+ return;
+ }
+ }
+
+ if (shadow) {
+ vm_object_lock(shadow);
+
+ if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
+ ref_count--;
+ }
+
+ if (++depth > extended->shadow_depth) {
+ extended->shadow_depth = depth;
+ }
+
+ if (ref_count > max_refcnt) {
+ max_refcnt = ref_count;
+ }
+
+ if (object != caller_object) {
+ vm_object_unlock(object);
+ }
+
+ offset = offset + object->vo_shadow_offset;
+ object = shadow;
+ shadow = object->shadow;
+ continue;
+ }
+ if (object != caller_object) {
+ vm_object_unlock(object);
+ }
+ break;
+ }
+}
+
+static int
+vm_map_region_count_obj_refs(
+ vm_map_entry_t entry,
+ vm_object_t object)
+{
+ int ref_count;
+ vm_object_t chk_obj;
+ vm_object_t tmp_obj;
+
+ if (VME_OBJECT(entry) == 0) {
+ return 0;
+ }
+
+ if (entry->is_sub_map) {
+ return 0;
+ } else {
+ ref_count = 0;
+
+ chk_obj = VME_OBJECT(entry);
+ vm_object_lock(chk_obj);
+
+ while (chk_obj) {
+ if (chk_obj == object) {
+ ref_count++;
+ }
+ tmp_obj = chk_obj->shadow;
+ if (tmp_obj) {
+ vm_object_lock(tmp_obj);
+ }
+ vm_object_unlock(chk_obj);
+
+ chk_obj = tmp_obj;
+ }
+ }
+ return ref_count;
+}
+
+
+/*
+ * Routine: vm_map_simplify
+ *
+ * Description:
+ * Attempt to simplify the map representation in
+ * the vicinity of the given starting address.
+ * Note:
+ * This routine is intended primarily to keep the
+ * kernel maps more compact -- they generally don't
+ * benefit from the "expand a map entry" technology
+ * at allocation time because the adjacent entry
+ * is often wired down.
+ */
+void
+vm_map_simplify_entry(
+ vm_map_t map,
+ vm_map_entry_t this_entry)
+{
+ vm_map_entry_t prev_entry;
+
+ prev_entry = this_entry->vme_prev;
+
+ if ((this_entry != vm_map_to_entry(map)) &&
+ (prev_entry != vm_map_to_entry(map)) &&
+
+ (prev_entry->vme_end == this_entry->vme_start) &&
+
+ (prev_entry->is_sub_map == this_entry->is_sub_map) &&
+ (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
+ ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
+ prev_entry->vme_start))
+ == VME_OFFSET(this_entry)) &&
+
+ (prev_entry->behavior == this_entry->behavior) &&
+ (prev_entry->needs_copy == this_entry->needs_copy) &&
+ (prev_entry->protection == this_entry->protection) &&
+ (prev_entry->max_protection == this_entry->max_protection) &&
+ (prev_entry->inheritance == this_entry->inheritance) &&
+ (prev_entry->use_pmap == this_entry->use_pmap) &&
+ (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
+ (prev_entry->no_cache == this_entry->no_cache) &&
+ (prev_entry->permanent == this_entry->permanent) &&
+ (prev_entry->map_aligned == this_entry->map_aligned) &&
+ (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
+ (prev_entry->used_for_jit == this_entry->used_for_jit) &&
+ (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
+ /* from_reserved_zone: OK if that field doesn't match */
+ (prev_entry->iokit_acct == this_entry->iokit_acct) &&
+ (prev_entry->vme_resilient_codesign ==
+ this_entry->vme_resilient_codesign) &&
+ (prev_entry->vme_resilient_media ==
+ this_entry->vme_resilient_media) &&
+ (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
+
+ (prev_entry->wired_count == this_entry->wired_count) &&
+ (prev_entry->user_wired_count == this_entry->user_wired_count) &&
+
+ ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
+ (prev_entry->in_transition == FALSE) &&
+ (this_entry->in_transition == FALSE) &&
+ (prev_entry->needs_wakeup == FALSE) &&
+ (this_entry->needs_wakeup == FALSE) &&
+ (prev_entry->is_shared == this_entry->is_shared) &&
+ (prev_entry->superpage_size == FALSE) &&
+ (this_entry->superpage_size == FALSE)
+ ) {
+ vm_map_store_entry_unlink(map, prev_entry);
+ assert(prev_entry->vme_start < this_entry->vme_end);
+ if (prev_entry->map_aligned) {
+ assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
+ VM_MAP_PAGE_MASK(map)));
+ }
+ this_entry->vme_start = prev_entry->vme_start;
+ VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
+
+ if (map->holelistenabled) {
+ vm_map_store_update_first_free(map, this_entry, TRUE);
+ }
+
+ if (prev_entry->is_sub_map) {
+ vm_map_deallocate(VME_SUBMAP(prev_entry));
+ } else {
+ vm_object_deallocate(VME_OBJECT(prev_entry));
+ }
+ vm_map_entry_dispose(map, prev_entry);
+ SAVE_HINT_MAP_WRITE(map, this_entry);
+ }
+}
+
+void
+vm_map_simplify(
+ vm_map_t map,
+ vm_map_offset_t start)
+{
+ vm_map_entry_t this_entry;
+
+ vm_map_lock(map);
+ if (vm_map_lookup_entry(map, start, &this_entry)) {
+ vm_map_simplify_entry(map, this_entry);
+ vm_map_simplify_entry(map, this_entry->vme_next);
+ }
+ vm_map_unlock(map);
+}
+
+static void
+vm_map_simplify_range(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end)
+{
+ vm_map_entry_t entry;
+
+ /*
+ * The map should be locked (for "write") by the caller.
+ */
+
+ if (start >= end) {
+ /* invalid address range */
+ return;
+ }
+
+ start = vm_map_trunc_page(start,
+ VM_MAP_PAGE_MASK(map));
+ end = vm_map_round_page(end,
+ VM_MAP_PAGE_MASK(map));
+
+ if (!vm_map_lookup_entry(map, start, &entry)) {
+ /* "start" is not mapped and "entry" ends before "start" */
+ if (entry == vm_map_to_entry(map)) {
+ /* start with first entry in the map */
+ entry = vm_map_first_entry(map);
+ } else {
+ /* start with next entry */
+ entry = entry->vme_next;
+ }
+ }
+
+ while (entry != vm_map_to_entry(map) &&
+ entry->vme_start <= end) {
+ /* try and coalesce "entry" with its previous entry */
+ vm_map_simplify_entry(map, entry);
+ entry = entry->vme_next;
+ }
+}
+
+
+/*
+ * Routine: vm_map_machine_attribute
+ * Purpose:
+ * Provide machine-specific attributes to mappings,
+ * such as cachability etc. for machines that provide
+ * them. NUMA architectures and machines with big/strange
+ * caches will use this.
+ * Note:
+ * Responsibilities for locking and checking are handled here,
+ * everything else in the pmap module. If any non-volatile
+ * information must be kept, the pmap module should handle
+ * it itself. [This assumes that attributes do not
+ * need to be inherited, which seems ok to me]
+ */
+kern_return_t
+vm_map_machine_attribute(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end,
+ vm_machine_attribute_t attribute,
+ vm_machine_attribute_val_t* value) /* IN/OUT */
+{
+ kern_return_t ret;
+ vm_map_size_t sync_size;
+ vm_map_entry_t entry;
+
+ if (start < vm_map_min(map) || end > vm_map_max(map)) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /* Figure how much memory we need to flush (in page increments) */
+ sync_size = end - start;
+
+ vm_map_lock(map);
+
+ if (attribute != MATTR_CACHE) {
+ /* If we don't have to find physical addresses, we */
+ /* don't have to do an explicit traversal here. */
+ ret = pmap_attribute(map->pmap, start, end - start,
+ attribute, value);
+ vm_map_unlock(map);
+ return ret;
+ }
+
+ ret = KERN_SUCCESS; /* Assume it all worked */
+
+ while (sync_size) {
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_size_t sub_size;
+ if ((entry->vme_end - start) > sync_size) {
+ sub_size = sync_size;
+ sync_size = 0;
+ } else {
+ sub_size = entry->vme_end - start;
+ sync_size -= sub_size;
+ }
+ if (entry->is_sub_map) {
+ vm_map_offset_t sub_start;
+ vm_map_offset_t sub_end;
+
+ sub_start = (start - entry->vme_start)
+ + VME_OFFSET(entry);
+ sub_end = sub_start + sub_size;
+ vm_map_machine_attribute(
+ VME_SUBMAP(entry),
+ sub_start,
+ sub_end,
+ attribute, value);
+ } else {
+ if (VME_OBJECT(entry)) {
+ vm_page_t m;
+ vm_object_t object;
+ vm_object_t base_object;
+ vm_object_t last_object;
+ vm_object_offset_t offset;
+ vm_object_offset_t base_offset;
+ vm_map_size_t range;
+ range = sub_size;
+ offset = (start - entry->vme_start)
+ + VME_OFFSET(entry);
+ offset = vm_object_trunc_page(offset);
+ base_offset = offset;
+ object = VME_OBJECT(entry);
+ base_object = object;
+ last_object = NULL;
+
+ vm_object_lock(object);
+
+ while (range) {
+ m = vm_page_lookup(
+ object, offset);
+
+ if (m && !m->vmp_fictitious) {
+ ret =
+ pmap_attribute_cache_sync(
+ VM_PAGE_GET_PHYS_PAGE(m),
+ PAGE_SIZE,
+ attribute, value);
+ } else if (object->shadow) {
+ offset = offset + object->vo_shadow_offset;
+ last_object = object;
+ object = object->shadow;
+ vm_object_lock(last_object->shadow);
+ vm_object_unlock(last_object);
+ continue;
+ }
+ if (range < PAGE_SIZE) {
+ range = 0;
+ } else {
+ range -= PAGE_SIZE;
+ }
+
+ if (base_object != object) {
+ vm_object_unlock(object);
+ vm_object_lock(base_object);
+ object = base_object;
+ }
+ /* Bump to the next page */
+ base_offset += PAGE_SIZE;
+ offset = base_offset;
+ }
+ vm_object_unlock(object);
+ }
+ }
+ start += sub_size;
+ } else {
+ vm_map_unlock(map);
+ return KERN_FAILURE;
+ }
+ }
+
+ vm_map_unlock(map);
+
+ return ret;
+}
+
+/*
+ * vm_map_behavior_set:
+ *
+ * Sets the paging reference behavior of the specified address
+ * range in the target map. Paging reference behavior affects
+ * how pagein operations resulting from faults on the map will be
+ * clustered.
+ */
+kern_return_t
+vm_map_behavior_set(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end,
+ vm_behavior_t new_behavior)
+{
+ vm_map_entry_t entry;
+ vm_map_entry_t temp_entry;
+
+ if (start > end ||
+ start < vm_map_min(map) ||
+ end > vm_map_max(map)) {
+ return KERN_NO_SPACE;
+ }
+
+ switch (new_behavior) {
+ /*
+ * This first block of behaviors all set a persistent state on the specified
+ * memory range. All we have to do here is to record the desired behavior
+ * in the vm_map_entry_t's.
+ */
+
+ case VM_BEHAVIOR_DEFAULT:
+ case VM_BEHAVIOR_RANDOM:
+ case VM_BEHAVIOR_SEQUENTIAL:
+ case VM_BEHAVIOR_RSEQNTL:
+ case VM_BEHAVIOR_ZERO_WIRED_PAGES:
+ vm_map_lock(map);
+
+ /*
+ * The entire address range must be valid for the map.
+ * Note that vm_map_range_check() does a
+ * vm_map_lookup_entry() internally and returns the
+ * entry containing the start of the address range if
+ * the entire range is valid.
+ */
+ if (vm_map_range_check(map, start, end, &temp_entry)) {
+ entry = temp_entry;
+ vm_map_clip_start(map, entry, start);
+ } else {
+ vm_map_unlock(map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+ while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
+ vm_map_clip_end(map, entry, end);
+ if (entry->is_sub_map) {
+ assert(!entry->use_pmap);
+ }
+
+ if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
+ entry->zero_wired_pages = TRUE;
+ } else {
+ entry->behavior = new_behavior;
+ }
+ entry = entry->vme_next;
+ }
+
+ vm_map_unlock(map);
+ break;
+
+ /*
+ * The rest of these are different from the above in that they cause
+ * an immediate action to take place as opposed to setting a behavior that
+ * affects future actions.
+ */
+
+ case VM_BEHAVIOR_WILLNEED:
+ return vm_map_willneed(map, start, end);
+
+ case VM_BEHAVIOR_DONTNEED:
+ return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
+
+ case VM_BEHAVIOR_FREE:
+ return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
+
+ case VM_BEHAVIOR_REUSABLE:
+ return vm_map_reusable_pages(map, start, end);
+
+ case VM_BEHAVIOR_REUSE:
+ return vm_map_reuse_pages(map, start, end);
+
+ case VM_BEHAVIOR_CAN_REUSE:
+ return vm_map_can_reuse(map, start, end);
+
+#if MACH_ASSERT
+ case VM_BEHAVIOR_PAGEOUT:
+ return vm_map_pageout(map, start, end);
+#endif /* MACH_ASSERT */
+
+ default:
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ return KERN_SUCCESS;
+}
+
+
+/*
+ * Internals for madvise(MADV_WILLNEED) system call.
+ *
+ * The implementation is to do:-
+ * a) read-ahead if the mapping corresponds to a mapped regular file
+ * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
+ */
+
+
+static kern_return_t
+vm_map_willneed(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end
+ )
+{
+ vm_map_entry_t entry;
+ vm_object_t object;
+ memory_object_t pager;
+ struct vm_object_fault_info fault_info = {};
+ kern_return_t kr;
+ vm_object_size_t len;
+ vm_object_offset_t offset;
+
+ fault_info.interruptible = THREAD_UNINT; /* ignored value */
+ fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
+ fault_info.stealth = TRUE;
+
+ /*
+ * The MADV_WILLNEED operation doesn't require any changes to the
+ * vm_map_entry_t's, so the read lock is sufficient.
+ */
+
+ vm_map_lock_read(map);
+
+ /*
+ * The madvise semantics require that the address range be fully
+ * allocated with no holes. Otherwise, we're required to return
+ * an error.
+ */
+
+ if (!vm_map_range_check(map, start, end, &entry)) {
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Examine each vm_map_entry_t in the range.
+ */
+ for (; entry != vm_map_to_entry(map) && start < end;) {
+ /*
+ * The first time through, the start address could be anywhere
+ * within the vm_map_entry we found. So adjust the offset to
+ * correspond. After that, the offset will always be zero to
+ * correspond to the beginning of the current vm_map_entry.
+ */
+ offset = (start - entry->vme_start) + VME_OFFSET(entry);
+
+ /*
+ * Set the length so we don't go beyond the end of the
+ * map_entry or beyond the end of the range we were given.
+ * This range could span also multiple map entries all of which
+ * map different files, so make sure we only do the right amount
+ * of I/O for each object. Note that it's possible for there
+ * to be multiple map entries all referring to the same object
+ * but with different page permissions, but it's not worth
+ * trying to optimize that case.
+ */
+ len = MIN(entry->vme_end - start, end - start);
+
+ if ((vm_size_t) len != len) {
+ /* 32-bit overflow */
+ len = (vm_size_t) (0 - PAGE_SIZE);
+ }
+ fault_info.cluster_size = (vm_size_t) len;
+ fault_info.lo_offset = offset;
+ fault_info.hi_offset = offset + len;
+ fault_info.user_tag = VME_ALIAS(entry);
+ fault_info.pmap_options = 0;
+ if (entry->iokit_acct ||
+ (!entry->is_sub_map && !entry->use_pmap)) {
+ fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
+ }
+
+ /*
+ * If the entry is a submap OR there's no read permission
+ * to this mapping, then just skip it.
+ */
+ if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
+ entry = entry->vme_next;
+ start = entry->vme_start;
+ continue;
+ }
+
+ object = VME_OBJECT(entry);
+
+ if (object == NULL ||
+ (object && object->internal)) {
+ /*
+ * Memory range backed by anonymous memory.
+ */
+ vm_size_t region_size = 0, effective_page_size = 0;
+ vm_map_offset_t addr = 0, effective_page_mask = 0;
+
+ region_size = len;
+ addr = start;
+
+ effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
+ effective_page_size = effective_page_mask + 1;
+
+ vm_map_unlock_read(map);
+
+ while (region_size) {
+ vm_pre_fault(
+ vm_map_trunc_page(addr, effective_page_mask),
+ VM_PROT_READ | VM_PROT_WRITE);
+
+ region_size -= effective_page_size;
+ addr += effective_page_size;
+ }
+ } else {
+ /*
+ * Find the file object backing this map entry. If there is
+ * none, then we simply ignore the "will need" advice for this
+ * entry and go on to the next one.
+ */
+ if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
+ entry = entry->vme_next;
+ start = entry->vme_start;
+ continue;
+ }
+
+ vm_object_paging_begin(object);
+ pager = object->pager;
+ vm_object_unlock(object);
+
+ /*
+ * The data_request() could take a long time, so let's
+ * release the map lock to avoid blocking other threads.
+ */
+ vm_map_unlock_read(map);
+
+ /*
+ * Get the data from the object asynchronously.
+ *
+ * Note that memory_object_data_request() places limits on the
+ * amount of I/O it will do. Regardless of the len we
+ * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
+ * silently truncates the len to that size. This isn't
+ * necessarily bad since madvise shouldn't really be used to
+ * page in unlimited amounts of data. Other Unix variants
+ * limit the willneed case as well. If this turns out to be an
+ * issue for developers, then we can always adjust the policy
+ * here and still be backwards compatible since this is all
+ * just "advice".
+ */
+ kr = memory_object_data_request(
+ pager,
+ vm_object_trunc_page(offset) + object->paging_offset,
+ 0, /* ignored */
+ VM_PROT_READ,
+ (memory_object_fault_info_t)&fault_info);
+
+ vm_object_lock(object);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ /*
+ * If we couldn't do the I/O for some reason, just give up on
+ * the madvise. We still return success to the user since
+ * madvise isn't supposed to fail when the advice can't be
+ * taken.
+ */
+
+ if (kr != KERN_SUCCESS) {
+ return KERN_SUCCESS;
+ }
+ }
+
+ start += len;
+ if (start >= end) {
+ /* done */
+ return KERN_SUCCESS;
+ }
+
+ /* look up next entry */
+ vm_map_lock_read(map);
+ if (!vm_map_lookup_entry(map, start, &entry)) {
+ /*
+ * There's a new hole in the address range.
+ */
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+ }
+
+ vm_map_unlock_read(map);
+ return KERN_SUCCESS;
+}
+
+static boolean_t
+vm_map_entry_is_reusable(
+ vm_map_entry_t entry)
+{
+ /* Only user map entries */
+
+ vm_object_t object;
+
+ if (entry->is_sub_map) {
+ return FALSE;
+ }
+
+ switch (VME_ALIAS(entry)) {
+ case VM_MEMORY_MALLOC:
+ case VM_MEMORY_MALLOC_SMALL:
+ case VM_MEMORY_MALLOC_LARGE:
+ case VM_MEMORY_REALLOC:
+ case VM_MEMORY_MALLOC_TINY:
+ case VM_MEMORY_MALLOC_LARGE_REUSABLE:
+ case VM_MEMORY_MALLOC_LARGE_REUSED:
+ /*
+ * This is a malloc() memory region: check if it's still
+ * in its original state and can be re-used for more
+ * malloc() allocations.
+ */
+ break;
+ default:
+ /*
+ * Not a malloc() memory region: let the caller decide if
+ * it's re-usable.
+ */
+ return TRUE;
+ }
+
+ if (/*entry->is_shared ||*/
+ entry->is_sub_map ||
+ entry->in_transition ||
+ entry->protection != VM_PROT_DEFAULT ||
+ entry->max_protection != VM_PROT_ALL ||
+ entry->inheritance != VM_INHERIT_DEFAULT ||
+ entry->no_cache ||
+ entry->permanent ||
+ entry->superpage_size != FALSE ||
+ entry->zero_wired_pages ||
+ entry->wired_count != 0 ||
+ entry->user_wired_count != 0) {
+ return FALSE;
+ }
+
+ object = VME_OBJECT(entry);
+ if (object == VM_OBJECT_NULL) {
+ return TRUE;
+ }
+ if (
+#if 0
+ /*
+ * Let's proceed even if the VM object is potentially
+ * shared.
+ * We check for this later when processing the actual
+ * VM pages, so the contents will be safe if shared.
+ *
+ * But we can still mark this memory region as "reusable" to
+ * acknowledge that the caller did let us know that the memory
+ * could be re-used and should not be penalized for holding
+ * on to it. This allows its "resident size" to not include
+ * the reusable range.
+ */
+ object->ref_count == 1 &&
+#endif
+ object->wired_page_count == 0 &&
+ object->copy == VM_OBJECT_NULL &&
+ object->shadow == VM_OBJECT_NULL &&
+ object->internal &&
+ object->purgable == VM_PURGABLE_DENY &&
+ object->wimg_bits == VM_WIMG_USE_DEFAULT &&
+ !object->code_signed) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static kern_return_t
+vm_map_reuse_pages(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end)
+{
+ vm_map_entry_t entry;
+ vm_object_t object;
+ vm_object_offset_t start_offset, end_offset;
+
+ /*
+ * The MADV_REUSE operation doesn't require any changes to the
+ * vm_map_entry_t's, so the read lock is sufficient.
+ */
+
+ if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
+ /*
+ * XXX TODO4K
+ * need to figure out what reusable means for a
+ * portion of a native page.
+ */
+ return KERN_SUCCESS;
+ }
+
+ vm_map_lock_read(map);
+ assert(map->pmap != kernel_pmap); /* protect alias access */
+
+ /*
+ * The madvise semantics require that the address range be fully
+ * allocated with no holes. Otherwise, we're required to return
+ * an error.
+ */
+
+ if (!vm_map_range_check(map, start, end, &entry)) {
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.reuse_pages_failure++;
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Examine each vm_map_entry_t in the range.
+ */
+ for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
+ entry = entry->vme_next) {
+ /*
+ * Sanity check on the VM map entry.
+ */
+ if (!vm_map_entry_is_reusable(entry)) {
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.reuse_pages_failure++;
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * The first time through, the start address could be anywhere
+ * within the vm_map_entry we found. So adjust the offset to
+ * correspond.
+ */
+ if (entry->vme_start < start) {
+ start_offset = start - entry->vme_start;
+ } else {
+ start_offset = 0;
+ }
+ end_offset = MIN(end, entry->vme_end) - entry->vme_start;
+ start_offset += VME_OFFSET(entry);
+ end_offset += VME_OFFSET(entry);
+
+ assert(!entry->is_sub_map);
+ object = VME_OBJECT(entry);
+ if (object != VM_OBJECT_NULL) {
+ vm_object_lock(object);
+ vm_object_reuse_pages(object, start_offset, end_offset,
+ TRUE);
+ vm_object_unlock(object);
+ }
+
+ if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
+ /*
+ * XXX
+ * We do not hold the VM map exclusively here.
+ * The "alias" field is not that critical, so it's
+ * safe to update it here, as long as it is the only
+ * one that can be modified while holding the VM map
+ * "shared".
+ */
+ VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
+ }
+ }
+
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.reuse_pages_success++;
+ return KERN_SUCCESS;
+}
+
+
+static kern_return_t
+vm_map_reusable_pages(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end)
+{
+ vm_map_entry_t entry;
+ vm_object_t object;
+ vm_object_offset_t start_offset, end_offset;
+ vm_map_offset_t pmap_offset;
+
+ if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
+ /*
+ * XXX TODO4K
+ * need to figure out what reusable means for a portion
+ * of a native page.
+ */
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * The MADV_REUSABLE operation doesn't require any changes to the
+ * vm_map_entry_t's, so the read lock is sufficient.
+ */
+
+ vm_map_lock_read(map);
+ assert(map->pmap != kernel_pmap); /* protect alias access */
+
+ /*
+ * The madvise semantics require that the address range be fully
+ * allocated with no holes. Otherwise, we're required to return
+ * an error.
+ */
+
+ if (!vm_map_range_check(map, start, end, &entry)) {
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.reusable_pages_failure++;
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Examine each vm_map_entry_t in the range.
+ */
+ for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
+ entry = entry->vme_next) {
+ int kill_pages = 0;
+
+ /*
+ * Sanity check on the VM map entry.
+ */
+ if (!vm_map_entry_is_reusable(entry)) {
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.reusable_pages_failure++;
+ return KERN_INVALID_ADDRESS;
+ }
+
+ if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
+ /* not writable: can't discard contents */
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.reusable_nonwritable++;
+ vm_page_stats_reusable.reusable_pages_failure++;
+ return KERN_PROTECTION_FAILURE;
+ }
+
+ /*
+ * The first time through, the start address could be anywhere
+ * within the vm_map_entry we found. So adjust the offset to
+ * correspond.
+ */
+ if (entry->vme_start < start) {
+ start_offset = start - entry->vme_start;
+ pmap_offset = start;
+ } else {
+ start_offset = 0;
+ pmap_offset = entry->vme_start;
+ }
+ end_offset = MIN(end, entry->vme_end) - entry->vme_start;
+ start_offset += VME_OFFSET(entry);
+ end_offset += VME_OFFSET(entry);
+
+ assert(!entry->is_sub_map);
+ object = VME_OBJECT(entry);
+ if (object == VM_OBJECT_NULL) {
+ continue;
+ }
+
+
+ vm_object_lock(object);
+ if (((object->ref_count == 1) ||
+ (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
+ object->copy == VM_OBJECT_NULL)) &&
+ object->shadow == VM_OBJECT_NULL &&
+ /*
+ * "iokit_acct" entries are billed for their virtual size
+ * (rather than for their resident pages only), so they
+ * wouldn't benefit from making pages reusable, and it
+ * would be hard to keep track of pages that are both
+ * "iokit_acct" and "reusable" in the pmap stats and
+ * ledgers.
+ */
+ !(entry->iokit_acct ||
+ (!entry->is_sub_map && !entry->use_pmap))) {
+ if (object->ref_count != 1) {
+ vm_page_stats_reusable.reusable_shared++;
+ }
+ kill_pages = 1;
+ } else {
+ kill_pages = -1;
+ }
+ if (kill_pages != -1) {
+ vm_object_deactivate_pages(object,
+ start_offset,
+ end_offset - start_offset,
+ kill_pages,
+ TRUE /*reusable_pages*/,
+ map->pmap,
+ pmap_offset);
+ } else {
+ vm_page_stats_reusable.reusable_pages_shared++;
+ }
+ vm_object_unlock(object);
+
+ if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
+ VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
+ /*
+ * XXX
+ * We do not hold the VM map exclusively here.
+ * The "alias" field is not that critical, so it's
+ * safe to update it here, as long as it is the only
+ * one that can be modified while holding the VM map
+ * "shared".
+ */
+ VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
+ }
+ }
+
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.reusable_pages_success++;
+ return KERN_SUCCESS;
+}
+
+
+static kern_return_t
+vm_map_can_reuse(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end)
+{
+ vm_map_entry_t entry;
+
+ /*
+ * The MADV_REUSABLE operation doesn't require any changes to the
+ * vm_map_entry_t's, so the read lock is sufficient.
+ */
+
+ vm_map_lock_read(map);
+ assert(map->pmap != kernel_pmap); /* protect alias access */
+
+ /*
+ * The madvise semantics require that the address range be fully
+ * allocated with no holes. Otherwise, we're required to return
+ * an error.
+ */
+
+ if (!vm_map_range_check(map, start, end, &entry)) {
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.can_reuse_failure++;
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Examine each vm_map_entry_t in the range.
+ */
+ for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
+ entry = entry->vme_next) {
+ /*
+ * Sanity check on the VM map entry.
+ */
+ if (!vm_map_entry_is_reusable(entry)) {
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.can_reuse_failure++;
+ return KERN_INVALID_ADDRESS;
+ }
+ }
+
+ vm_map_unlock_read(map);
+ vm_page_stats_reusable.can_reuse_success++;
+ return KERN_SUCCESS;
+}
+
+
+#if MACH_ASSERT
+static kern_return_t
+vm_map_pageout(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end)
+{
+ vm_map_entry_t entry;
+
+ /*
+ * The MADV_PAGEOUT operation doesn't require any changes to the
+ * vm_map_entry_t's, so the read lock is sufficient.
+ */
+
+ vm_map_lock_read(map);
+
+ /*
+ * The madvise semantics require that the address range be fully
+ * allocated with no holes. Otherwise, we're required to return
+ * an error.
+ */
+
+ if (!vm_map_range_check(map, start, end, &entry)) {
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Examine each vm_map_entry_t in the range.
+ */
+ for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
+ entry = entry->vme_next) {
+ vm_object_t object;
+
+ /*
+ * Sanity check on the VM map entry.
+ */
+ if (entry->is_sub_map) {
+ vm_map_t submap;
+ vm_map_offset_t submap_start;
+ vm_map_offset_t submap_end;
+ vm_map_entry_t submap_entry;
+
+ submap = VME_SUBMAP(entry);
+ submap_start = VME_OFFSET(entry);
+ submap_end = submap_start + (entry->vme_end -
+ entry->vme_start);
+
+ vm_map_lock_read(submap);
+
+ if (!vm_map_range_check(submap,
+ submap_start,
+ submap_end,
+ &submap_entry)) {
+ vm_map_unlock_read(submap);
+ vm_map_unlock_read(map);
+ return KERN_INVALID_ADDRESS;
+ }
+
+ object = VME_OBJECT(submap_entry);
+ if (submap_entry->is_sub_map ||
+ object == VM_OBJECT_NULL ||
+ !object->internal) {
+ vm_map_unlock_read(submap);
+ continue;
+ }
+
+ vm_object_pageout(object);
+
+ vm_map_unlock_read(submap);
+ submap = VM_MAP_NULL;
+ submap_entry = VM_MAP_ENTRY_NULL;
+ continue;
+ }
+
+ object = VME_OBJECT(entry);
+ if (entry->is_sub_map ||
+ object == VM_OBJECT_NULL ||
+ !object->internal) {
+ continue;
+ }
+
+ vm_object_pageout(object);
+ }
+
+ vm_map_unlock_read(map);
+ return KERN_SUCCESS;
+}
+#endif /* MACH_ASSERT */
+
+
+/*
+ * Routine: vm_map_entry_insert
+ *
+ * Description: This routine inserts a new vm_entry in a locked map.
+ */
+vm_map_entry_t
+vm_map_entry_insert(
+ vm_map_t map,
+ vm_map_entry_t insp_entry,
+ vm_map_offset_t start,
+ vm_map_offset_t end,
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_map_kernel_flags_t vmk_flags,
+ boolean_t needs_copy,
+ boolean_t is_shared,
+ boolean_t in_transition,
+ vm_prot_t cur_protection,
+ vm_prot_t max_protection,
+ vm_behavior_t behavior,
+ vm_inherit_t inheritance,
+ unsigned short wired_count,
+ boolean_t no_cache,
+ boolean_t permanent,
+ boolean_t no_copy_on_read,
+ unsigned int superpage_size,
+ boolean_t clear_map_aligned,
+ boolean_t is_submap,
+ boolean_t used_for_jit,
+ int alias,
+ boolean_t translated_allow_execute)
+{
+ vm_map_entry_t new_entry;
+
+ assert(insp_entry != (vm_map_entry_t)0);
+ vm_map_lock_assert_exclusive(map);
+
+#if DEVELOPMENT || DEBUG
+ vm_object_offset_t end_offset = 0;
+ assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
+#endif /* DEVELOPMENT || DEBUG */
+
+ new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
+
+ if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
+ new_entry->map_aligned = TRUE;
+ } else {
+ new_entry->map_aligned = FALSE;
+ }
+ if (clear_map_aligned &&
+ (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
+ !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
+ new_entry->map_aligned = FALSE;
+ }
+
+ new_entry->vme_start = start;
+ new_entry->vme_end = end;
+ if (new_entry->map_aligned) {
+ assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
+ VM_MAP_PAGE_MASK(map)));
+ assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
+ VM_MAP_PAGE_MASK(map)));
+ } else {
+ assert(page_aligned(new_entry->vme_start));
+ assert(page_aligned(new_entry->vme_end));
+ }
+ assert(new_entry->vme_start < new_entry->vme_end);
+
+ VME_OBJECT_SET(new_entry, object);
+ VME_OFFSET_SET(new_entry, offset);
+ new_entry->is_shared = is_shared;
+ new_entry->is_sub_map = is_submap;
+ new_entry->needs_copy = needs_copy;
+ new_entry->in_transition = in_transition;
+ new_entry->needs_wakeup = FALSE;
+ new_entry->inheritance = inheritance;
+ new_entry->protection = cur_protection;
+ new_entry->max_protection = max_protection;
+ new_entry->behavior = behavior;
+ new_entry->wired_count = wired_count;
+ new_entry->user_wired_count = 0;
+ if (is_submap) {
+ /*
+ * submap: "use_pmap" means "nested".
+ * default: false.
+ */
+ new_entry->use_pmap = FALSE;
+ } else {
+ /*
+ * object: "use_pmap" means "use pmap accounting" for footprint.
+ * default: true.
+ */
+ new_entry->use_pmap = TRUE;
+ }
+ VME_ALIAS_SET(new_entry, alias);
+ new_entry->zero_wired_pages = FALSE;
+ new_entry->no_cache = no_cache;
+ new_entry->permanent = permanent;
+ if (superpage_size) {
+ new_entry->superpage_size = TRUE;
+ } else {
+ new_entry->superpage_size = FALSE;
+ }
+ if (used_for_jit) {
+ if (!(map->jit_entry_exists) ||
+ VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
+ new_entry->used_for_jit = TRUE;
+ map->jit_entry_exists = TRUE;
+ }
+ } else {
+ new_entry->used_for_jit = FALSE;
+ }
+ if (translated_allow_execute) {
+ new_entry->translated_allow_execute = TRUE;
+ } else {
+ new_entry->translated_allow_execute = FALSE;
+ }
+ new_entry->pmap_cs_associated = FALSE;
+ new_entry->iokit_acct = FALSE;
+ new_entry->vme_resilient_codesign = FALSE;
+ new_entry->vme_resilient_media = FALSE;
+ new_entry->vme_atomic = FALSE;
+ new_entry->vme_no_copy_on_read = no_copy_on_read;
+
+ /*
+ * Insert the new entry into the list.
+ */
+
+ vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
+ map->size += end - start;
+
+ /*
+ * Update the free space hint and the lookup hint.
+ */
+
+ SAVE_HINT_MAP_WRITE(map, new_entry);
+ return new_entry;
+}
+
+/*
+ * Routine: vm_map_remap_extract
+ *
+ * Description: This routine returns a vm_entry list from a map.
+ */
+static kern_return_t
+vm_map_remap_extract(
+ vm_map_t map,
+ vm_map_offset_t addr,
+ vm_map_size_t size,
+ boolean_t copy,
+ struct vm_map_header *map_header,
+ vm_prot_t *cur_protection, /* IN/OUT */
+ vm_prot_t *max_protection, /* IN/OUT */
+ /* What, no behavior? */
+ vm_inherit_t inheritance,
+ vm_map_kernel_flags_t vmk_flags)
+{
+ kern_return_t result;
+ vm_map_size_t mapped_size;
+ vm_map_size_t tmp_size;
+ vm_map_entry_t src_entry; /* result of last map lookup */
+ vm_map_entry_t new_entry;
+ vm_object_offset_t offset;
+ vm_map_offset_t map_address;
+ vm_map_offset_t src_start; /* start of entry to map */
+ vm_map_offset_t src_end; /* end of region to be mapped */
+ vm_object_t object;
+ vm_map_version_t version;
+ boolean_t src_needs_copy;
+ boolean_t new_entry_needs_copy;
+ vm_map_entry_t saved_src_entry;
+ boolean_t src_entry_was_wired;
+ vm_prot_t max_prot_for_prot_copy;
+ vm_map_offset_t effective_page_mask;
+ boolean_t pageable, same_map;
+ boolean_t vm_remap_legacy;
+ vm_prot_t required_cur_prot, required_max_prot;
+
+ pageable = vmk_flags.vmkf_copy_pageable;
+ same_map = vmk_flags.vmkf_copy_same_map;
+
+ effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
+
+ assert(map != VM_MAP_NULL);
+ assert(size != 0);
+ assert(size == vm_map_round_page(size, effective_page_mask));
+ assert(inheritance == VM_INHERIT_NONE ||
+ inheritance == VM_INHERIT_COPY ||
+ inheritance == VM_INHERIT_SHARE);
+ assert(!(*cur_protection & ~VM_PROT_ALL));
+ assert(!(*max_protection & ~VM_PROT_ALL));
+ assert((*cur_protection & *max_protection) == *cur_protection);
+
+ /*
+ * Compute start and end of region.
+ */
+ src_start = vm_map_trunc_page(addr, effective_page_mask);
+ src_end = vm_map_round_page(src_start + size, effective_page_mask);
+
+ /*
+ * Initialize map_header.
+ */
+ map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
+ map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
+ map_header->nentries = 0;
+ map_header->entries_pageable = pageable;
+// map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
+ map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
+ map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
+
+ vm_map_store_init( map_header );
+
+ if (copy && vmk_flags.vmkf_remap_prot_copy) {
+ /*
+ * Special case for vm_map_protect(VM_PROT_COPY):
+ * we want to set the new mappings' max protection to the
+ * specified *max_protection...
+ */
+ max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
+ /* ... but we want to use the vm_remap() legacy mode */
+ *max_protection = VM_PROT_NONE;
+ *cur_protection = VM_PROT_NONE;
+ } else {
+ max_prot_for_prot_copy = VM_PROT_NONE;
+ }
+
+ if (*cur_protection == VM_PROT_NONE &&
+ *max_protection == VM_PROT_NONE) {
+ /*
+ * vm_remap() legacy mode:
+ * Extract all memory regions in the specified range and
+ * collect the strictest set of protections allowed on the
+ * entire range, so the caller knows what they can do with
+ * the remapped range.
+ * We start with VM_PROT_ALL and we'll remove the protections
+ * missing from each memory region.
+ */
+ vm_remap_legacy = TRUE;
+ *cur_protection = VM_PROT_ALL;
+ *max_protection = VM_PROT_ALL;
+ required_cur_prot = VM_PROT_NONE;
+ required_max_prot = VM_PROT_NONE;
+ } else {
+ /*
+ * vm_remap_new() mode:
+ * Extract all memory regions in the specified range and
+ * ensure that they have at least the protections specified
+ * by the caller via *cur_protection and *max_protection.
+ * The resulting mapping should have these protections.
+ */
+ vm_remap_legacy = FALSE;
+ if (copy) {
+ required_cur_prot = VM_PROT_NONE;
+ required_max_prot = VM_PROT_READ;
+ } else {
+ required_cur_prot = *cur_protection;
+ required_max_prot = *max_protection;
+ }
+ }
+
+ map_address = 0;
+ mapped_size = 0;
+ result = KERN_SUCCESS;
+
+ /*
+ * The specified source virtual space might correspond to
+ * multiple map entries, need to loop on them.
+ */
+ vm_map_lock(map);
+ if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
+ /*
+ * This address space uses sub-pages so the range might
+ * not be re-mappable in an address space with larger
+ * pages. Re-assemble any broken-up VM map entries to
+ * improve our chances of making it work.
+ */
+ vm_map_simplify_range(map, src_start, src_end);
+ }
+ while (mapped_size != size) {
+ vm_map_size_t entry_size;
+
+ /*
+ * Find the beginning of the region.
+ */
+ if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
+ result = KERN_INVALID_ADDRESS;
+ break;
+ }
+
+ if (src_start < src_entry->vme_start ||
+ (mapped_size && src_start != src_entry->vme_start)) {
+ result = KERN_INVALID_ADDRESS;
+ break;
+ }
+
+ tmp_size = size - mapped_size;
+ if (src_end > src_entry->vme_end) {
+ tmp_size -= (src_end - src_entry->vme_end);
+ }
+
+ entry_size = (vm_map_size_t)(src_entry->vme_end -
+ src_entry->vme_start);
+
+ if (src_entry->is_sub_map &&
+ vmk_flags.vmkf_copy_single_object) {
+ vm_map_t submap;
+ vm_map_offset_t submap_start;
+ vm_map_size_t submap_size;
+ boolean_t submap_needs_copy;
+
+ /*
+ * No check for "required protection" on "src_entry"
+ * because the protections that matter are the ones
+ * on the submap's VM map entry, which will be checked
+ * during the call to vm_map_remap_extract() below.
+ */
+ submap_size = src_entry->vme_end - src_start;
+ if (submap_size > size) {
+ submap_size = size;
+ }
+ submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
+ submap = VME_SUBMAP(src_entry);
+ if (copy) {
+ /*
+ * The caller wants a copy-on-write re-mapping,
+ * so let's extract from the submap accordingly.
+ */
+ submap_needs_copy = TRUE;
+ } else if (src_entry->needs_copy) {
+ /*
+ * The caller wants a shared re-mapping but the
+ * submap is mapped with "needs_copy", so its
+ * contents can't be shared as is. Extract the
+ * contents of the submap as "copy-on-write".
+ * The re-mapping won't be shared with the
+ * original mapping but this is equivalent to
+ * what happened with the original "remap from
+ * submap" code.
+ * The shared region is mapped "needs_copy", for
+ * example.
+ */
+ submap_needs_copy = TRUE;
+ } else {
+ /*
+ * The caller wants a shared re-mapping and
+ * this mapping can be shared (no "needs_copy"),
+ * so let's extract from the submap accordingly.
+ * Kernel submaps are mapped without
+ * "needs_copy", for example.
+ */
+ submap_needs_copy = FALSE;
+ }
+ vm_map_reference(submap);
+ vm_map_unlock(map);
+ src_entry = NULL;
+ if (vm_remap_legacy) {
+ *cur_protection = VM_PROT_NONE;
+ *max_protection = VM_PROT_NONE;
+ }
+
+ DTRACE_VM7(remap_submap_recurse,
+ vm_map_t, map,
+ vm_map_offset_t, addr,
+ vm_map_size_t, size,
+ boolean_t, copy,
+ vm_map_offset_t, submap_start,
+ vm_map_size_t, submap_size,
+ boolean_t, submap_needs_copy);
+
+ result = vm_map_remap_extract(submap,
+ submap_start,
+ submap_size,
+ submap_needs_copy,
+ map_header,
+ cur_protection,
+ max_protection,
+ inheritance,
+ vmk_flags);
+ vm_map_deallocate(submap);
+ return result;
+ }
+
+ if (src_entry->is_sub_map) {
+ /* protections for submap mapping are irrelevant here */
+ } else if (((src_entry->protection & required_cur_prot) !=
+ required_cur_prot) ||
+ ((src_entry->max_protection & required_max_prot) !=
+ required_max_prot)) {
+ if (vmk_flags.vmkf_copy_single_object &&
+ mapped_size != 0) {
+ /*
+ * Single object extraction.
+ * We can't extract more with the required
+ * protection but we've extracted some, so
+ * stop there and declare success.
+ * The caller should check the size of
+ * the copy entry we've extracted.
+ */
+ result = KERN_SUCCESS;
+ } else {
+ /*
+ * VM range extraction.
+ * Required proctection is not available
+ * for this part of the range: fail.
+ */
+ result = KERN_PROTECTION_FAILURE;
+ }
+ break;
+ }
+
+ if (src_entry->is_sub_map) {
+ vm_map_t submap;
+ vm_map_offset_t submap_start;
+ vm_map_size_t submap_size;
+ vm_map_copy_t submap_copy;
+ vm_prot_t submap_curprot, submap_maxprot;
+ boolean_t submap_needs_copy;
+
+ /*
+ * No check for "required protection" on "src_entry"
+ * because the protections that matter are the ones
+ * on the submap's VM map entry, which will be checked
+ * during the call to vm_map_copy_extract() below.
+ */
+ object = VM_OBJECT_NULL;
+ submap_copy = VM_MAP_COPY_NULL;
+
+ /* find equivalent range in the submap */
+ submap = VME_SUBMAP(src_entry);
+ submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
+ submap_size = tmp_size;
+ if (copy) {
+ /*
+ * The caller wants a copy-on-write re-mapping,
+ * so let's extract from the submap accordingly.
+ */
+ submap_needs_copy = TRUE;
+ } else if (src_entry->needs_copy) {
+ /*
+ * The caller wants a shared re-mapping but the
+ * submap is mapped with "needs_copy", so its
+ * contents can't be shared as is. Extract the
+ * contents of the submap as "copy-on-write".
+ * The re-mapping won't be shared with the
+ * original mapping but this is equivalent to
+ * what happened with the original "remap from
+ * submap" code.
+ * The shared region is mapped "needs_copy", for
+ * example.
+ */
+ submap_needs_copy = TRUE;
+ } else {
+ /*
+ * The caller wants a shared re-mapping and
+ * this mapping can be shared (no "needs_copy"),
+ * so let's extract from the submap accordingly.
+ * Kernel submaps are mapped without
+ * "needs_copy", for example.
+ */
+ submap_needs_copy = FALSE;
+ }
+ /* extra ref to keep submap alive */
+ vm_map_reference(submap);
+
+ DTRACE_VM7(remap_submap_recurse,
+ vm_map_t, map,
+ vm_map_offset_t, addr,
+ vm_map_size_t, size,
+ boolean_t, copy,
+ vm_map_offset_t, submap_start,
+ vm_map_size_t, submap_size,
+ boolean_t, submap_needs_copy);
+
+ /*
+ * The map can be safely unlocked since we
+ * already hold a reference on the submap.
+ *
+ * No timestamp since we don't care if the map
+ * gets modified while we're down in the submap.
+ * We'll resume the extraction at src_start + tmp_size
+ * anyway.
+ */
+ vm_map_unlock(map);
+ src_entry = NULL; /* not valid once map is unlocked */
+
+ if (vm_remap_legacy) {
+ submap_curprot = VM_PROT_NONE;
+ submap_maxprot = VM_PROT_NONE;
+ if (max_prot_for_prot_copy) {
+ submap_maxprot = max_prot_for_prot_copy;
+ }
+ } else {
+ assert(!max_prot_for_prot_copy);
+ submap_curprot = *cur_protection;
+ submap_maxprot = *max_protection;
+ }
+ result = vm_map_copy_extract(submap,
+ submap_start,
+ submap_size,
+ submap_needs_copy,
+ &submap_copy,
+ &submap_curprot,
+ &submap_maxprot,
+ inheritance,
+ vmk_flags);
+
+ /* release extra ref on submap */
+ vm_map_deallocate(submap);
+ submap = VM_MAP_NULL;
+
+ if (result != KERN_SUCCESS) {
+ vm_map_lock(map);
+ break;
+ }
+
+ /* transfer submap_copy entries to map_header */
+ while (vm_map_copy_first_entry(submap_copy) !=
+ vm_map_copy_to_entry(submap_copy)) {
+ vm_map_entry_t copy_entry;
+ vm_map_size_t copy_entry_size;
+
+ copy_entry = vm_map_copy_first_entry(submap_copy);
+ assert(!copy_entry->is_sub_map);
+ object = VME_OBJECT(copy_entry);
+
+ /*
+ * Prevent kernel_object from being exposed to
+ * user space.
+ */
+ if (__improbable(object == kernel_object)) {
+ printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
+ proc_selfpid(),
+ (current_task()->bsd_info
+ ? proc_name_address(current_task()->bsd_info)
+ : "?"));
+ DTRACE_VM(extract_kernel_only);
+ result = KERN_INVALID_RIGHT;
+ vm_map_copy_discard(submap_copy);
+ submap_copy = VM_MAP_COPY_NULL;
+ vm_map_lock(map);
+ break;
+ }
+
+ vm_map_copy_entry_unlink(submap_copy, copy_entry);
+ copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
+ copy_entry->vme_start = map_address;
+ copy_entry->vme_end = map_address + copy_entry_size;
+ map_address += copy_entry_size;
+ mapped_size += copy_entry_size;
+ src_start += copy_entry_size;
+ assert(src_start <= src_end);
+ _vm_map_store_entry_link(map_header,
+ map_header->links.prev,
+ copy_entry);
+ }
+ /* done with submap_copy */
+ vm_map_copy_discard(submap_copy);
+
+ if (vm_remap_legacy) {
+ *cur_protection &= submap_curprot;
+ *max_protection &= submap_maxprot;
+ }
+
+ /* re-acquire the map lock and continue to next entry */
+ vm_map_lock(map);
+ continue;
+ } else {
+ object = VME_OBJECT(src_entry);
+
+ /*
+ * Prevent kernel_object from being exposed to
+ * user space.
+ */
+ if (__improbable(object == kernel_object)) {
+ printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
+ proc_selfpid(),
+ (current_task()->bsd_info
+ ? proc_name_address(current_task()->bsd_info)
+ : "?"));
+ DTRACE_VM(extract_kernel_only);
+ result = KERN_INVALID_RIGHT;
+ break;
+ }
+
+ if (src_entry->iokit_acct) {
+ /*
+ * This entry uses "IOKit accounting".
+ */
+ } else if (object != VM_OBJECT_NULL &&
+ (object->purgable != VM_PURGABLE_DENY ||
+ object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
+ /*
+ * Purgeable objects have their own accounting:
+ * no pmap accounting for them.
+ */
+ assertf(!src_entry->use_pmap,
+ "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
+ map,
+ src_entry,
+ (uint64_t)src_entry->vme_start,
+ (uint64_t)src_entry->vme_end,
+ src_entry->protection,
+ src_entry->max_protection,
+ VME_ALIAS(src_entry));
+ } else {
+ /*
+ * Not IOKit or purgeable:
+ * must be accounted by pmap stats.
+ */
+ assertf(src_entry->use_pmap,
+ "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
+ map,
+ src_entry,
+ (uint64_t)src_entry->vme_start,
+ (uint64_t)src_entry->vme_end,
+ src_entry->protection,
+ src_entry->max_protection,
+ VME_ALIAS(src_entry));
+ }
+
+ if (object == VM_OBJECT_NULL) {
+ assert(!src_entry->needs_copy);
+ object = vm_object_allocate(entry_size);
+ VME_OFFSET_SET(src_entry, 0);
+ VME_OBJECT_SET(src_entry, object);
+ assert(src_entry->use_pmap);
+ assert(!map->mapped_in_other_pmaps);
+ } else if (src_entry->wired_count ||
+ object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
+ /*
+ * A wired memory region should not have
+ * any pending copy-on-write and needs to
+ * keep pointing at the VM object that
+ * contains the wired pages.
+ * If we're sharing this memory (copy=false),
+ * we'll share this VM object.
+ * If we're copying this memory (copy=true),
+ * we'll call vm_object_copy_slowly() below
+ * and use the new VM object for the remapping.
+ *
+ * Or, we are already using an asymmetric
+ * copy, and therefore we already have
+ * the right object.
+ */
+ assert(!src_entry->needs_copy);
+ } else if (src_entry->needs_copy || object->shadowed ||
+ (object->internal && !object->true_share &&
+ !src_entry->is_shared &&
+ object->vo_size > entry_size)) {
+ VME_OBJECT_SHADOW(src_entry, entry_size);
+ assert(src_entry->use_pmap);
+
+ if (!src_entry->needs_copy &&
+ (src_entry->protection & VM_PROT_WRITE)) {
+ vm_prot_t prot;
+
+ assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
+
+ prot = src_entry->protection & ~VM_PROT_WRITE;
+
+ if (override_nx(map,
+ VME_ALIAS(src_entry))
+ && prot) {
+ prot |= VM_PROT_EXECUTE;
+ }
+
+ assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
+
+ if (map->mapped_in_other_pmaps) {
+ vm_object_pmap_protect(
+ VME_OBJECT(src_entry),
+ VME_OFFSET(src_entry),
+ entry_size,
+ PMAP_NULL,
+ PAGE_SIZE,
+ src_entry->vme_start,
+ prot);
+#if MACH_ASSERT
+ } else if (__improbable(map->pmap == PMAP_NULL)) {
+ extern boolean_t vm_tests_in_progress;
+ assert(vm_tests_in_progress);
+ /*
+ * Some VM tests (in vm_tests.c)
+ * sometimes want to use a VM
+ * map without a pmap.
+ * Otherwise, this should never
+ * happen.
+ */
+#endif /* MACH_ASSERT */
+ } else {
+ pmap_protect(vm_map_pmap(map),
+ src_entry->vme_start,
+ src_entry->vme_end,
+ prot);
+ }
+ }
+
+ object = VME_OBJECT(src_entry);
+ src_entry->needs_copy = FALSE;
+ }
+
+
+ vm_object_lock(object);
+ vm_object_reference_locked(object); /* object ref. for new entry */
+ assert(!src_entry->needs_copy);
+ if (object->copy_strategy ==
+ MEMORY_OBJECT_COPY_SYMMETRIC) {
+ /*
+ * If we want to share this object (copy==0),
+ * it needs to be COPY_DELAY.
+ * If we want to copy this object (copy==1),
+ * we can't just set "needs_copy" on our side
+ * and expect the other side to do the same
+ * (symmetrically), so we can't let the object
+ * stay COPY_SYMMETRIC.
+ * So we always switch from COPY_SYMMETRIC to
+ * COPY_DELAY.
+ */
+ object->copy_strategy =
+ MEMORY_OBJECT_COPY_DELAY;
+ object->true_share = TRUE;
+ }
+ vm_object_unlock(object);
+ }
+
+ offset = (VME_OFFSET(src_entry) +
+ (src_start - src_entry->vme_start));
+
+ new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
+ vm_map_entry_copy(map, new_entry, src_entry);
+ if (new_entry->is_sub_map) {
+ /* clr address space specifics */
+ new_entry->use_pmap = FALSE;
+ } else if (copy) {
+ /*
+ * We're dealing with a copy-on-write operation,
+ * so the resulting mapping should not inherit the
+ * original mapping's accounting settings.
+ * "use_pmap" should be reset to its default (TRUE)
+ * so that the new mapping gets accounted for in
+ * the task's memory footprint.
+ */
+ new_entry->use_pmap = TRUE;
+ }
+ /* "iokit_acct" was cleared in vm_map_entry_copy() */
+ assert(!new_entry->iokit_acct);
+
+ new_entry->map_aligned = FALSE;
+
+ new_entry->vme_start = map_address;
+ new_entry->vme_end = map_address + tmp_size;
+ assert(new_entry->vme_start < new_entry->vme_end);
+ if (copy && vmk_flags.vmkf_remap_prot_copy) {
+ /*
+ * Remapping for vm_map_protect(VM_PROT_COPY)
+ * to convert a read-only mapping into a
+ * copy-on-write version of itself but
+ * with write access:
+ * keep the original inheritance and add
+ * VM_PROT_WRITE to the max protection.
+ */
+ new_entry->inheritance = src_entry->inheritance;
+ new_entry->protection &= max_prot_for_prot_copy;
+ new_entry->max_protection |= VM_PROT_WRITE;
+ } else {
+ new_entry->inheritance = inheritance;
+ if (!vm_remap_legacy) {
+ new_entry->protection = *cur_protection;
+ new_entry->max_protection = *max_protection;
+ }
+ }
+ VME_OFFSET_SET(new_entry, offset);
+
+ /*
+ * The new region has to be copied now if required.
+ */
+RestartCopy:
+ if (!copy) {
+ if (src_entry->used_for_jit == TRUE) {
+ if (same_map) {
+ } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
+ /*
+ * Cannot allow an entry describing a JIT
+ * region to be shared across address spaces.
+ */
+ result = KERN_INVALID_ARGUMENT;
+ break;
+ }
+ }
+
+ src_entry->is_shared = TRUE;
+ new_entry->is_shared = TRUE;
+ if (!(new_entry->is_sub_map)) {
+ new_entry->needs_copy = FALSE;
+ }
+ } else if (src_entry->is_sub_map) {
+ /* make this a COW sub_map if not already */
+ assert(new_entry->wired_count == 0);
+ new_entry->needs_copy = TRUE;
+ object = VM_OBJECT_NULL;
+ } else if (src_entry->wired_count == 0 &&
+ !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
+ vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
+ VME_OFFSET(new_entry),
+ (new_entry->vme_end -
+ new_entry->vme_start),
+ &src_needs_copy,
+ &new_entry_needs_copy)) {
+ new_entry->needs_copy = new_entry_needs_copy;
+ new_entry->is_shared = FALSE;
+ assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
+
+ /*
+ * Handle copy_on_write semantics.
+ */
+ if (src_needs_copy && !src_entry->needs_copy) {
+ vm_prot_t prot;
+
+ assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
+
+ prot = src_entry->protection & ~VM_PROT_WRITE;
+
+ if (override_nx(map,
+ VME_ALIAS(src_entry))
+ && prot) {
+ prot |= VM_PROT_EXECUTE;
+ }
+
+ assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
+
+ vm_object_pmap_protect(object,
+ offset,
+ entry_size,
+ ((src_entry->is_shared
+ || map->mapped_in_other_pmaps) ?
+ PMAP_NULL : map->pmap),
+ VM_MAP_PAGE_SIZE(map),
+ src_entry->vme_start,
+ prot);
+
+ assert(src_entry->wired_count == 0);
+ src_entry->needs_copy = TRUE;
+ }
+ /*
+ * Throw away the old object reference of the new entry.
+ */
+ vm_object_deallocate(object);
+ } else {
+ new_entry->is_shared = FALSE;
+ assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
+
+ src_entry_was_wired = (src_entry->wired_count > 0);
+ saved_src_entry = src_entry;
+ src_entry = VM_MAP_ENTRY_NULL;
+
+ /*
+ * The map can be safely unlocked since we
+ * already hold a reference on the object.
+ *
+ * Record the timestamp of the map for later
+ * verification, and unlock the map.
+ */
+ version.main_timestamp = map->timestamp;
+ vm_map_unlock(map); /* Increments timestamp once! */
+
+ /*
+ * Perform the copy.
+ */
+ if (src_entry_was_wired > 0 ||
+ (debug4k_no_cow_copyin &&
+ VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
+ vm_object_lock(object);
+ result = vm_object_copy_slowly(
+ object,
+ offset,
+ (new_entry->vme_end -
+ new_entry->vme_start),
+ THREAD_UNINT,
+ VME_OBJECT_PTR(new_entry));
+
+ VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
+ new_entry->needs_copy = FALSE;
+ } else {
+ vm_object_offset_t new_offset;
+
+ new_offset = VME_OFFSET(new_entry);
+ result = vm_object_copy_strategically(
+ object,
+ offset,
+ (new_entry->vme_end -
+ new_entry->vme_start),
+ VME_OBJECT_PTR(new_entry),
+ &new_offset,
+ &new_entry_needs_copy);
+ if (new_offset != VME_OFFSET(new_entry)) {
+ VME_OFFSET_SET(new_entry, new_offset);
+ }
+
+ new_entry->needs_copy = new_entry_needs_copy;
+ }
+
+ /*
+ * Throw away the old object reference of the new entry.
+ */
+ vm_object_deallocate(object);
+
+ if (result != KERN_SUCCESS &&
+ result != KERN_MEMORY_RESTART_COPY) {
+ _vm_map_entry_dispose(map_header, new_entry);
+ vm_map_lock(map);
+ break;
+ }
+
+ /*
+ * Verify that the map has not substantially
+ * changed while the copy was being made.
+ */
+
+ vm_map_lock(map);
+ if (version.main_timestamp + 1 != map->timestamp) {
+ /*
+ * Simple version comparison failed.
+ *
+ * Retry the lookup and verify that the
+ * same object/offset are still present.
+ */
+ saved_src_entry = VM_MAP_ENTRY_NULL;
+ vm_object_deallocate(VME_OBJECT(new_entry));
+ _vm_map_entry_dispose(map_header, new_entry);
+ if (result == KERN_MEMORY_RESTART_COPY) {
+ result = KERN_SUCCESS;
+ }
+ continue;
+ }
+ /* map hasn't changed: src_entry is still valid */
+ src_entry = saved_src_entry;
+ saved_src_entry = VM_MAP_ENTRY_NULL;
+
+ if (result == KERN_MEMORY_RESTART_COPY) {
+ vm_object_reference(object);
+ goto RestartCopy;
+ }
+ }
+
+ _vm_map_store_entry_link(map_header,
+ map_header->links.prev, new_entry);
+
+ /* protections for submap mapping are irrelevant here */
+ if (vm_remap_legacy && !src_entry->is_sub_map) {
+ *cur_protection &= src_entry->protection;
+ *max_protection &= src_entry->max_protection;
+ }
+
+ map_address += tmp_size;
+ mapped_size += tmp_size;
+ src_start += tmp_size;
+
+ if (vmk_flags.vmkf_copy_single_object) {
+ if (mapped_size != size) {
+ DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
+ if (src_entry->vme_next != vm_map_to_entry(map) &&
+ VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
+ /* XXX TODO4K */
+ DEBUG4K_ERROR("could have extended copy to next entry...\n");
+ }
+ }
+ break;
+ }
+ } /* end while */
+
+ vm_map_unlock(map);
+ if (result != KERN_SUCCESS) {
+ /*
+ * Free all allocated elements.
+ */
+ for (src_entry = map_header->links.next;
+ src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
+ src_entry = new_entry) {
+ new_entry = src_entry->vme_next;
+ _vm_map_store_entry_unlink(map_header, src_entry);
+ if (src_entry->is_sub_map) {
+ vm_map_deallocate(VME_SUBMAP(src_entry));
+ } else {
+ vm_object_deallocate(VME_OBJECT(src_entry));
+ }
+ _vm_map_entry_dispose(map_header, src_entry);
+ }
+ }
+ return result;
+}
+
+bool
+vm_map_is_exotic(
+ vm_map_t map)
+{
+ return VM_MAP_IS_EXOTIC(map);
+}
+
+bool
+vm_map_is_alien(
+ vm_map_t map)
+{
+ return VM_MAP_IS_ALIEN(map);
+}
+
+#if XNU_TARGET_OS_OSX
+void
+vm_map_mark_alien(
+ vm_map_t map)
+{
+ vm_map_lock(map);
+ map->is_alien = true;
+ vm_map_unlock(map);
+}
+
+void
+vm_map_single_jit(
+ vm_map_t map)
+{
+ vm_map_lock(map);
+ map->single_jit = true;
+ vm_map_unlock(map);
+}
+#endif /* XNU_TARGET_OS_OSX */
+
+void vm_map_copy_to_physcopy(vm_map_copy_t copy_map, vm_map_t target_map);
+void
+vm_map_copy_to_physcopy(
+ vm_map_copy_t copy_map,
+ vm_map_t target_map)
+{
+ vm_map_size_t size;
+ vm_map_entry_t entry;
+ vm_map_entry_t new_entry;
+ vm_object_t new_object;
+ unsigned int pmap_flags;
+ pmap_t new_pmap;
+ vm_map_t new_map;
+ vm_map_address_t src_start, src_end, src_cur;
+ vm_map_address_t dst_start, dst_end, dst_cur;
+ kern_return_t kr;
+ void *kbuf;
+
+ /*
+ * Perform the equivalent of vm_allocate() and memcpy().
+ * Replace the mappings in "copy_map" with the newly allocated mapping.
+ */
+ DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
+
+ assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
+
+ /* allocate new VM object */
+ size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
+ new_object = vm_object_allocate(size);
+ assert(new_object);
+
+ /* allocate new VM map entry */
+ new_entry = vm_map_copy_entry_create(copy_map, FALSE);
+ assert(new_entry);
+
+ /* finish initializing new VM map entry */
+ new_entry->protection = VM_PROT_DEFAULT;
+ new_entry->max_protection = VM_PROT_DEFAULT;
+ new_entry->use_pmap = TRUE;
+
+ /* make new VM map entry point to new VM object */
+ new_entry->vme_start = 0;
+ new_entry->vme_end = size;
+ VME_OBJECT_SET(new_entry, new_object);
+ VME_OFFSET_SET(new_entry, 0);
+
+ /* create a new pmap to map "copy_map" */
+ pmap_flags = 0;
+ assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
+#if PMAP_CREATE_FORCE_4K_PAGES
+ pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
+#endif /* PMAP_CREATE_FORCE_4K_PAGES */
+ pmap_flags |= PMAP_CREATE_64BIT;
+ new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
+ assert(new_pmap);
+
+ /* create a new pageable VM map to map "copy_map" */
+ new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
+ assert(new_map);
+ vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
+
+ /* map "copy_map" in the new VM map */
+ src_start = 0;
+ kr = vm_map_copyout_internal(
+ new_map,
+ &src_start,
+ copy_map,
+ copy_map->size,
+ FALSE, /* consume_on_success */
+ VM_PROT_DEFAULT,
+ VM_PROT_DEFAULT,
+ VM_INHERIT_DEFAULT);
+ assert(kr == KERN_SUCCESS);
+ src_end = src_start + copy_map->size;
+
+ /* map "new_object" in the new VM map */
+ vm_object_reference(new_object);
+ dst_start = 0;
+ kr = vm_map_enter(new_map,
+ &dst_start,
+ size,
+ 0, /* mask */
+ VM_FLAGS_ANYWHERE,
+ VM_MAP_KERNEL_FLAGS_NONE,
+ VM_KERN_MEMORY_OSFMK,
+ new_object,
+ 0, /* offset */
+ FALSE, /* needs copy */
+ VM_PROT_DEFAULT,
+ VM_PROT_DEFAULT,
+ VM_INHERIT_DEFAULT);
+ assert(kr == KERN_SUCCESS);
+ dst_end = dst_start + size;
+
+ /* get a kernel buffer */
+ kbuf = kheap_alloc(KHEAP_TEMP, PAGE_SIZE, Z_WAITOK);
+ assert(kbuf);
+
+ /* physically copy "copy_map" mappings to new VM object */
+ for (src_cur = src_start, dst_cur = dst_start;
+ src_cur < src_end;
+ src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
+ vm_size_t bytes;
+
+ bytes = PAGE_SIZE;
+ if (src_cur + PAGE_SIZE > src_end) {
+ /* partial copy for last page */
+ bytes = src_end - src_cur;
+ assert(bytes > 0 && bytes < PAGE_SIZE);
+ /* rest of dst page should be zero-filled */
+ }
+ /* get bytes from src mapping */
+ kr = copyinmap(new_map, src_cur, kbuf, bytes);
+ if (kr != KERN_SUCCESS) {
+ DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
+ }
+ /* put bytes in dst mapping */
+ assert(dst_cur < dst_end);
+ assert(dst_cur + bytes <= dst_end);
+ kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
+ if (kr != KERN_SUCCESS) {
+ DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
+ }
+ }
+
+ /* free kernel buffer */
+ kheap_free(KHEAP_TEMP, kbuf, PAGE_SIZE);
+ kbuf = NULL;
+
+ /* destroy new map */
+ vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
+ new_map = VM_MAP_NULL;
+
+ /* dispose of the old map entries in "copy_map" */
+ while (vm_map_copy_first_entry(copy_map) !=
+ vm_map_copy_to_entry(copy_map)) {
+ entry = vm_map_copy_first_entry(copy_map);
+ vm_map_copy_entry_unlink(copy_map, entry);
+ if (entry->is_sub_map) {
+ vm_map_deallocate(VME_SUBMAP(entry));
+ } else {
+ vm_object_deallocate(VME_OBJECT(entry));
+ }
+ vm_map_copy_entry_dispose(copy_map, entry);
+ }
+
+ /* change "copy_map"'s page_size to match "target_map" */
+ copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
+ copy_map->offset = 0;
+ copy_map->size = size;
+
+ /* insert new map entry in "copy_map" */
+ assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
+ vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
+
+ DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
+}
+
+void
+vm_map_copy_adjust_get_target_copy_map(
+ vm_map_copy_t copy_map,
+ vm_map_copy_t *target_copy_map_p);
+void
+vm_map_copy_adjust_get_target_copy_map(
+ vm_map_copy_t copy_map,
+ vm_map_copy_t *target_copy_map_p)
+{
+ vm_map_copy_t target_copy_map;
+ vm_map_entry_t entry, target_entry;
+
+ if (*target_copy_map_p != VM_MAP_COPY_NULL) {
+ /* the caller already has a "target_copy_map": use it */
+ return;
+ }
+
+ /* the caller wants us to create a new copy of "copy_map" */
+ target_copy_map = vm_map_copy_allocate();
+ target_copy_map->type = copy_map->type;
+ assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
+ target_copy_map->offset = copy_map->offset;
+ target_copy_map->size = copy_map->size;
+ target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
+ vm_map_store_init(&target_copy_map->cpy_hdr);
+ for (entry = vm_map_copy_first_entry(copy_map);
+ entry != vm_map_copy_to_entry(copy_map);
+ entry = entry->vme_next) {
+ target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
+ vm_map_entry_copy_full(target_entry, entry);
+ if (target_entry->is_sub_map) {
+ vm_map_reference(VME_SUBMAP(target_entry));
+ } else {
+ vm_object_reference(VME_OBJECT(target_entry));
+ }
+ vm_map_copy_entry_link(
+ target_copy_map,
+ vm_map_copy_last_entry(target_copy_map),
+ target_entry);
+ }
+ entry = VM_MAP_ENTRY_NULL;
+ *target_copy_map_p = target_copy_map;
+}
+
+void
+vm_map_copy_trim(
+ vm_map_copy_t copy_map,
+ int new_page_shift,
+ vm_map_offset_t trim_start,
+ vm_map_offset_t trim_end);
+void
+vm_map_copy_trim(
+ vm_map_copy_t copy_map,
+ int new_page_shift,
+ vm_map_offset_t trim_start,
+ vm_map_offset_t trim_end)
+{
+ int copy_page_shift;
+ vm_map_entry_t entry, next_entry;
+
+ assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
+ assert(copy_map->cpy_hdr.nentries > 0);
+
+ trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
+ trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
+
+ /* use the new page_shift to do the clipping */
+ copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
+ copy_map->cpy_hdr.page_shift = new_page_shift;
+
+ for (entry = vm_map_copy_first_entry(copy_map);
+ entry != vm_map_copy_to_entry(copy_map);
+ entry = next_entry) {
+ next_entry = entry->vme_next;
+ if (entry->vme_end <= trim_start) {
+ /* entry fully before trim range: skip */
+ continue;
+ }
+ if (entry->vme_start >= trim_end) {
+ /* entry fully after trim range: done */
+ break;
+ }
+ /* clip entry if needed */
+ vm_map_copy_clip_start(copy_map, entry, trim_start);
+ vm_map_copy_clip_end(copy_map, entry, trim_end);
+ /* dispose of entry */
+ copy_map->size -= entry->vme_end - entry->vme_start;
+ vm_map_copy_entry_unlink(copy_map, entry);
+ if (entry->is_sub_map) {
+ vm_map_deallocate(VME_SUBMAP(entry));
+ } else {
+ vm_object_deallocate(VME_OBJECT(entry));
+ }
+ vm_map_copy_entry_dispose(copy_map, entry);
+ entry = VM_MAP_ENTRY_NULL;
+ }
+
+ /* restore copy_map's original page_shift */
+ copy_map->cpy_hdr.page_shift = copy_page_shift;
+}
+
+/*
+ * Make any necessary adjustments to "copy_map" to allow it to be
+ * mapped into "target_map".
+ * If no changes were necessary, "target_copy_map" points to the
+ * untouched "copy_map".
+ * If changes are necessary, changes will be made to "target_copy_map".
+ * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
+ * copy the original "copy_map" to it before applying the changes.
+ * The caller should discard "target_copy_map" if it's not the same as
+ * the original "copy_map".
+ */
+/* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
+kern_return_t
+vm_map_copy_adjust_to_target(
+ vm_map_copy_t src_copy_map,
+ vm_map_offset_t offset,
+ vm_map_size_t size,
+ vm_map_t target_map,
+ boolean_t copy,
+ vm_map_copy_t *target_copy_map_p,
+ vm_map_offset_t *overmap_start_p,
+ vm_map_offset_t *overmap_end_p,
+ vm_map_offset_t *trimmed_start_p)
+{
+ vm_map_copy_t copy_map, target_copy_map;
+ vm_map_size_t target_size;
+ vm_map_size_t src_copy_map_size;
+ vm_map_size_t overmap_start, overmap_end;
+ int misalignments;
+ vm_map_entry_t entry, target_entry;
+ vm_map_offset_t addr_adjustment;
+ vm_map_offset_t new_start, new_end;
+ int copy_page_mask, target_page_mask;
+ int copy_page_shift, target_page_shift;
+ vm_map_offset_t trimmed_end;
+
+ /*
+ * Assert that the vm_map_copy is coming from the right
+ * zone and hasn't been forged
+ */
+ vm_map_copy_require(src_copy_map);
+ assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
+
+ /*
+ * Start working with "src_copy_map" but we'll switch
+ * to "target_copy_map" as soon as we start making adjustments.
+ */
+ copy_map = src_copy_map;
+ src_copy_map_size = src_copy_map->size;
+
+ copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
+ copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
+ target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
+ target_page_mask = VM_MAP_PAGE_MASK(target_map);
+
+ DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
+
+ target_copy_map = *target_copy_map_p;
+ if (target_copy_map != VM_MAP_COPY_NULL) {
+ vm_map_copy_require(target_copy_map);
+ }
+
+ if (offset + size > copy_map->size) {
+ DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ /* trim the end */
+ trimmed_end = 0;
+ new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
+ if (new_end < copy_map->size) {
+ trimmed_end = src_copy_map_size - new_end;
+ DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
+ /* get "target_copy_map" if needed and adjust it */
+ vm_map_copy_adjust_get_target_copy_map(copy_map,
+ &target_copy_map);
+ copy_map = target_copy_map;
+ vm_map_copy_trim(target_copy_map, target_page_shift,
+ new_end, copy_map->size);
+ }
+
+ /* trim the start */
+ new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
+ if (new_start != 0) {
+ DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
+ /* get "target_copy_map" if needed and adjust it */
+ vm_map_copy_adjust_get_target_copy_map(copy_map,
+ &target_copy_map);
+ copy_map = target_copy_map;
+ vm_map_copy_trim(target_copy_map, target_page_shift,
+ 0, new_start);
+ }
+ *trimmed_start_p = new_start;
+
+ /* target_size starts with what's left after trimming */
+ target_size = copy_map->size;
+ assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
+ "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
+ (uint64_t)target_size, (uint64_t)src_copy_map_size,
+ (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
+
+ /* check for misalignments but don't adjust yet */
+ misalignments = 0;
+ overmap_start = 0;
+ overmap_end = 0;
+ if (copy_page_shift < target_page_shift) {
+ /*
+ * Remapping from 4K to 16K: check the VM object alignments
+ * throughout the range.
+ * If the start and end of the range are mis-aligned, we can
+ * over-map to re-align, and adjust the "overmap" start/end
+ * and "target_size" of the range accordingly.
+ * If there is any mis-alignment within the range:
+ * if "copy":
+ * we can do immediate-copy instead of copy-on-write,
+ * else:
+ * no way to remap and share; fail.
+ */
+ for (entry = vm_map_copy_first_entry(copy_map);
+ entry != vm_map_copy_to_entry(copy_map);
+ entry = entry->vme_next) {
+ vm_object_offset_t object_offset_start, object_offset_end;
+
+ object_offset_start = VME_OFFSET(entry);
+ object_offset_end = object_offset_start;
+ object_offset_end += entry->vme_end - entry->vme_start;
+ if (object_offset_start & target_page_mask) {
+ if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
+ overmap_start++;
+ } else {
+ misalignments++;
+ }
+ }
+ if (object_offset_end & target_page_mask) {
+ if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
+ overmap_end++;
+ } else {
+ misalignments++;
+ }
+ }
+ }
+ }
+ entry = VM_MAP_ENTRY_NULL;
+
+ /* decide how to deal with misalignments */
+ assert(overmap_start <= 1);
+ assert(overmap_end <= 1);
+ if (!overmap_start && !overmap_end && !misalignments) {
+ /* copy_map is properly aligned for target_map ... */
+ if (*trimmed_start_p) {
+ /* ... but we trimmed it, so still need to adjust */
+ } else {
+ /* ... and we didn't trim anything: we're done */
+ if (target_copy_map == VM_MAP_COPY_NULL) {
+ target_copy_map = copy_map;
+ }
+ *target_copy_map_p = target_copy_map;
+ *overmap_start_p = 0;
+ *overmap_end_p = 0;
+ DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
+ return KERN_SUCCESS;
+ }
+ } else if (misalignments && !copy) {
+ /* can't "share" if misaligned */
+ DEBUG4K_ADJUST("unsupported sharing\n");
+#if MACH_ASSERT
+ if (debug4k_panic_on_misaligned_sharing) {
+ panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__, __LINE__);
+ }
+#endif /* MACH_ASSERT */
+ DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
+ return KERN_NOT_SUPPORTED;
+ } else {
+ /* can't virtual-copy if misaligned (but can physical-copy) */
+ DEBUG4K_ADJUST("mis-aligned copying\n");
+ }
+
+ /* get a "target_copy_map" if needed and switch to it */
+ vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
+ copy_map = target_copy_map;
+
+ if (misalignments && copy) {
+ vm_map_size_t target_copy_map_size;
+
+ /*
+ * Can't do copy-on-write with misaligned mappings.
+ * Replace the mappings with a physical copy of the original
+ * mappings' contents.
+ */
+ target_copy_map_size = target_copy_map->size;
+ vm_map_copy_to_physcopy(target_copy_map, target_map);
+ *target_copy_map_p = target_copy_map;
+ *overmap_start_p = 0;
+ *overmap_end_p = target_copy_map->size - target_copy_map_size;
+ DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
+ return KERN_SUCCESS;
+ }
+
+ /* apply the adjustments */
+ misalignments = 0;
+ overmap_start = 0;
+ overmap_end = 0;
+ /* remove copy_map->offset, so that everything starts at offset 0 */
+ addr_adjustment = copy_map->offset;
+ /* also remove whatever we trimmed from the start */
+ addr_adjustment += *trimmed_start_p;
+ for (target_entry = vm_map_copy_first_entry(target_copy_map);
+ target_entry != vm_map_copy_to_entry(target_copy_map);
+ target_entry = target_entry->vme_next) {
+ vm_object_offset_t object_offset_start, object_offset_end;
+
+ DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
+ object_offset_start = VME_OFFSET(target_entry);
+ if (object_offset_start & target_page_mask) {
+ DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
+ if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
+ /*
+ * start of 1st entry is mis-aligned:
+ * re-adjust by over-mapping.
+ */
+ overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
+ DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
+ VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
+ } else {
+ misalignments++;
+ DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
+ assert(copy);
+ }
+ }
+
+ if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
+ target_size += overmap_start;
+ } else {
+ target_entry->vme_start += overmap_start;
+ }
+ target_entry->vme_end += overmap_start;
+
+ object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
+ if (object_offset_end & target_page_mask) {
+ DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
+ if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
+ /*
+ * end of last entry is mis-aligned: re-adjust by over-mapping.
+ */
+ overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
+ DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
+ target_entry->vme_end += overmap_end;
+ target_size += overmap_end;
+ } else {
+ misalignments++;
+ DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
+ assert(copy);
+ }
+ }
+ target_entry->vme_start -= addr_adjustment;
+ target_entry->vme_end -= addr_adjustment;
+ DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
+ }
+
+ target_copy_map->size = target_size;
+ target_copy_map->offset += overmap_start;
+ target_copy_map->offset -= addr_adjustment;
+ target_copy_map->cpy_hdr.page_shift = target_page_shift;
+
+// assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
+// assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
+ assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
+ assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
+
+ *target_copy_map_p = target_copy_map;
+ *overmap_start_p = overmap_start;
+ *overmap_end_p = overmap_end;
+
+ DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+vm_map_range_physical_size(
+ vm_map_t map,
+ vm_map_address_t start,
+ mach_vm_size_t size,
+ mach_vm_size_t * phys_size)
+{
+ kern_return_t kr;
+ vm_map_copy_t copy_map, target_copy_map;
+ vm_map_offset_t adjusted_start, adjusted_end;
+ vm_map_size_t adjusted_size;
+ vm_prot_t cur_prot, max_prot;
+ vm_map_offset_t overmap_start, overmap_end, trimmed_start;
+ vm_map_kernel_flags_t vmk_flags;
+
+ adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
+ adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
+ adjusted_size = adjusted_end - adjusted_start;
+ *phys_size = adjusted_size;
+ if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
+ return KERN_SUCCESS;
+ }
+ if (start == 0) {
+ adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
+ adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
+ adjusted_size = adjusted_end - adjusted_start;
+ *phys_size = adjusted_size;
+ return KERN_SUCCESS;
+ }
+ if (adjusted_size == 0) {
+ DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
+ *phys_size = 0;
+ return KERN_SUCCESS;
+ }
+
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_copy_pageable = TRUE;
+ vmk_flags.vmkf_copy_same_map = TRUE;
+ assert(adjusted_size != 0);
+ cur_prot = VM_PROT_NONE; /* legacy mode */
+ max_prot = VM_PROT_NONE; /* legacy mode */
+ kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
+ FALSE /* copy */,
+ ©_map,
+ &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
+ vmk_flags);
+ if (kr != KERN_SUCCESS) {
+ DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
+ //assert(0);
+ *phys_size = 0;
+ return kr;
+ }
+ assert(copy_map != VM_MAP_COPY_NULL);
+ target_copy_map = copy_map;
+ DEBUG4K_ADJUST("adjusting...\n");
+ kr = vm_map_copy_adjust_to_target(
+ copy_map,
+ start - adjusted_start, /* offset */
+ size, /* size */
+ kernel_map,
+ FALSE, /* copy */
+ &target_copy_map,
+ &overmap_start,
+ &overmap_end,
+ &trimmed_start);
+ if (kr == KERN_SUCCESS) {
+ if (target_copy_map->size != *phys_size) {
+ DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
+ }
+ *phys_size = target_copy_map->size;
+ } else {
+ DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
+ //assert(0);
+ *phys_size = 0;
+ }
+ vm_map_copy_discard(copy_map);
+ copy_map = VM_MAP_COPY_NULL;
+
+ return kr;
+}
+
+
+kern_return_t
+memory_entry_check_for_adjustment(
+ vm_map_t src_map,
+ ipc_port_t port,
+ vm_map_offset_t *overmap_start,
+ vm_map_offset_t *overmap_end)
+{
+ kern_return_t kr = KERN_SUCCESS;
+ vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
+
+ assert(port);
+ assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
+
+ vm_named_entry_t named_entry;
+
+ named_entry = (vm_named_entry_t) ipc_kobject_get(port);
+ named_entry_lock(named_entry);
+ copy_map = named_entry->backing.copy;
+ target_copy_map = copy_map;
+
+ if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
+ vm_map_offset_t trimmed_start;
+
+ trimmed_start = 0;
+ DEBUG4K_ADJUST("adjusting...\n");
+ kr = vm_map_copy_adjust_to_target(
+ copy_map,
+ 0, /* offset */
+ copy_map->size, /* size */
+ src_map,
+ FALSE, /* copy */
+ &target_copy_map,
+ overmap_start,
+ overmap_end,
+ &trimmed_start);
+ assert(trimmed_start == 0);
+ }
+ named_entry_unlock(named_entry);
+
+ return kr;
+}
+
+
+/*
+ * Routine: vm_remap
+ *
+ * Map portion of a task's address space.
+ * Mapped region must not overlap more than
+ * one vm memory object. Protections and
+ * inheritance attributes remain the same
+ * as in the original task and are out parameters.
+ * Source and Target task can be identical
+ * Other attributes are identical as for vm_map()
+ */
+kern_return_t
+vm_map_remap(
+ vm_map_t target_map,
+ vm_map_address_t *address,
+ vm_map_size_t size,
+ vm_map_offset_t mask,
+ int flags,
+ vm_map_kernel_flags_t vmk_flags,
+ vm_tag_t tag,
+ vm_map_t src_map,
+ vm_map_offset_t memory_address,
+ boolean_t copy,
+ vm_prot_t *cur_protection, /* IN/OUT */
+ vm_prot_t *max_protection, /* IN/OUT */
+ vm_inherit_t inheritance)
+{
+ kern_return_t result;
+ vm_map_entry_t entry;
+ vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
+ vm_map_entry_t new_entry;
+ vm_map_copy_t copy_map;
+ vm_map_offset_t offset_in_mapping;
+ vm_map_size_t target_size = 0;
+ vm_map_size_t src_page_mask, target_page_mask;
+ vm_map_offset_t overmap_start, overmap_end, trimmed_start;
+ vm_map_offset_t initial_memory_address;
+ vm_map_size_t initial_size;
+
+ if (target_map == VM_MAP_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ initial_memory_address = memory_address;
+ initial_size = size;
+ src_page_mask = VM_MAP_PAGE_MASK(src_map);
+ target_page_mask = VM_MAP_PAGE_MASK(target_map);
+
+ switch (inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ if (size != 0 && src_map != VM_MAP_NULL) {
+ break;
+ }
+ OS_FALLTHROUGH;
+ default:
+ return KERN_INVALID_ARGUMENT;
+ }