+ * For further convenience, we also use negative logic for the page state in the bit map.
+ * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has
+ * been processed. This way we can simply test the 64-bit long word to see if it's zero
+ * to easily tell if the whole range has been processed. Therefore, the bit map starts
+ * out with all the bits set. The macros below hide all these details from the caller.
+ */
+
+#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */
+ /* be the same as the number of bits in */
+ /* the chunk_state_t type. We use 64 */
+ /* just for convenience. */
+
+#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */
+
+typedef uint64_t chunk_state_t;
+
+/*
+ * The bit map uses negative logic, so we start out with all 64 bits set to indicate
+ * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE,
+ * then we mark pages beyond the len as having been "processed" so that we don't waste time
+ * looking at pages in that range. This can save us from unnecessarily chasing down the
+ * shadow chain.
+ */
+
+#define CHUNK_INIT(c, len) \
+ MACRO_BEGIN \
+ uint64_t p; \
+ \
+ (c) = 0xffffffffffffffffLL; \
+ \
+ for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \
+ MARK_PAGE_HANDLED(c, p); \
+ MACRO_END
+
+/*
+ * Return true if all pages in the chunk have not yet been processed.
+ */
+
+#define CHUNK_NOT_COMPLETE(c) ((c) != 0)
+
+/*
+ * Return true if the page at offset 'p' in the bit map has already been handled
+ * while processing a higher level object in the shadow chain.
+ */
+
+#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0)
+
+/*
+ * Mark the page at offset 'p' in the bit map as having been processed.
+ */
+
+#define MARK_PAGE_HANDLED(c, p) \
+MACRO_BEGIN \
+ (c) = (c) & ~(1LL << (p)); \
+MACRO_END
+
+
+/*
+ * Return true if the page at the given offset has been paged out. Object is
+ * locked upon entry and returned locked.
+ */
+
+static boolean_t
+page_is_paged_out(
+ vm_object_t object,
+ vm_object_offset_t offset)
+{
+ kern_return_t kr;
+ memory_object_t pager;
+
+ /*
+ * Check the existence map for the page if we have one, otherwise
+ * ask the pager about this page.
+ */
+
+#if MACH_PAGEMAP
+ if (object->existence_map) {
+ if (vm_external_state_get(object->existence_map, offset)
+ == VM_EXTERNAL_STATE_EXISTS) {
+ /*
+ * We found the page
+ */
+
+ return TRUE;
+ }
+ } else
+#endif
+ if (object->internal &&
+ object->alive &&
+ !object->terminating &&
+ object->pager_ready) {
+
+ /*
+ * We're already holding a "paging in progress" reference
+ * so the object can't disappear when we release the lock.
+ */
+
+ assert(object->paging_in_progress);
+ pager = object->pager;
+ vm_object_unlock(object);
+
+ kr = memory_object_data_request(
+ pager,
+ offset + object->paging_offset,
+ 0, /* just poke the pager */
+ VM_PROT_READ,
+ NULL);
+
+ vm_object_lock(object);
+
+ if (kr == KERN_SUCCESS) {
+
+ /*
+ * We found the page
+ */
+
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Deactivate the pages in the specified object and range. If kill_page is set, also discard any
+ * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify
+ * a size that is less than or equal to the CHUNK_SIZE.
+ */
+
+static void
+deactivate_pages_in_object(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_object_size_t size,
+ boolean_t kill_page,
+ boolean_t reusable_page,
+#if !MACH_ASSERT
+ __unused
+#endif
+ boolean_t all_reusable,
+ chunk_state_t *chunk_state)
+{
+ vm_page_t m;
+ int p;
+ struct dw dw_array[DELAYED_WORK_LIMIT];
+ struct dw *dwp;
+ int dw_count;
+ unsigned int reusable = 0;
+
+
+ /*
+ * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the
+ * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may
+ * have pages marked as having been processed already. We stop the loop early if we find we've handled
+ * all the pages in the chunk.
+ */
+
+ dwp = &dw_array[0];
+ dw_count = 0;
+
+ for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) {
+
+ /*
+ * If this offset has already been found and handled in a higher level object, then don't
+ * do anything with it in the current shadow object.
+ */
+
+ if (PAGE_ALREADY_HANDLED(*chunk_state, p))
+ continue;
+
+ /*
+ * See if the page at this offset is around. First check to see if the page is resident,
+ * then if not, check the existence map or with the pager.
+ */
+
+ if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
+
+ /*
+ * We found a page we were looking for. Mark it as "handled" now in the chunk_state
+ * so that we won't bother looking for a page at this offset again if there are more
+ * shadow objects. Then deactivate the page.
+ */
+
+ MARK_PAGE_HANDLED(*chunk_state, p);
+
+ if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy)) {
+ int clear_refmod;
+
+ assert(!m->laundry);
+
+ clear_refmod = VM_MEM_REFERENCED;
+ dwp->dw_mask = DW_clear_reference;
+
+ if ((kill_page) && (object->internal)) {
+ m->precious = FALSE;
+ m->dirty = FALSE;
+
+ clear_refmod |= VM_MEM_MODIFIED;
+ if (m->throttled) {
+ /*
+ * This page is now clean and
+ * reclaimable. Move it out
+ * of the throttled queue, so
+ * that vm_pageout_scan() can
+ * find it.
+ */
+ dwp->dw_mask |= DW_move_page;
+ }
+#if MACH_PAGEMAP
+ vm_external_state_clr(object->existence_map, offset);
+#endif /* MACH_PAGEMAP */
+
+ if (reusable_page && !m->reusable) {
+ assert(!all_reusable);
+ assert(!object->all_reusable);
+ m->reusable = TRUE;
+ object->reusable_page_count++;
+ assert(object->resident_page_count >= object->reusable_page_count);
+ reusable++;
+#if CONFIG_EMBEDDED
+ } else {
+ if (m->reusable) {
+ m->reusable = FALSE;
+ object->reusable_page_count--;
+ }
+#endif
+ }
+ }
+ pmap_clear_refmod(m->phys_page, clear_refmod);
+
+ if (!m->throttled && !(reusable_page || all_reusable))
+ dwp->dw_mask |= DW_move_page;
+ /*
+ * dw_do_work may need to drop the object lock
+ * if it does, we need the pages its looking at to
+ * be held stable via the busy bit.
+ */
+ m->busy = TRUE;
+ dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
+
+ dwp->dw_m = m;
+ dwp++;
+ dw_count++;
+
+ if (dw_count >= DELAYED_WORK_LIMIT) {
+ if (reusable) {
+ OSAddAtomic(reusable,
+ &vm_page_stats_reusable.reusable_count);
+ vm_page_stats_reusable.reusable += reusable;
+ reusable = 0;
+ }
+ dw_do_work(object, &dw_array[0], dw_count);
+
+ dwp = &dw_array[0];
+ dw_count = 0;
+ }
+ }
+
+ } else {
+
+ /*
+ * The page at this offset isn't memory resident, check to see if it's
+ * been paged out. If so, mark it as handled so we don't bother looking
+ * for it in the shadow chain.
+ */
+
+ if (page_is_paged_out(object, offset)) {
+ MARK_PAGE_HANDLED(*chunk_state, p);
+
+ /*
+ * If we're killing a non-resident page, then clear the page in the existence
+ * map so we don't bother paging it back in if it's touched again in the future.
+ */
+
+ if ((kill_page) && (object->internal)) {
+#if MACH_PAGEMAP
+ vm_external_state_clr(object->existence_map, offset);
+#endif /* MACH_PAGEMAP */
+ }
+ }
+ }
+ }
+
+ if (reusable) {
+ OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count);
+ vm_page_stats_reusable.reusable += reusable;
+ reusable = 0;
+ }
+
+ if (dw_count)
+ dw_do_work(object, &dw_array[0], dw_count);
+}
+
+
+/*
+ * Deactive a "chunk" of the given range of the object starting at offset. A "chunk"
+ * will always be less than or equal to the given size. The total range is divided up
+ * into chunks for efficiency and performance related to the locks and handling the shadow
+ * chain. This routine returns how much of the given "size" it actually processed. It's
+ * up to the caler to loop and keep calling this routine until the entire range they want
+ * to process has been done.
+ */
+
+static vm_object_size_t
+deactivate_a_chunk(
+ vm_object_t orig_object,
+ vm_object_offset_t offset,
+ vm_object_size_t size,
+ boolean_t kill_page,
+ boolean_t reusable_page,
+ boolean_t all_reusable)
+{
+ vm_object_t object;
+ vm_object_t tmp_object;
+ vm_object_size_t length;
+ chunk_state_t chunk_state;
+
+
+ /*
+ * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the
+ * remaining size the caller asked for.
+ */
+
+ length = MIN(size, CHUNK_SIZE);
+
+ /*
+ * The chunk_state keeps track of which pages we've already processed if there's
+ * a shadow chain on this object. At this point, we haven't done anything with this
+ * range of pages yet, so initialize the state to indicate no pages processed yet.
+ */
+
+ CHUNK_INIT(chunk_state, length);
+ object = orig_object;
+
+ /*
+ * Start at the top level object and iterate around the loop once for each object
+ * in the shadow chain. We stop processing early if we've already found all the pages
+ * in the range. Otherwise we stop when we run out of shadow objects.
+ */
+
+ while (object && CHUNK_NOT_COMPLETE(chunk_state)) {
+ vm_object_paging_begin(object);
+
+ deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state);
+
+ vm_object_paging_end(object);
+
+ /*
+ * We've finished with this object, see if there's a shadow object. If
+ * there is, update the offset and lock the new object. We also turn off
+ * kill_page at this point since we only kill pages in the top most object.
+ */
+
+ tmp_object = object->shadow;
+
+ if (tmp_object) {
+ kill_page = FALSE;
+ reusable_page = FALSE;
+ all_reusable = FALSE;
+ offset += object->shadow_offset;
+ vm_object_lock(tmp_object);
+ }
+
+ if (object != orig_object)
+ vm_object_unlock(object);
+
+ object = tmp_object;
+ }
+
+ if (object && object != orig_object)
+ vm_object_unlock(object);
+
+ return length;
+}
+
+
+
+/*
+ * Move any resident pages in the specified range to the inactive queue. If kill_page is set,
+ * we also clear the modified status of the page and "forget" any changes that have been made
+ * to the page.
+ */
+
+__private_extern__ void
+vm_object_deactivate_pages(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_object_size_t size,
+ boolean_t kill_page,
+ boolean_t reusable_page)
+{
+ vm_object_size_t length;
+ boolean_t all_reusable;
+
+ /*
+ * We break the range up into chunks and do one chunk at a time. This is for
+ * efficiency and performance while handling the shadow chains and the locks.
+ * The deactivate_a_chunk() function returns how much of the range it processed.
+ * We keep calling this routine until the given size is exhausted.
+ */
+
+
+ all_reusable = FALSE;
+ if (reusable_page &&
+ object->size != 0 &&
+ object->size == size &&
+ object->reusable_page_count == 0) {
+ all_reusable = TRUE;
+ reusable_page = FALSE;
+ }
+
+#if CONFIG_EMBEDDED
+ if ((reusable_page || all_reusable) && object->all_reusable) {
+ /* This means MADV_FREE_REUSABLE has been called twice, which
+ * is probably illegal. */
+ return;
+ }
+#endif
+
+ while (size) {
+ length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable);
+
+ size -= length;
+ offset += length;
+ }
+
+ if (all_reusable) {
+ if (!object->all_reusable) {
+ unsigned int reusable;
+
+ object->all_reusable = TRUE;
+ assert(object->reusable_page_count == 0);
+ /* update global stats */
+ reusable = object->resident_page_count;
+ OSAddAtomic(reusable,
+ &vm_page_stats_reusable.reusable_count);
+ vm_page_stats_reusable.reusable += reusable;
+ vm_page_stats_reusable.all_reusable_calls++;
+ }
+ } else if (reusable_page) {
+ vm_page_stats_reusable.partial_reusable_calls++;
+ }
+}
+
+void
+vm_object_reuse_pages(
+ vm_object_t object,
+ vm_object_offset_t start_offset,
+ vm_object_offset_t end_offset,
+ boolean_t allow_partial_reuse)
+{
+ vm_object_offset_t cur_offset;
+ vm_page_t m;
+ unsigned int reused, reusable;
+
+#define VM_OBJECT_REUSE_PAGE(object, m, reused) \
+ MACRO_BEGIN \
+ if ((m) != VM_PAGE_NULL && \
+ (m)->reusable) { \
+ assert((object)->reusable_page_count <= \
+ (object)->resident_page_count); \
+ assert((object)->reusable_page_count > 0); \
+ (object)->reusable_page_count--; \
+ (m)->reusable = FALSE; \
+ (reused)++; \
+ } \
+ MACRO_END
+
+ reused = 0;
+ reusable = 0;
+
+ vm_object_lock_assert_exclusive(object);
+
+ if (object->all_reusable) {
+ assert(object->reusable_page_count == 0);
+ object->all_reusable = FALSE;
+ if (end_offset - start_offset == object->size ||
+ !allow_partial_reuse) {
+ vm_page_stats_reusable.all_reuse_calls++;
+ reused = object->resident_page_count;
+ } else {
+ vm_page_stats_reusable.partial_reuse_calls++;
+ queue_iterate(&object->memq, m, vm_page_t, listq) {
+ if (m->offset < start_offset ||
+ m->offset >= end_offset) {
+ m->reusable = TRUE;
+ object->reusable_page_count++;
+ assert(object->resident_page_count >= object->reusable_page_count);
+ continue;
+ } else {
+ assert(!m->reusable);
+ reused++;
+ }
+ }
+ }
+ } else if (object->resident_page_count >
+ ((end_offset - start_offset) >> PAGE_SHIFT)) {
+ vm_page_stats_reusable.partial_reuse_calls++;
+ for (cur_offset = start_offset;
+ cur_offset < end_offset;
+ cur_offset += PAGE_SIZE_64) {
+ if (object->reusable_page_count == 0) {
+ break;
+ }
+ m = vm_page_lookup(object, cur_offset);
+ VM_OBJECT_REUSE_PAGE(object, m, reused);
+ }
+ } else {
+ vm_page_stats_reusable.partial_reuse_calls++;
+ queue_iterate(&object->memq, m, vm_page_t, listq) {
+ if (object->reusable_page_count == 0) {
+ break;
+ }
+ if (m->offset < start_offset ||
+ m->offset >= end_offset) {
+ continue;
+ }
+ VM_OBJECT_REUSE_PAGE(object, m, reused);
+ }
+ }
+
+ /* update global stats */
+ OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count);
+ vm_page_stats_reusable.reused += reused;
+ vm_page_stats_reusable.reusable += reusable;
+}
+
+/*
+ * Routine: vm_object_pmap_protect
+ *
+ * Purpose:
+ * Reduces the permission for all physical
+ * pages in the specified object range.
+ *
+ * If removing write permission only, it is
+ * sufficient to protect only the pages in
+ * the top-level object; only those pages may
+ * have write permission.
+ *
+ * If removing all access, we must follow the
+ * shadow chain from the top-level object to