+ printf("vm_page %p: \n", p);
+ printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
+ printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
+ printf(" next=%p\n", p->next);
+ printf(" object=%p offset=0x%llx\n", p->object, p->offset);
+ printf(" wire_count=%u\n", p->wire_count);
+
+ printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
+ (p->local ? "" : "!"),
+ (p->inactive ? "" : "!"),
+ (p->active ? "" : "!"),
+ (p->pageout_queue ? "" : "!"),
+ (p->speculative ? "" : "!"),
+ (p->laundry ? "" : "!"));
+ printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
+ (p->free ? "" : "!"),
+ (p->reference ? "" : "!"),
+ (p->gobbled ? "" : "!"),
+ (p->private ? "" : "!"),
+ (p->throttled ? "" : "!"));
+ printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
+ (p->busy ? "" : "!"),
+ (p->wanted ? "" : "!"),
+ (p->tabled ? "" : "!"),
+ (p->fictitious ? "" : "!"),
+ (p->pmapped ? "" : "!"),
+ (p->wpmapped ? "" : "!"));
+ printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
+ (p->pageout ? "" : "!"),
+ (p->absent ? "" : "!"),
+ (p->error ? "" : "!"),
+ (p->dirty ? "" : "!"),
+ (p->cleaning ? "" : "!"),
+ (p->precious ? "" : "!"),
+ (p->clustered ? "" : "!"));
+ printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
+ (p->overwriting ? "" : "!"),
+ (p->restart ? "" : "!"),
+ (p->unusual ? "" : "!"),
+ (p->encrypted ? "" : "!"),
+ (p->encrypted_cleaning ? "" : "!"));
+ printf(" %scs_validated, %scs_tainted, %sno_cache\n",
+ (p->cs_validated ? "" : "!"),
+ (p->cs_tainted ? "" : "!"),
+ (p->no_cache ? "" : "!"));
+
+ printf("phys_page=0x%x\n", p->phys_page);
+}
+
+/*
+ * Check that the list of pages is ordered by
+ * ascending physical address and has no holes.
+ */
+static int
+vm_page_verify_contiguous(
+ vm_page_t pages,
+ unsigned int npages)
+{
+ register vm_page_t m;
+ unsigned int page_count;
+ vm_offset_t prev_addr;
+
+ prev_addr = pages->phys_page;
+ page_count = 1;
+ for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
+ if (m->phys_page != prev_addr + 1) {
+ printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
+ m, (long)prev_addr, m->phys_page);
+ printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
+ panic("vm_page_verify_contiguous: not contiguous!");
+ }
+ prev_addr = m->phys_page;
+ ++page_count;
+ }
+ if (page_count != npages) {
+ printf("pages %p actual count 0x%x but requested 0x%x\n",
+ pages, page_count, npages);
+ panic("vm_page_verify_contiguous: count error");
+ }
+ return 1;
+}
+
+
+/*
+ * Check the free lists for proper length etc.
+ */
+static unsigned int
+vm_page_verify_free_list(
+ queue_head_t *vm_page_queue,
+ unsigned int color,
+ vm_page_t look_for_page,
+ boolean_t expect_page)
+{
+ unsigned int npages;
+ vm_page_t m;
+ vm_page_t prev_m;
+ boolean_t found_page;
+
+ found_page = FALSE;
+ npages = 0;
+ prev_m = (vm_page_t) vm_page_queue;
+ queue_iterate(vm_page_queue,
+ m,
+ vm_page_t,
+ pageq) {
+
+ if (m == look_for_page) {
+ found_page = TRUE;
+ }
+ if ((vm_page_t) m->pageq.prev != prev_m)
+ panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
+ color, npages, m, m->pageq.prev, prev_m);
+ if ( ! m->busy )
+ panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
+ color, npages, m);
+ if (color != (unsigned int) -1) {
+ if ((m->phys_page & vm_color_mask) != color)
+ panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
+ color, npages, m, m->phys_page & vm_color_mask, color);
+ if ( ! m->free )
+ panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
+ color, npages, m);
+ }
+ ++npages;
+ prev_m = m;
+ }
+ if (look_for_page != VM_PAGE_NULL) {
+ unsigned int other_color;
+
+ if (expect_page && !found_page) {
+ printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
+ color, npages, look_for_page, look_for_page->phys_page);
+ _vm_page_print(look_for_page);
+ for (other_color = 0;
+ other_color < vm_colors;
+ other_color++) {
+ if (other_color == color)
+ continue;
+ vm_page_verify_free_list(&vm_page_queue_free[other_color],
+ other_color, look_for_page, FALSE);
+ }
+ if (color == (unsigned int) -1) {
+ vm_page_verify_free_list(&vm_lopage_queue_free,
+ (unsigned int) -1, look_for_page, FALSE);
+ }
+ panic("vm_page_verify_free_list(color=%u)\n", color);
+ }
+ if (!expect_page && found_page) {
+ printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
+ color, npages, look_for_page, look_for_page->phys_page);
+ }
+ }
+ return npages;
+}
+
+static boolean_t vm_page_verify_free_lists_enabled = FALSE;
+static void
+vm_page_verify_free_lists( void )
+{
+ unsigned int color, npages, nlopages;
+
+ if (! vm_page_verify_free_lists_enabled)
+ return;
+
+ npages = 0;
+
+ lck_mtx_lock(&vm_page_queue_free_lock);
+
+ for( color = 0; color < vm_colors; color++ ) {
+ npages += vm_page_verify_free_list(&vm_page_queue_free[color],
+ color, VM_PAGE_NULL, FALSE);
+ }
+ nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
+ (unsigned int) -1,
+ VM_PAGE_NULL, FALSE);
+ if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
+ panic("vm_page_verify_free_lists: "
+ "npages %u free_count %d nlopages %u lo_free_count %u",
+ npages, vm_page_free_count, nlopages, vm_lopage_free_count);
+
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+}
+
+void
+vm_page_queues_assert(
+ vm_page_t mem,
+ int val)
+{
+#if DEBUG
+ lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+#endif
+ if (mem->free + mem->active + mem->inactive + mem->speculative +
+ mem->throttled + mem->pageout_queue > (val)) {
+ _vm_page_print(mem);
+ panic("vm_page_queues_assert(%p, %d)\n", mem, val);
+ }
+ if (VM_PAGE_WIRED(mem)) {
+ assert(!mem->active);
+ assert(!mem->inactive);
+ assert(!mem->speculative);
+ assert(!mem->throttled);
+ assert(!mem->pageout_queue);
+ }
+}
+#endif /* MACH_ASSERT */
+
+
+/*
+ * CONTIGUOUS PAGE ALLOCATION
+ *
+ * Find a region large enough to contain at least n pages
+ * of contiguous physical memory.
+ *
+ * This is done by traversing the vm_page_t array in a linear fashion
+ * we assume that the vm_page_t array has the avaiable physical pages in an
+ * ordered, ascending list... this is currently true of all our implementations
+ * and must remain so... there can be 'holes' in the array... we also can
+ * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
+ * which use to happen via 'vm_page_convert'... that function was no longer
+ * being called and was removed...
+ *
+ * The basic flow consists of stabilizing some of the interesting state of
+ * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
+ * sweep at the beginning of the array looking for pages that meet our criterea
+ * for a 'stealable' page... currently we are pretty conservative... if the page
+ * meets this criterea and is physically contiguous to the previous page in the 'run'
+ * we keep developing it. If we hit a page that doesn't fit, we reset our state
+ * and start to develop a new run... if at this point we've already considered
+ * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
+ * and mutex_pause (which will yield the processor), to keep the latency low w/r
+ * to other threads trying to acquire free pages (or move pages from q to q),
+ * and then continue from the spot we left off... we only make 1 pass through the
+ * array. Once we have a 'run' that is long enough, we'll go into the loop which
+ * which steals the pages from the queues they're currently on... pages on the free
+ * queue can be stolen directly... pages that are on any of the other queues
+ * must be removed from the object they are tabled on... this requires taking the
+ * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
+ * or if the state of the page behind the vm_object lock is no longer viable, we'll
+ * dump the pages we've currently stolen back to the free list, and pick up our
+ * scan from the point where we aborted the 'current' run.
+ *
+ *
+ * Requirements:
+ * - neither vm_page_queue nor vm_free_list lock can be held on entry
+ *
+ * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
+ *
+ * Algorithm:
+ */
+
+#define MAX_CONSIDERED_BEFORE_YIELD 1000
+
+
+#define RESET_STATE_OF_RUN() \
+ MACRO_BEGIN \
+ prevcontaddr = -2; \
+ start_pnum = -1; \
+ free_considered = 0; \
+ substitute_needed = 0; \
+ npages = 0; \
+ MACRO_END
+
+/*
+ * Can we steal in-use (i.e. not free) pages when searching for
+ * physically-contiguous pages ?
+ */
+#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
+
+static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
+#if DEBUG
+int vm_page_find_contig_debug = 0;
+#endif
+
+static vm_page_t
+vm_page_find_contiguous(
+ unsigned int contig_pages,
+ ppnum_t max_pnum,
+ ppnum_t pnum_mask,
+ boolean_t wire,
+ int flags)
+{
+ vm_page_t m = NULL;
+ ppnum_t prevcontaddr;
+ ppnum_t start_pnum;
+ unsigned int npages, considered, scanned;
+ unsigned int page_idx, start_idx, last_idx, orig_last_idx;
+ unsigned int idx_last_contig_page_found = 0;
+ int free_considered, free_available;
+ int substitute_needed;
+ boolean_t wrapped;
+#if DEBUG
+ clock_sec_t tv_start_sec, tv_end_sec;
+ clock_usec_t tv_start_usec, tv_end_usec;
+#endif
+#if MACH_ASSERT
+ int yielded = 0;
+ int dumped_run = 0;
+ int stolen_pages = 0;
+#endif
+
+ if (contig_pages == 0)
+ return VM_PAGE_NULL;
+
+#if MACH_ASSERT
+ vm_page_verify_free_lists();
+#endif
+#if DEBUG
+ clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
+#endif
+ vm_page_lock_queues();
+ lck_mtx_lock(&vm_page_queue_free_lock);
+
+ RESET_STATE_OF_RUN();
+
+ scanned = 0;
+ considered = 0;
+ free_available = vm_page_free_count - vm_page_free_reserved;
+
+ wrapped = FALSE;
+
+ if(flags & KMA_LOMEM)
+ idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
+ else
+ idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
+
+ orig_last_idx = idx_last_contig_page_found;
+ last_idx = orig_last_idx;
+
+ for (page_idx = last_idx, start_idx = last_idx;
+ npages < contig_pages && page_idx < vm_pages_count;
+ page_idx++) {
+retry:
+ if (wrapped &&
+ npages == 0 &&
+ page_idx >= orig_last_idx) {
+ /*
+ * We're back where we started and we haven't
+ * found any suitable contiguous range. Let's
+ * give up.
+ */
+ break;
+ }
+ scanned++;
+ m = &vm_pages[page_idx];
+
+ assert(!m->fictitious);
+ assert(!m->private);
+
+ if (max_pnum && m->phys_page > max_pnum) {
+ /* no more low pages... */
+ break;
+ }
+ if (!npages & ((m->phys_page & pnum_mask) != 0)) {
+ /*
+ * not aligned
+ */
+ RESET_STATE_OF_RUN();
+
+ } else if (VM_PAGE_WIRED(m) || m->gobbled ||
+ m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
+ m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
+ m->cleaning || m->overwriting || m->restart || m->unusual || m->pageout) {
+ /*
+ * page is in a transient state
+ * or a state we don't want to deal
+ * with, so don't consider it which
+ * means starting a new run
+ */
+ RESET_STATE_OF_RUN();
+
+ } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
+ /*
+ * page needs to be on one of our queues
+ * in order for it to be stable behind the
+ * locks we hold at this point...
+ * if not, don't consider it which
+ * means starting a new run
+ */
+ RESET_STATE_OF_RUN();
+
+ } else if (!m->free && (!m->tabled || m->busy)) {
+ /*
+ * pages on the free list are always 'busy'
+ * so we couldn't test for 'busy' in the check
+ * for the transient states... pages that are
+ * 'free' are never 'tabled', so we also couldn't
+ * test for 'tabled'. So we check here to make
+ * sure that a non-free page is not busy and is
+ * tabled on an object...
+ * if not, don't consider it which
+ * means starting a new run
+ */
+ RESET_STATE_OF_RUN();
+
+ } else {
+ if (m->phys_page != prevcontaddr + 1) {
+ if ((m->phys_page & pnum_mask) != 0) {
+ RESET_STATE_OF_RUN();
+ goto did_consider;
+ } else {
+ npages = 1;
+ start_idx = page_idx;
+ start_pnum = m->phys_page;
+ }
+ } else {
+ npages++;
+ }
+ prevcontaddr = m->phys_page;
+
+ VM_PAGE_CHECK(m);
+ if (m->free) {
+ free_considered++;
+ } else {
+ /*
+ * This page is not free.
+ * If we can't steal used pages,
+ * we have to give up this run
+ * and keep looking.
+ * Otherwise, we might need to
+ * move the contents of this page
+ * into a substitute page.
+ */
+#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
+ if (m->pmapped || m->dirty) {
+ substitute_needed++;
+ }
+#else
+ RESET_STATE_OF_RUN();
+#endif
+ }
+
+ if ((free_considered + substitute_needed) > free_available) {
+ /*
+ * if we let this run continue
+ * we will end up dropping the vm_page_free_count
+ * below the reserve limit... we need to abort
+ * this run, but we can at least re-consider this
+ * page... thus the jump back to 'retry'
+ */
+ RESET_STATE_OF_RUN();
+
+ if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
+ considered++;
+ goto retry;
+ }
+ /*
+ * free_available == 0
+ * so can't consider any free pages... if
+ * we went to retry in this case, we'd
+ * get stuck looking at the same page
+ * w/o making any forward progress
+ * we also want to take this path if we've already
+ * reached our limit that controls the lock latency
+ */
+ }
+ }
+did_consider:
+ if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
+
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+ vm_page_unlock_queues();
+
+ mutex_pause(0);
+
+ vm_page_lock_queues();
+ lck_mtx_lock(&vm_page_queue_free_lock);
+
+ RESET_STATE_OF_RUN();
+ /*
+ * reset our free page limit since we
+ * dropped the lock protecting the vm_page_free_queue
+ */
+ free_available = vm_page_free_count - vm_page_free_reserved;
+ considered = 0;
+#if MACH_ASSERT
+ yielded++;
+#endif
+ goto retry;
+ }
+ considered++;
+ }
+ m = VM_PAGE_NULL;
+
+ if (npages != contig_pages) {
+ if (!wrapped) {
+ /*
+ * We didn't find a contiguous range but we didn't
+ * start from the very first page.
+ * Start again from the very first page.
+ */
+ RESET_STATE_OF_RUN();
+ if( flags & KMA_LOMEM)
+ idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
+ else
+ idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
+ last_idx = 0;
+ page_idx = last_idx;
+ wrapped = TRUE;
+ goto retry;
+ }
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+ } else {
+ vm_page_t m1;
+ vm_page_t m2;
+ unsigned int cur_idx;
+ unsigned int tmp_start_idx;
+ vm_object_t locked_object = VM_OBJECT_NULL;
+ boolean_t abort_run = FALSE;
+
+ assert(page_idx - start_idx == contig_pages);
+
+ tmp_start_idx = start_idx;
+
+ /*
+ * first pass through to pull the free pages
+ * off of the free queue so that in case we
+ * need substitute pages, we won't grab any
+ * of the free pages in the run... we'll clear
+ * the 'free' bit in the 2nd pass, and even in
+ * an abort_run case, we'll collect all of the
+ * free pages in this run and return them to the free list
+ */
+ while (start_idx < page_idx) {
+
+ m1 = &vm_pages[start_idx++];
+
+#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
+ assert(m1->free);
+#endif
+
+ if (m1->free) {
+ unsigned int color;
+
+ color = m1->phys_page & vm_color_mask;
+#if MACH_ASSERT
+ vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
+#endif
+ queue_remove(&vm_page_queue_free[color],
+ m1,
+ vm_page_t,
+ pageq);
+ m1->pageq.next = NULL;
+ m1->pageq.prev = NULL;
+#if MACH_ASSERT
+ vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
+#endif
+ /*
+ * Clear the "free" bit so that this page
+ * does not get considered for another
+ * concurrent physically-contiguous allocation.
+ */
+ m1->free = FALSE;
+ assert(m1->busy);
+
+ vm_page_free_count--;
+ }
+ }
+ /*
+ * adjust global freelist counts
+ */
+ if (vm_page_free_count < vm_page_free_count_minimum)
+ vm_page_free_count_minimum = vm_page_free_count;
+
+ if( flags & KMA_LOMEM)
+ vm_page_lomem_find_contiguous_last_idx = page_idx;
+ else
+ vm_page_find_contiguous_last_idx = page_idx;
+
+ /*
+ * we can drop the free queue lock at this point since
+ * we've pulled any 'free' candidates off of the list
+ * we need it dropped so that we can do a vm_page_grab
+ * when substituing for pmapped/dirty pages
+ */
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+
+ start_idx = tmp_start_idx;
+ cur_idx = page_idx - 1;
+
+ while (start_idx++ < page_idx) {
+ /*
+ * must go through the list from back to front
+ * so that the page list is created in the
+ * correct order - low -> high phys addresses
+ */
+ m1 = &vm_pages[cur_idx--];
+
+ assert(!m1->free);
+ if (m1->object == VM_OBJECT_NULL) {
+ /*
+ * page has already been removed from
+ * the free list in the 1st pass
+ */
+ assert(m1->offset == (vm_object_offset_t) -1);
+ assert(m1->busy);
+ assert(!m1->wanted);
+ assert(!m1->laundry);
+ } else {
+ vm_object_t object;
+
+ if (abort_run == TRUE)
+ continue;
+
+ object = m1->object;
+
+ if (object != locked_object) {
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
+ }
+ if (vm_object_lock_try(object))
+ locked_object = object;
+ }
+ if (locked_object == VM_OBJECT_NULL ||
+ (VM_PAGE_WIRED(m1) || m1->gobbled ||
+ m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
+ m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
+ m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->busy)) {
+
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
+ }
+ tmp_start_idx = cur_idx;
+ abort_run = TRUE;
+ continue;
+ }
+ if (m1->pmapped || m1->dirty) {
+ int refmod;
+ vm_object_offset_t offset;
+
+ m2 = vm_page_grab();
+
+ if (m2 == VM_PAGE_NULL) {
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
+ }
+ tmp_start_idx = cur_idx;
+ abort_run = TRUE;
+ continue;
+ }
+ if (m1->pmapped)
+ refmod = pmap_disconnect(m1->phys_page);
+ else
+ refmod = 0;
+ vm_page_copy(m1, m2);
+
+ m2->reference = m1->reference;
+ m2->dirty = m1->dirty;
+
+ if (refmod & VM_MEM_REFERENCED)
+ m2->reference = TRUE;
+ if (refmod & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(m2, TRUE);
+ }
+ offset = m1->offset;
+
+ /*
+ * completely cleans up the state
+ * of the page so that it is ready
+ * to be put onto the free list, or
+ * for this purpose it looks like it
+ * just came off of the free list
+ */
+ vm_page_free_prepare(m1);
+
+ /*
+ * make sure we clear the ref/mod state
+ * from the pmap layer... else we risk
+ * inheriting state from the last time
+ * this page was used...
+ */
+ pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+ /*
+ * now put the substitute page on the object
+ */
+ vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
+
+ if (m2->reference)
+ vm_page_activate(m2);
+ else
+ vm_page_deactivate(m2);
+
+ PAGE_WAKEUP_DONE(m2);
+
+ } else {
+ /*
+ * completely cleans up the state
+ * of the page so that it is ready
+ * to be put onto the free list, or
+ * for this purpose it looks like it
+ * just came off of the free list
+ */
+ vm_page_free_prepare(m1);
+ }
+#if MACH_ASSERT
+ stolen_pages++;
+#endif
+ }
+ m1->pageq.next = (queue_entry_t) m;
+ m1->pageq.prev = NULL;
+ m = m1;
+ }
+ if (locked_object) {
+ vm_object_unlock(locked_object);
+ locked_object = VM_OBJECT_NULL;
+ }
+
+ if (abort_run == TRUE) {
+ if (m != VM_PAGE_NULL) {
+ vm_page_free_list(m, FALSE);
+ }
+#if MACH_ASSERT
+ dumped_run++;
+#endif
+ /*
+ * want the index of the last
+ * page in this run that was
+ * successfully 'stolen', so back
+ * it up 1 for the auto-decrement on use
+ * and 1 more to bump back over this page
+ */
+ page_idx = tmp_start_idx + 2;
+ if (page_idx >= vm_pages_count) {
+ if (wrapped)
+ goto done_scanning;
+ page_idx = last_idx = 0;
+ wrapped = TRUE;
+ }
+ abort_run = FALSE;
+
+ /*
+ * We didn't find a contiguous range but we didn't
+ * start from the very first page.
+ * Start again from the very first page.
+ */
+ RESET_STATE_OF_RUN();
+
+ if( flags & KMA_LOMEM)
+ idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
+ else
+ idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
+
+ last_idx = page_idx;
+
+ lck_mtx_lock(&vm_page_queue_free_lock);
+ /*
+ * reset our free page limit since we
+ * dropped the lock protecting the vm_page_free_queue
+ */
+ free_available = vm_page_free_count - vm_page_free_reserved;
+ goto retry;
+ }
+
+ for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
+
+ if (wire == TRUE)
+ m1->wire_count++;
+ else
+ m1->gobbled = TRUE;
+ }
+ if (wire == FALSE)
+ vm_page_gobble_count += npages;
+
+ /*
+ * gobbled pages are also counted as wired pages
+ */
+ vm_page_wire_count += npages;
+
+ assert(vm_page_verify_contiguous(m, npages));
+ }
+done_scanning:
+ vm_page_unlock_queues();
+
+#if DEBUG
+ clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
+
+ tv_end_sec -= tv_start_sec;
+ if (tv_end_usec < tv_start_usec) {
+ tv_end_sec--;
+ tv_end_usec += 1000000;
+ }
+ tv_end_usec -= tv_start_usec;
+ if (tv_end_usec >= 1000000) {
+ tv_end_sec++;
+ tv_end_sec -= 1000000;
+ }
+ if (vm_page_find_contig_debug) {
+ printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
+ __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
+ (long)tv_end_sec, tv_end_usec, orig_last_idx,
+ scanned, yielded, dumped_run, stolen_pages);
+ }
+
+#endif
+#if MACH_ASSERT
+ vm_page_verify_free_lists();
+#endif
+ return m;
+}
+
+/*
+ * Allocate a list of contiguous, wired pages.
+ */
+kern_return_t
+cpm_allocate(
+ vm_size_t size,
+ vm_page_t *list,
+ ppnum_t max_pnum,
+ ppnum_t pnum_mask,
+ boolean_t wire,
+ int flags)
+{
+ vm_page_t pages;
+ unsigned int npages;
+
+ if (size % PAGE_SIZE != 0)
+ return KERN_INVALID_ARGUMENT;
+
+ npages = (unsigned int) (size / PAGE_SIZE);
+ if (npages != size / PAGE_SIZE) {
+ /* 32-bit overflow */
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ /*
+ * Obtain a pointer to a subset of the free
+ * list large enough to satisfy the request;
+ * the region will be physically contiguous.
+ */
+ pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
+
+ if (pages == VM_PAGE_NULL)
+ return KERN_NO_SPACE;
+ /*
+ * determine need for wakeups
+ */
+ if ((vm_page_free_count < vm_page_free_min) ||
+ ((vm_page_free_count < vm_page_free_target) &&
+ ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
+ thread_wakeup((event_t) &vm_page_free_wanted);
+
+ VM_CHECK_MEMORYSTATUS;
+
+ /*
+ * The CPM pages should now be available and
+ * ordered by ascending physical address.
+ */
+ assert(vm_page_verify_contiguous(pages, npages));
+
+ *list = pages;
+ return KERN_SUCCESS;
+}
+
+
+unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
+
+/*
+ * when working on a 'run' of pages, it is necessary to hold
+ * the vm_page_queue_lock (a hot global lock) for certain operations
+ * on the page... however, the majority of the work can be done
+ * while merely holding the object lock... in fact there are certain
+ * collections of pages that don't require any work brokered by the
+ * vm_page_queue_lock... to mitigate the time spent behind the global
+ * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
+ * while doing all of the work that doesn't require the vm_page_queue_lock...
+ * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
+ * necessary work for each page... we will grab the busy bit on the page
+ * if it's not already held so that vm_page_do_delayed_work can drop the object lock
+ * if it can't immediately take the vm_page_queue_lock in order to compete
+ * for the locks in the same order that vm_pageout_scan takes them.
+ * the operation names are modeled after the names of the routines that
+ * need to be called in order to make the changes very obvious in the
+ * original loop
+ */
+
+void
+vm_page_do_delayed_work(
+ vm_object_t object,
+ struct vm_page_delayed_work *dwp,
+ int dw_count)
+{
+ int j;
+ vm_page_t m;
+ vm_page_t local_free_q = VM_PAGE_NULL;
+
+ /*
+ * pageout_scan takes the vm_page_lock_queues first
+ * then tries for the object lock... to avoid what
+ * is effectively a lock inversion, we'll go to the
+ * trouble of taking them in that same order... otherwise
+ * if this object contains the majority of the pages resident
+ * in the UBC (or a small set of large objects actively being
+ * worked on contain the majority of the pages), we could
+ * cause the pageout_scan thread to 'starve' in its attempt
+ * to find pages to move to the free queue, since it has to
+ * successfully acquire the object lock of any candidate page
+ * before it can steal/clean it.
+ */
+ if (!vm_page_trylockspin_queues()) {
+ vm_object_unlock(object);
+
+ vm_page_lockspin_queues();
+
+ for (j = 0; ; j++) {
+ if (!vm_object_lock_avoid(object) &&
+ _vm_object_lock_try(object))
+ break;
+ vm_page_unlock_queues();
+ mutex_pause(j);
+ vm_page_lockspin_queues();
+ }
+ }
+ for (j = 0; j < dw_count; j++, dwp++) {
+
+ m = dwp->dw_m;
+
+ if (dwp->dw_mask & DW_vm_pageout_throttle_up)
+ vm_pageout_throttle_up(m);
+
+ if (dwp->dw_mask & DW_vm_page_wire)
+ vm_page_wire(m);
+ else if (dwp->dw_mask & DW_vm_page_unwire) {
+ boolean_t queueit;
+
+ queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
+
+ vm_page_unwire(m, queueit);
+ }
+ if (dwp->dw_mask & DW_vm_page_free) {
+ vm_page_free_prepare_queues(m);
+
+ assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+ /*
+ * Add this page to our list of reclaimed pages,
+ * to be freed later.
+ */
+ m->pageq.next = (queue_entry_t) local_free_q;
+ local_free_q = m;
+ } else {
+ if (dwp->dw_mask & DW_vm_page_deactivate_internal)
+ vm_page_deactivate_internal(m, FALSE);
+ else if (dwp->dw_mask & DW_vm_page_activate) {
+ if (m->active == FALSE) {
+ vm_page_activate(m);
+ }
+ }
+ else if (dwp->dw_mask & DW_vm_page_speculate)
+ vm_page_speculate(m, TRUE);
+ else if (dwp->dw_mask & DW_enqueue_cleaned) {
+ /*
+ * if we didn't hold the object lock and did this,
+ * we might disconnect the page, then someone might
+ * soft fault it back in, then we would put it on the
+ * cleaned queue, and so we would have a referenced (maybe even dirty)
+ * page on that queue, which we don't want
+ */
+ int refmod_state = pmap_disconnect(m->phys_page);
+
+ if ((refmod_state & VM_MEM_REFERENCED)) {
+ /*
+ * this page has been touched since it got cleaned; let's activate it
+ * if it hasn't already been
+ */
+ vm_pageout_enqueued_cleaned++;
+ vm_pageout_cleaned_reactivated++;
+ vm_pageout_cleaned_commit_reactivated++;
+
+ if (m->active == FALSE)
+ vm_page_activate(m);
+ } else {
+ m->reference = FALSE;
+ vm_page_enqueue_cleaned(m);
+ }
+ }
+ else if (dwp->dw_mask & DW_vm_page_lru)
+ vm_page_lru(m);
+ else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
+ if ( !m->pageout_queue)
+ VM_PAGE_QUEUES_REMOVE(m);
+ }
+ if (dwp->dw_mask & DW_set_reference)
+ m->reference = TRUE;
+ else if (dwp->dw_mask & DW_clear_reference)
+ m->reference = FALSE;
+
+ if (dwp->dw_mask & DW_move_page) {
+ if ( !m->pageout_queue) {
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ assert(m->object != kernel_object);
+
+ VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+ }
+ }
+ if (dwp->dw_mask & DW_clear_busy)
+ m->busy = FALSE;
+
+ if (dwp->dw_mask & DW_PAGE_WAKEUP)
+ PAGE_WAKEUP(m);
+ }
+ }
+ vm_page_unlock_queues();
+
+ if (local_free_q)
+ vm_page_free_list(local_free_q, TRUE);
+
+ VM_CHECK_MEMORYSTATUS;
+
+}
+
+kern_return_t
+vm_page_alloc_list(
+ int page_count,
+ int flags,
+ vm_page_t *list)
+{
+ vm_page_t lo_page_list = VM_PAGE_NULL;
+ vm_page_t mem;
+ int i;
+
+ if ( !(flags & KMA_LOMEM))
+ panic("vm_page_alloc_list: called w/o KMA_LOMEM");
+
+ for (i = 0; i < page_count; i++) {
+
+ mem = vm_page_grablo();
+
+ if (mem == VM_PAGE_NULL) {
+ if (lo_page_list)
+ vm_page_free_list(lo_page_list, FALSE);
+
+ *list = VM_PAGE_NULL;
+
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+ mem->pageq.next = (queue_entry_t) lo_page_list;
+ lo_page_list = mem;
+ }
+ *list = lo_page_list;
+
+ return (KERN_SUCCESS);
+}
+
+void
+vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
+{
+ page->offset = offset;
+}
+
+vm_page_t
+vm_page_get_next(vm_page_t page)
+{
+ return ((vm_page_t) page->pageq.next);
+}
+
+vm_object_offset_t
+vm_page_get_offset(vm_page_t page)
+{
+ return (page->offset);
+}
+
+ppnum_t
+vm_page_get_phys_page(vm_page_t page)
+{
+ return (page->phys_page);
+}
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#if HIBERNATION
+
+static vm_page_t hibernate_gobble_queue;
+
+extern boolean_t (* volatile consider_buffer_cache_collect)(int);
+
+static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
+static int hibernate_flush_dirty_pages(void);
+static int hibernate_flush_queue(queue_head_t *, int);
+
+void hibernate_flush_wait(void);
+void hibernate_mark_in_progress(void);
+void hibernate_clear_in_progress(void);
+
+
+struct hibernate_statistics {
+ int hibernate_considered;
+ int hibernate_reentered_on_q;
+ int hibernate_found_dirty;
+ int hibernate_skipped_cleaning;
+ int hibernate_skipped_transient;
+ int hibernate_skipped_precious;
+ int hibernate_queue_nolock;
+ int hibernate_queue_paused;
+ int hibernate_throttled;
+ int hibernate_throttle_timeout;
+ int hibernate_drained;
+ int hibernate_drain_timeout;
+ int cd_lock_failed;
+ int cd_found_precious;
+ int cd_found_wired;
+ int cd_found_busy;
+ int cd_found_unusual;
+ int cd_found_cleaning;
+ int cd_found_laundry;
+ int cd_found_dirty;
+ int cd_local_free;
+ int cd_total_free;
+ int cd_vm_page_wire_count;
+ int cd_pages;
+ int cd_discarded;
+ int cd_count_wire;
+} hibernate_stats;
+
+
+
+static int
+hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
+{
+ wait_result_t wait_result;
+
+ vm_page_lock_queues();
+
+ while (q->pgo_laundry) {
+
+ q->pgo_draining = TRUE;
+
+ assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
+
+ vm_page_unlock_queues();
+
+ wait_result = thread_block(THREAD_CONTINUE_NULL);
+
+ if (wait_result == THREAD_TIMED_OUT) {
+ hibernate_stats.hibernate_drain_timeout++;
+ return (1);
+ }
+ vm_page_lock_queues();
+
+ hibernate_stats.hibernate_drained++;
+ }
+ vm_page_unlock_queues();
+
+ return (0);
+}
+
+
+static int
+hibernate_flush_queue(queue_head_t *q, int qcount)
+{
+ vm_page_t m;
+ vm_object_t l_object = NULL;
+ vm_object_t m_object = NULL;
+ int refmod_state = 0;
+ int try_failed_count = 0;
+ int retval = 0;
+ int current_run = 0;
+ struct vm_pageout_queue *iq;
+ struct vm_pageout_queue *eq;
+ struct vm_pageout_queue *tq;
+
+ hibernate_cleaning_in_progress = TRUE;
+
+ KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
+
+ iq = &vm_pageout_queue_internal;
+ eq = &vm_pageout_queue_external;
+
+ vm_page_lock_queues();
+
+ while (qcount && !queue_empty(q)) {
+
+ if (current_run++ == 1000) {
+ if (hibernate_should_abort()) {
+ retval = 1;
+ break;
+ }
+ current_run = 0;
+ }
+
+ m = (vm_page_t) queue_first(q);
+ m_object = m->object;
+
+ /*
+ * check to see if we currently are working
+ * with the same object... if so, we've
+ * already got the lock
+ */
+ if (m_object != l_object) {
+ /*
+ * the object associated with candidate page is
+ * different from the one we were just working
+ * with... dump the lock if we still own it
+ */
+ if (l_object != NULL) {
+ vm_object_unlock(l_object);
+ l_object = NULL;
+ }
+ /*
+ * Try to lock object; since we've alread got the
+ * page queues lock, we can only 'try' for this one.
+ * if the 'try' fails, we need to do a mutex_pause
+ * to allow the owner of the object lock a chance to
+ * run...
+ */
+ if ( !vm_object_lock_try_scan(m_object)) {
+
+ if (try_failed_count > 20) {
+ hibernate_stats.hibernate_queue_nolock++;
+
+ goto reenter_pg_on_q;
+ }
+ vm_pageout_scan_wants_object = m_object;