]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/vm/vm_resident.c
xnu-2050.18.24.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65#include <debug.h>
66#include <libkern/OSAtomic.h>
67
68#include <mach/clock_types.h>
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
71#include <mach/sdt.h>
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
76#include <kern/kalloc.h>
77#include <kern/zalloc.h>
78#include <kern/xpr.h>
79#include <vm/pmap.h>
80#include <vm/vm_init.h>
81#include <vm/vm_map.h>
82#include <vm/vm_page.h>
83#include <vm/vm_pageout.h>
84#include <vm/vm_kern.h> /* kernel_memory_allocate() */
85#include <kern/misc_protos.h>
86#include <zone_debug.h>
87#include <vm/cpm.h>
88#include <pexpert/pexpert.h>
89
90#include <vm/vm_protos.h>
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
93
94#include <IOKit/IOHibernatePrivate.h>
95
96#include <sys/kdebug.h>
97
98boolean_t hibernate_cleaning_in_progress = FALSE;
99boolean_t vm_page_free_verify = TRUE;
100
101uint32_t vm_lopage_free_count = 0;
102uint32_t vm_lopage_free_limit = 0;
103uint32_t vm_lopage_lowater = 0;
104boolean_t vm_lopage_refill = FALSE;
105boolean_t vm_lopage_needed = FALSE;
106
107lck_mtx_ext_t vm_page_queue_lock_ext;
108lck_mtx_ext_t vm_page_queue_free_lock_ext;
109lck_mtx_ext_t vm_purgeable_queue_lock_ext;
110
111int speculative_age_index = 0;
112int speculative_steal_index = 0;
113struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
114
115
116__private_extern__ void vm_page_init_lck_grp(void);
117
118static void vm_page_free_prepare(vm_page_t page);
119static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
120
121
122
123
124/*
125 * Associated with page of user-allocatable memory is a
126 * page structure.
127 */
128
129/*
130 * These variables record the values returned by vm_page_bootstrap,
131 * for debugging purposes. The implementation of pmap_steal_memory
132 * and pmap_startup here also uses them internally.
133 */
134
135vm_offset_t virtual_space_start;
136vm_offset_t virtual_space_end;
137uint32_t vm_page_pages;
138
139/*
140 * The vm_page_lookup() routine, which provides for fast
141 * (virtual memory object, offset) to page lookup, employs
142 * the following hash table. The vm_page_{insert,remove}
143 * routines install and remove associations in the table.
144 * [This table is often called the virtual-to-physical,
145 * or VP, table.]
146 */
147typedef struct {
148 vm_page_t pages;
149#if MACH_PAGE_HASH_STATS
150 int cur_count; /* current count */
151 int hi_count; /* high water mark */
152#endif /* MACH_PAGE_HASH_STATS */
153} vm_page_bucket_t;
154
155
156#define BUCKETS_PER_LOCK 16
157
158vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
159unsigned int vm_page_bucket_count = 0; /* How big is array? */
160unsigned int vm_page_hash_mask; /* Mask for hash function */
161unsigned int vm_page_hash_shift; /* Shift for hash function */
162uint32_t vm_page_bucket_hash; /* Basic bucket hash */
163unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
164
165lck_spin_t *vm_page_bucket_locks;
166
167
168#if MACH_PAGE_HASH_STATS
169/* This routine is only for debug. It is intended to be called by
170 * hand by a developer using a kernel debugger. This routine prints
171 * out vm_page_hash table statistics to the kernel debug console.
172 */
173void
174hash_debug(void)
175{
176 int i;
177 int numbuckets = 0;
178 int highsum = 0;
179 int maxdepth = 0;
180
181 for (i = 0; i < vm_page_bucket_count; i++) {
182 if (vm_page_buckets[i].hi_count) {
183 numbuckets++;
184 highsum += vm_page_buckets[i].hi_count;
185 if (vm_page_buckets[i].hi_count > maxdepth)
186 maxdepth = vm_page_buckets[i].hi_count;
187 }
188 }
189 printf("Total number of buckets: %d\n", vm_page_bucket_count);
190 printf("Number used buckets: %d = %d%%\n",
191 numbuckets, 100*numbuckets/vm_page_bucket_count);
192 printf("Number unused buckets: %d = %d%%\n",
193 vm_page_bucket_count - numbuckets,
194 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
195 printf("Sum of bucket max depth: %d\n", highsum);
196 printf("Average bucket depth: %d.%2d\n",
197 highsum/vm_page_bucket_count,
198 highsum%vm_page_bucket_count);
199 printf("Maximum bucket depth: %d\n", maxdepth);
200}
201#endif /* MACH_PAGE_HASH_STATS */
202
203/*
204 * The virtual page size is currently implemented as a runtime
205 * variable, but is constant once initialized using vm_set_page_size.
206 * This initialization must be done in the machine-dependent
207 * bootstrap sequence, before calling other machine-independent
208 * initializations.
209 *
210 * All references to the virtual page size outside this
211 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
212 * constants.
213 */
214vm_size_t page_size = PAGE_SIZE;
215vm_size_t page_mask = PAGE_MASK;
216int page_shift = PAGE_SHIFT;
217
218/*
219 * Resident page structures are initialized from
220 * a template (see vm_page_alloc).
221 *
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see vm_page_bootstrap).
225 */
226struct vm_page vm_page_template;
227
228vm_page_t vm_pages = VM_PAGE_NULL;
229unsigned int vm_pages_count = 0;
230ppnum_t vm_page_lowest = 0;
231
232/*
233 * Resident pages that represent real memory
234 * are allocated from a set of free lists,
235 * one per color.
236 */
237unsigned int vm_colors;
238unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
239unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
240queue_head_t vm_page_queue_free[MAX_COLORS];
241unsigned int vm_page_free_wanted;
242unsigned int vm_page_free_wanted_privileged;
243unsigned int vm_page_free_count;
244unsigned int vm_page_fictitious_count;
245
246unsigned int vm_page_free_count_minimum; /* debugging */
247
248/*
249 * Occasionally, the virtual memory system uses
250 * resident page structures that do not refer to
251 * real pages, for example to leave a page with
252 * important state information in the VP table.
253 *
254 * These page structures are allocated the way
255 * most other kernel structures are.
256 */
257zone_t vm_page_zone;
258vm_locks_array_t vm_page_locks;
259decl_lck_mtx_data(,vm_page_alloc_lock)
260lck_mtx_ext_t vm_page_alloc_lock_ext;
261
262unsigned int io_throttle_zero_fill;
263
264unsigned int vm_page_local_q_count = 0;
265unsigned int vm_page_local_q_soft_limit = 250;
266unsigned int vm_page_local_q_hard_limit = 500;
267struct vplq *vm_page_local_q = NULL;
268
269/* N.B. Guard and fictitious pages must not
270 * be assigned a zero phys_page value.
271 */
272/*
273 * Fictitious pages don't have a physical address,
274 * but we must initialize phys_page to something.
275 * For debugging, this should be a strange value
276 * that the pmap module can recognize in assertions.
277 */
278ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
279
280/*
281 * Guard pages are not accessible so they don't
282 * need a physical address, but we need to enter
283 * one in the pmap.
284 * Let's make it recognizable and make sure that
285 * we don't use a real physical page with that
286 * physical address.
287 */
288ppnum_t vm_page_guard_addr = (ppnum_t) -2;
289
290/*
291 * Resident page structures are also chained on
292 * queues that are used by the page replacement
293 * system (pageout daemon). These queues are
294 * defined here, but are shared by the pageout
295 * module. The inactive queue is broken into
296 * inactive and zf for convenience as the
297 * pageout daemon often assignes a higher
298 * affinity to zf pages
299 */
300queue_head_t vm_page_queue_active;
301queue_head_t vm_page_queue_inactive;
302queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
303queue_head_t vm_page_queue_throttled;
304
305unsigned int vm_page_active_count;
306unsigned int vm_page_inactive_count;
307unsigned int vm_page_anonymous_count;
308unsigned int vm_page_throttled_count;
309unsigned int vm_page_speculative_count;
310unsigned int vm_page_wire_count;
311unsigned int vm_page_wire_count_initial;
312unsigned int vm_page_gobble_count = 0;
313unsigned int vm_page_wire_count_warning = 0;
314unsigned int vm_page_gobble_count_warning = 0;
315
316unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
317unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
318uint64_t vm_page_purged_count = 0; /* total count of purged pages */
319
320#if DEVELOPMENT || DEBUG
321unsigned int vm_page_speculative_recreated = 0;
322unsigned int vm_page_speculative_created = 0;
323unsigned int vm_page_speculative_used = 0;
324#endif
325
326queue_head_t vm_page_queue_cleaned;
327
328unsigned int vm_page_cleaned_count = 0;
329unsigned int vm_pageout_enqueued_cleaned = 0;
330
331uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
332ppnum_t max_valid_low_ppnum = 0xffffffff;
333
334
335/*
336 * Several page replacement parameters are also
337 * shared with this module, so that page allocation
338 * (done here in vm_page_alloc) can trigger the
339 * pageout daemon.
340 */
341unsigned int vm_page_free_target = 0;
342unsigned int vm_page_free_min = 0;
343unsigned int vm_page_throttle_limit = 0;
344uint32_t vm_page_creation_throttle = 0;
345unsigned int vm_page_inactive_target = 0;
346unsigned int vm_page_anonymous_min = 0;
347unsigned int vm_page_inactive_min = 0;
348unsigned int vm_page_free_reserved = 0;
349unsigned int vm_page_throttle_count = 0;
350
351
352/*
353 * The VM system has a couple of heuristics for deciding
354 * that pages are "uninteresting" and should be placed
355 * on the inactive queue as likely candidates for replacement.
356 * These variables let the heuristics be controlled at run-time
357 * to make experimentation easier.
358 */
359
360boolean_t vm_page_deactivate_hint = TRUE;
361
362struct vm_page_stats_reusable vm_page_stats_reusable;
363
364/*
365 * vm_set_page_size:
366 *
367 * Sets the page size, perhaps based upon the memory
368 * size. Must be called before any use of page-size
369 * dependent functions.
370 *
371 * Sets page_shift and page_mask from page_size.
372 */
373void
374vm_set_page_size(void)
375{
376 page_mask = page_size - 1;
377
378 if ((page_mask & page_size) != 0)
379 panic("vm_set_page_size: page size not a power of two");
380
381 for (page_shift = 0; ; page_shift++)
382 if ((1U << page_shift) == page_size)
383 break;
384}
385
386
387/* Called once during statup, once the cache geometry is known.
388 */
389static void
390vm_page_set_colors( void )
391{
392 unsigned int n, override;
393
394 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
395 n = override;
396 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
397 n = vm_cache_geometry_colors;
398 else n = DEFAULT_COLORS; /* use default if all else fails */
399
400 if ( n == 0 )
401 n = 1;
402 if ( n > MAX_COLORS )
403 n = MAX_COLORS;
404
405 /* the count must be a power of 2 */
406 if ( ( n & (n - 1)) != 0 )
407 panic("vm_page_set_colors");
408
409 vm_colors = n;
410 vm_color_mask = n - 1;
411}
412
413
414lck_grp_t vm_page_lck_grp_free;
415lck_grp_t vm_page_lck_grp_queue;
416lck_grp_t vm_page_lck_grp_local;
417lck_grp_t vm_page_lck_grp_purge;
418lck_grp_t vm_page_lck_grp_alloc;
419lck_grp_t vm_page_lck_grp_bucket;
420lck_grp_attr_t vm_page_lck_grp_attr;
421lck_attr_t vm_page_lck_attr;
422
423
424__private_extern__ void
425vm_page_init_lck_grp(void)
426{
427 /*
428 * initialze the vm_page lock world
429 */
430 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
431 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
432 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
433 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
434 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
435 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
436 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
437 lck_attr_setdefault(&vm_page_lck_attr);
438 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
439}
440
441void
442vm_page_init_local_q()
443{
444 unsigned int num_cpus;
445 unsigned int i;
446 struct vplq *t_local_q;
447
448 num_cpus = ml_get_max_cpus();
449
450 /*
451 * no point in this for a uni-processor system
452 */
453 if (num_cpus >= 2) {
454 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
455
456 for (i = 0; i < num_cpus; i++) {
457 struct vpl *lq;
458
459 lq = &t_local_q[i].vpl_un.vpl;
460 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
461 queue_init(&lq->vpl_queue);
462 lq->vpl_count = 0;
463 }
464 vm_page_local_q_count = num_cpus;
465
466 vm_page_local_q = (struct vplq *)t_local_q;
467 }
468}
469
470
471/*
472 * vm_page_bootstrap:
473 *
474 * Initializes the resident memory module.
475 *
476 * Allocates memory for the page cells, and
477 * for the object/offset-to-page hash table headers.
478 * Each page cell is initialized and placed on the free list.
479 * Returns the range of available kernel virtual memory.
480 */
481
482void
483vm_page_bootstrap(
484 vm_offset_t *startp,
485 vm_offset_t *endp)
486{
487 register vm_page_t m;
488 unsigned int i;
489 unsigned int log1;
490 unsigned int log2;
491 unsigned int size;
492
493 /*
494 * Initialize the vm_page template.
495 */
496
497 m = &vm_page_template;
498 bzero(m, sizeof (*m));
499
500 m->pageq.next = NULL;
501 m->pageq.prev = NULL;
502 m->listq.next = NULL;
503 m->listq.prev = NULL;
504 m->next = VM_PAGE_NULL;
505
506 m->object = VM_OBJECT_NULL; /* reset later */
507 m->offset = (vm_object_offset_t) -1; /* reset later */
508
509 m->wire_count = 0;
510 m->local = FALSE;
511 m->inactive = FALSE;
512 m->active = FALSE;
513 m->pageout_queue = FALSE;
514 m->speculative = FALSE;
515 m->laundry = FALSE;
516 m->free = FALSE;
517 m->reference = FALSE;
518 m->gobbled = FALSE;
519 m->private = FALSE;
520 m->throttled = FALSE;
521 m->__unused_pageq_bits = 0;
522
523 m->phys_page = 0; /* reset later */
524
525 m->busy = TRUE;
526 m->wanted = FALSE;
527 m->tabled = FALSE;
528 m->fictitious = FALSE;
529 m->pmapped = FALSE;
530 m->wpmapped = FALSE;
531 m->pageout = FALSE;
532 m->absent = FALSE;
533 m->error = FALSE;
534 m->dirty = FALSE;
535 m->cleaning = FALSE;
536 m->precious = FALSE;
537 m->clustered = FALSE;
538 m->overwriting = FALSE;
539 m->restart = FALSE;
540 m->unusual = FALSE;
541 m->encrypted = FALSE;
542 m->encrypted_cleaning = FALSE;
543 m->cs_validated = FALSE;
544 m->cs_tainted = FALSE;
545 m->no_cache = FALSE;
546 m->reusable = FALSE;
547 m->slid = FALSE;
548 m->was_dirty = FALSE;
549 m->__unused_object_bits = 0;
550
551
552 /*
553 * Initialize the page queues.
554 */
555 vm_page_init_lck_grp();
556
557 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
558 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
559 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
560
561 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
562 int group;
563
564 purgeable_queues[i].token_q_head = 0;
565 purgeable_queues[i].token_q_tail = 0;
566 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
567 queue_init(&purgeable_queues[i].objq[group]);
568
569 purgeable_queues[i].type = i;
570 purgeable_queues[i].new_pages = 0;
571#if MACH_ASSERT
572 purgeable_queues[i].debug_count_tokens = 0;
573 purgeable_queues[i].debug_count_objects = 0;
574#endif
575 };
576
577 for (i = 0; i < MAX_COLORS; i++ )
578 queue_init(&vm_page_queue_free[i]);
579
580 queue_init(&vm_lopage_queue_free);
581 queue_init(&vm_page_queue_active);
582 queue_init(&vm_page_queue_inactive);
583 queue_init(&vm_page_queue_cleaned);
584 queue_init(&vm_page_queue_throttled);
585 queue_init(&vm_page_queue_anonymous);
586
587 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
588 queue_init(&vm_page_queue_speculative[i].age_q);
589
590 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
591 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
592 }
593 vm_page_free_wanted = 0;
594 vm_page_free_wanted_privileged = 0;
595
596 vm_page_set_colors();
597
598
599 /*
600 * Steal memory for the map and zone subsystems.
601 */
602 zone_steal_memory();
603 vm_map_steal_memory();
604
605 /*
606 * Allocate (and initialize) the virtual-to-physical
607 * table hash buckets.
608 *
609 * The number of buckets should be a power of two to
610 * get a good hash function. The following computation
611 * chooses the first power of two that is greater
612 * than the number of physical pages in the system.
613 */
614
615 if (vm_page_bucket_count == 0) {
616 unsigned int npages = pmap_free_pages();
617
618 vm_page_bucket_count = 1;
619 while (vm_page_bucket_count < npages)
620 vm_page_bucket_count <<= 1;
621 }
622 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
623
624 vm_page_hash_mask = vm_page_bucket_count - 1;
625
626 /*
627 * Calculate object shift value for hashing algorithm:
628 * O = log2(sizeof(struct vm_object))
629 * B = log2(vm_page_bucket_count)
630 * hash shifts the object left by
631 * B/2 - O
632 */
633 size = vm_page_bucket_count;
634 for (log1 = 0; size > 1; log1++)
635 size /= 2;
636 size = sizeof(struct vm_object);
637 for (log2 = 0; size > 1; log2++)
638 size /= 2;
639 vm_page_hash_shift = log1/2 - log2 + 1;
640
641 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
642 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
643 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
644
645 if (vm_page_hash_mask & vm_page_bucket_count)
646 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
647
648 vm_page_buckets = (vm_page_bucket_t *)
649 pmap_steal_memory(vm_page_bucket_count *
650 sizeof(vm_page_bucket_t));
651
652 vm_page_bucket_locks = (lck_spin_t *)
653 pmap_steal_memory(vm_page_bucket_lock_count *
654 sizeof(lck_spin_t));
655
656 for (i = 0; i < vm_page_bucket_count; i++) {
657 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
658
659 bucket->pages = VM_PAGE_NULL;
660#if MACH_PAGE_HASH_STATS
661 bucket->cur_count = 0;
662 bucket->hi_count = 0;
663#endif /* MACH_PAGE_HASH_STATS */
664 }
665
666 for (i = 0; i < vm_page_bucket_lock_count; i++)
667 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
668
669 /*
670 * Machine-dependent code allocates the resident page table.
671 * It uses vm_page_init to initialize the page frames.
672 * The code also returns to us the virtual space available
673 * to the kernel. We don't trust the pmap module
674 * to get the alignment right.
675 */
676
677 pmap_startup(&virtual_space_start, &virtual_space_end);
678 virtual_space_start = round_page(virtual_space_start);
679 virtual_space_end = trunc_page(virtual_space_end);
680
681 *startp = virtual_space_start;
682 *endp = virtual_space_end;
683
684 /*
685 * Compute the initial "wire" count.
686 * Up until now, the pages which have been set aside are not under
687 * the VM system's control, so although they aren't explicitly
688 * wired, they nonetheless can't be moved. At this moment,
689 * all VM managed pages are "free", courtesy of pmap_startup.
690 */
691 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
692 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
693 vm_page_wire_count_initial = vm_page_wire_count;
694 vm_page_free_count_minimum = vm_page_free_count;
695
696 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
697 vm_page_free_count, vm_page_wire_count);
698
699 simple_lock_init(&vm_paging_lock, 0);
700}
701
702#ifndef MACHINE_PAGES
703/*
704 * We implement pmap_steal_memory and pmap_startup with the help
705 * of two simpler functions, pmap_virtual_space and pmap_next_page.
706 */
707
708void *
709pmap_steal_memory(
710 vm_size_t size)
711{
712 vm_offset_t addr, vaddr;
713 ppnum_t phys_page;
714
715 /*
716 * We round the size to a round multiple.
717 */
718
719 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
720
721 /*
722 * If this is the first call to pmap_steal_memory,
723 * we have to initialize ourself.
724 */
725
726 if (virtual_space_start == virtual_space_end) {
727 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
728
729 /*
730 * The initial values must be aligned properly, and
731 * we don't trust the pmap module to do it right.
732 */
733
734 virtual_space_start = round_page(virtual_space_start);
735 virtual_space_end = trunc_page(virtual_space_end);
736 }
737
738 /*
739 * Allocate virtual memory for this request.
740 */
741
742 addr = virtual_space_start;
743 virtual_space_start += size;
744
745 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
746
747 /*
748 * Allocate and map physical pages to back new virtual pages.
749 */
750
751 for (vaddr = round_page(addr);
752 vaddr < addr + size;
753 vaddr += PAGE_SIZE) {
754
755 if (!pmap_next_page_hi(&phys_page))
756 panic("pmap_steal_memory");
757
758 /*
759 * XXX Logically, these mappings should be wired,
760 * but some pmap modules barf if they are.
761 */
762#if defined(__LP64__)
763 pmap_pre_expand(kernel_pmap, vaddr);
764#endif
765
766 pmap_enter(kernel_pmap, vaddr, phys_page,
767 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
768 VM_WIMG_USE_DEFAULT, FALSE);
769 /*
770 * Account for newly stolen memory
771 */
772 vm_page_wire_count++;
773
774 }
775
776 return (void *) addr;
777}
778
779void
780pmap_startup(
781 vm_offset_t *startp,
782 vm_offset_t *endp)
783{
784 unsigned int i, npages, pages_initialized, fill, fillval;
785 ppnum_t phys_page;
786 addr64_t tmpaddr;
787
788 /*
789 * We calculate how many page frames we will have
790 * and then allocate the page structures in one chunk.
791 */
792
793 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
794 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
795 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
796
797 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
798
799 /*
800 * Initialize the page frames.
801 */
802 for (i = 0, pages_initialized = 0; i < npages; i++) {
803 if (!pmap_next_page(&phys_page))
804 break;
805 if (pages_initialized == 0 || phys_page < vm_page_lowest)
806 vm_page_lowest = phys_page;
807
808 vm_page_init(&vm_pages[i], phys_page, FALSE);
809 vm_page_pages++;
810 pages_initialized++;
811 }
812 vm_pages_count = pages_initialized;
813
814 /*
815 * Check if we want to initialize pages to a known value
816 */
817 fill = 0; /* Assume no fill */
818 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
819#if DEBUG
820 /* This slows down booting the DEBUG kernel, particularly on
821 * large memory systems, but is worthwhile in deterministically
822 * trapping uninitialized memory usage.
823 */
824 if (fill == 0) {
825 fill = 1;
826 fillval = 0xDEB8F177;
827 }
828#endif
829 if (fill)
830 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
831 // -debug code remove
832 if (2 == vm_himemory_mode) {
833 // free low -> high so high is preferred
834 for (i = 1; i <= pages_initialized; i++) {
835 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
836 vm_page_release(&vm_pages[i - 1]);
837 }
838 }
839 else
840 // debug code remove-
841
842 /*
843 * Release pages in reverse order so that physical pages
844 * initially get allocated in ascending addresses. This keeps
845 * the devices (which must address physical memory) happy if
846 * they require several consecutive pages.
847 */
848 for (i = pages_initialized; i > 0; i--) {
849 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
850 vm_page_release(&vm_pages[i - 1]);
851 }
852
853#if 0
854 {
855 vm_page_t xx, xxo, xxl;
856 int i, j, k, l;
857
858 j = 0; /* (BRINGUP) */
859 xxl = 0;
860
861 for( i = 0; i < vm_colors; i++ ) {
862 queue_iterate(&vm_page_queue_free[i],
863 xx,
864 vm_page_t,
865 pageq) { /* BRINGUP */
866 j++; /* (BRINGUP) */
867 if(j > vm_page_free_count) { /* (BRINGUP) */
868 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
869 }
870
871 l = vm_page_free_count - j; /* (BRINGUP) */
872 k = 0; /* (BRINGUP) */
873
874 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
875
876 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
877 k++;
878 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
879 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
880 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
881 }
882 }
883
884 xxl = xx;
885 }
886 }
887
888 if(j != vm_page_free_count) { /* (BRINGUP) */
889 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
890 }
891 }
892#endif
893
894
895 /*
896 * We have to re-align virtual_space_start,
897 * because pmap_steal_memory has been using it.
898 */
899
900 virtual_space_start = round_page(virtual_space_start);
901
902 *startp = virtual_space_start;
903 *endp = virtual_space_end;
904}
905#endif /* MACHINE_PAGES */
906
907/*
908 * Routine: vm_page_module_init
909 * Purpose:
910 * Second initialization pass, to be done after
911 * the basic VM system is ready.
912 */
913void
914vm_page_module_init(void)
915{
916 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
917 0, PAGE_SIZE, "vm pages");
918
919#if ZONE_DEBUG
920 zone_debug_disable(vm_page_zone);
921#endif /* ZONE_DEBUG */
922
923 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
924 zone_change(vm_page_zone, Z_EXPAND, FALSE);
925 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
926 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
927 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
928 /*
929 * Adjust zone statistics to account for the real pages allocated
930 * in vm_page_create(). [Q: is this really what we want?]
931 */
932 vm_page_zone->count += vm_page_pages;
933 vm_page_zone->sum_count += vm_page_pages;
934 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
935}
936
937/*
938 * Routine: vm_page_create
939 * Purpose:
940 * After the VM system is up, machine-dependent code
941 * may stumble across more physical memory. For example,
942 * memory that it was reserving for a frame buffer.
943 * vm_page_create turns this memory into available pages.
944 */
945
946void
947vm_page_create(
948 ppnum_t start,
949 ppnum_t end)
950{
951 ppnum_t phys_page;
952 vm_page_t m;
953
954 for (phys_page = start;
955 phys_page < end;
956 phys_page++) {
957 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
958 == VM_PAGE_NULL)
959 vm_page_more_fictitious();
960
961 m->fictitious = FALSE;
962 pmap_clear_noencrypt(phys_page);
963
964 vm_page_pages++;
965 vm_page_release(m);
966 }
967}
968
969/*
970 * vm_page_hash:
971 *
972 * Distributes the object/offset key pair among hash buckets.
973 *
974 * NOTE: The bucket count must be a power of 2
975 */
976#define vm_page_hash(object, offset) (\
977 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
978 & vm_page_hash_mask)
979
980
981/*
982 * vm_page_insert: [ internal use only ]
983 *
984 * Inserts the given mem entry into the object/object-page
985 * table and object list.
986 *
987 * The object must be locked.
988 */
989void
990vm_page_insert(
991 vm_page_t mem,
992 vm_object_t object,
993 vm_object_offset_t offset)
994{
995 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
996}
997
998void
999vm_page_insert_internal(
1000 vm_page_t mem,
1001 vm_object_t object,
1002 vm_object_offset_t offset,
1003 boolean_t queues_lock_held,
1004 boolean_t insert_in_hash,
1005 boolean_t batch_pmap_op)
1006{
1007 vm_page_bucket_t *bucket;
1008 lck_spin_t *bucket_lock;
1009 int hash_id;
1010
1011 XPR(XPR_VM_PAGE,
1012 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1013 object, offset, mem, 0,0);
1014#if 0
1015 /*
1016 * we may not hold the page queue lock
1017 * so this check isn't safe to make
1018 */
1019 VM_PAGE_CHECK(mem);
1020#endif
1021
1022 if (object == vm_submap_object) {
1023 /* the vm_submap_object is only a placeholder for submaps */
1024 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1025 }
1026
1027 vm_object_lock_assert_exclusive(object);
1028#if DEBUG
1029 lck_mtx_assert(&vm_page_queue_lock,
1030 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1031 : LCK_MTX_ASSERT_NOTOWNED);
1032#endif /* DEBUG */
1033
1034 if (insert_in_hash == TRUE) {
1035#if DEBUG
1036 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1037 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1038 "already in (obj=%p,off=0x%llx)",
1039 mem, object, offset, mem->object, mem->offset);
1040#endif
1041 assert(!object->internal || offset < object->vo_size);
1042
1043 /* only insert "pageout" pages into "pageout" objects,
1044 * and normal pages into normal objects */
1045 assert(object->pageout == mem->pageout);
1046
1047 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1048
1049 /*
1050 * Record the object/offset pair in this page
1051 */
1052
1053 mem->object = object;
1054 mem->offset = offset;
1055
1056 /*
1057 * Insert it into the object_object/offset hash table
1058 */
1059 hash_id = vm_page_hash(object, offset);
1060 bucket = &vm_page_buckets[hash_id];
1061 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1062
1063 lck_spin_lock(bucket_lock);
1064
1065 mem->next = bucket->pages;
1066 bucket->pages = mem;
1067#if MACH_PAGE_HASH_STATS
1068 if (++bucket->cur_count > bucket->hi_count)
1069 bucket->hi_count = bucket->cur_count;
1070#endif /* MACH_PAGE_HASH_STATS */
1071
1072 lck_spin_unlock(bucket_lock);
1073 }
1074
1075 {
1076 unsigned int cache_attr;
1077
1078 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1079
1080 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1081 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1082 }
1083 }
1084 /*
1085 * Now link into the object's list of backed pages.
1086 */
1087
1088 VM_PAGE_INSERT(mem, object);
1089 mem->tabled = TRUE;
1090
1091 /*
1092 * Show that the object has one more resident page.
1093 */
1094
1095 object->resident_page_count++;
1096 if (VM_PAGE_WIRED(mem)) {
1097 object->wired_page_count++;
1098 }
1099 assert(object->resident_page_count >= object->wired_page_count);
1100
1101 assert(!mem->reusable);
1102
1103 if (object->purgable == VM_PURGABLE_VOLATILE) {
1104 if (VM_PAGE_WIRED(mem)) {
1105 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1106 } else {
1107 OSAddAtomic(1, &vm_page_purgeable_count);
1108 }
1109 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1110 mem->throttled) {
1111 /*
1112 * This page belongs to a purged VM object but hasn't
1113 * been purged (because it was "busy").
1114 * It's in the "throttled" queue and hence not
1115 * visible to vm_pageout_scan(). Move it to a pageable
1116 * queue, so that it can eventually be reclaimed, instead
1117 * of lingering in the "empty" object.
1118 */
1119 if (queues_lock_held == FALSE)
1120 vm_page_lockspin_queues();
1121 vm_page_deactivate(mem);
1122 if (queues_lock_held == FALSE)
1123 vm_page_unlock_queues();
1124 }
1125}
1126
1127/*
1128 * vm_page_replace:
1129 *
1130 * Exactly like vm_page_insert, except that we first
1131 * remove any existing page at the given offset in object.
1132 *
1133 * The object must be locked.
1134 */
1135void
1136vm_page_replace(
1137 register vm_page_t mem,
1138 register vm_object_t object,
1139 register vm_object_offset_t offset)
1140{
1141 vm_page_bucket_t *bucket;
1142 vm_page_t found_m = VM_PAGE_NULL;
1143 lck_spin_t *bucket_lock;
1144 int hash_id;
1145
1146#if 0
1147 /*
1148 * we don't hold the page queue lock
1149 * so this check isn't safe to make
1150 */
1151 VM_PAGE_CHECK(mem);
1152#endif
1153 vm_object_lock_assert_exclusive(object);
1154#if DEBUG
1155 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1156 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1157 "already in (obj=%p,off=0x%llx)",
1158 mem, object, offset, mem->object, mem->offset);
1159 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1160#endif
1161 /*
1162 * Record the object/offset pair in this page
1163 */
1164
1165 mem->object = object;
1166 mem->offset = offset;
1167
1168 /*
1169 * Insert it into the object_object/offset hash table,
1170 * replacing any page that might have been there.
1171 */
1172
1173 hash_id = vm_page_hash(object, offset);
1174 bucket = &vm_page_buckets[hash_id];
1175 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1176
1177 lck_spin_lock(bucket_lock);
1178
1179 if (bucket->pages) {
1180 vm_page_t *mp = &bucket->pages;
1181 vm_page_t m = *mp;
1182
1183 do {
1184 if (m->object == object && m->offset == offset) {
1185 /*
1186 * Remove old page from hash list
1187 */
1188 *mp = m->next;
1189
1190 found_m = m;
1191 break;
1192 }
1193 mp = &m->next;
1194 } while ((m = *mp));
1195
1196 mem->next = bucket->pages;
1197 } else {
1198 mem->next = VM_PAGE_NULL;
1199 }
1200 /*
1201 * insert new page at head of hash list
1202 */
1203 bucket->pages = mem;
1204
1205 lck_spin_unlock(bucket_lock);
1206
1207 if (found_m) {
1208 /*
1209 * there was already a page at the specified
1210 * offset for this object... remove it from
1211 * the object and free it back to the free list
1212 */
1213 vm_page_free_unlocked(found_m, FALSE);
1214 }
1215 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1216}
1217
1218/*
1219 * vm_page_remove: [ internal use only ]
1220 *
1221 * Removes the given mem entry from the object/offset-page
1222 * table and the object page list.
1223 *
1224 * The object must be locked.
1225 */
1226
1227void
1228vm_page_remove(
1229 vm_page_t mem,
1230 boolean_t remove_from_hash)
1231{
1232 vm_page_bucket_t *bucket;
1233 vm_page_t this;
1234 lck_spin_t *bucket_lock;
1235 int hash_id;
1236
1237 XPR(XPR_VM_PAGE,
1238 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1239 mem->object, mem->offset,
1240 mem, 0,0);
1241
1242 vm_object_lock_assert_exclusive(mem->object);
1243 assert(mem->tabled);
1244 assert(!mem->cleaning);
1245 assert(!mem->laundry);
1246#if 0
1247 /*
1248 * we don't hold the page queue lock
1249 * so this check isn't safe to make
1250 */
1251 VM_PAGE_CHECK(mem);
1252#endif
1253 if (remove_from_hash == TRUE) {
1254 /*
1255 * Remove from the object_object/offset hash table
1256 */
1257 hash_id = vm_page_hash(mem->object, mem->offset);
1258 bucket = &vm_page_buckets[hash_id];
1259 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1260
1261 lck_spin_lock(bucket_lock);
1262
1263 if ((this = bucket->pages) == mem) {
1264 /* optimize for common case */
1265
1266 bucket->pages = mem->next;
1267 } else {
1268 vm_page_t *prev;
1269
1270 for (prev = &this->next;
1271 (this = *prev) != mem;
1272 prev = &this->next)
1273 continue;
1274 *prev = this->next;
1275 }
1276#if MACH_PAGE_HASH_STATS
1277 bucket->cur_count--;
1278#endif /* MACH_PAGE_HASH_STATS */
1279
1280 lck_spin_unlock(bucket_lock);
1281 }
1282 /*
1283 * Now remove from the object's list of backed pages.
1284 */
1285
1286 VM_PAGE_REMOVE(mem);
1287
1288 /*
1289 * And show that the object has one fewer resident
1290 * page.
1291 */
1292
1293 assert(mem->object->resident_page_count > 0);
1294 mem->object->resident_page_count--;
1295
1296 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1297 if (mem->object->resident_page_count == 0)
1298 vm_object_cache_remove(mem->object);
1299 }
1300
1301 if (VM_PAGE_WIRED(mem)) {
1302 assert(mem->object->wired_page_count > 0);
1303 mem->object->wired_page_count--;
1304 }
1305 assert(mem->object->resident_page_count >=
1306 mem->object->wired_page_count);
1307 if (mem->reusable) {
1308 assert(mem->object->reusable_page_count > 0);
1309 mem->object->reusable_page_count--;
1310 assert(mem->object->reusable_page_count <=
1311 mem->object->resident_page_count);
1312 mem->reusable = FALSE;
1313 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1314 vm_page_stats_reusable.reused_remove++;
1315 } else if (mem->object->all_reusable) {
1316 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1317 vm_page_stats_reusable.reused_remove++;
1318 }
1319
1320 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1321 if (VM_PAGE_WIRED(mem)) {
1322 assert(vm_page_purgeable_wired_count > 0);
1323 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1324 } else {
1325 assert(vm_page_purgeable_count > 0);
1326 OSAddAtomic(-1, &vm_page_purgeable_count);
1327 }
1328 }
1329 if (mem->object->set_cache_attr == TRUE)
1330 pmap_set_cache_attributes(mem->phys_page, 0);
1331
1332 mem->tabled = FALSE;
1333 mem->object = VM_OBJECT_NULL;
1334 mem->offset = (vm_object_offset_t) -1;
1335}
1336
1337
1338/*
1339 * vm_page_lookup:
1340 *
1341 * Returns the page associated with the object/offset
1342 * pair specified; if none is found, VM_PAGE_NULL is returned.
1343 *
1344 * The object must be locked. No side effects.
1345 */
1346
1347unsigned long vm_page_lookup_hint = 0;
1348unsigned long vm_page_lookup_hint_next = 0;
1349unsigned long vm_page_lookup_hint_prev = 0;
1350unsigned long vm_page_lookup_hint_miss = 0;
1351unsigned long vm_page_lookup_bucket_NULL = 0;
1352unsigned long vm_page_lookup_miss = 0;
1353
1354
1355vm_page_t
1356vm_page_lookup(
1357 vm_object_t object,
1358 vm_object_offset_t offset)
1359{
1360 vm_page_t mem;
1361 vm_page_bucket_t *bucket;
1362 queue_entry_t qe;
1363 lck_spin_t *bucket_lock;
1364 int hash_id;
1365
1366 vm_object_lock_assert_held(object);
1367 mem = object->memq_hint;
1368
1369 if (mem != VM_PAGE_NULL) {
1370 assert(mem->object == object);
1371
1372 if (mem->offset == offset) {
1373 vm_page_lookup_hint++;
1374 return mem;
1375 }
1376 qe = queue_next(&mem->listq);
1377
1378 if (! queue_end(&object->memq, qe)) {
1379 vm_page_t next_page;
1380
1381 next_page = (vm_page_t) qe;
1382 assert(next_page->object == object);
1383
1384 if (next_page->offset == offset) {
1385 vm_page_lookup_hint_next++;
1386 object->memq_hint = next_page; /* new hint */
1387 return next_page;
1388 }
1389 }
1390 qe = queue_prev(&mem->listq);
1391
1392 if (! queue_end(&object->memq, qe)) {
1393 vm_page_t prev_page;
1394
1395 prev_page = (vm_page_t) qe;
1396 assert(prev_page->object == object);
1397
1398 if (prev_page->offset == offset) {
1399 vm_page_lookup_hint_prev++;
1400 object->memq_hint = prev_page; /* new hint */
1401 return prev_page;
1402 }
1403 }
1404 }
1405 /*
1406 * Search the hash table for this object/offset pair
1407 */
1408 hash_id = vm_page_hash(object, offset);
1409 bucket = &vm_page_buckets[hash_id];
1410
1411 /*
1412 * since we hold the object lock, we are guaranteed that no
1413 * new pages can be inserted into this object... this in turn
1414 * guarantess that the page we're looking for can't exist
1415 * if the bucket it hashes to is currently NULL even when looked
1416 * at outside the scope of the hash bucket lock... this is a
1417 * really cheap optimiztion to avoid taking the lock
1418 */
1419 if (bucket->pages == VM_PAGE_NULL) {
1420 vm_page_lookup_bucket_NULL++;
1421
1422 return (VM_PAGE_NULL);
1423 }
1424 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1425
1426 lck_spin_lock(bucket_lock);
1427
1428 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1429#if 0
1430 /*
1431 * we don't hold the page queue lock
1432 * so this check isn't safe to make
1433 */
1434 VM_PAGE_CHECK(mem);
1435#endif
1436 if ((mem->object == object) && (mem->offset == offset))
1437 break;
1438 }
1439 lck_spin_unlock(bucket_lock);
1440
1441 if (mem != VM_PAGE_NULL) {
1442 if (object->memq_hint != VM_PAGE_NULL) {
1443 vm_page_lookup_hint_miss++;
1444 }
1445 assert(mem->object == object);
1446 object->memq_hint = mem;
1447 } else
1448 vm_page_lookup_miss++;
1449
1450 return(mem);
1451}
1452
1453
1454/*
1455 * vm_page_rename:
1456 *
1457 * Move the given memory entry from its
1458 * current object to the specified target object/offset.
1459 *
1460 * The object must be locked.
1461 */
1462void
1463vm_page_rename(
1464 register vm_page_t mem,
1465 register vm_object_t new_object,
1466 vm_object_offset_t new_offset,
1467 boolean_t encrypted_ok)
1468{
1469 assert(mem->object != new_object);
1470
1471 /*
1472 * ENCRYPTED SWAP:
1473 * The encryption key is based on the page's memory object
1474 * (aka "pager") and paging offset. Moving the page to
1475 * another VM object changes its "pager" and "paging_offset"
1476 * so it has to be decrypted first, or we would lose the key.
1477 *
1478 * One exception is VM object collapsing, where we transfer pages
1479 * from one backing object to its parent object. This operation also
1480 * transfers the paging information, so the <pager,paging_offset> info
1481 * should remain consistent. The caller (vm_object_do_collapse())
1482 * sets "encrypted_ok" in this case.
1483 */
1484 if (!encrypted_ok && mem->encrypted) {
1485 panic("vm_page_rename: page %p is encrypted\n", mem);
1486 }
1487
1488 XPR(XPR_VM_PAGE,
1489 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1490 new_object, new_offset,
1491 mem, 0,0);
1492
1493 /*
1494 * Changes to mem->object require the page lock because
1495 * the pageout daemon uses that lock to get the object.
1496 */
1497 vm_page_lockspin_queues();
1498
1499 vm_page_remove(mem, TRUE);
1500 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1501
1502 vm_page_unlock_queues();
1503}
1504
1505/*
1506 * vm_page_init:
1507 *
1508 * Initialize the fields in a new page.
1509 * This takes a structure with random values and initializes it
1510 * so that it can be given to vm_page_release or vm_page_insert.
1511 */
1512void
1513vm_page_init(
1514 vm_page_t mem,
1515 ppnum_t phys_page,
1516 boolean_t lopage)
1517{
1518 assert(phys_page);
1519
1520#if DEBUG
1521 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1522 if (!(pmap_valid_page(phys_page))) {
1523 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1524 }
1525 }
1526#endif
1527 *mem = vm_page_template;
1528 mem->phys_page = phys_page;
1529#if 0
1530 /*
1531 * we're leaving this turned off for now... currently pages
1532 * come off the free list and are either immediately dirtied/referenced
1533 * due to zero-fill or COW faults, or are used to read or write files...
1534 * in the file I/O case, the UPL mechanism takes care of clearing
1535 * the state of the HW ref/mod bits in a somewhat fragile way.
1536 * Since we may change the way this works in the future (to toughen it up),
1537 * I'm leaving this as a reminder of where these bits could get cleared
1538 */
1539
1540 /*
1541 * make sure both the h/w referenced and modified bits are
1542 * clear at this point... we are especially dependent on
1543 * not finding a 'stale' h/w modified in a number of spots
1544 * once this page goes back into use
1545 */
1546 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1547#endif
1548 mem->lopage = lopage;
1549}
1550
1551/*
1552 * vm_page_grab_fictitious:
1553 *
1554 * Remove a fictitious page from the free list.
1555 * Returns VM_PAGE_NULL if there are no free pages.
1556 */
1557int c_vm_page_grab_fictitious = 0;
1558int c_vm_page_grab_fictitious_failed = 0;
1559int c_vm_page_release_fictitious = 0;
1560int c_vm_page_more_fictitious = 0;
1561
1562vm_page_t
1563vm_page_grab_fictitious_common(
1564 ppnum_t phys_addr)
1565{
1566 vm_page_t m;
1567
1568 if ((m = (vm_page_t)zget(vm_page_zone))) {
1569
1570 vm_page_init(m, phys_addr, FALSE);
1571 m->fictitious = TRUE;
1572
1573 c_vm_page_grab_fictitious++;
1574 } else
1575 c_vm_page_grab_fictitious_failed++;
1576
1577 return m;
1578}
1579
1580vm_page_t
1581vm_page_grab_fictitious(void)
1582{
1583 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1584}
1585
1586vm_page_t
1587vm_page_grab_guard(void)
1588{
1589 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1590}
1591
1592
1593/*
1594 * vm_page_release_fictitious:
1595 *
1596 * Release a fictitious page to the zone pool
1597 */
1598void
1599vm_page_release_fictitious(
1600 vm_page_t m)
1601{
1602 assert(!m->free);
1603 assert(m->fictitious);
1604 assert(m->phys_page == vm_page_fictitious_addr ||
1605 m->phys_page == vm_page_guard_addr);
1606
1607 c_vm_page_release_fictitious++;
1608
1609 zfree(vm_page_zone, m);
1610}
1611
1612/*
1613 * vm_page_more_fictitious:
1614 *
1615 * Add more fictitious pages to the zone.
1616 * Allowed to block. This routine is way intimate
1617 * with the zones code, for several reasons:
1618 * 1. we need to carve some page structures out of physical
1619 * memory before zones work, so they _cannot_ come from
1620 * the zone_map.
1621 * 2. the zone needs to be collectable in order to prevent
1622 * growth without bound. These structures are used by
1623 * the device pager (by the hundreds and thousands), as
1624 * private pages for pageout, and as blocking pages for
1625 * pagein. Temporary bursts in demand should not result in
1626 * permanent allocation of a resource.
1627 * 3. To smooth allocation humps, we allocate single pages
1628 * with kernel_memory_allocate(), and cram them into the
1629 * zone.
1630 */
1631
1632void vm_page_more_fictitious(void)
1633{
1634 vm_offset_t addr;
1635 kern_return_t retval;
1636
1637 c_vm_page_more_fictitious++;
1638
1639 /*
1640 * Allocate a single page from the zone_map. Do not wait if no physical
1641 * pages are immediately available, and do not zero the space. We need
1642 * our own blocking lock here to prevent having multiple,
1643 * simultaneous requests from piling up on the zone_map lock. Exactly
1644 * one (of our) threads should be potentially waiting on the map lock.
1645 * If winner is not vm-privileged, then the page allocation will fail,
1646 * and it will temporarily block here in the vm_page_wait().
1647 */
1648 lck_mtx_lock(&vm_page_alloc_lock);
1649 /*
1650 * If another thread allocated space, just bail out now.
1651 */
1652 if (zone_free_count(vm_page_zone) > 5) {
1653 /*
1654 * The number "5" is a small number that is larger than the
1655 * number of fictitious pages that any single caller will
1656 * attempt to allocate. Otherwise, a thread will attempt to
1657 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1658 * release all of the resources and locks already acquired,
1659 * and then call this routine. This routine finds the pages
1660 * that the caller released, so fails to allocate new space.
1661 * The process repeats infinitely. The largest known number
1662 * of fictitious pages required in this manner is 2. 5 is
1663 * simply a somewhat larger number.
1664 */
1665 lck_mtx_unlock(&vm_page_alloc_lock);
1666 return;
1667 }
1668
1669 retval = kernel_memory_allocate(zone_map,
1670 &addr, PAGE_SIZE, VM_PROT_ALL,
1671 KMA_KOBJECT|KMA_NOPAGEWAIT);
1672 if (retval != KERN_SUCCESS) {
1673 /*
1674 * No page was available. Drop the
1675 * lock to give another thread a chance at it, and
1676 * wait for the pageout daemon to make progress.
1677 */
1678 lck_mtx_unlock(&vm_page_alloc_lock);
1679 vm_page_wait(THREAD_UNINT);
1680 return;
1681 }
1682 zcram(vm_page_zone, addr, PAGE_SIZE);
1683
1684 lck_mtx_unlock(&vm_page_alloc_lock);
1685}
1686
1687
1688/*
1689 * vm_pool_low():
1690 *
1691 * Return true if it is not likely that a non-vm_privileged thread
1692 * can get memory without blocking. Advisory only, since the
1693 * situation may change under us.
1694 */
1695int
1696vm_pool_low(void)
1697{
1698 /* No locking, at worst we will fib. */
1699 return( vm_page_free_count <= vm_page_free_reserved );
1700}
1701
1702
1703
1704/*
1705 * this is an interface to support bring-up of drivers
1706 * on platforms with physical memory > 4G...
1707 */
1708int vm_himemory_mode = 0;
1709
1710
1711/*
1712 * this interface exists to support hardware controllers
1713 * incapable of generating DMAs with more than 32 bits
1714 * of address on platforms with physical memory > 4G...
1715 */
1716unsigned int vm_lopages_allocated_q = 0;
1717unsigned int vm_lopages_allocated_cpm_success = 0;
1718unsigned int vm_lopages_allocated_cpm_failed = 0;
1719queue_head_t vm_lopage_queue_free;
1720
1721vm_page_t
1722vm_page_grablo(void)
1723{
1724 vm_page_t mem;
1725
1726 if (vm_lopage_needed == FALSE)
1727 return (vm_page_grab());
1728
1729 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1730
1731 if ( !queue_empty(&vm_lopage_queue_free)) {
1732 queue_remove_first(&vm_lopage_queue_free,
1733 mem,
1734 vm_page_t,
1735 pageq);
1736 assert(vm_lopage_free_count);
1737
1738 vm_lopage_free_count--;
1739 vm_lopages_allocated_q++;
1740
1741 if (vm_lopage_free_count < vm_lopage_lowater)
1742 vm_lopage_refill = TRUE;
1743
1744 lck_mtx_unlock(&vm_page_queue_free_lock);
1745 } else {
1746 lck_mtx_unlock(&vm_page_queue_free_lock);
1747
1748 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1749
1750 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1751 vm_lopages_allocated_cpm_failed++;
1752 lck_mtx_unlock(&vm_page_queue_free_lock);
1753
1754 return (VM_PAGE_NULL);
1755 }
1756 mem->busy = TRUE;
1757
1758 vm_page_lockspin_queues();
1759
1760 mem->gobbled = FALSE;
1761 vm_page_gobble_count--;
1762 vm_page_wire_count--;
1763
1764 vm_lopages_allocated_cpm_success++;
1765 vm_page_unlock_queues();
1766 }
1767 assert(mem->busy);
1768 assert(!mem->free);
1769 assert(!mem->pmapped);
1770 assert(!mem->wpmapped);
1771 assert(!pmap_is_noencrypt(mem->phys_page));
1772
1773 mem->pageq.next = NULL;
1774 mem->pageq.prev = NULL;
1775
1776 return (mem);
1777}
1778
1779
1780/*
1781 * vm_page_grab:
1782 *
1783 * first try to grab a page from the per-cpu free list...
1784 * this must be done while pre-emption is disabled... if
1785 * a page is available, we're done...
1786 * if no page is available, grab the vm_page_queue_free_lock
1787 * and see if current number of free pages would allow us
1788 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1789 * if there are pages available, disable preemption and
1790 * recheck the state of the per-cpu free list... we could
1791 * have been preempted and moved to a different cpu, or
1792 * some other thread could have re-filled it... if still
1793 * empty, figure out how many pages we can steal from the
1794 * global free queue and move to the per-cpu queue...
1795 * return 1 of these pages when done... only wakeup the
1796 * pageout_scan thread if we moved pages from the global
1797 * list... no need for the wakeup if we've satisfied the
1798 * request from the per-cpu queue.
1799 */
1800
1801#define COLOR_GROUPS_TO_STEAL 4
1802
1803
1804vm_page_t
1805vm_page_grab( void )
1806{
1807 vm_page_t mem;
1808
1809
1810 disable_preemption();
1811
1812 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1813return_page_from_cpu_list:
1814 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1815 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1816 mem->pageq.next = NULL;
1817
1818 enable_preemption();
1819
1820 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1821 assert(mem->tabled == FALSE);
1822 assert(mem->object == VM_OBJECT_NULL);
1823 assert(!mem->laundry);
1824 assert(!mem->free);
1825 assert(pmap_verify_free(mem->phys_page));
1826 assert(mem->busy);
1827 assert(!mem->encrypted);
1828 assert(!mem->pmapped);
1829 assert(!mem->wpmapped);
1830 assert(!mem->active);
1831 assert(!mem->inactive);
1832 assert(!mem->throttled);
1833 assert(!mem->speculative);
1834 assert(!pmap_is_noencrypt(mem->phys_page));
1835
1836 return mem;
1837 }
1838 enable_preemption();
1839
1840
1841 /*
1842 * Optionally produce warnings if the wire or gobble
1843 * counts exceed some threshold.
1844 */
1845 if (vm_page_wire_count_warning > 0
1846 && vm_page_wire_count >= vm_page_wire_count_warning) {
1847 printf("mk: vm_page_grab(): high wired page count of %d\n",
1848 vm_page_wire_count);
1849 assert(vm_page_wire_count < vm_page_wire_count_warning);
1850 }
1851 if (vm_page_gobble_count_warning > 0
1852 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1853 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1854 vm_page_gobble_count);
1855 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1856 }
1857
1858 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1859
1860 /*
1861 * Only let privileged threads (involved in pageout)
1862 * dip into the reserved pool.
1863 */
1864 if ((vm_page_free_count < vm_page_free_reserved) &&
1865 !(current_thread()->options & TH_OPT_VMPRIV)) {
1866 lck_mtx_unlock(&vm_page_queue_free_lock);
1867 mem = VM_PAGE_NULL;
1868 }
1869 else {
1870 vm_page_t head;
1871 vm_page_t tail;
1872 unsigned int pages_to_steal;
1873 unsigned int color;
1874
1875 while ( vm_page_free_count == 0 ) {
1876
1877 lck_mtx_unlock(&vm_page_queue_free_lock);
1878 /*
1879 * must be a privileged thread to be
1880 * in this state since a non-privileged
1881 * thread would have bailed if we were
1882 * under the vm_page_free_reserved mark
1883 */
1884 VM_PAGE_WAIT();
1885 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1886 }
1887
1888 disable_preemption();
1889
1890 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1891 lck_mtx_unlock(&vm_page_queue_free_lock);
1892
1893 /*
1894 * we got preempted and moved to another processor
1895 * or we got preempted and someone else ran and filled the cache
1896 */
1897 goto return_page_from_cpu_list;
1898 }
1899 if (vm_page_free_count <= vm_page_free_reserved)
1900 pages_to_steal = 1;
1901 else {
1902 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1903
1904 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1905 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1906 }
1907 color = PROCESSOR_DATA(current_processor(), start_color);
1908 head = tail = NULL;
1909
1910 while (pages_to_steal--) {
1911 if (--vm_page_free_count < vm_page_free_count_minimum)
1912 vm_page_free_count_minimum = vm_page_free_count;
1913
1914 while (queue_empty(&vm_page_queue_free[color]))
1915 color = (color + 1) & vm_color_mask;
1916
1917 queue_remove_first(&vm_page_queue_free[color],
1918 mem,
1919 vm_page_t,
1920 pageq);
1921 mem->pageq.next = NULL;
1922 mem->pageq.prev = NULL;
1923
1924 assert(!mem->active);
1925 assert(!mem->inactive);
1926 assert(!mem->throttled);
1927 assert(!mem->speculative);
1928
1929 color = (color + 1) & vm_color_mask;
1930
1931 if (head == NULL)
1932 head = mem;
1933 else
1934 tail->pageq.next = (queue_t)mem;
1935 tail = mem;
1936
1937 mem->pageq.prev = NULL;
1938 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1939 assert(mem->tabled == FALSE);
1940 assert(mem->object == VM_OBJECT_NULL);
1941 assert(!mem->laundry);
1942 assert(mem->free);
1943 mem->free = FALSE;
1944
1945 assert(pmap_verify_free(mem->phys_page));
1946 assert(mem->busy);
1947 assert(!mem->free);
1948 assert(!mem->encrypted);
1949 assert(!mem->pmapped);
1950 assert(!mem->wpmapped);
1951 assert(!pmap_is_noencrypt(mem->phys_page));
1952 }
1953 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1954 PROCESSOR_DATA(current_processor(), start_color) = color;
1955
1956 /*
1957 * satisfy this request
1958 */
1959 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1960 mem = head;
1961 mem->pageq.next = NULL;
1962
1963 lck_mtx_unlock(&vm_page_queue_free_lock);
1964
1965 enable_preemption();
1966 }
1967 /*
1968 * Decide if we should poke the pageout daemon.
1969 * We do this if the free count is less than the low
1970 * water mark, or if the free count is less than the high
1971 * water mark (but above the low water mark) and the inactive
1972 * count is less than its target.
1973 *
1974 * We don't have the counts locked ... if they change a little,
1975 * it doesn't really matter.
1976 */
1977 if ((vm_page_free_count < vm_page_free_min) ||
1978 ((vm_page_free_count < vm_page_free_target) &&
1979 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1980 thread_wakeup((event_t) &vm_page_free_wanted);
1981
1982 VM_CHECK_MEMORYSTATUS;
1983
1984// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1985
1986 return mem;
1987}
1988
1989/*
1990 * vm_page_release:
1991 *
1992 * Return a page to the free list.
1993 */
1994
1995void
1996vm_page_release(
1997 register vm_page_t mem)
1998{
1999 unsigned int color;
2000 int need_wakeup = 0;
2001 int need_priv_wakeup = 0;
2002
2003
2004 assert(!mem->private && !mem->fictitious);
2005 if (vm_page_free_verify) {
2006 assert(pmap_verify_free(mem->phys_page));
2007 }
2008// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2009
2010 pmap_clear_noencrypt(mem->phys_page);
2011
2012 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2013#if DEBUG
2014 if (mem->free)
2015 panic("vm_page_release");
2016#endif
2017
2018 assert(mem->busy);
2019 assert(!mem->laundry);
2020 assert(mem->object == VM_OBJECT_NULL);
2021 assert(mem->pageq.next == NULL &&
2022 mem->pageq.prev == NULL);
2023 assert(mem->listq.next == NULL &&
2024 mem->listq.prev == NULL);
2025
2026 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2027 vm_lopage_free_count < vm_lopage_free_limit &&
2028 mem->phys_page < max_valid_low_ppnum) {
2029 /*
2030 * this exists to support hardware controllers
2031 * incapable of generating DMAs with more than 32 bits
2032 * of address on platforms with physical memory > 4G...
2033 */
2034 queue_enter_first(&vm_lopage_queue_free,
2035 mem,
2036 vm_page_t,
2037 pageq);
2038 vm_lopage_free_count++;
2039
2040 if (vm_lopage_free_count >= vm_lopage_free_limit)
2041 vm_lopage_refill = FALSE;
2042
2043 mem->lopage = TRUE;
2044 } else {
2045 mem->lopage = FALSE;
2046 mem->free = TRUE;
2047
2048 color = mem->phys_page & vm_color_mask;
2049 queue_enter_first(&vm_page_queue_free[color],
2050 mem,
2051 vm_page_t,
2052 pageq);
2053 vm_page_free_count++;
2054 /*
2055 * Check if we should wake up someone waiting for page.
2056 * But don't bother waking them unless they can allocate.
2057 *
2058 * We wakeup only one thread, to prevent starvation.
2059 * Because the scheduling system handles wait queues FIFO,
2060 * if we wakeup all waiting threads, one greedy thread
2061 * can starve multiple niceguy threads. When the threads
2062 * all wakeup, the greedy threads runs first, grabs the page,
2063 * and waits for another page. It will be the first to run
2064 * when the next page is freed.
2065 *
2066 * However, there is a slight danger here.
2067 * The thread we wake might not use the free page.
2068 * Then the other threads could wait indefinitely
2069 * while the page goes unused. To forestall this,
2070 * the pageout daemon will keep making free pages
2071 * as long as vm_page_free_wanted is non-zero.
2072 */
2073
2074 assert(vm_page_free_count > 0);
2075 if (vm_page_free_wanted_privileged > 0) {
2076 vm_page_free_wanted_privileged--;
2077 need_priv_wakeup = 1;
2078 } else if (vm_page_free_wanted > 0 &&
2079 vm_page_free_count > vm_page_free_reserved) {
2080 vm_page_free_wanted--;
2081 need_wakeup = 1;
2082 }
2083 }
2084 lck_mtx_unlock(&vm_page_queue_free_lock);
2085
2086 if (need_priv_wakeup)
2087 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2088 else if (need_wakeup)
2089 thread_wakeup_one((event_t) &vm_page_free_count);
2090
2091 VM_CHECK_MEMORYSTATUS;
2092}
2093
2094/*
2095 * vm_page_wait:
2096 *
2097 * Wait for a page to become available.
2098 * If there are plenty of free pages, then we don't sleep.
2099 *
2100 * Returns:
2101 * TRUE: There may be another page, try again
2102 * FALSE: We were interrupted out of our wait, don't try again
2103 */
2104
2105boolean_t
2106vm_page_wait(
2107 int interruptible )
2108{
2109 /*
2110 * We can't use vm_page_free_reserved to make this
2111 * determination. Consider: some thread might
2112 * need to allocate two pages. The first allocation
2113 * succeeds, the second fails. After the first page is freed,
2114 * a call to vm_page_wait must really block.
2115 */
2116 kern_return_t wait_result;
2117 int need_wakeup = 0;
2118 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2119
2120 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2121
2122 if (is_privileged && vm_page_free_count) {
2123 lck_mtx_unlock(&vm_page_queue_free_lock);
2124 return TRUE;
2125 }
2126 if (vm_page_free_count < vm_page_free_target) {
2127
2128 if (is_privileged) {
2129 if (vm_page_free_wanted_privileged++ == 0)
2130 need_wakeup = 1;
2131 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2132 } else {
2133 if (vm_page_free_wanted++ == 0)
2134 need_wakeup = 1;
2135 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2136 }
2137 lck_mtx_unlock(&vm_page_queue_free_lock);
2138 counter(c_vm_page_wait_block++);
2139
2140 if (need_wakeup)
2141 thread_wakeup((event_t)&vm_page_free_wanted);
2142
2143 if (wait_result == THREAD_WAITING)
2144 wait_result = thread_block(THREAD_CONTINUE_NULL);
2145
2146 return(wait_result == THREAD_AWAKENED);
2147 } else {
2148 lck_mtx_unlock(&vm_page_queue_free_lock);
2149 return TRUE;
2150 }
2151}
2152
2153/*
2154 * vm_page_alloc:
2155 *
2156 * Allocate and return a memory cell associated
2157 * with this VM object/offset pair.
2158 *
2159 * Object must be locked.
2160 */
2161
2162vm_page_t
2163vm_page_alloc(
2164 vm_object_t object,
2165 vm_object_offset_t offset)
2166{
2167 register vm_page_t mem;
2168
2169 vm_object_lock_assert_exclusive(object);
2170 mem = vm_page_grab();
2171 if (mem == VM_PAGE_NULL)
2172 return VM_PAGE_NULL;
2173
2174 vm_page_insert(mem, object, offset);
2175
2176 return(mem);
2177}
2178
2179vm_page_t
2180vm_page_alloclo(
2181 vm_object_t object,
2182 vm_object_offset_t offset)
2183{
2184 register vm_page_t mem;
2185
2186 vm_object_lock_assert_exclusive(object);
2187 mem = vm_page_grablo();
2188 if (mem == VM_PAGE_NULL)
2189 return VM_PAGE_NULL;
2190
2191 vm_page_insert(mem, object, offset);
2192
2193 return(mem);
2194}
2195
2196
2197/*
2198 * vm_page_alloc_guard:
2199 *
2200 * Allocate a fictitious page which will be used
2201 * as a guard page. The page will be inserted into
2202 * the object and returned to the caller.
2203 */
2204
2205vm_page_t
2206vm_page_alloc_guard(
2207 vm_object_t object,
2208 vm_object_offset_t offset)
2209{
2210 register vm_page_t mem;
2211
2212 vm_object_lock_assert_exclusive(object);
2213 mem = vm_page_grab_guard();
2214 if (mem == VM_PAGE_NULL)
2215 return VM_PAGE_NULL;
2216
2217 vm_page_insert(mem, object, offset);
2218
2219 return(mem);
2220}
2221
2222
2223counter(unsigned int c_laundry_pages_freed = 0;)
2224
2225/*
2226 * vm_page_free_prepare:
2227 *
2228 * Removes page from any queue it may be on
2229 * and disassociates it from its VM object.
2230 *
2231 * Object and page queues must be locked prior to entry.
2232 */
2233static void
2234vm_page_free_prepare(
2235 vm_page_t mem)
2236{
2237 vm_page_free_prepare_queues(mem);
2238 vm_page_free_prepare_object(mem, TRUE);
2239}
2240
2241
2242void
2243vm_page_free_prepare_queues(
2244 vm_page_t mem)
2245{
2246 VM_PAGE_CHECK(mem);
2247 assert(!mem->free);
2248 assert(!mem->cleaning);
2249#if DEBUG
2250 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2251 if (mem->free)
2252 panic("vm_page_free: freeing page on free list\n");
2253#endif
2254 if (mem->object) {
2255 vm_object_lock_assert_exclusive(mem->object);
2256 }
2257 if (mem->laundry) {
2258 /*
2259 * We may have to free a page while it's being laundered
2260 * if we lost its pager (due to a forced unmount, for example).
2261 * We need to call vm_pageout_steal_laundry() before removing
2262 * the page from its VM object, so that we can remove it
2263 * from its pageout queue and adjust the laundry accounting
2264 */
2265 vm_pageout_steal_laundry(mem, TRUE);
2266 counter(++c_laundry_pages_freed);
2267 }
2268
2269 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2270
2271 if (VM_PAGE_WIRED(mem)) {
2272 if (mem->object) {
2273 assert(mem->object->wired_page_count > 0);
2274 mem->object->wired_page_count--;
2275 assert(mem->object->resident_page_count >=
2276 mem->object->wired_page_count);
2277
2278 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2279 OSAddAtomic(+1, &vm_page_purgeable_count);
2280 assert(vm_page_purgeable_wired_count > 0);
2281 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2282 }
2283 }
2284 if (!mem->private && !mem->fictitious)
2285 vm_page_wire_count--;
2286 mem->wire_count = 0;
2287 assert(!mem->gobbled);
2288 } else if (mem->gobbled) {
2289 if (!mem->private && !mem->fictitious)
2290 vm_page_wire_count--;
2291 vm_page_gobble_count--;
2292 }
2293}
2294
2295
2296void
2297vm_page_free_prepare_object(
2298 vm_page_t mem,
2299 boolean_t remove_from_hash)
2300{
2301 if (mem->tabled)
2302 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2303
2304 PAGE_WAKEUP(mem); /* clears wanted */
2305
2306 if (mem->private) {
2307 mem->private = FALSE;
2308 mem->fictitious = TRUE;
2309 mem->phys_page = vm_page_fictitious_addr;
2310 }
2311 if ( !mem->fictitious) {
2312 vm_page_init(mem, mem->phys_page, mem->lopage);
2313 }
2314}
2315
2316
2317/*
2318 * vm_page_free:
2319 *
2320 * Returns the given page to the free list,
2321 * disassociating it with any VM object.
2322 *
2323 * Object and page queues must be locked prior to entry.
2324 */
2325void
2326vm_page_free(
2327 vm_page_t mem)
2328{
2329 vm_page_free_prepare(mem);
2330
2331 if (mem->fictitious) {
2332 vm_page_release_fictitious(mem);
2333 } else {
2334 vm_page_release(mem);
2335 }
2336}
2337
2338
2339void
2340vm_page_free_unlocked(
2341 vm_page_t mem,
2342 boolean_t remove_from_hash)
2343{
2344 vm_page_lockspin_queues();
2345 vm_page_free_prepare_queues(mem);
2346 vm_page_unlock_queues();
2347
2348 vm_page_free_prepare_object(mem, remove_from_hash);
2349
2350 if (mem->fictitious) {
2351 vm_page_release_fictitious(mem);
2352 } else {
2353 vm_page_release(mem);
2354 }
2355}
2356
2357
2358/*
2359 * Free a list of pages. The list can be up to several hundred pages,
2360 * as blocked up by vm_pageout_scan().
2361 * The big win is not having to take the free list lock once
2362 * per page.
2363 */
2364void
2365vm_page_free_list(
2366 vm_page_t freeq,
2367 boolean_t prepare_object)
2368{
2369 vm_page_t mem;
2370 vm_page_t nxt;
2371 vm_page_t local_freeq;
2372 int pg_count;
2373
2374 while (freeq) {
2375
2376 pg_count = 0;
2377 local_freeq = VM_PAGE_NULL;
2378 mem = freeq;
2379
2380 /*
2381 * break up the processing into smaller chunks so
2382 * that we can 'pipeline' the pages onto the
2383 * free list w/o introducing too much
2384 * contention on the global free queue lock
2385 */
2386 while (mem && pg_count < 64) {
2387
2388 assert(!mem->inactive);
2389 assert(!mem->active);
2390 assert(!mem->throttled);
2391 assert(!mem->free);
2392 assert(!mem->speculative);
2393 assert(!VM_PAGE_WIRED(mem));
2394 assert(mem->pageq.prev == NULL);
2395
2396 nxt = (vm_page_t)(mem->pageq.next);
2397
2398 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2399 assert(pmap_verify_free(mem->phys_page));
2400 }
2401 if (prepare_object == TRUE)
2402 vm_page_free_prepare_object(mem, TRUE);
2403
2404 if (!mem->fictitious) {
2405 assert(mem->busy);
2406
2407 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2408 vm_lopage_free_count < vm_lopage_free_limit &&
2409 mem->phys_page < max_valid_low_ppnum) {
2410 mem->pageq.next = NULL;
2411 vm_page_release(mem);
2412 } else {
2413 /*
2414 * IMPORTANT: we can't set the page "free" here
2415 * because that would make the page eligible for
2416 * a physically-contiguous allocation (see
2417 * vm_page_find_contiguous()) right away (we don't
2418 * hold the vm_page_queue_free lock). That would
2419 * cause trouble because the page is not actually
2420 * in the free queue yet...
2421 */
2422 mem->pageq.next = (queue_entry_t)local_freeq;
2423 local_freeq = mem;
2424 pg_count++;
2425
2426 pmap_clear_noencrypt(mem->phys_page);
2427 }
2428 } else {
2429 assert(mem->phys_page == vm_page_fictitious_addr ||
2430 mem->phys_page == vm_page_guard_addr);
2431 vm_page_release_fictitious(mem);
2432 }
2433 mem = nxt;
2434 }
2435 freeq = mem;
2436
2437 if ( (mem = local_freeq) ) {
2438 unsigned int avail_free_count;
2439 unsigned int need_wakeup = 0;
2440 unsigned int need_priv_wakeup = 0;
2441
2442 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2443
2444 while (mem) {
2445 int color;
2446
2447 nxt = (vm_page_t)(mem->pageq.next);
2448
2449 assert(!mem->free);
2450 assert(mem->busy);
2451 mem->free = TRUE;
2452
2453 color = mem->phys_page & vm_color_mask;
2454 queue_enter_first(&vm_page_queue_free[color],
2455 mem,
2456 vm_page_t,
2457 pageq);
2458 mem = nxt;
2459 }
2460 vm_page_free_count += pg_count;
2461 avail_free_count = vm_page_free_count;
2462
2463 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2464
2465 if (avail_free_count < vm_page_free_wanted_privileged) {
2466 need_priv_wakeup = avail_free_count;
2467 vm_page_free_wanted_privileged -= avail_free_count;
2468 avail_free_count = 0;
2469 } else {
2470 need_priv_wakeup = vm_page_free_wanted_privileged;
2471 vm_page_free_wanted_privileged = 0;
2472 avail_free_count -= vm_page_free_wanted_privileged;
2473 }
2474 }
2475 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2476 unsigned int available_pages;
2477
2478 available_pages = avail_free_count - vm_page_free_reserved;
2479
2480 if (available_pages >= vm_page_free_wanted) {
2481 need_wakeup = vm_page_free_wanted;
2482 vm_page_free_wanted = 0;
2483 } else {
2484 need_wakeup = available_pages;
2485 vm_page_free_wanted -= available_pages;
2486 }
2487 }
2488 lck_mtx_unlock(&vm_page_queue_free_lock);
2489
2490 if (need_priv_wakeup != 0) {
2491 /*
2492 * There shouldn't be that many VM-privileged threads,
2493 * so let's wake them all up, even if we don't quite
2494 * have enough pages to satisfy them all.
2495 */
2496 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2497 }
2498 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2499 /*
2500 * We don't expect to have any more waiters
2501 * after this, so let's wake them all up at
2502 * once.
2503 */
2504 thread_wakeup((event_t) &vm_page_free_count);
2505 } else for (; need_wakeup != 0; need_wakeup--) {
2506 /*
2507 * Wake up one waiter per page we just released.
2508 */
2509 thread_wakeup_one((event_t) &vm_page_free_count);
2510 }
2511
2512 VM_CHECK_MEMORYSTATUS;
2513 }
2514 }
2515}
2516
2517
2518/*
2519 * vm_page_wire:
2520 *
2521 * Mark this page as wired down by yet
2522 * another map, removing it from paging queues
2523 * as necessary.
2524 *
2525 * The page's object and the page queues must be locked.
2526 */
2527void
2528vm_page_wire(
2529 register vm_page_t mem)
2530{
2531
2532// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2533
2534 VM_PAGE_CHECK(mem);
2535 if (mem->object) {
2536 vm_object_lock_assert_exclusive(mem->object);
2537 } else {
2538 /*
2539 * In theory, the page should be in an object before it
2540 * gets wired, since we need to hold the object lock
2541 * to update some fields in the page structure.
2542 * However, some code (i386 pmap, for example) might want
2543 * to wire a page before it gets inserted into an object.
2544 * That's somewhat OK, as long as nobody else can get to
2545 * that page and update it at the same time.
2546 */
2547 }
2548#if DEBUG
2549 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2550#endif
2551 if ( !VM_PAGE_WIRED(mem)) {
2552
2553 if (mem->pageout_queue) {
2554 mem->pageout = FALSE;
2555 vm_pageout_throttle_up(mem);
2556 }
2557 VM_PAGE_QUEUES_REMOVE(mem);
2558
2559 if (mem->object) {
2560 mem->object->wired_page_count++;
2561 assert(mem->object->resident_page_count >=
2562 mem->object->wired_page_count);
2563 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2564 assert(vm_page_purgeable_count > 0);
2565 OSAddAtomic(-1, &vm_page_purgeable_count);
2566 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2567 }
2568 if (mem->object->all_reusable) {
2569 /*
2570 * Wired pages are not counted as "re-usable"
2571 * in "all_reusable" VM objects, so nothing
2572 * to do here.
2573 */
2574 } else if (mem->reusable) {
2575 /*
2576 * This page is not "re-usable" when it's
2577 * wired, so adjust its state and the
2578 * accounting.
2579 */
2580 vm_object_reuse_pages(mem->object,
2581 mem->offset,
2582 mem->offset+PAGE_SIZE_64,
2583 FALSE);
2584 }
2585 }
2586 assert(!mem->reusable);
2587
2588 if (!mem->private && !mem->fictitious && !mem->gobbled)
2589 vm_page_wire_count++;
2590 if (mem->gobbled)
2591 vm_page_gobble_count--;
2592 mem->gobbled = FALSE;
2593
2594 VM_CHECK_MEMORYSTATUS;
2595
2596 /*
2597 * ENCRYPTED SWAP:
2598 * The page could be encrypted, but
2599 * We don't have to decrypt it here
2600 * because we don't guarantee that the
2601 * data is actually valid at this point.
2602 * The page will get decrypted in
2603 * vm_fault_wire() if needed.
2604 */
2605 }
2606 assert(!mem->gobbled);
2607 mem->wire_count++;
2608 VM_PAGE_CHECK(mem);
2609}
2610
2611/*
2612 * vm_page_gobble:
2613 *
2614 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2615 *
2616 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2617 */
2618void
2619vm_page_gobble(
2620 register vm_page_t mem)
2621{
2622 vm_page_lockspin_queues();
2623 VM_PAGE_CHECK(mem);
2624
2625 assert(!mem->gobbled);
2626 assert( !VM_PAGE_WIRED(mem));
2627
2628 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2629 if (!mem->private && !mem->fictitious)
2630 vm_page_wire_count++;
2631 }
2632 vm_page_gobble_count++;
2633 mem->gobbled = TRUE;
2634 vm_page_unlock_queues();
2635}
2636
2637/*
2638 * vm_page_unwire:
2639 *
2640 * Release one wiring of this page, potentially
2641 * enabling it to be paged again.
2642 *
2643 * The page's object and the page queues must be locked.
2644 */
2645void
2646vm_page_unwire(
2647 vm_page_t mem,
2648 boolean_t queueit)
2649{
2650
2651// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2652
2653 VM_PAGE_CHECK(mem);
2654 assert(VM_PAGE_WIRED(mem));
2655 assert(mem->object != VM_OBJECT_NULL);
2656#if DEBUG
2657 vm_object_lock_assert_exclusive(mem->object);
2658 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2659#endif
2660 if (--mem->wire_count == 0) {
2661 assert(!mem->private && !mem->fictitious);
2662 vm_page_wire_count--;
2663 assert(mem->object->wired_page_count > 0);
2664 mem->object->wired_page_count--;
2665 assert(mem->object->resident_page_count >=
2666 mem->object->wired_page_count);
2667 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2668 OSAddAtomic(+1, &vm_page_purgeable_count);
2669 assert(vm_page_purgeable_wired_count > 0);
2670 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2671 }
2672 assert(!mem->laundry);
2673 assert(mem->object != kernel_object);
2674 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2675
2676 if (queueit == TRUE) {
2677 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2678 vm_page_deactivate(mem);
2679 } else {
2680 vm_page_activate(mem);
2681 }
2682 }
2683
2684 VM_CHECK_MEMORYSTATUS;
2685
2686 }
2687 VM_PAGE_CHECK(mem);
2688}
2689
2690/*
2691 * vm_page_deactivate:
2692 *
2693 * Returns the given page to the inactive list,
2694 * indicating that no physical maps have access
2695 * to this page. [Used by the physical mapping system.]
2696 *
2697 * The page queues must be locked.
2698 */
2699void
2700vm_page_deactivate(
2701 vm_page_t m)
2702{
2703 vm_page_deactivate_internal(m, TRUE);
2704}
2705
2706
2707void
2708vm_page_deactivate_internal(
2709 vm_page_t m,
2710 boolean_t clear_hw_reference)
2711{
2712
2713 VM_PAGE_CHECK(m);
2714 assert(m->object != kernel_object);
2715 assert(m->phys_page != vm_page_guard_addr);
2716
2717// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2718#if DEBUG
2719 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2720#endif
2721 /*
2722 * This page is no longer very interesting. If it was
2723 * interesting (active or inactive/referenced), then we
2724 * clear the reference bit and (re)enter it in the
2725 * inactive queue. Note wired pages should not have
2726 * their reference bit cleared.
2727 */
2728 assert ( !(m->absent && !m->unusual));
2729
2730 if (m->gobbled) { /* can this happen? */
2731 assert( !VM_PAGE_WIRED(m));
2732
2733 if (!m->private && !m->fictitious)
2734 vm_page_wire_count--;
2735 vm_page_gobble_count--;
2736 m->gobbled = FALSE;
2737 }
2738 /*
2739 * if this page is currently on the pageout queue, we can't do the
2740 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2741 * and we can't remove it manually since we would need the object lock
2742 * (which is not required here) to decrement the activity_in_progress
2743 * reference which is held on the object while the page is in the pageout queue...
2744 * just let the normal laundry processing proceed
2745 */
2746 if (m->pageout_queue || m->private || m->fictitious || (VM_PAGE_WIRED(m)))
2747 return;
2748
2749 if (!m->absent && clear_hw_reference == TRUE)
2750 pmap_clear_reference(m->phys_page);
2751
2752 m->reference = FALSE;
2753 m->no_cache = FALSE;
2754
2755 if (!m->inactive) {
2756 VM_PAGE_QUEUES_REMOVE(m);
2757
2758 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2759 m->dirty && m->object->internal &&
2760 (m->object->purgable == VM_PURGABLE_DENY ||
2761 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2762 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2763 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2764 m->throttled = TRUE;
2765 vm_page_throttled_count++;
2766 } else {
2767 if (m->object->named && m->object->ref_count == 1) {
2768 vm_page_speculate(m, FALSE);
2769#if DEVELOPMENT || DEBUG
2770 vm_page_speculative_recreated++;
2771#endif
2772 } else {
2773 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2774 }
2775 }
2776 }
2777}
2778
2779/*
2780 * vm_page_enqueue_cleaned
2781 *
2782 * Put the page on the cleaned queue, mark it cleaned, etc.
2783 * Being on the cleaned queue (and having m->clean_queue set)
2784 * does ** NOT ** guarantee that the page is clean!
2785 *
2786 * Call with the queues lock held.
2787 */
2788
2789void vm_page_enqueue_cleaned(vm_page_t m)
2790{
2791 assert(m->phys_page != vm_page_guard_addr);
2792#if DEBUG
2793 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2794#endif
2795 assert( !(m->absent && !m->unusual));
2796
2797 if (m->gobbled) {
2798 assert( !VM_PAGE_WIRED(m));
2799 if (!m->private && !m->fictitious)
2800 vm_page_wire_count--;
2801 vm_page_gobble_count--;
2802 m->gobbled = FALSE;
2803 }
2804 /*
2805 * if this page is currently on the pageout queue, we can't do the
2806 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2807 * and we can't remove it manually since we would need the object lock
2808 * (which is not required here) to decrement the activity_in_progress
2809 * reference which is held on the object while the page is in the pageout queue...
2810 * just let the normal laundry processing proceed
2811 */
2812 if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2813 return;
2814
2815 VM_PAGE_QUEUES_REMOVE(m);
2816
2817 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2818 m->clean_queue = TRUE;
2819 vm_page_cleaned_count++;
2820
2821 m->inactive = TRUE;
2822 vm_page_inactive_count++;
2823
2824 vm_pageout_enqueued_cleaned++;
2825}
2826
2827/*
2828 * vm_page_activate:
2829 *
2830 * Put the specified page on the active list (if appropriate).
2831 *
2832 * The page queues must be locked.
2833 */
2834
2835void
2836vm_page_activate(
2837 register vm_page_t m)
2838{
2839 VM_PAGE_CHECK(m);
2840#ifdef FIXME_4778297
2841 assert(m->object != kernel_object);
2842#endif
2843 assert(m->phys_page != vm_page_guard_addr);
2844#if DEBUG
2845 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2846#endif
2847 assert( !(m->absent && !m->unusual));
2848
2849 if (m->gobbled) {
2850 assert( !VM_PAGE_WIRED(m));
2851 if (!m->private && !m->fictitious)
2852 vm_page_wire_count--;
2853 vm_page_gobble_count--;
2854 m->gobbled = FALSE;
2855 }
2856 /*
2857 * if this page is currently on the pageout queue, we can't do the
2858 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2859 * and we can't remove it manually since we would need the object lock
2860 * (which is not required here) to decrement the activity_in_progress
2861 * reference which is held on the object while the page is in the pageout queue...
2862 * just let the normal laundry processing proceed
2863 */
2864 if (m->pageout_queue || m->private || m->fictitious)
2865 return;
2866
2867#if DEBUG
2868 if (m->active)
2869 panic("vm_page_activate: already active");
2870#endif
2871
2872 if (m->speculative) {
2873 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2874 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2875 }
2876
2877 VM_PAGE_QUEUES_REMOVE(m);
2878
2879 if ( !VM_PAGE_WIRED(m)) {
2880
2881 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2882 m->dirty && m->object->internal &&
2883 (m->object->purgable == VM_PURGABLE_DENY ||
2884 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2885 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2886 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2887 m->throttled = TRUE;
2888 vm_page_throttled_count++;
2889 } else {
2890 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2891 m->active = TRUE;
2892 vm_page_active_count++;
2893 }
2894 m->reference = TRUE;
2895 m->no_cache = FALSE;
2896 }
2897 VM_PAGE_CHECK(m);
2898}
2899
2900
2901/*
2902 * vm_page_speculate:
2903 *
2904 * Put the specified page on the speculative list (if appropriate).
2905 *
2906 * The page queues must be locked.
2907 */
2908void
2909vm_page_speculate(
2910 vm_page_t m,
2911 boolean_t new)
2912{
2913 struct vm_speculative_age_q *aq;
2914
2915 VM_PAGE_CHECK(m);
2916 assert(m->object != kernel_object);
2917 assert(m->phys_page != vm_page_guard_addr);
2918#if DEBUG
2919 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2920#endif
2921 assert( !(m->absent && !m->unusual));
2922
2923 /*
2924 * if this page is currently on the pageout queue, we can't do the
2925 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2926 * and we can't remove it manually since we would need the object lock
2927 * (which is not required here) to decrement the activity_in_progress
2928 * reference which is held on the object while the page is in the pageout queue...
2929 * just let the normal laundry processing proceed
2930 */
2931 if (m->pageout_queue || m->private || m->fictitious)
2932 return;
2933
2934 VM_PAGE_QUEUES_REMOVE(m);
2935
2936 if ( !VM_PAGE_WIRED(m)) {
2937 mach_timespec_t ts;
2938 clock_sec_t sec;
2939 clock_nsec_t nsec;
2940
2941 clock_get_system_nanotime(&sec, &nsec);
2942 ts.tv_sec = (unsigned int) sec;
2943 ts.tv_nsec = nsec;
2944
2945 if (vm_page_speculative_count == 0) {
2946
2947 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2948 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2949
2950 aq = &vm_page_queue_speculative[speculative_age_index];
2951
2952 /*
2953 * set the timer to begin a new group
2954 */
2955 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2956 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2957
2958 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2959 } else {
2960 aq = &vm_page_queue_speculative[speculative_age_index];
2961
2962 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2963
2964 speculative_age_index++;
2965
2966 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2967 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2968 if (speculative_age_index == speculative_steal_index) {
2969 speculative_steal_index = speculative_age_index + 1;
2970
2971 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2972 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2973 }
2974 aq = &vm_page_queue_speculative[speculative_age_index];
2975
2976 if (!queue_empty(&aq->age_q))
2977 vm_page_speculate_ageit(aq);
2978
2979 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2980 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2981
2982 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2983 }
2984 }
2985 enqueue_tail(&aq->age_q, &m->pageq);
2986 m->speculative = TRUE;
2987 vm_page_speculative_count++;
2988
2989 if (new == TRUE) {
2990 vm_object_lock_assert_exclusive(m->object);
2991
2992 m->object->pages_created++;
2993#if DEVELOPMENT || DEBUG
2994 vm_page_speculative_created++;
2995#endif
2996 }
2997 }
2998 VM_PAGE_CHECK(m);
2999}
3000
3001
3002/*
3003 * move pages from the specified aging bin to
3004 * the speculative bin that pageout_scan claims from
3005 *
3006 * The page queues must be locked.
3007 */
3008void
3009vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3010{
3011 struct vm_speculative_age_q *sq;
3012 vm_page_t t;
3013
3014 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3015
3016 if (queue_empty(&sq->age_q)) {
3017 sq->age_q.next = aq->age_q.next;
3018 sq->age_q.prev = aq->age_q.prev;
3019
3020 t = (vm_page_t)sq->age_q.next;
3021 t->pageq.prev = &sq->age_q;
3022
3023 t = (vm_page_t)sq->age_q.prev;
3024 t->pageq.next = &sq->age_q;
3025 } else {
3026 t = (vm_page_t)sq->age_q.prev;
3027 t->pageq.next = aq->age_q.next;
3028
3029 t = (vm_page_t)aq->age_q.next;
3030 t->pageq.prev = sq->age_q.prev;
3031
3032 t = (vm_page_t)aq->age_q.prev;
3033 t->pageq.next = &sq->age_q;
3034
3035 sq->age_q.prev = aq->age_q.prev;
3036 }
3037 queue_init(&aq->age_q);
3038}
3039
3040
3041void
3042vm_page_lru(
3043 vm_page_t m)
3044{
3045 VM_PAGE_CHECK(m);
3046 assert(m->object != kernel_object);
3047 assert(m->phys_page != vm_page_guard_addr);
3048
3049#if DEBUG
3050 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051#endif
3052 /*
3053 * if this page is currently on the pageout queue, we can't do the
3054 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3055 * and we can't remove it manually since we would need the object lock
3056 * (which is not required here) to decrement the activity_in_progress
3057 * reference which is held on the object while the page is in the pageout queue...
3058 * just let the normal laundry processing proceed
3059 */
3060 if (m->pageout_queue || m->private || (VM_PAGE_WIRED(m)))
3061 return;
3062
3063 m->no_cache = FALSE;
3064
3065 VM_PAGE_QUEUES_REMOVE(m);
3066
3067 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3068}
3069
3070
3071void
3072vm_page_reactivate_all_throttled(void)
3073{
3074 vm_page_t first_throttled, last_throttled;
3075 vm_page_t first_active;
3076 vm_page_t m;
3077 int extra_active_count;
3078
3079 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3080 return;
3081
3082 extra_active_count = 0;
3083 vm_page_lock_queues();
3084 if (! queue_empty(&vm_page_queue_throttled)) {
3085 /*
3086 * Switch "throttled" pages to "active".
3087 */
3088 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3089 VM_PAGE_CHECK(m);
3090 assert(m->throttled);
3091 assert(!m->active);
3092 assert(!m->inactive);
3093 assert(!m->speculative);
3094 assert(!VM_PAGE_WIRED(m));
3095
3096 extra_active_count++;
3097
3098 m->throttled = FALSE;
3099 m->active = TRUE;
3100 VM_PAGE_CHECK(m);
3101 }
3102
3103 /*
3104 * Transfer the entire throttled queue to a regular LRU page queues.
3105 * We insert it at the head of the active queue, so that these pages
3106 * get re-evaluated by the LRU algorithm first, since they've been
3107 * completely out of it until now.
3108 */
3109 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3110 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3111 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3112 if (queue_empty(&vm_page_queue_active)) {
3113 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3114 } else {
3115 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3116 }
3117 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3118 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3119 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3120
3121#if DEBUG
3122 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3123#endif
3124 queue_init(&vm_page_queue_throttled);
3125 /*
3126 * Adjust the global page counts.
3127 */
3128 vm_page_active_count += extra_active_count;
3129 vm_page_throttled_count = 0;
3130 }
3131 assert(vm_page_throttled_count == 0);
3132 assert(queue_empty(&vm_page_queue_throttled));
3133 vm_page_unlock_queues();
3134}
3135
3136
3137/*
3138 * move pages from the indicated local queue to the global active queue
3139 * its ok to fail if we're below the hard limit and force == FALSE
3140 * the nolocks == TRUE case is to allow this function to be run on
3141 * the hibernate path
3142 */
3143
3144void
3145vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3146{
3147 struct vpl *lq;
3148 vm_page_t first_local, last_local;
3149 vm_page_t first_active;
3150 vm_page_t m;
3151 uint32_t count = 0;
3152
3153 if (vm_page_local_q == NULL)
3154 return;
3155
3156 lq = &vm_page_local_q[lid].vpl_un.vpl;
3157
3158 if (nolocks == FALSE) {
3159 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3160 if ( !vm_page_trylockspin_queues())
3161 return;
3162 } else
3163 vm_page_lockspin_queues();
3164
3165 VPL_LOCK(&lq->vpl_lock);
3166 }
3167 if (lq->vpl_count) {
3168 /*
3169 * Switch "local" pages to "active".
3170 */
3171 assert(!queue_empty(&lq->vpl_queue));
3172
3173 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3174 VM_PAGE_CHECK(m);
3175 assert(m->local);
3176 assert(!m->active);
3177 assert(!m->inactive);
3178 assert(!m->speculative);
3179 assert(!VM_PAGE_WIRED(m));
3180 assert(!m->throttled);
3181 assert(!m->fictitious);
3182
3183 if (m->local_id != lid)
3184 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3185
3186 m->local_id = 0;
3187 m->local = FALSE;
3188 m->active = TRUE;
3189 VM_PAGE_CHECK(m);
3190
3191 count++;
3192 }
3193 if (count != lq->vpl_count)
3194 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3195
3196 /*
3197 * Transfer the entire local queue to a regular LRU page queues.
3198 */
3199 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3200 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3201 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3202
3203 if (queue_empty(&vm_page_queue_active)) {
3204 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3205 } else {
3206 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3207 }
3208 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3209 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3210 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3211
3212 queue_init(&lq->vpl_queue);
3213 /*
3214 * Adjust the global page counts.
3215 */
3216 vm_page_active_count += lq->vpl_count;
3217 lq->vpl_count = 0;
3218 }
3219 assert(queue_empty(&lq->vpl_queue));
3220
3221 if (nolocks == FALSE) {
3222 VPL_UNLOCK(&lq->vpl_lock);
3223 vm_page_unlock_queues();
3224 }
3225}
3226
3227/*
3228 * vm_page_part_zero_fill:
3229 *
3230 * Zero-fill a part of the page.
3231 */
3232void
3233vm_page_part_zero_fill(
3234 vm_page_t m,
3235 vm_offset_t m_pa,
3236 vm_size_t len)
3237{
3238 vm_page_t tmp;
3239
3240#if 0
3241 /*
3242 * we don't hold the page queue lock
3243 * so this check isn't safe to make
3244 */
3245 VM_PAGE_CHECK(m);
3246#endif
3247
3248#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3249 pmap_zero_part_page(m->phys_page, m_pa, len);
3250#else
3251 while (1) {
3252 tmp = vm_page_grab();
3253 if (tmp == VM_PAGE_NULL) {
3254 vm_page_wait(THREAD_UNINT);
3255 continue;
3256 }
3257 break;
3258 }
3259 vm_page_zero_fill(tmp);
3260 if(m_pa != 0) {
3261 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3262 }
3263 if((m_pa + len) < PAGE_SIZE) {
3264 vm_page_part_copy(m, m_pa + len, tmp,
3265 m_pa + len, PAGE_SIZE - (m_pa + len));
3266 }
3267 vm_page_copy(tmp,m);
3268 VM_PAGE_FREE(tmp);
3269#endif
3270
3271}
3272
3273/*
3274 * vm_page_zero_fill:
3275 *
3276 * Zero-fill the specified page.
3277 */
3278void
3279vm_page_zero_fill(
3280 vm_page_t m)
3281{
3282 XPR(XPR_VM_PAGE,
3283 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3284 m->object, m->offset, m, 0,0);
3285#if 0
3286 /*
3287 * we don't hold the page queue lock
3288 * so this check isn't safe to make
3289 */
3290 VM_PAGE_CHECK(m);
3291#endif
3292
3293// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3294 pmap_zero_page(m->phys_page);
3295}
3296
3297/*
3298 * vm_page_part_copy:
3299 *
3300 * copy part of one page to another
3301 */
3302
3303void
3304vm_page_part_copy(
3305 vm_page_t src_m,
3306 vm_offset_t src_pa,
3307 vm_page_t dst_m,
3308 vm_offset_t dst_pa,
3309 vm_size_t len)
3310{
3311#if 0
3312 /*
3313 * we don't hold the page queue lock
3314 * so this check isn't safe to make
3315 */
3316 VM_PAGE_CHECK(src_m);
3317 VM_PAGE_CHECK(dst_m);
3318#endif
3319 pmap_copy_part_page(src_m->phys_page, src_pa,
3320 dst_m->phys_page, dst_pa, len);
3321}
3322
3323/*
3324 * vm_page_copy:
3325 *
3326 * Copy one page to another
3327 *
3328 * ENCRYPTED SWAP:
3329 * The source page should not be encrypted. The caller should
3330 * make sure the page is decrypted first, if necessary.
3331 */
3332
3333int vm_page_copy_cs_validations = 0;
3334int vm_page_copy_cs_tainted = 0;
3335
3336void
3337vm_page_copy(
3338 vm_page_t src_m,
3339 vm_page_t dest_m)
3340{
3341 XPR(XPR_VM_PAGE,
3342 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3343 src_m->object, src_m->offset,
3344 dest_m->object, dest_m->offset,
3345 0);
3346#if 0
3347 /*
3348 * we don't hold the page queue lock
3349 * so this check isn't safe to make
3350 */
3351 VM_PAGE_CHECK(src_m);
3352 VM_PAGE_CHECK(dest_m);
3353#endif
3354 vm_object_lock_assert_held(src_m->object);
3355
3356 /*
3357 * ENCRYPTED SWAP:
3358 * The source page should not be encrypted at this point.
3359 * The destination page will therefore not contain encrypted
3360 * data after the copy.
3361 */
3362 if (src_m->encrypted) {
3363 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3364 }
3365 dest_m->encrypted = FALSE;
3366
3367 if (src_m->object != VM_OBJECT_NULL &&
3368 src_m->object->code_signed) {
3369 /*
3370 * We're copying a page from a code-signed object.
3371 * Whoever ends up mapping the copy page might care about
3372 * the original page's integrity, so let's validate the
3373 * source page now.
3374 */
3375 vm_page_copy_cs_validations++;
3376 vm_page_validate_cs(src_m);
3377 }
3378
3379 if (vm_page_is_slideable(src_m)) {
3380 boolean_t was_busy = src_m->busy;
3381 src_m->busy = TRUE;
3382 (void) vm_page_slide(src_m, 0);
3383 assert(src_m->busy);
3384 if (!was_busy) {
3385 PAGE_WAKEUP_DONE(src_m);
3386 }
3387 }
3388
3389 /*
3390 * Propagate the cs_tainted bit to the copy page. Do not propagate
3391 * the cs_validated bit.
3392 */
3393 dest_m->cs_tainted = src_m->cs_tainted;
3394 if (dest_m->cs_tainted) {
3395 vm_page_copy_cs_tainted++;
3396 }
3397 dest_m->slid = src_m->slid;
3398 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3399 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3400}
3401
3402#if MACH_ASSERT
3403static void
3404_vm_page_print(
3405 vm_page_t p)
3406{
3407 printf("vm_page %p: \n", p);
3408 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3409 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3410 printf(" next=%p\n", p->next);
3411 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3412 printf(" wire_count=%u\n", p->wire_count);
3413
3414 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3415 (p->local ? "" : "!"),
3416 (p->inactive ? "" : "!"),
3417 (p->active ? "" : "!"),
3418 (p->pageout_queue ? "" : "!"),
3419 (p->speculative ? "" : "!"),
3420 (p->laundry ? "" : "!"));
3421 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3422 (p->free ? "" : "!"),
3423 (p->reference ? "" : "!"),
3424 (p->gobbled ? "" : "!"),
3425 (p->private ? "" : "!"),
3426 (p->throttled ? "" : "!"));
3427 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3428 (p->busy ? "" : "!"),
3429 (p->wanted ? "" : "!"),
3430 (p->tabled ? "" : "!"),
3431 (p->fictitious ? "" : "!"),
3432 (p->pmapped ? "" : "!"),
3433 (p->wpmapped ? "" : "!"));
3434 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3435 (p->pageout ? "" : "!"),
3436 (p->absent ? "" : "!"),
3437 (p->error ? "" : "!"),
3438 (p->dirty ? "" : "!"),
3439 (p->cleaning ? "" : "!"),
3440 (p->precious ? "" : "!"),
3441 (p->clustered ? "" : "!"));
3442 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3443 (p->overwriting ? "" : "!"),
3444 (p->restart ? "" : "!"),
3445 (p->unusual ? "" : "!"),
3446 (p->encrypted ? "" : "!"),
3447 (p->encrypted_cleaning ? "" : "!"));
3448 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3449 (p->cs_validated ? "" : "!"),
3450 (p->cs_tainted ? "" : "!"),
3451 (p->no_cache ? "" : "!"));
3452
3453 printf("phys_page=0x%x\n", p->phys_page);
3454}
3455
3456/*
3457 * Check that the list of pages is ordered by
3458 * ascending physical address and has no holes.
3459 */
3460static int
3461vm_page_verify_contiguous(
3462 vm_page_t pages,
3463 unsigned int npages)
3464{
3465 register vm_page_t m;
3466 unsigned int page_count;
3467 vm_offset_t prev_addr;
3468
3469 prev_addr = pages->phys_page;
3470 page_count = 1;
3471 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3472 if (m->phys_page != prev_addr + 1) {
3473 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3474 m, (long)prev_addr, m->phys_page);
3475 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3476 panic("vm_page_verify_contiguous: not contiguous!");
3477 }
3478 prev_addr = m->phys_page;
3479 ++page_count;
3480 }
3481 if (page_count != npages) {
3482 printf("pages %p actual count 0x%x but requested 0x%x\n",
3483 pages, page_count, npages);
3484 panic("vm_page_verify_contiguous: count error");
3485 }
3486 return 1;
3487}
3488
3489
3490/*
3491 * Check the free lists for proper length etc.
3492 */
3493static unsigned int
3494vm_page_verify_free_list(
3495 queue_head_t *vm_page_queue,
3496 unsigned int color,
3497 vm_page_t look_for_page,
3498 boolean_t expect_page)
3499{
3500 unsigned int npages;
3501 vm_page_t m;
3502 vm_page_t prev_m;
3503 boolean_t found_page;
3504
3505 found_page = FALSE;
3506 npages = 0;
3507 prev_m = (vm_page_t) vm_page_queue;
3508 queue_iterate(vm_page_queue,
3509 m,
3510 vm_page_t,
3511 pageq) {
3512
3513 if (m == look_for_page) {
3514 found_page = TRUE;
3515 }
3516 if ((vm_page_t) m->pageq.prev != prev_m)
3517 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3518 color, npages, m, m->pageq.prev, prev_m);
3519 if ( ! m->busy )
3520 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3521 color, npages, m);
3522 if (color != (unsigned int) -1) {
3523 if ((m->phys_page & vm_color_mask) != color)
3524 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3525 color, npages, m, m->phys_page & vm_color_mask, color);
3526 if ( ! m->free )
3527 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3528 color, npages, m);
3529 }
3530 ++npages;
3531 prev_m = m;
3532 }
3533 if (look_for_page != VM_PAGE_NULL) {
3534 unsigned int other_color;
3535
3536 if (expect_page && !found_page) {
3537 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3538 color, npages, look_for_page, look_for_page->phys_page);
3539 _vm_page_print(look_for_page);
3540 for (other_color = 0;
3541 other_color < vm_colors;
3542 other_color++) {
3543 if (other_color == color)
3544 continue;
3545 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3546 other_color, look_for_page, FALSE);
3547 }
3548 if (color == (unsigned int) -1) {
3549 vm_page_verify_free_list(&vm_lopage_queue_free,
3550 (unsigned int) -1, look_for_page, FALSE);
3551 }
3552 panic("vm_page_verify_free_list(color=%u)\n", color);
3553 }
3554 if (!expect_page && found_page) {
3555 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3556 color, npages, look_for_page, look_for_page->phys_page);
3557 }
3558 }
3559 return npages;
3560}
3561
3562static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3563static void
3564vm_page_verify_free_lists( void )
3565{
3566 unsigned int color, npages, nlopages;
3567
3568 if (! vm_page_verify_free_lists_enabled)
3569 return;
3570
3571 npages = 0;
3572
3573 lck_mtx_lock(&vm_page_queue_free_lock);
3574
3575 for( color = 0; color < vm_colors; color++ ) {
3576 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3577 color, VM_PAGE_NULL, FALSE);
3578 }
3579 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3580 (unsigned int) -1,
3581 VM_PAGE_NULL, FALSE);
3582 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3583 panic("vm_page_verify_free_lists: "
3584 "npages %u free_count %d nlopages %u lo_free_count %u",
3585 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3586
3587 lck_mtx_unlock(&vm_page_queue_free_lock);
3588}
3589
3590void
3591vm_page_queues_assert(
3592 vm_page_t mem,
3593 int val)
3594{
3595#if DEBUG
3596 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3597#endif
3598 if (mem->free + mem->active + mem->inactive + mem->speculative +
3599 mem->throttled + mem->pageout_queue > (val)) {
3600 _vm_page_print(mem);
3601 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3602 }
3603 if (VM_PAGE_WIRED(mem)) {
3604 assert(!mem->active);
3605 assert(!mem->inactive);
3606 assert(!mem->speculative);
3607 assert(!mem->throttled);
3608 assert(!mem->pageout_queue);
3609 }
3610}
3611#endif /* MACH_ASSERT */
3612
3613
3614/*
3615 * CONTIGUOUS PAGE ALLOCATION
3616 *
3617 * Find a region large enough to contain at least n pages
3618 * of contiguous physical memory.
3619 *
3620 * This is done by traversing the vm_page_t array in a linear fashion
3621 * we assume that the vm_page_t array has the avaiable physical pages in an
3622 * ordered, ascending list... this is currently true of all our implementations
3623 * and must remain so... there can be 'holes' in the array... we also can
3624 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3625 * which use to happen via 'vm_page_convert'... that function was no longer
3626 * being called and was removed...
3627 *
3628 * The basic flow consists of stabilizing some of the interesting state of
3629 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3630 * sweep at the beginning of the array looking for pages that meet our criterea
3631 * for a 'stealable' page... currently we are pretty conservative... if the page
3632 * meets this criterea and is physically contiguous to the previous page in the 'run'
3633 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3634 * and start to develop a new run... if at this point we've already considered
3635 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3636 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3637 * to other threads trying to acquire free pages (or move pages from q to q),
3638 * and then continue from the spot we left off... we only make 1 pass through the
3639 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3640 * which steals the pages from the queues they're currently on... pages on the free
3641 * queue can be stolen directly... pages that are on any of the other queues
3642 * must be removed from the object they are tabled on... this requires taking the
3643 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3644 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3645 * dump the pages we've currently stolen back to the free list, and pick up our
3646 * scan from the point where we aborted the 'current' run.
3647 *
3648 *
3649 * Requirements:
3650 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3651 *
3652 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3653 *
3654 * Algorithm:
3655 */
3656
3657#define MAX_CONSIDERED_BEFORE_YIELD 1000
3658
3659
3660#define RESET_STATE_OF_RUN() \
3661 MACRO_BEGIN \
3662 prevcontaddr = -2; \
3663 start_pnum = -1; \
3664 free_considered = 0; \
3665 substitute_needed = 0; \
3666 npages = 0; \
3667 MACRO_END
3668
3669/*
3670 * Can we steal in-use (i.e. not free) pages when searching for
3671 * physically-contiguous pages ?
3672 */
3673#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3674
3675static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3676#if DEBUG
3677int vm_page_find_contig_debug = 0;
3678#endif
3679
3680static vm_page_t
3681vm_page_find_contiguous(
3682 unsigned int contig_pages,
3683 ppnum_t max_pnum,
3684 ppnum_t pnum_mask,
3685 boolean_t wire,
3686 int flags)
3687{
3688 vm_page_t m = NULL;
3689 ppnum_t prevcontaddr;
3690 ppnum_t start_pnum;
3691 unsigned int npages, considered, scanned;
3692 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3693 unsigned int idx_last_contig_page_found = 0;
3694 int free_considered, free_available;
3695 int substitute_needed;
3696 boolean_t wrapped;
3697#if DEBUG
3698 clock_sec_t tv_start_sec, tv_end_sec;
3699 clock_usec_t tv_start_usec, tv_end_usec;
3700#endif
3701#if MACH_ASSERT
3702 int yielded = 0;
3703 int dumped_run = 0;
3704 int stolen_pages = 0;
3705#endif
3706
3707 if (contig_pages == 0)
3708 return VM_PAGE_NULL;
3709
3710#if MACH_ASSERT
3711 vm_page_verify_free_lists();
3712#endif
3713#if DEBUG
3714 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3715#endif
3716 vm_page_lock_queues();
3717 lck_mtx_lock(&vm_page_queue_free_lock);
3718
3719 RESET_STATE_OF_RUN();
3720
3721 scanned = 0;
3722 considered = 0;
3723 free_available = vm_page_free_count - vm_page_free_reserved;
3724
3725 wrapped = FALSE;
3726
3727 if(flags & KMA_LOMEM)
3728 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3729 else
3730 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3731
3732 orig_last_idx = idx_last_contig_page_found;
3733 last_idx = orig_last_idx;
3734
3735 for (page_idx = last_idx, start_idx = last_idx;
3736 npages < contig_pages && page_idx < vm_pages_count;
3737 page_idx++) {
3738retry:
3739 if (wrapped &&
3740 npages == 0 &&
3741 page_idx >= orig_last_idx) {
3742 /*
3743 * We're back where we started and we haven't
3744 * found any suitable contiguous range. Let's
3745 * give up.
3746 */
3747 break;
3748 }
3749 scanned++;
3750 m = &vm_pages[page_idx];
3751
3752 assert(!m->fictitious);
3753 assert(!m->private);
3754
3755 if (max_pnum && m->phys_page > max_pnum) {
3756 /* no more low pages... */
3757 break;
3758 }
3759 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3760 /*
3761 * not aligned
3762 */
3763 RESET_STATE_OF_RUN();
3764
3765 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3766 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3767 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3768 m->cleaning || m->overwriting || m->restart || m->unusual || m->pageout) {
3769 /*
3770 * page is in a transient state
3771 * or a state we don't want to deal
3772 * with, so don't consider it which
3773 * means starting a new run
3774 */
3775 RESET_STATE_OF_RUN();
3776
3777 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3778 /*
3779 * page needs to be on one of our queues
3780 * in order for it to be stable behind the
3781 * locks we hold at this point...
3782 * if not, don't consider it which
3783 * means starting a new run
3784 */
3785 RESET_STATE_OF_RUN();
3786
3787 } else if (!m->free && (!m->tabled || m->busy)) {
3788 /*
3789 * pages on the free list are always 'busy'
3790 * so we couldn't test for 'busy' in the check
3791 * for the transient states... pages that are
3792 * 'free' are never 'tabled', so we also couldn't
3793 * test for 'tabled'. So we check here to make
3794 * sure that a non-free page is not busy and is
3795 * tabled on an object...
3796 * if not, don't consider it which
3797 * means starting a new run
3798 */
3799 RESET_STATE_OF_RUN();
3800
3801 } else {
3802 if (m->phys_page != prevcontaddr + 1) {
3803 if ((m->phys_page & pnum_mask) != 0) {
3804 RESET_STATE_OF_RUN();
3805 goto did_consider;
3806 } else {
3807 npages = 1;
3808 start_idx = page_idx;
3809 start_pnum = m->phys_page;
3810 }
3811 } else {
3812 npages++;
3813 }
3814 prevcontaddr = m->phys_page;
3815
3816 VM_PAGE_CHECK(m);
3817 if (m->free) {
3818 free_considered++;
3819 } else {
3820 /*
3821 * This page is not free.
3822 * If we can't steal used pages,
3823 * we have to give up this run
3824 * and keep looking.
3825 * Otherwise, we might need to
3826 * move the contents of this page
3827 * into a substitute page.
3828 */
3829#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3830 if (m->pmapped || m->dirty) {
3831 substitute_needed++;
3832 }
3833#else
3834 RESET_STATE_OF_RUN();
3835#endif
3836 }
3837
3838 if ((free_considered + substitute_needed) > free_available) {
3839 /*
3840 * if we let this run continue
3841 * we will end up dropping the vm_page_free_count
3842 * below the reserve limit... we need to abort
3843 * this run, but we can at least re-consider this
3844 * page... thus the jump back to 'retry'
3845 */
3846 RESET_STATE_OF_RUN();
3847
3848 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3849 considered++;
3850 goto retry;
3851 }
3852 /*
3853 * free_available == 0
3854 * so can't consider any free pages... if
3855 * we went to retry in this case, we'd
3856 * get stuck looking at the same page
3857 * w/o making any forward progress
3858 * we also want to take this path if we've already
3859 * reached our limit that controls the lock latency
3860 */
3861 }
3862 }
3863did_consider:
3864 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3865
3866 lck_mtx_unlock(&vm_page_queue_free_lock);
3867 vm_page_unlock_queues();
3868
3869 mutex_pause(0);
3870
3871 vm_page_lock_queues();
3872 lck_mtx_lock(&vm_page_queue_free_lock);
3873
3874 RESET_STATE_OF_RUN();
3875 /*
3876 * reset our free page limit since we
3877 * dropped the lock protecting the vm_page_free_queue
3878 */
3879 free_available = vm_page_free_count - vm_page_free_reserved;
3880 considered = 0;
3881#if MACH_ASSERT
3882 yielded++;
3883#endif
3884 goto retry;
3885 }
3886 considered++;
3887 }
3888 m = VM_PAGE_NULL;
3889
3890 if (npages != contig_pages) {
3891 if (!wrapped) {
3892 /*
3893 * We didn't find a contiguous range but we didn't
3894 * start from the very first page.
3895 * Start again from the very first page.
3896 */
3897 RESET_STATE_OF_RUN();
3898 if( flags & KMA_LOMEM)
3899 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3900 else
3901 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3902 last_idx = 0;
3903 page_idx = last_idx;
3904 wrapped = TRUE;
3905 goto retry;
3906 }
3907 lck_mtx_unlock(&vm_page_queue_free_lock);
3908 } else {
3909 vm_page_t m1;
3910 vm_page_t m2;
3911 unsigned int cur_idx;
3912 unsigned int tmp_start_idx;
3913 vm_object_t locked_object = VM_OBJECT_NULL;
3914 boolean_t abort_run = FALSE;
3915
3916 assert(page_idx - start_idx == contig_pages);
3917
3918 tmp_start_idx = start_idx;
3919
3920 /*
3921 * first pass through to pull the free pages
3922 * off of the free queue so that in case we
3923 * need substitute pages, we won't grab any
3924 * of the free pages in the run... we'll clear
3925 * the 'free' bit in the 2nd pass, and even in
3926 * an abort_run case, we'll collect all of the
3927 * free pages in this run and return them to the free list
3928 */
3929 while (start_idx < page_idx) {
3930
3931 m1 = &vm_pages[start_idx++];
3932
3933#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3934 assert(m1->free);
3935#endif
3936
3937 if (m1->free) {
3938 unsigned int color;
3939
3940 color = m1->phys_page & vm_color_mask;
3941#if MACH_ASSERT
3942 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
3943#endif
3944 queue_remove(&vm_page_queue_free[color],
3945 m1,
3946 vm_page_t,
3947 pageq);
3948 m1->pageq.next = NULL;
3949 m1->pageq.prev = NULL;
3950#if MACH_ASSERT
3951 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
3952#endif
3953 /*
3954 * Clear the "free" bit so that this page
3955 * does not get considered for another
3956 * concurrent physically-contiguous allocation.
3957 */
3958 m1->free = FALSE;
3959 assert(m1->busy);
3960
3961 vm_page_free_count--;
3962 }
3963 }
3964 /*
3965 * adjust global freelist counts
3966 */
3967 if (vm_page_free_count < vm_page_free_count_minimum)
3968 vm_page_free_count_minimum = vm_page_free_count;
3969
3970 if( flags & KMA_LOMEM)
3971 vm_page_lomem_find_contiguous_last_idx = page_idx;
3972 else
3973 vm_page_find_contiguous_last_idx = page_idx;
3974
3975 /*
3976 * we can drop the free queue lock at this point since
3977 * we've pulled any 'free' candidates off of the list
3978 * we need it dropped so that we can do a vm_page_grab
3979 * when substituing for pmapped/dirty pages
3980 */
3981 lck_mtx_unlock(&vm_page_queue_free_lock);
3982
3983 start_idx = tmp_start_idx;
3984 cur_idx = page_idx - 1;
3985
3986 while (start_idx++ < page_idx) {
3987 /*
3988 * must go through the list from back to front
3989 * so that the page list is created in the
3990 * correct order - low -> high phys addresses
3991 */
3992 m1 = &vm_pages[cur_idx--];
3993
3994 assert(!m1->free);
3995 if (m1->object == VM_OBJECT_NULL) {
3996 /*
3997 * page has already been removed from
3998 * the free list in the 1st pass
3999 */
4000 assert(m1->offset == (vm_object_offset_t) -1);
4001 assert(m1->busy);
4002 assert(!m1->wanted);
4003 assert(!m1->laundry);
4004 } else {
4005 vm_object_t object;
4006
4007 if (abort_run == TRUE)
4008 continue;
4009
4010 object = m1->object;
4011
4012 if (object != locked_object) {
4013 if (locked_object) {
4014 vm_object_unlock(locked_object);
4015 locked_object = VM_OBJECT_NULL;
4016 }
4017 if (vm_object_lock_try(object))
4018 locked_object = object;
4019 }
4020 if (locked_object == VM_OBJECT_NULL ||
4021 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4022 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
4023 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
4024 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->busy)) {
4025
4026 if (locked_object) {
4027 vm_object_unlock(locked_object);
4028 locked_object = VM_OBJECT_NULL;
4029 }
4030 tmp_start_idx = cur_idx;
4031 abort_run = TRUE;
4032 continue;
4033 }
4034 if (m1->pmapped || m1->dirty) {
4035 int refmod;
4036 vm_object_offset_t offset;
4037
4038 m2 = vm_page_grab();
4039
4040 if (m2 == VM_PAGE_NULL) {
4041 if (locked_object) {
4042 vm_object_unlock(locked_object);
4043 locked_object = VM_OBJECT_NULL;
4044 }
4045 tmp_start_idx = cur_idx;
4046 abort_run = TRUE;
4047 continue;
4048 }
4049 if (m1->pmapped)
4050 refmod = pmap_disconnect(m1->phys_page);
4051 else
4052 refmod = 0;
4053 vm_page_copy(m1, m2);
4054
4055 m2->reference = m1->reference;
4056 m2->dirty = m1->dirty;
4057
4058 if (refmod & VM_MEM_REFERENCED)
4059 m2->reference = TRUE;
4060 if (refmod & VM_MEM_MODIFIED) {
4061 SET_PAGE_DIRTY(m2, TRUE);
4062 }
4063 offset = m1->offset;
4064
4065 /*
4066 * completely cleans up the state
4067 * of the page so that it is ready
4068 * to be put onto the free list, or
4069 * for this purpose it looks like it
4070 * just came off of the free list
4071 */
4072 vm_page_free_prepare(m1);
4073
4074 /*
4075 * make sure we clear the ref/mod state
4076 * from the pmap layer... else we risk
4077 * inheriting state from the last time
4078 * this page was used...
4079 */
4080 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4081 /*
4082 * now put the substitute page on the object
4083 */
4084 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4085
4086 if (m2->reference)
4087 vm_page_activate(m2);
4088 else
4089 vm_page_deactivate(m2);
4090
4091 PAGE_WAKEUP_DONE(m2);
4092
4093 } else {
4094 /*
4095 * completely cleans up the state
4096 * of the page so that it is ready
4097 * to be put onto the free list, or
4098 * for this purpose it looks like it
4099 * just came off of the free list
4100 */
4101 vm_page_free_prepare(m1);
4102 }
4103#if MACH_ASSERT
4104 stolen_pages++;
4105#endif
4106 }
4107 m1->pageq.next = (queue_entry_t) m;
4108 m1->pageq.prev = NULL;
4109 m = m1;
4110 }
4111 if (locked_object) {
4112 vm_object_unlock(locked_object);
4113 locked_object = VM_OBJECT_NULL;
4114 }
4115
4116 if (abort_run == TRUE) {
4117 if (m != VM_PAGE_NULL) {
4118 vm_page_free_list(m, FALSE);
4119 }
4120#if MACH_ASSERT
4121 dumped_run++;
4122#endif
4123 /*
4124 * want the index of the last
4125 * page in this run that was
4126 * successfully 'stolen', so back
4127 * it up 1 for the auto-decrement on use
4128 * and 1 more to bump back over this page
4129 */
4130 page_idx = tmp_start_idx + 2;
4131 if (page_idx >= vm_pages_count) {
4132 if (wrapped)
4133 goto done_scanning;
4134 page_idx = last_idx = 0;
4135 wrapped = TRUE;
4136 }
4137 abort_run = FALSE;
4138
4139 /*
4140 * We didn't find a contiguous range but we didn't
4141 * start from the very first page.
4142 * Start again from the very first page.
4143 */
4144 RESET_STATE_OF_RUN();
4145
4146 if( flags & KMA_LOMEM)
4147 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4148 else
4149 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4150
4151 last_idx = page_idx;
4152
4153 lck_mtx_lock(&vm_page_queue_free_lock);
4154 /*
4155 * reset our free page limit since we
4156 * dropped the lock protecting the vm_page_free_queue
4157 */
4158 free_available = vm_page_free_count - vm_page_free_reserved;
4159 goto retry;
4160 }
4161
4162 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4163
4164 if (wire == TRUE)
4165 m1->wire_count++;
4166 else
4167 m1->gobbled = TRUE;
4168 }
4169 if (wire == FALSE)
4170 vm_page_gobble_count += npages;
4171
4172 /*
4173 * gobbled pages are also counted as wired pages
4174 */
4175 vm_page_wire_count += npages;
4176
4177 assert(vm_page_verify_contiguous(m, npages));
4178 }
4179done_scanning:
4180 vm_page_unlock_queues();
4181
4182#if DEBUG
4183 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4184
4185 tv_end_sec -= tv_start_sec;
4186 if (tv_end_usec < tv_start_usec) {
4187 tv_end_sec--;
4188 tv_end_usec += 1000000;
4189 }
4190 tv_end_usec -= tv_start_usec;
4191 if (tv_end_usec >= 1000000) {
4192 tv_end_sec++;
4193 tv_end_sec -= 1000000;
4194 }
4195 if (vm_page_find_contig_debug) {
4196 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4197 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4198 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4199 scanned, yielded, dumped_run, stolen_pages);
4200 }
4201
4202#endif
4203#if MACH_ASSERT
4204 vm_page_verify_free_lists();
4205#endif
4206 return m;
4207}
4208
4209/*
4210 * Allocate a list of contiguous, wired pages.
4211 */
4212kern_return_t
4213cpm_allocate(
4214 vm_size_t size,
4215 vm_page_t *list,
4216 ppnum_t max_pnum,
4217 ppnum_t pnum_mask,
4218 boolean_t wire,
4219 int flags)
4220{
4221 vm_page_t pages;
4222 unsigned int npages;
4223
4224 if (size % PAGE_SIZE != 0)
4225 return KERN_INVALID_ARGUMENT;
4226
4227 npages = (unsigned int) (size / PAGE_SIZE);
4228 if (npages != size / PAGE_SIZE) {
4229 /* 32-bit overflow */
4230 return KERN_INVALID_ARGUMENT;
4231 }
4232
4233 /*
4234 * Obtain a pointer to a subset of the free
4235 * list large enough to satisfy the request;
4236 * the region will be physically contiguous.
4237 */
4238 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4239
4240 if (pages == VM_PAGE_NULL)
4241 return KERN_NO_SPACE;
4242 /*
4243 * determine need for wakeups
4244 */
4245 if ((vm_page_free_count < vm_page_free_min) ||
4246 ((vm_page_free_count < vm_page_free_target) &&
4247 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4248 thread_wakeup((event_t) &vm_page_free_wanted);
4249
4250 VM_CHECK_MEMORYSTATUS;
4251
4252 /*
4253 * The CPM pages should now be available and
4254 * ordered by ascending physical address.
4255 */
4256 assert(vm_page_verify_contiguous(pages, npages));
4257
4258 *list = pages;
4259 return KERN_SUCCESS;
4260}
4261
4262
4263unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4264
4265/*
4266 * when working on a 'run' of pages, it is necessary to hold
4267 * the vm_page_queue_lock (a hot global lock) for certain operations
4268 * on the page... however, the majority of the work can be done
4269 * while merely holding the object lock... in fact there are certain
4270 * collections of pages that don't require any work brokered by the
4271 * vm_page_queue_lock... to mitigate the time spent behind the global
4272 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4273 * while doing all of the work that doesn't require the vm_page_queue_lock...
4274 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4275 * necessary work for each page... we will grab the busy bit on the page
4276 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4277 * if it can't immediately take the vm_page_queue_lock in order to compete
4278 * for the locks in the same order that vm_pageout_scan takes them.
4279 * the operation names are modeled after the names of the routines that
4280 * need to be called in order to make the changes very obvious in the
4281 * original loop
4282 */
4283
4284void
4285vm_page_do_delayed_work(
4286 vm_object_t object,
4287 struct vm_page_delayed_work *dwp,
4288 int dw_count)
4289{
4290 int j;
4291 vm_page_t m;
4292 vm_page_t local_free_q = VM_PAGE_NULL;
4293
4294 /*
4295 * pageout_scan takes the vm_page_lock_queues first
4296 * then tries for the object lock... to avoid what
4297 * is effectively a lock inversion, we'll go to the
4298 * trouble of taking them in that same order... otherwise
4299 * if this object contains the majority of the pages resident
4300 * in the UBC (or a small set of large objects actively being
4301 * worked on contain the majority of the pages), we could
4302 * cause the pageout_scan thread to 'starve' in its attempt
4303 * to find pages to move to the free queue, since it has to
4304 * successfully acquire the object lock of any candidate page
4305 * before it can steal/clean it.
4306 */
4307 if (!vm_page_trylockspin_queues()) {
4308 vm_object_unlock(object);
4309
4310 vm_page_lockspin_queues();
4311
4312 for (j = 0; ; j++) {
4313 if (!vm_object_lock_avoid(object) &&
4314 _vm_object_lock_try(object))
4315 break;
4316 vm_page_unlock_queues();
4317 mutex_pause(j);
4318 vm_page_lockspin_queues();
4319 }
4320 }
4321 for (j = 0; j < dw_count; j++, dwp++) {
4322
4323 m = dwp->dw_m;
4324
4325 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4326 vm_pageout_throttle_up(m);
4327
4328 if (dwp->dw_mask & DW_vm_page_wire)
4329 vm_page_wire(m);
4330 else if (dwp->dw_mask & DW_vm_page_unwire) {
4331 boolean_t queueit;
4332
4333 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4334
4335 vm_page_unwire(m, queueit);
4336 }
4337 if (dwp->dw_mask & DW_vm_page_free) {
4338 vm_page_free_prepare_queues(m);
4339
4340 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4341 /*
4342 * Add this page to our list of reclaimed pages,
4343 * to be freed later.
4344 */
4345 m->pageq.next = (queue_entry_t) local_free_q;
4346 local_free_q = m;
4347 } else {
4348 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4349 vm_page_deactivate_internal(m, FALSE);
4350 else if (dwp->dw_mask & DW_vm_page_activate) {
4351 if (m->active == FALSE) {
4352 vm_page_activate(m);
4353 }
4354 }
4355 else if (dwp->dw_mask & DW_vm_page_speculate)
4356 vm_page_speculate(m, TRUE);
4357 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4358 /*
4359 * if we didn't hold the object lock and did this,
4360 * we might disconnect the page, then someone might
4361 * soft fault it back in, then we would put it on the
4362 * cleaned queue, and so we would have a referenced (maybe even dirty)
4363 * page on that queue, which we don't want
4364 */
4365 int refmod_state = pmap_disconnect(m->phys_page);
4366
4367 if ((refmod_state & VM_MEM_REFERENCED)) {
4368 /*
4369 * this page has been touched since it got cleaned; let's activate it
4370 * if it hasn't already been
4371 */
4372 vm_pageout_enqueued_cleaned++;
4373 vm_pageout_cleaned_reactivated++;
4374 vm_pageout_cleaned_commit_reactivated++;
4375
4376 if (m->active == FALSE)
4377 vm_page_activate(m);
4378 } else {
4379 m->reference = FALSE;
4380 vm_page_enqueue_cleaned(m);
4381 }
4382 }
4383 else if (dwp->dw_mask & DW_vm_page_lru)
4384 vm_page_lru(m);
4385 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4386 if ( !m->pageout_queue)
4387 VM_PAGE_QUEUES_REMOVE(m);
4388 }
4389 if (dwp->dw_mask & DW_set_reference)
4390 m->reference = TRUE;
4391 else if (dwp->dw_mask & DW_clear_reference)
4392 m->reference = FALSE;
4393
4394 if (dwp->dw_mask & DW_move_page) {
4395 if ( !m->pageout_queue) {
4396 VM_PAGE_QUEUES_REMOVE(m);
4397
4398 assert(m->object != kernel_object);
4399
4400 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4401 }
4402 }
4403 if (dwp->dw_mask & DW_clear_busy)
4404 m->busy = FALSE;
4405
4406 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4407 PAGE_WAKEUP(m);
4408 }
4409 }
4410 vm_page_unlock_queues();
4411
4412 if (local_free_q)
4413 vm_page_free_list(local_free_q, TRUE);
4414
4415 VM_CHECK_MEMORYSTATUS;
4416
4417}
4418
4419kern_return_t
4420vm_page_alloc_list(
4421 int page_count,
4422 int flags,
4423 vm_page_t *list)
4424{
4425 vm_page_t lo_page_list = VM_PAGE_NULL;
4426 vm_page_t mem;
4427 int i;
4428
4429 if ( !(flags & KMA_LOMEM))
4430 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4431
4432 for (i = 0; i < page_count; i++) {
4433
4434 mem = vm_page_grablo();
4435
4436 if (mem == VM_PAGE_NULL) {
4437 if (lo_page_list)
4438 vm_page_free_list(lo_page_list, FALSE);
4439
4440 *list = VM_PAGE_NULL;
4441
4442 return (KERN_RESOURCE_SHORTAGE);
4443 }
4444 mem->pageq.next = (queue_entry_t) lo_page_list;
4445 lo_page_list = mem;
4446 }
4447 *list = lo_page_list;
4448
4449 return (KERN_SUCCESS);
4450}
4451
4452void
4453vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4454{
4455 page->offset = offset;
4456}
4457
4458vm_page_t
4459vm_page_get_next(vm_page_t page)
4460{
4461 return ((vm_page_t) page->pageq.next);
4462}
4463
4464vm_object_offset_t
4465vm_page_get_offset(vm_page_t page)
4466{
4467 return (page->offset);
4468}
4469
4470ppnum_t
4471vm_page_get_phys_page(vm_page_t page)
4472{
4473 return (page->phys_page);
4474}
4475
4476
4477/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4478
4479#if HIBERNATION
4480
4481static vm_page_t hibernate_gobble_queue;
4482
4483extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4484
4485static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4486static int hibernate_flush_dirty_pages(void);
4487static int hibernate_flush_queue(queue_head_t *, int);
4488
4489void hibernate_flush_wait(void);
4490void hibernate_mark_in_progress(void);
4491void hibernate_clear_in_progress(void);
4492
4493
4494struct hibernate_statistics {
4495 int hibernate_considered;
4496 int hibernate_reentered_on_q;
4497 int hibernate_found_dirty;
4498 int hibernate_skipped_cleaning;
4499 int hibernate_skipped_transient;
4500 int hibernate_skipped_precious;
4501 int hibernate_queue_nolock;
4502 int hibernate_queue_paused;
4503 int hibernate_throttled;
4504 int hibernate_throttle_timeout;
4505 int hibernate_drained;
4506 int hibernate_drain_timeout;
4507 int cd_lock_failed;
4508 int cd_found_precious;
4509 int cd_found_wired;
4510 int cd_found_busy;
4511 int cd_found_unusual;
4512 int cd_found_cleaning;
4513 int cd_found_laundry;
4514 int cd_found_dirty;
4515 int cd_local_free;
4516 int cd_total_free;
4517 int cd_vm_page_wire_count;
4518 int cd_pages;
4519 int cd_discarded;
4520 int cd_count_wire;
4521} hibernate_stats;
4522
4523
4524
4525static int
4526hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4527{
4528 wait_result_t wait_result;
4529
4530 vm_page_lock_queues();
4531
4532 while (q->pgo_laundry) {
4533
4534 q->pgo_draining = TRUE;
4535
4536 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4537
4538 vm_page_unlock_queues();
4539
4540 wait_result = thread_block(THREAD_CONTINUE_NULL);
4541
4542 if (wait_result == THREAD_TIMED_OUT) {
4543 hibernate_stats.hibernate_drain_timeout++;
4544 return (1);
4545 }
4546 vm_page_lock_queues();
4547
4548 hibernate_stats.hibernate_drained++;
4549 }
4550 vm_page_unlock_queues();
4551
4552 return (0);
4553}
4554
4555
4556static int
4557hibernate_flush_queue(queue_head_t *q, int qcount)
4558{
4559 vm_page_t m;
4560 vm_object_t l_object = NULL;
4561 vm_object_t m_object = NULL;
4562 int refmod_state = 0;
4563 int try_failed_count = 0;
4564 int retval = 0;
4565 int current_run = 0;
4566 struct vm_pageout_queue *iq;
4567 struct vm_pageout_queue *eq;
4568 struct vm_pageout_queue *tq;
4569
4570 hibernate_cleaning_in_progress = TRUE;
4571
4572 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4573
4574 iq = &vm_pageout_queue_internal;
4575 eq = &vm_pageout_queue_external;
4576
4577 vm_page_lock_queues();
4578
4579 while (qcount && !queue_empty(q)) {
4580
4581 if (current_run++ == 1000) {
4582 if (hibernate_should_abort()) {
4583 retval = 1;
4584 break;
4585 }
4586 current_run = 0;
4587 }
4588
4589 m = (vm_page_t) queue_first(q);
4590 m_object = m->object;
4591
4592 /*
4593 * check to see if we currently are working
4594 * with the same object... if so, we've
4595 * already got the lock
4596 */
4597 if (m_object != l_object) {
4598 /*
4599 * the object associated with candidate page is
4600 * different from the one we were just working
4601 * with... dump the lock if we still own it
4602 */
4603 if (l_object != NULL) {
4604 vm_object_unlock(l_object);
4605 l_object = NULL;
4606 }
4607 /*
4608 * Try to lock object; since we've alread got the
4609 * page queues lock, we can only 'try' for this one.
4610 * if the 'try' fails, we need to do a mutex_pause
4611 * to allow the owner of the object lock a chance to
4612 * run...
4613 */
4614 if ( !vm_object_lock_try_scan(m_object)) {
4615
4616 if (try_failed_count > 20) {
4617 hibernate_stats.hibernate_queue_nolock++;
4618
4619 goto reenter_pg_on_q;
4620 }
4621 vm_pageout_scan_wants_object = m_object;
4622
4623 vm_page_unlock_queues();
4624 mutex_pause(try_failed_count++);
4625 vm_page_lock_queues();
4626
4627 hibernate_stats.hibernate_queue_paused++;
4628 continue;
4629 } else {
4630 l_object = m_object;
4631 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4632 }
4633 }
4634 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
4635 /*
4636 * page is not to be cleaned
4637 * put it back on the head of its queue
4638 */
4639 if (m->cleaning)
4640 hibernate_stats.hibernate_skipped_cleaning++;
4641 else
4642 hibernate_stats.hibernate_skipped_transient++;
4643
4644 goto reenter_pg_on_q;
4645 }
4646 if ( !m_object->pager_initialized && m_object->pager_created)
4647 goto reenter_pg_on_q;
4648
4649 if (m_object->copy == VM_OBJECT_NULL) {
4650 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4651 /*
4652 * let the normal hibernate image path
4653 * deal with these
4654 */
4655 goto reenter_pg_on_q;
4656 }
4657 }
4658 if ( !m->dirty && m->pmapped) {
4659 refmod_state = pmap_get_refmod(m->phys_page);
4660
4661 if ((refmod_state & VM_MEM_MODIFIED)) {
4662 SET_PAGE_DIRTY(m, FALSE);
4663 }
4664 } else
4665 refmod_state = 0;
4666
4667 if ( !m->dirty) {
4668 /*
4669 * page is not to be cleaned
4670 * put it back on the head of its queue
4671 */
4672 if (m->precious)
4673 hibernate_stats.hibernate_skipped_precious++;
4674
4675 goto reenter_pg_on_q;
4676 }
4677 tq = NULL;
4678
4679 if (m_object->internal) {
4680 if (VM_PAGE_Q_THROTTLED(iq))
4681 tq = iq;
4682 } else if (VM_PAGE_Q_THROTTLED(eq))
4683 tq = eq;
4684
4685 if (tq != NULL) {
4686 wait_result_t wait_result;
4687 int wait_count = 5;
4688
4689 if (l_object != NULL) {
4690 vm_object_unlock(l_object);
4691 l_object = NULL;
4692 }
4693 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4694
4695 tq->pgo_throttled = TRUE;
4696
4697 while (retval == 0) {
4698
4699 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4700
4701 vm_page_unlock_queues();
4702
4703 wait_result = thread_block(THREAD_CONTINUE_NULL);
4704
4705 vm_page_lock_queues();
4706
4707 if (hibernate_should_abort())
4708 retval = 1;
4709
4710 if (wait_result != THREAD_TIMED_OUT)
4711 break;
4712
4713 if (--wait_count == 0) {
4714 hibernate_stats.hibernate_throttle_timeout++;
4715 retval = 1;
4716 }
4717 }
4718 if (retval)
4719 break;
4720
4721 hibernate_stats.hibernate_throttled++;
4722
4723 continue;
4724 }
4725 /*
4726 * we've already factored out pages in the laundry which
4727 * means this page can't be on the pageout queue so it's
4728 * safe to do the VM_PAGE_QUEUES_REMOVE
4729 */
4730 assert(!m->pageout_queue);
4731
4732 VM_PAGE_QUEUES_REMOVE(m);
4733
4734 vm_pageout_cluster(m, FALSE);
4735
4736 hibernate_stats.hibernate_found_dirty++;
4737
4738 goto next_pg;
4739
4740reenter_pg_on_q:
4741 queue_remove(q, m, vm_page_t, pageq);
4742 queue_enter(q, m, vm_page_t, pageq);
4743
4744 hibernate_stats.hibernate_reentered_on_q++;
4745next_pg:
4746 hibernate_stats.hibernate_considered++;
4747
4748 qcount--;
4749 try_failed_count = 0;
4750 }
4751 if (l_object != NULL) {
4752 vm_object_unlock(l_object);
4753 l_object = NULL;
4754 }
4755 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4756
4757 vm_page_unlock_queues();
4758
4759 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4760
4761 hibernate_cleaning_in_progress = FALSE;
4762
4763 return (retval);
4764}
4765
4766
4767static int
4768hibernate_flush_dirty_pages()
4769{
4770 struct vm_speculative_age_q *aq;
4771 uint32_t i;
4772
4773 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4774
4775 if (vm_page_local_q) {
4776 for (i = 0; i < vm_page_local_q_count; i++)
4777 vm_page_reactivate_local(i, TRUE, FALSE);
4778 }
4779
4780 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4781 int qcount;
4782 vm_page_t m;
4783
4784 aq = &vm_page_queue_speculative[i];
4785
4786 if (queue_empty(&aq->age_q))
4787 continue;
4788 qcount = 0;
4789
4790 vm_page_lockspin_queues();
4791
4792 queue_iterate(&aq->age_q,
4793 m,
4794 vm_page_t,
4795 pageq)
4796 {
4797 qcount++;
4798 }
4799 vm_page_unlock_queues();
4800
4801 if (qcount) {
4802 if (hibernate_flush_queue(&aq->age_q, qcount))
4803 return (1);
4804 }
4805 }
4806 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4807 return (1);
4808 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
4809 return (1);
4810 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
4811 return (1);
4812 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
4813 return (1);
4814
4815 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4816 return (1);
4817 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4818}
4819
4820
4821extern void IOSleep(unsigned int);
4822extern int sync_internal(void);
4823
4824int
4825hibernate_flush_memory()
4826{
4827 int retval;
4828
4829 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4830
4831 IOSleep(2 * 1000);
4832
4833 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4834
4835 if ((retval = hibernate_flush_dirty_pages()) == 0) {
4836 if (consider_buffer_cache_collect != NULL) {
4837
4838 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4839
4840 sync_internal();
4841 (void)(*consider_buffer_cache_collect)(1);
4842 consider_zone_gc(TRUE);
4843
4844 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4845 }
4846 }
4847 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4848
4849 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4850 hibernate_stats.hibernate_considered,
4851 hibernate_stats.hibernate_reentered_on_q,
4852 hibernate_stats.hibernate_found_dirty);
4853 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4854 hibernate_stats.hibernate_skipped_cleaning,
4855 hibernate_stats.hibernate_skipped_transient,
4856 hibernate_stats.hibernate_skipped_precious,
4857 hibernate_stats.hibernate_queue_nolock);
4858 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4859 hibernate_stats.hibernate_queue_paused,
4860 hibernate_stats.hibernate_throttled,
4861 hibernate_stats.hibernate_throttle_timeout,
4862 hibernate_stats.hibernate_drained,
4863 hibernate_stats.hibernate_drain_timeout);
4864
4865 return (retval);
4866}
4867
4868
4869static void
4870hibernate_page_list_zero(hibernate_page_list_t *list)
4871{
4872 uint32_t bank;
4873 hibernate_bitmap_t * bitmap;
4874
4875 bitmap = &list->bank_bitmap[0];
4876 for (bank = 0; bank < list->bank_count; bank++)
4877 {
4878 uint32_t last_bit;
4879
4880 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4881 // set out-of-bound bits at end of bitmap.
4882 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4883 if (last_bit)
4884 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4885
4886 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4887 }
4888}
4889
4890void
4891hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4892{
4893 uint32_t i;
4894 vm_page_t m;
4895 uint64_t start, end, timeout, nsec;
4896 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4897 clock_get_uptime(&start);
4898
4899 for (i = 0; i < gobble_count; i++)
4900 {
4901 while (VM_PAGE_NULL == (m = vm_page_grab()))
4902 {
4903 clock_get_uptime(&end);
4904 if (end >= timeout)
4905 break;
4906 VM_PAGE_WAIT();
4907 }
4908 if (!m)
4909 break;
4910 m->busy = FALSE;
4911 vm_page_gobble(m);
4912
4913 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4914 hibernate_gobble_queue = m;
4915 }
4916
4917 clock_get_uptime(&end);
4918 absolutetime_to_nanoseconds(end - start, &nsec);
4919 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4920}
4921
4922void
4923hibernate_free_gobble_pages(void)
4924{
4925 vm_page_t m, next;
4926 uint32_t count = 0;
4927
4928 m = (vm_page_t) hibernate_gobble_queue;
4929 while(m)
4930 {
4931 next = (vm_page_t) m->pageq.next;
4932 vm_page_free(m);
4933 count++;
4934 m = next;
4935 }
4936 hibernate_gobble_queue = VM_PAGE_NULL;
4937
4938 if (count)
4939 HIBLOG("Freed %d pages\n", count);
4940}
4941
4942static boolean_t
4943hibernate_consider_discard(vm_page_t m)
4944{
4945 vm_object_t object = NULL;
4946 int refmod_state;
4947 boolean_t discard = FALSE;
4948
4949 do
4950 {
4951 if (m->private)
4952 panic("hibernate_consider_discard: private");
4953
4954 if (!vm_object_lock_try(m->object)) {
4955 hibernate_stats.cd_lock_failed++;
4956 break;
4957 }
4958 object = m->object;
4959
4960 if (VM_PAGE_WIRED(m)) {
4961 hibernate_stats.cd_found_wired++;
4962 break;
4963 }
4964 if (m->precious) {
4965 hibernate_stats.cd_found_precious++;
4966 break;
4967 }
4968 if (m->busy || !object->alive) {
4969 /*
4970 * Somebody is playing with this page.
4971 */
4972 hibernate_stats.cd_found_busy++;
4973 break;
4974 }
4975 if (m->absent || m->unusual || m->error) {
4976 /*
4977 * If it's unusual in anyway, ignore it
4978 */
4979 hibernate_stats.cd_found_unusual++;
4980 break;
4981 }
4982 if (m->cleaning) {
4983 hibernate_stats.cd_found_cleaning++;
4984 break;
4985 }
4986 if (m->laundry) {
4987 hibernate_stats.cd_found_laundry++;
4988 break;
4989 }
4990 if (!m->dirty)
4991 {
4992 refmod_state = pmap_get_refmod(m->phys_page);
4993
4994 if (refmod_state & VM_MEM_REFERENCED)
4995 m->reference = TRUE;
4996 if (refmod_state & VM_MEM_MODIFIED) {
4997 SET_PAGE_DIRTY(m, FALSE);
4998 }
4999 }
5000
5001 /*
5002 * If it's clean or purgeable we can discard the page on wakeup.
5003 */
5004 discard = (!m->dirty)
5005 || (VM_PURGABLE_VOLATILE == object->purgable)
5006 || (VM_PURGABLE_EMPTY == object->purgable);
5007
5008 if (discard == FALSE)
5009 hibernate_stats.cd_found_dirty++;
5010 }
5011 while (FALSE);
5012
5013 if (object)
5014 vm_object_unlock(object);
5015
5016 return (discard);
5017}
5018
5019
5020static void
5021hibernate_discard_page(vm_page_t m)
5022{
5023 if (m->absent || m->unusual || m->error)
5024 /*
5025 * If it's unusual in anyway, ignore
5026 */
5027 return;
5028
5029#if DEBUG
5030 vm_object_t object = m->object;
5031 if (!vm_object_lock_try(m->object))
5032 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5033#else
5034 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5035 makes sure these locks are uncontended before sleep */
5036#endif /* !DEBUG */
5037
5038 if (m->pmapped == TRUE)
5039 {
5040 __unused int refmod_state = pmap_disconnect(m->phys_page);
5041 }
5042
5043 if (m->laundry)
5044 panic("hibernate_discard_page(%p) laundry", m);
5045 if (m->private)
5046 panic("hibernate_discard_page(%p) private", m);
5047 if (m->fictitious)
5048 panic("hibernate_discard_page(%p) fictitious", m);
5049
5050 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5051 {
5052 /* object should be on a queue */
5053 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5054 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5055 assert(old_queue);
5056 vm_purgeable_token_delete_first(old_queue);
5057 m->object->purgable = VM_PURGABLE_EMPTY;
5058 }
5059
5060 vm_page_free(m);
5061
5062#if DEBUG
5063 vm_object_unlock(object);
5064#endif /* DEBUG */
5065}
5066
5067/*
5068 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5069 pages known to VM to not need saving are subtracted.
5070 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5071*/
5072
5073void
5074hibernate_page_list_setall(hibernate_page_list_t * page_list,
5075 hibernate_page_list_t * page_list_wired,
5076 hibernate_page_list_t * page_list_pal,
5077 uint32_t * pagesOut)
5078{
5079 uint64_t start, end, nsec;
5080 vm_page_t m;
5081 uint32_t pages = page_list->page_count;
5082 uint32_t count_zf = 0, count_throttled = 0;
5083 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5084 uint32_t count_wire = pages;
5085 uint32_t count_discard_active = 0;
5086 uint32_t count_discard_inactive = 0;
5087 uint32_t count_discard_cleaned = 0;
5088 uint32_t count_discard_purgeable = 0;
5089 uint32_t count_discard_speculative = 0;
5090 uint32_t i;
5091 uint32_t bank;
5092 hibernate_bitmap_t * bitmap;
5093 hibernate_bitmap_t * bitmap_wired;
5094
5095
5096 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5097
5098#if DEBUG
5099 vm_page_lock_queues();
5100 if (vm_page_local_q) {
5101 for (i = 0; i < vm_page_local_q_count; i++) {
5102 struct vpl *lq;
5103 lq = &vm_page_local_q[i].vpl_un.vpl;
5104 VPL_LOCK(&lq->vpl_lock);
5105 }
5106 }
5107#endif /* DEBUG */
5108
5109
5110 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5111
5112 clock_get_uptime(&start);
5113
5114 hibernate_page_list_zero(page_list);
5115 hibernate_page_list_zero(page_list_wired);
5116 hibernate_page_list_zero(page_list_pal);
5117
5118 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5119 hibernate_stats.cd_pages = pages;
5120
5121 if (vm_page_local_q) {
5122 for (i = 0; i < vm_page_local_q_count; i++)
5123 vm_page_reactivate_local(i, TRUE, TRUE);
5124 }
5125
5126 m = (vm_page_t) hibernate_gobble_queue;
5127 while(m)
5128 {
5129 pages--;
5130 count_wire--;
5131 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5132 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5133 m = (vm_page_t) m->pageq.next;
5134 }
5135
5136 for( i = 0; i < real_ncpus; i++ )
5137 {
5138 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5139 {
5140 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5141 {
5142 pages--;
5143 count_wire--;
5144 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5145 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5146
5147 hibernate_stats.cd_local_free++;
5148 hibernate_stats.cd_total_free++;
5149 }
5150 }
5151 }
5152
5153 for( i = 0; i < vm_colors; i++ )
5154 {
5155 queue_iterate(&vm_page_queue_free[i],
5156 m,
5157 vm_page_t,
5158 pageq)
5159 {
5160 pages--;
5161 count_wire--;
5162 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5163 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5164
5165 hibernate_stats.cd_total_free++;
5166 }
5167 }
5168
5169 queue_iterate(&vm_lopage_queue_free,
5170 m,
5171 vm_page_t,
5172 pageq)
5173 {
5174 pages--;
5175 count_wire--;
5176 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5177 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5178
5179 hibernate_stats.cd_total_free++;
5180 }
5181
5182 queue_iterate( &vm_page_queue_throttled,
5183 m,
5184 vm_page_t,
5185 pageq )
5186 {
5187 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5188 && hibernate_consider_discard(m))
5189 {
5190 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5191 count_discard_inactive++;
5192 }
5193 else
5194 count_throttled++;
5195 count_wire--;
5196 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5197 }
5198
5199 queue_iterate( &vm_page_queue_anonymous,
5200 m,
5201 vm_page_t,
5202 pageq )
5203 {
5204 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5205 && hibernate_consider_discard(m))
5206 {
5207 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5208 if (m->dirty)
5209 count_discard_purgeable++;
5210 else
5211 count_discard_inactive++;
5212 }
5213 else
5214 count_zf++;
5215 count_wire--;
5216 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5217 }
5218
5219 queue_iterate( &vm_page_queue_inactive,
5220 m,
5221 vm_page_t,
5222 pageq )
5223 {
5224 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5225 && hibernate_consider_discard(m))
5226 {
5227 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5228 if (m->dirty)
5229 count_discard_purgeable++;
5230 else
5231 count_discard_inactive++;
5232 }
5233 else
5234 count_inactive++;
5235 count_wire--;
5236 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5237 }
5238
5239 queue_iterate( &vm_page_queue_cleaned,
5240 m,
5241 vm_page_t,
5242 pageq )
5243 {
5244 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5245 && hibernate_consider_discard(m))
5246 {
5247 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5248 if (m->dirty)
5249 count_discard_purgeable++;
5250 else
5251 count_discard_cleaned++;
5252 }
5253 else
5254 count_cleaned++;
5255 count_wire--;
5256 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5257 }
5258
5259 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5260 {
5261 queue_iterate(&vm_page_queue_speculative[i].age_q,
5262 m,
5263 vm_page_t,
5264 pageq)
5265 {
5266 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5267 && hibernate_consider_discard(m))
5268 {
5269 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5270 count_discard_speculative++;
5271 }
5272 else
5273 count_speculative++;
5274 count_wire--;
5275 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5276 }
5277 }
5278
5279 queue_iterate( &vm_page_queue_active,
5280 m,
5281 vm_page_t,
5282 pageq )
5283 {
5284 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5285 && hibernate_consider_discard(m))
5286 {
5287 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5288 if (m->dirty)
5289 count_discard_purgeable++;
5290 else
5291 count_discard_active++;
5292 }
5293 else
5294 count_active++;
5295 count_wire--;
5296 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5297 }
5298
5299 // pull wired from hibernate_bitmap
5300
5301 bitmap = &page_list->bank_bitmap[0];
5302 bitmap_wired = &page_list_wired->bank_bitmap[0];
5303 for (bank = 0; bank < page_list->bank_count; bank++)
5304 {
5305 for (i = 0; i < bitmap->bitmapwords; i++)
5306 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5307 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5308 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5309 }
5310
5311 // machine dependent adjustments
5312 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5313
5314 hibernate_stats.cd_count_wire = count_wire;
5315 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative + count_discard_cleaned;
5316
5317 clock_get_uptime(&end);
5318 absolutetime_to_nanoseconds(end - start, &nsec);
5319 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5320
5321 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5322 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_zf, count_throttled,
5323 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5324
5325 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5326
5327#if DEBUG
5328 if (vm_page_local_q) {
5329 for (i = 0; i < vm_page_local_q_count; i++) {
5330 struct vpl *lq;
5331 lq = &vm_page_local_q[i].vpl_un.vpl;
5332 VPL_UNLOCK(&lq->vpl_lock);
5333 }
5334 }
5335 vm_page_unlock_queues();
5336#endif /* DEBUG */
5337
5338 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5339}
5340
5341void
5342hibernate_page_list_discard(hibernate_page_list_t * page_list)
5343{
5344 uint64_t start, end, nsec;
5345 vm_page_t m;
5346 vm_page_t next;
5347 uint32_t i;
5348 uint32_t count_discard_active = 0;
5349 uint32_t count_discard_inactive = 0;
5350 uint32_t count_discard_purgeable = 0;
5351 uint32_t count_discard_cleaned = 0;
5352 uint32_t count_discard_speculative = 0;
5353
5354#if DEBUG
5355 vm_page_lock_queues();
5356 if (vm_page_local_q) {
5357 for (i = 0; i < vm_page_local_q_count; i++) {
5358 struct vpl *lq;
5359 lq = &vm_page_local_q[i].vpl_un.vpl;
5360 VPL_LOCK(&lq->vpl_lock);
5361 }
5362 }
5363#endif /* DEBUG */
5364
5365 clock_get_uptime(&start);
5366
5367 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5368 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5369 {
5370 next = (vm_page_t) m->pageq.next;
5371 if (hibernate_page_bittst(page_list, m->phys_page))
5372 {
5373 if (m->dirty)
5374 count_discard_purgeable++;
5375 else
5376 count_discard_inactive++;
5377 hibernate_discard_page(m);
5378 }
5379 m = next;
5380 }
5381
5382 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5383 {
5384 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5385 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5386 {
5387 next = (vm_page_t) m->pageq.next;
5388 if (hibernate_page_bittst(page_list, m->phys_page))
5389 {
5390 count_discard_speculative++;
5391 hibernate_discard_page(m);
5392 }
5393 m = next;
5394 }
5395 }
5396
5397 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5398 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5399 {
5400 next = (vm_page_t) m->pageq.next;
5401 if (hibernate_page_bittst(page_list, m->phys_page))
5402 {
5403 if (m->dirty)
5404 count_discard_purgeable++;
5405 else
5406 count_discard_inactive++;
5407 hibernate_discard_page(m);
5408 }
5409 m = next;
5410 }
5411
5412 m = (vm_page_t) queue_first(&vm_page_queue_active);
5413 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5414 {
5415 next = (vm_page_t) m->pageq.next;
5416 if (hibernate_page_bittst(page_list, m->phys_page))
5417 {
5418 if (m->dirty)
5419 count_discard_purgeable++;
5420 else
5421 count_discard_active++;
5422 hibernate_discard_page(m);
5423 }
5424 m = next;
5425 }
5426
5427 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5428 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5429 {
5430 next = (vm_page_t) m->pageq.next;
5431 if (hibernate_page_bittst(page_list, m->phys_page))
5432 {
5433 if (m->dirty)
5434 count_discard_purgeable++;
5435 else
5436 count_discard_cleaned++;
5437 hibernate_discard_page(m);
5438 }
5439 m = next;
5440 }
5441
5442#if DEBUG
5443 if (vm_page_local_q) {
5444 for (i = 0; i < vm_page_local_q_count; i++) {
5445 struct vpl *lq;
5446 lq = &vm_page_local_q[i].vpl_un.vpl;
5447 VPL_UNLOCK(&lq->vpl_lock);
5448 }
5449 }
5450 vm_page_unlock_queues();
5451#endif /* DEBUG */
5452
5453 clock_get_uptime(&end);
5454 absolutetime_to_nanoseconds(end - start, &nsec);
5455 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5456 nsec / 1000000ULL,
5457 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5458}
5459
5460#endif /* HIBERNATION */
5461
5462/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5463
5464#include <mach_vm_debug.h>
5465#if MACH_VM_DEBUG
5466
5467#include <mach_debug/hash_info.h>
5468#include <vm/vm_debug.h>
5469
5470/*
5471 * Routine: vm_page_info
5472 * Purpose:
5473 * Return information about the global VP table.
5474 * Fills the buffer with as much information as possible
5475 * and returns the desired size of the buffer.
5476 * Conditions:
5477 * Nothing locked. The caller should provide
5478 * possibly-pageable memory.
5479 */
5480
5481unsigned int
5482vm_page_info(
5483 hash_info_bucket_t *info,
5484 unsigned int count)
5485{
5486 unsigned int i;
5487 lck_spin_t *bucket_lock;
5488
5489 if (vm_page_bucket_count < count)
5490 count = vm_page_bucket_count;
5491
5492 for (i = 0; i < count; i++) {
5493 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5494 unsigned int bucket_count = 0;
5495 vm_page_t m;
5496
5497 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5498 lck_spin_lock(bucket_lock);
5499
5500 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5501 bucket_count++;
5502
5503 lck_spin_unlock(bucket_lock);
5504
5505 /* don't touch pageable memory while holding locks */
5506 info[i].hib_count = bucket_count;
5507 }
5508
5509 return vm_page_bucket_count;
5510}
5511#endif /* MACH_VM_DEBUG */