]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-2782.30.5.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <kern/ledger.h>
80 #include <vm/pmap.h>
81 #include <vm/vm_init.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
86 #include <kern/misc_protos.h>
87 #include <zone_debug.h>
88 #include <vm/cpm.h>
89 #include <pexpert/pexpert.h>
90
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94 #include <vm/vm_compressor.h>
95
96 #if CONFIG_PHANTOM_CACHE
97 #include <vm/vm_phantom_cache.h>
98 #endif
99
100 #include <IOKit/IOHibernatePrivate.h>
101
102 #include <sys/kdebug.h>
103
104 boolean_t hibernate_cleaning_in_progress = FALSE;
105 boolean_t vm_page_free_verify = TRUE;
106
107 uint32_t vm_lopage_free_count = 0;
108 uint32_t vm_lopage_free_limit = 0;
109 uint32_t vm_lopage_lowater = 0;
110 boolean_t vm_lopage_refill = FALSE;
111 boolean_t vm_lopage_needed = FALSE;
112
113 lck_mtx_ext_t vm_page_queue_lock_ext;
114 lck_mtx_ext_t vm_page_queue_free_lock_ext;
115 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
116
117 int speculative_age_index = 0;
118 int speculative_steal_index = 0;
119 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
120
121
122 __private_extern__ void vm_page_init_lck_grp(void);
123
124 static void vm_page_free_prepare(vm_page_t page);
125 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
126
127
128
129
130 /*
131 * Associated with page of user-allocatable memory is a
132 * page structure.
133 */
134
135 /*
136 * These variables record the values returned by vm_page_bootstrap,
137 * for debugging purposes. The implementation of pmap_steal_memory
138 * and pmap_startup here also uses them internally.
139 */
140
141 vm_offset_t virtual_space_start;
142 vm_offset_t virtual_space_end;
143 uint32_t vm_page_pages;
144
145 /*
146 * The vm_page_lookup() routine, which provides for fast
147 * (virtual memory object, offset) to page lookup, employs
148 * the following hash table. The vm_page_{insert,remove}
149 * routines install and remove associations in the table.
150 * [This table is often called the virtual-to-physical,
151 * or VP, table.]
152 */
153 typedef struct {
154 vm_page_packed_t page_list;
155 #if MACH_PAGE_HASH_STATS
156 int cur_count; /* current count */
157 int hi_count; /* high water mark */
158 #endif /* MACH_PAGE_HASH_STATS */
159 } vm_page_bucket_t;
160
161
162 #define BUCKETS_PER_LOCK 16
163
164 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
165 unsigned int vm_page_bucket_count = 0; /* How big is array? */
166 unsigned int vm_page_hash_mask; /* Mask for hash function */
167 unsigned int vm_page_hash_shift; /* Shift for hash function */
168 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
169 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
170
171 lck_spin_t *vm_page_bucket_locks;
172
173 #if VM_PAGE_BUCKETS_CHECK
174 boolean_t vm_page_buckets_check_ready = FALSE;
175 #if VM_PAGE_FAKE_BUCKETS
176 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
177 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
178 #endif /* VM_PAGE_FAKE_BUCKETS */
179 #endif /* VM_PAGE_BUCKETS_CHECK */
180
181 #if MACH_PAGE_HASH_STATS
182 /* This routine is only for debug. It is intended to be called by
183 * hand by a developer using a kernel debugger. This routine prints
184 * out vm_page_hash table statistics to the kernel debug console.
185 */
186 void
187 hash_debug(void)
188 {
189 int i;
190 int numbuckets = 0;
191 int highsum = 0;
192 int maxdepth = 0;
193
194 for (i = 0; i < vm_page_bucket_count; i++) {
195 if (vm_page_buckets[i].hi_count) {
196 numbuckets++;
197 highsum += vm_page_buckets[i].hi_count;
198 if (vm_page_buckets[i].hi_count > maxdepth)
199 maxdepth = vm_page_buckets[i].hi_count;
200 }
201 }
202 printf("Total number of buckets: %d\n", vm_page_bucket_count);
203 printf("Number used buckets: %d = %d%%\n",
204 numbuckets, 100*numbuckets/vm_page_bucket_count);
205 printf("Number unused buckets: %d = %d%%\n",
206 vm_page_bucket_count - numbuckets,
207 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
208 printf("Sum of bucket max depth: %d\n", highsum);
209 printf("Average bucket depth: %d.%2d\n",
210 highsum/vm_page_bucket_count,
211 highsum%vm_page_bucket_count);
212 printf("Maximum bucket depth: %d\n", maxdepth);
213 }
214 #endif /* MACH_PAGE_HASH_STATS */
215
216 /*
217 * The virtual page size is currently implemented as a runtime
218 * variable, but is constant once initialized using vm_set_page_size.
219 * This initialization must be done in the machine-dependent
220 * bootstrap sequence, before calling other machine-independent
221 * initializations.
222 *
223 * All references to the virtual page size outside this
224 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
225 * constants.
226 */
227 vm_size_t page_size = PAGE_SIZE;
228 vm_size_t page_mask = PAGE_MASK;
229 int page_shift = PAGE_SHIFT;
230
231 /*
232 * Resident page structures are initialized from
233 * a template (see vm_page_alloc).
234 *
235 * When adding a new field to the virtual memory
236 * object structure, be sure to add initialization
237 * (see vm_page_bootstrap).
238 */
239 struct vm_page vm_page_template;
240
241 vm_page_t vm_pages = VM_PAGE_NULL;
242 unsigned int vm_pages_count = 0;
243 ppnum_t vm_page_lowest = 0;
244
245 /*
246 * Resident pages that represent real memory
247 * are allocated from a set of free lists,
248 * one per color.
249 */
250 unsigned int vm_colors;
251 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
252 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
253 unsigned int vm_free_magazine_refill_limit = 0;
254 queue_head_t vm_page_queue_free[MAX_COLORS];
255 unsigned int vm_page_free_wanted;
256 unsigned int vm_page_free_wanted_privileged;
257 unsigned int vm_page_free_count;
258 unsigned int vm_page_fictitious_count;
259
260 /*
261 * Occasionally, the virtual memory system uses
262 * resident page structures that do not refer to
263 * real pages, for example to leave a page with
264 * important state information in the VP table.
265 *
266 * These page structures are allocated the way
267 * most other kernel structures are.
268 */
269 zone_t vm_page_zone;
270 vm_locks_array_t vm_page_locks;
271 decl_lck_mtx_data(,vm_page_alloc_lock)
272 lck_mtx_ext_t vm_page_alloc_lock_ext;
273
274 unsigned int io_throttle_zero_fill;
275
276 unsigned int vm_page_local_q_count = 0;
277 unsigned int vm_page_local_q_soft_limit = 250;
278 unsigned int vm_page_local_q_hard_limit = 500;
279 struct vplq *vm_page_local_q = NULL;
280
281 /* N.B. Guard and fictitious pages must not
282 * be assigned a zero phys_page value.
283 */
284 /*
285 * Fictitious pages don't have a physical address,
286 * but we must initialize phys_page to something.
287 * For debugging, this should be a strange value
288 * that the pmap module can recognize in assertions.
289 */
290 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
291
292 /*
293 * Guard pages are not accessible so they don't
294 * need a physical address, but we need to enter
295 * one in the pmap.
296 * Let's make it recognizable and make sure that
297 * we don't use a real physical page with that
298 * physical address.
299 */
300 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
301
302 /*
303 * Resident page structures are also chained on
304 * queues that are used by the page replacement
305 * system (pageout daemon). These queues are
306 * defined here, but are shared by the pageout
307 * module. The inactive queue is broken into
308 * file backed and anonymous for convenience as the
309 * pageout daemon often assignes a higher
310 * importance to anonymous pages (less likely to pick)
311 */
312 queue_head_t vm_page_queue_active;
313 queue_head_t vm_page_queue_inactive;
314 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
315 queue_head_t vm_page_queue_throttled;
316
317 unsigned int vm_page_active_count;
318 unsigned int vm_page_inactive_count;
319 unsigned int vm_page_anonymous_count;
320 unsigned int vm_page_throttled_count;
321 unsigned int vm_page_speculative_count;
322 unsigned int vm_page_wire_count;
323 unsigned int vm_page_wire_count_initial;
324 unsigned int vm_page_gobble_count = 0;
325
326 #define VM_PAGE_WIRE_COUNT_WARNING 0
327 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
328
329 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
330 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
331 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
332
333 unsigned int vm_page_xpmapped_external_count = 0;
334 unsigned int vm_page_external_count = 0;
335 unsigned int vm_page_internal_count = 0;
336 unsigned int vm_page_pageable_external_count = 0;
337 unsigned int vm_page_pageable_internal_count = 0;
338
339 #if DEVELOPMENT || DEBUG
340 unsigned int vm_page_speculative_recreated = 0;
341 unsigned int vm_page_speculative_created = 0;
342 unsigned int vm_page_speculative_used = 0;
343 #endif
344
345 queue_head_t vm_page_queue_cleaned;
346
347 unsigned int vm_page_cleaned_count = 0;
348 unsigned int vm_pageout_enqueued_cleaned = 0;
349
350 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
351 ppnum_t max_valid_low_ppnum = 0xffffffff;
352
353
354 /*
355 * Several page replacement parameters are also
356 * shared with this module, so that page allocation
357 * (done here in vm_page_alloc) can trigger the
358 * pageout daemon.
359 */
360 unsigned int vm_page_free_target = 0;
361 unsigned int vm_page_free_min = 0;
362 unsigned int vm_page_throttle_limit = 0;
363 unsigned int vm_page_inactive_target = 0;
364 unsigned int vm_page_anonymous_min = 0;
365 unsigned int vm_page_inactive_min = 0;
366 unsigned int vm_page_free_reserved = 0;
367 unsigned int vm_page_throttle_count = 0;
368
369
370 /*
371 * The VM system has a couple of heuristics for deciding
372 * that pages are "uninteresting" and should be placed
373 * on the inactive queue as likely candidates for replacement.
374 * These variables let the heuristics be controlled at run-time
375 * to make experimentation easier.
376 */
377
378 boolean_t vm_page_deactivate_hint = TRUE;
379
380 struct vm_page_stats_reusable vm_page_stats_reusable;
381
382 /*
383 * vm_set_page_size:
384 *
385 * Sets the page size, perhaps based upon the memory
386 * size. Must be called before any use of page-size
387 * dependent functions.
388 *
389 * Sets page_shift and page_mask from page_size.
390 */
391 void
392 vm_set_page_size(void)
393 {
394 page_size = PAGE_SIZE;
395 page_mask = PAGE_MASK;
396 page_shift = PAGE_SHIFT;
397
398 if ((page_mask & page_size) != 0)
399 panic("vm_set_page_size: page size not a power of two");
400
401 for (page_shift = 0; ; page_shift++)
402 if ((1U << page_shift) == page_size)
403 break;
404 }
405
406 #define COLOR_GROUPS_TO_STEAL 4
407
408
409 /* Called once during statup, once the cache geometry is known.
410 */
411 static void
412 vm_page_set_colors( void )
413 {
414 unsigned int n, override;
415
416 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
417 n = override;
418 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
419 n = vm_cache_geometry_colors;
420 else n = DEFAULT_COLORS; /* use default if all else fails */
421
422 if ( n == 0 )
423 n = 1;
424 if ( n > MAX_COLORS )
425 n = MAX_COLORS;
426
427 /* the count must be a power of 2 */
428 if ( ( n & (n - 1)) != 0 )
429 panic("vm_page_set_colors");
430
431 vm_colors = n;
432 vm_color_mask = n - 1;
433
434 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
435 }
436
437
438 lck_grp_t vm_page_lck_grp_free;
439 lck_grp_t vm_page_lck_grp_queue;
440 lck_grp_t vm_page_lck_grp_local;
441 lck_grp_t vm_page_lck_grp_purge;
442 lck_grp_t vm_page_lck_grp_alloc;
443 lck_grp_t vm_page_lck_grp_bucket;
444 lck_grp_attr_t vm_page_lck_grp_attr;
445 lck_attr_t vm_page_lck_attr;
446
447
448 __private_extern__ void
449 vm_page_init_lck_grp(void)
450 {
451 /*
452 * initialze the vm_page lock world
453 */
454 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
455 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
456 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
457 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
458 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
459 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
460 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
461 lck_attr_setdefault(&vm_page_lck_attr);
462 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
463
464 vm_compressor_init_locks();
465 }
466
467 void
468 vm_page_init_local_q()
469 {
470 unsigned int num_cpus;
471 unsigned int i;
472 struct vplq *t_local_q;
473
474 num_cpus = ml_get_max_cpus();
475
476 /*
477 * no point in this for a uni-processor system
478 */
479 if (num_cpus >= 2) {
480 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
481
482 for (i = 0; i < num_cpus; i++) {
483 struct vpl *lq;
484
485 lq = &t_local_q[i].vpl_un.vpl;
486 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
487 queue_init(&lq->vpl_queue);
488 lq->vpl_count = 0;
489 lq->vpl_internal_count = 0;
490 lq->vpl_external_count = 0;
491 }
492 vm_page_local_q_count = num_cpus;
493
494 vm_page_local_q = (struct vplq *)t_local_q;
495 }
496 }
497
498
499 /*
500 * vm_page_bootstrap:
501 *
502 * Initializes the resident memory module.
503 *
504 * Allocates memory for the page cells, and
505 * for the object/offset-to-page hash table headers.
506 * Each page cell is initialized and placed on the free list.
507 * Returns the range of available kernel virtual memory.
508 */
509
510 void
511 vm_page_bootstrap(
512 vm_offset_t *startp,
513 vm_offset_t *endp)
514 {
515 register vm_page_t m;
516 unsigned int i;
517 unsigned int log1;
518 unsigned int log2;
519 unsigned int size;
520
521 /*
522 * Initialize the vm_page template.
523 */
524
525 m = &vm_page_template;
526 bzero(m, sizeof (*m));
527
528 m->pageq.next = NULL;
529 m->pageq.prev = NULL;
530 m->listq.next = NULL;
531 m->listq.prev = NULL;
532 m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
533
534 m->object = VM_OBJECT_NULL; /* reset later */
535 m->offset = (vm_object_offset_t) -1; /* reset later */
536
537 m->wire_count = 0;
538 m->local = FALSE;
539 m->inactive = FALSE;
540 m->active = FALSE;
541 m->pageout_queue = FALSE;
542 m->speculative = FALSE;
543 m->laundry = FALSE;
544 m->free = FALSE;
545 m->reference = FALSE;
546 m->gobbled = FALSE;
547 m->private = FALSE;
548 m->throttled = FALSE;
549 m->__unused_pageq_bits = 0;
550
551 m->phys_page = 0; /* reset later */
552
553 m->busy = TRUE;
554 m->wanted = FALSE;
555 m->tabled = FALSE;
556 m->hashed = FALSE;
557 m->fictitious = FALSE;
558 m->pmapped = FALSE;
559 m->wpmapped = FALSE;
560 m->pageout = FALSE;
561 m->absent = FALSE;
562 m->error = FALSE;
563 m->dirty = FALSE;
564 m->cleaning = FALSE;
565 m->precious = FALSE;
566 m->clustered = FALSE;
567 m->overwriting = FALSE;
568 m->restart = FALSE;
569 m->unusual = FALSE;
570 m->encrypted = FALSE;
571 m->encrypted_cleaning = FALSE;
572 m->cs_validated = FALSE;
573 m->cs_tainted = FALSE;
574 m->no_cache = FALSE;
575 m->reusable = FALSE;
576 m->slid = FALSE;
577 m->xpmapped = FALSE;
578 m->compressor = FALSE;
579 m->written_by_kernel = FALSE;
580 m->__unused_object_bits = 0;
581
582 /*
583 * Initialize the page queues.
584 */
585 vm_page_init_lck_grp();
586
587 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
588 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
589 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
590
591 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
592 int group;
593
594 purgeable_queues[i].token_q_head = 0;
595 purgeable_queues[i].token_q_tail = 0;
596 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
597 queue_init(&purgeable_queues[i].objq[group]);
598
599 purgeable_queues[i].type = i;
600 purgeable_queues[i].new_pages = 0;
601 #if MACH_ASSERT
602 purgeable_queues[i].debug_count_tokens = 0;
603 purgeable_queues[i].debug_count_objects = 0;
604 #endif
605 };
606 purgeable_nonvolatile_count = 0;
607 queue_init(&purgeable_nonvolatile_queue);
608
609 for (i = 0; i < MAX_COLORS; i++ )
610 queue_init(&vm_page_queue_free[i]);
611
612 queue_init(&vm_lopage_queue_free);
613 queue_init(&vm_page_queue_active);
614 queue_init(&vm_page_queue_inactive);
615 queue_init(&vm_page_queue_cleaned);
616 queue_init(&vm_page_queue_throttled);
617 queue_init(&vm_page_queue_anonymous);
618
619 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
620 queue_init(&vm_page_queue_speculative[i].age_q);
621
622 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
623 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
624 }
625 vm_page_free_wanted = 0;
626 vm_page_free_wanted_privileged = 0;
627
628 vm_page_set_colors();
629
630
631 /*
632 * Steal memory for the map and zone subsystems.
633 */
634 kernel_debug_string("zone_steal_memory");
635 zone_steal_memory();
636 kernel_debug_string("vm_map_steal_memory");
637 vm_map_steal_memory();
638
639 /*
640 * Allocate (and initialize) the virtual-to-physical
641 * table hash buckets.
642 *
643 * The number of buckets should be a power of two to
644 * get a good hash function. The following computation
645 * chooses the first power of two that is greater
646 * than the number of physical pages in the system.
647 */
648
649 if (vm_page_bucket_count == 0) {
650 unsigned int npages = pmap_free_pages();
651
652 vm_page_bucket_count = 1;
653 while (vm_page_bucket_count < npages)
654 vm_page_bucket_count <<= 1;
655 }
656 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
657
658 vm_page_hash_mask = vm_page_bucket_count - 1;
659
660 /*
661 * Calculate object shift value for hashing algorithm:
662 * O = log2(sizeof(struct vm_object))
663 * B = log2(vm_page_bucket_count)
664 * hash shifts the object left by
665 * B/2 - O
666 */
667 size = vm_page_bucket_count;
668 for (log1 = 0; size > 1; log1++)
669 size /= 2;
670 size = sizeof(struct vm_object);
671 for (log2 = 0; size > 1; log2++)
672 size /= 2;
673 vm_page_hash_shift = log1/2 - log2 + 1;
674
675 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
676 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
677 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
678
679 if (vm_page_hash_mask & vm_page_bucket_count)
680 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
681
682 #if VM_PAGE_BUCKETS_CHECK
683 #if VM_PAGE_FAKE_BUCKETS
684 /*
685 * Allocate a decoy set of page buckets, to detect
686 * any stomping there.
687 */
688 vm_page_fake_buckets = (vm_page_bucket_t *)
689 pmap_steal_memory(vm_page_bucket_count *
690 sizeof(vm_page_bucket_t));
691 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
692 vm_page_fake_buckets_end =
693 vm_map_round_page((vm_page_fake_buckets_start +
694 (vm_page_bucket_count *
695 sizeof (vm_page_bucket_t))),
696 PAGE_MASK);
697 char *cp;
698 for (cp = (char *)vm_page_fake_buckets_start;
699 cp < (char *)vm_page_fake_buckets_end;
700 cp++) {
701 *cp = 0x5a;
702 }
703 #endif /* VM_PAGE_FAKE_BUCKETS */
704 #endif /* VM_PAGE_BUCKETS_CHECK */
705
706 kernel_debug_string("vm_page_buckets");
707 vm_page_buckets = (vm_page_bucket_t *)
708 pmap_steal_memory(vm_page_bucket_count *
709 sizeof(vm_page_bucket_t));
710
711 kernel_debug_string("vm_page_bucket_locks");
712 vm_page_bucket_locks = (lck_spin_t *)
713 pmap_steal_memory(vm_page_bucket_lock_count *
714 sizeof(lck_spin_t));
715
716 for (i = 0; i < vm_page_bucket_count; i++) {
717 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
718
719 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
720 #if MACH_PAGE_HASH_STATS
721 bucket->cur_count = 0;
722 bucket->hi_count = 0;
723 #endif /* MACH_PAGE_HASH_STATS */
724 }
725
726 for (i = 0; i < vm_page_bucket_lock_count; i++)
727 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
728
729 #if VM_PAGE_BUCKETS_CHECK
730 vm_page_buckets_check_ready = TRUE;
731 #endif /* VM_PAGE_BUCKETS_CHECK */
732
733 /*
734 * Machine-dependent code allocates the resident page table.
735 * It uses vm_page_init to initialize the page frames.
736 * The code also returns to us the virtual space available
737 * to the kernel. We don't trust the pmap module
738 * to get the alignment right.
739 */
740
741 kernel_debug_string("pmap_startup");
742 pmap_startup(&virtual_space_start, &virtual_space_end);
743 virtual_space_start = round_page(virtual_space_start);
744 virtual_space_end = trunc_page(virtual_space_end);
745
746 *startp = virtual_space_start;
747 *endp = virtual_space_end;
748
749 /*
750 * Compute the initial "wire" count.
751 * Up until now, the pages which have been set aside are not under
752 * the VM system's control, so although they aren't explicitly
753 * wired, they nonetheless can't be moved. At this moment,
754 * all VM managed pages are "free", courtesy of pmap_startup.
755 */
756 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
757 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
758 vm_page_wire_count_initial = vm_page_wire_count;
759
760 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
761 vm_page_free_count, vm_page_wire_count);
762
763 kernel_debug_string("vm_page_bootstrap complete");
764 simple_lock_init(&vm_paging_lock, 0);
765 }
766
767 #ifndef MACHINE_PAGES
768 /*
769 * We implement pmap_steal_memory and pmap_startup with the help
770 * of two simpler functions, pmap_virtual_space and pmap_next_page.
771 */
772
773 void *
774 pmap_steal_memory(
775 vm_size_t size)
776 {
777 vm_offset_t addr, vaddr;
778 ppnum_t phys_page;
779
780 /*
781 * We round the size to a round multiple.
782 */
783
784 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
785
786 /*
787 * If this is the first call to pmap_steal_memory,
788 * we have to initialize ourself.
789 */
790
791 if (virtual_space_start == virtual_space_end) {
792 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
793
794 /*
795 * The initial values must be aligned properly, and
796 * we don't trust the pmap module to do it right.
797 */
798
799 virtual_space_start = round_page(virtual_space_start);
800 virtual_space_end = trunc_page(virtual_space_end);
801 }
802
803 /*
804 * Allocate virtual memory for this request.
805 */
806
807 addr = virtual_space_start;
808 virtual_space_start += size;
809
810 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
811
812 /*
813 * Allocate and map physical pages to back new virtual pages.
814 */
815
816 for (vaddr = round_page(addr);
817 vaddr < addr + size;
818 vaddr += PAGE_SIZE) {
819
820 if (!pmap_next_page_hi(&phys_page))
821 panic("pmap_steal_memory");
822
823 /*
824 * XXX Logically, these mappings should be wired,
825 * but some pmap modules barf if they are.
826 */
827 #if defined(__LP64__)
828 pmap_pre_expand(kernel_pmap, vaddr);
829 #endif
830
831 pmap_enter(kernel_pmap, vaddr, phys_page,
832 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
833 VM_WIMG_USE_DEFAULT, FALSE);
834 /*
835 * Account for newly stolen memory
836 */
837 vm_page_wire_count++;
838
839 }
840
841 return (void *) addr;
842 }
843
844 void vm_page_release_startup(vm_page_t mem);
845 void
846 pmap_startup(
847 vm_offset_t *startp,
848 vm_offset_t *endp)
849 {
850 unsigned int i, npages, pages_initialized, fill, fillval;
851 ppnum_t phys_page;
852 addr64_t tmpaddr;
853
854
855 #if defined(__LP64__)
856 /*
857 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
858 */
859 assert(sizeof(struct vm_page) == 64);
860
861 /*
862 * make sure we are aligned on a 64 byte boundary
863 * for VM_PAGE_PACK_PTR (it clips off the low-order
864 * 6 bits of the pointer)
865 */
866 if (virtual_space_start != virtual_space_end)
867 virtual_space_start = round_page(virtual_space_start);
868 #endif
869
870 /*
871 * We calculate how many page frames we will have
872 * and then allocate the page structures in one chunk.
873 */
874
875 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
876 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
877 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
878
879 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
880
881 /*
882 * Initialize the page frames.
883 */
884 kernel_debug_string("Initialize the page frames");
885 for (i = 0, pages_initialized = 0; i < npages; i++) {
886 if (!pmap_next_page(&phys_page))
887 break;
888 if (pages_initialized == 0 || phys_page < vm_page_lowest)
889 vm_page_lowest = phys_page;
890
891 vm_page_init(&vm_pages[i], phys_page, FALSE);
892 vm_page_pages++;
893 pages_initialized++;
894 }
895 vm_pages_count = pages_initialized;
896
897 #if defined(__LP64__)
898
899 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
900 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
901
902 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
903 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
904 #endif
905 kernel_debug_string("page fill/release");
906 /*
907 * Check if we want to initialize pages to a known value
908 */
909 fill = 0; /* Assume no fill */
910 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
911 #if DEBUG
912 /* This slows down booting the DEBUG kernel, particularly on
913 * large memory systems, but is worthwhile in deterministically
914 * trapping uninitialized memory usage.
915 */
916 if (fill == 0) {
917 fill = 1;
918 fillval = 0xDEB8F177;
919 }
920 #endif
921 if (fill)
922 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
923 // -debug code remove
924 if (2 == vm_himemory_mode) {
925 // free low -> high so high is preferred
926 for (i = 1; i <= pages_initialized; i++) {
927 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
928 vm_page_release_startup(&vm_pages[i - 1]);
929 }
930 }
931 else
932 // debug code remove-
933
934 /*
935 * Release pages in reverse order so that physical pages
936 * initially get allocated in ascending addresses. This keeps
937 * the devices (which must address physical memory) happy if
938 * they require several consecutive pages.
939 */
940 for (i = pages_initialized; i > 0; i--) {
941 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
942 vm_page_release_startup(&vm_pages[i - 1]);
943 }
944
945 VM_CHECK_MEMORYSTATUS;
946
947 #if 0
948 {
949 vm_page_t xx, xxo, xxl;
950 int i, j, k, l;
951
952 j = 0; /* (BRINGUP) */
953 xxl = 0;
954
955 for( i = 0; i < vm_colors; i++ ) {
956 queue_iterate(&vm_page_queue_free[i],
957 xx,
958 vm_page_t,
959 pageq) { /* BRINGUP */
960 j++; /* (BRINGUP) */
961 if(j > vm_page_free_count) { /* (BRINGUP) */
962 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
963 }
964
965 l = vm_page_free_count - j; /* (BRINGUP) */
966 k = 0; /* (BRINGUP) */
967
968 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
969
970 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
971 k++;
972 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
973 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
974 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
975 }
976 }
977
978 xxl = xx;
979 }
980 }
981
982 if(j != vm_page_free_count) { /* (BRINGUP) */
983 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
984 }
985 }
986 #endif
987
988
989 /*
990 * We have to re-align virtual_space_start,
991 * because pmap_steal_memory has been using it.
992 */
993
994 virtual_space_start = round_page(virtual_space_start);
995
996 *startp = virtual_space_start;
997 *endp = virtual_space_end;
998 }
999 #endif /* MACHINE_PAGES */
1000
1001 /*
1002 * Routine: vm_page_module_init
1003 * Purpose:
1004 * Second initialization pass, to be done after
1005 * the basic VM system is ready.
1006 */
1007 void
1008 vm_page_module_init(void)
1009 {
1010 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1011 0, PAGE_SIZE, "vm pages");
1012
1013 #if ZONE_DEBUG
1014 zone_debug_disable(vm_page_zone);
1015 #endif /* ZONE_DEBUG */
1016
1017 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1018 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1019 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1020 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1021 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1022 /*
1023 * Adjust zone statistics to account for the real pages allocated
1024 * in vm_page_create(). [Q: is this really what we want?]
1025 */
1026 vm_page_zone->count += vm_page_pages;
1027 vm_page_zone->sum_count += vm_page_pages;
1028 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1029 }
1030
1031 /*
1032 * Routine: vm_page_create
1033 * Purpose:
1034 * After the VM system is up, machine-dependent code
1035 * may stumble across more physical memory. For example,
1036 * memory that it was reserving for a frame buffer.
1037 * vm_page_create turns this memory into available pages.
1038 */
1039
1040 void
1041 vm_page_create(
1042 ppnum_t start,
1043 ppnum_t end)
1044 {
1045 ppnum_t phys_page;
1046 vm_page_t m;
1047
1048 for (phys_page = start;
1049 phys_page < end;
1050 phys_page++) {
1051 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1052 == VM_PAGE_NULL)
1053 vm_page_more_fictitious();
1054
1055 m->fictitious = FALSE;
1056 pmap_clear_noencrypt(phys_page);
1057
1058 vm_page_pages++;
1059 vm_page_release(m);
1060 }
1061 }
1062
1063 /*
1064 * vm_page_hash:
1065 *
1066 * Distributes the object/offset key pair among hash buckets.
1067 *
1068 * NOTE: The bucket count must be a power of 2
1069 */
1070 #define vm_page_hash(object, offset) (\
1071 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1072 & vm_page_hash_mask)
1073
1074
1075 /*
1076 * vm_page_insert: [ internal use only ]
1077 *
1078 * Inserts the given mem entry into the object/object-page
1079 * table and object list.
1080 *
1081 * The object must be locked.
1082 */
1083 void
1084 vm_page_insert(
1085 vm_page_t mem,
1086 vm_object_t object,
1087 vm_object_offset_t offset)
1088 {
1089 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1090 }
1091
1092 void
1093 vm_page_insert_internal(
1094 vm_page_t mem,
1095 vm_object_t object,
1096 vm_object_offset_t offset,
1097 boolean_t queues_lock_held,
1098 boolean_t insert_in_hash,
1099 boolean_t batch_pmap_op)
1100 {
1101 vm_page_bucket_t *bucket;
1102 lck_spin_t *bucket_lock;
1103 int hash_id;
1104 task_t owner;
1105
1106 XPR(XPR_VM_PAGE,
1107 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1108 object, offset, mem, 0,0);
1109 #if 0
1110 /*
1111 * we may not hold the page queue lock
1112 * so this check isn't safe to make
1113 */
1114 VM_PAGE_CHECK(mem);
1115 #endif
1116
1117 assert(page_aligned(offset));
1118
1119 /* the vm_submap_object is only a placeholder for submaps */
1120 assert(object != vm_submap_object);
1121
1122 vm_object_lock_assert_exclusive(object);
1123 #if DEBUG
1124 lck_mtx_assert(&vm_page_queue_lock,
1125 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1126 : LCK_MTX_ASSERT_NOTOWNED);
1127 #endif /* DEBUG */
1128
1129 if (insert_in_hash == TRUE) {
1130 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1131 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1132 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1133 "already in (obj=%p,off=0x%llx)",
1134 mem, object, offset, mem->object, mem->offset);
1135 #endif
1136 assert(!object->internal || offset < object->vo_size);
1137
1138 /* only insert "pageout" pages into "pageout" objects,
1139 * and normal pages into normal objects */
1140 assert(object->pageout == mem->pageout);
1141
1142 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1143
1144 /*
1145 * Record the object/offset pair in this page
1146 */
1147
1148 mem->object = object;
1149 mem->offset = offset;
1150
1151 /*
1152 * Insert it into the object_object/offset hash table
1153 */
1154 hash_id = vm_page_hash(object, offset);
1155 bucket = &vm_page_buckets[hash_id];
1156 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1157
1158 lck_spin_lock(bucket_lock);
1159
1160 mem->next_m = bucket->page_list;
1161 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1162 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1163
1164 #if MACH_PAGE_HASH_STATS
1165 if (++bucket->cur_count > bucket->hi_count)
1166 bucket->hi_count = bucket->cur_count;
1167 #endif /* MACH_PAGE_HASH_STATS */
1168 mem->hashed = TRUE;
1169 lck_spin_unlock(bucket_lock);
1170 }
1171
1172 {
1173 unsigned int cache_attr;
1174
1175 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1176
1177 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1178 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1179 }
1180 }
1181 /*
1182 * Now link into the object's list of backed pages.
1183 */
1184 VM_PAGE_INSERT(mem, object);
1185 mem->tabled = TRUE;
1186
1187 /*
1188 * Show that the object has one more resident page.
1189 */
1190
1191 object->resident_page_count++;
1192 if (VM_PAGE_WIRED(mem)) {
1193 object->wired_page_count++;
1194 }
1195 assert(object->resident_page_count >= object->wired_page_count);
1196
1197 if (object->internal) {
1198 OSAddAtomic(1, &vm_page_internal_count);
1199 } else {
1200 OSAddAtomic(1, &vm_page_external_count);
1201 }
1202
1203 /*
1204 * It wouldn't make sense to insert a "reusable" page in
1205 * an object (the page would have been marked "reusable" only
1206 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1207 * in the object at that time).
1208 * But a page could be inserted in a "all_reusable" object, if
1209 * something faults it in (a vm_read() from another task or a
1210 * "use-after-free" issue in user space, for example). It can
1211 * also happen if we're relocating a page from that object to
1212 * a different physical page during a physically-contiguous
1213 * allocation.
1214 */
1215 assert(!mem->reusable);
1216 if (mem->object->all_reusable) {
1217 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1218 }
1219
1220 if (object->purgable == VM_PURGABLE_DENY) {
1221 owner = TASK_NULL;
1222 } else {
1223 owner = object->vo_purgeable_owner;
1224 }
1225 if (owner &&
1226 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1227 VM_PAGE_WIRED(mem))) {
1228 /* more non-volatile bytes */
1229 ledger_credit(owner->ledger,
1230 task_ledgers.purgeable_nonvolatile,
1231 PAGE_SIZE);
1232 /* more footprint */
1233 ledger_credit(owner->ledger,
1234 task_ledgers.phys_footprint,
1235 PAGE_SIZE);
1236
1237 } else if (owner &&
1238 (object->purgable == VM_PURGABLE_VOLATILE ||
1239 object->purgable == VM_PURGABLE_EMPTY)) {
1240 assert(! VM_PAGE_WIRED(mem));
1241 /* more volatile bytes */
1242 ledger_credit(owner->ledger,
1243 task_ledgers.purgeable_volatile,
1244 PAGE_SIZE);
1245 }
1246
1247 if (object->purgable == VM_PURGABLE_VOLATILE) {
1248 if (VM_PAGE_WIRED(mem)) {
1249 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1250 } else {
1251 OSAddAtomic(+1, &vm_page_purgeable_count);
1252 }
1253 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1254 mem->throttled) {
1255 /*
1256 * This page belongs to a purged VM object but hasn't
1257 * been purged (because it was "busy").
1258 * It's in the "throttled" queue and hence not
1259 * visible to vm_pageout_scan(). Move it to a pageable
1260 * queue, so that it can eventually be reclaimed, instead
1261 * of lingering in the "empty" object.
1262 */
1263 if (queues_lock_held == FALSE)
1264 vm_page_lockspin_queues();
1265 vm_page_deactivate(mem);
1266 if (queues_lock_held == FALSE)
1267 vm_page_unlock_queues();
1268 }
1269
1270 #if VM_OBJECT_TRACKING_OP_MODIFIED
1271 if (vm_object_tracking_inited &&
1272 object->internal &&
1273 object->resident_page_count == 0 &&
1274 object->pager == NULL &&
1275 object->shadow != NULL &&
1276 object->shadow->copy == object) {
1277 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1278 int numsaved = 0;
1279
1280 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1281 btlog_add_entry(vm_object_tracking_btlog,
1282 object,
1283 VM_OBJECT_TRACKING_OP_MODIFIED,
1284 bt,
1285 numsaved);
1286 }
1287 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1288 }
1289
1290 /*
1291 * vm_page_replace:
1292 *
1293 * Exactly like vm_page_insert, except that we first
1294 * remove any existing page at the given offset in object.
1295 *
1296 * The object must be locked.
1297 */
1298 void
1299 vm_page_replace(
1300 register vm_page_t mem,
1301 register vm_object_t object,
1302 register vm_object_offset_t offset)
1303 {
1304 vm_page_bucket_t *bucket;
1305 vm_page_t found_m = VM_PAGE_NULL;
1306 lck_spin_t *bucket_lock;
1307 int hash_id;
1308
1309 #if 0
1310 /*
1311 * we don't hold the page queue lock
1312 * so this check isn't safe to make
1313 */
1314 VM_PAGE_CHECK(mem);
1315 #endif
1316 vm_object_lock_assert_exclusive(object);
1317 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1318 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1319 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1320 "already in (obj=%p,off=0x%llx)",
1321 mem, object, offset, mem->object, mem->offset);
1322 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1323 #endif
1324 /*
1325 * Record the object/offset pair in this page
1326 */
1327
1328 mem->object = object;
1329 mem->offset = offset;
1330
1331 /*
1332 * Insert it into the object_object/offset hash table,
1333 * replacing any page that might have been there.
1334 */
1335
1336 hash_id = vm_page_hash(object, offset);
1337 bucket = &vm_page_buckets[hash_id];
1338 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1339
1340 lck_spin_lock(bucket_lock);
1341
1342 if (bucket->page_list) {
1343 vm_page_packed_t *mp = &bucket->page_list;
1344 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1345
1346 do {
1347 if (m->object == object && m->offset == offset) {
1348 /*
1349 * Remove old page from hash list
1350 */
1351 *mp = m->next_m;
1352 m->hashed = FALSE;
1353
1354 found_m = m;
1355 break;
1356 }
1357 mp = &m->next_m;
1358 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1359
1360 mem->next_m = bucket->page_list;
1361 } else {
1362 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1363 }
1364 /*
1365 * insert new page at head of hash list
1366 */
1367 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1368 mem->hashed = TRUE;
1369
1370 lck_spin_unlock(bucket_lock);
1371
1372 if (found_m) {
1373 /*
1374 * there was already a page at the specified
1375 * offset for this object... remove it from
1376 * the object and free it back to the free list
1377 */
1378 vm_page_free_unlocked(found_m, FALSE);
1379 }
1380 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1381 }
1382
1383 /*
1384 * vm_page_remove: [ internal use only ]
1385 *
1386 * Removes the given mem entry from the object/offset-page
1387 * table and the object page list.
1388 *
1389 * The object must be locked.
1390 */
1391
1392 void
1393 vm_page_remove(
1394 vm_page_t mem,
1395 boolean_t remove_from_hash)
1396 {
1397 vm_page_bucket_t *bucket;
1398 vm_page_t this;
1399 lck_spin_t *bucket_lock;
1400 int hash_id;
1401 task_t owner;
1402
1403 XPR(XPR_VM_PAGE,
1404 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1405 mem->object, mem->offset,
1406 mem, 0,0);
1407
1408 vm_object_lock_assert_exclusive(mem->object);
1409 assert(mem->tabled);
1410 assert(!mem->cleaning);
1411 assert(!mem->laundry);
1412 #if 0
1413 /*
1414 * we don't hold the page queue lock
1415 * so this check isn't safe to make
1416 */
1417 VM_PAGE_CHECK(mem);
1418 #endif
1419 if (remove_from_hash == TRUE) {
1420 /*
1421 * Remove from the object_object/offset hash table
1422 */
1423 hash_id = vm_page_hash(mem->object, mem->offset);
1424 bucket = &vm_page_buckets[hash_id];
1425 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1426
1427 lck_spin_lock(bucket_lock);
1428
1429 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1430 /* optimize for common case */
1431
1432 bucket->page_list = mem->next_m;
1433 } else {
1434 vm_page_packed_t *prev;
1435
1436 for (prev = &this->next_m;
1437 (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1438 prev = &this->next_m)
1439 continue;
1440 *prev = this->next_m;
1441 }
1442 #if MACH_PAGE_HASH_STATS
1443 bucket->cur_count--;
1444 #endif /* MACH_PAGE_HASH_STATS */
1445 mem->hashed = FALSE;
1446 lck_spin_unlock(bucket_lock);
1447 }
1448 /*
1449 * Now remove from the object's list of backed pages.
1450 */
1451
1452 VM_PAGE_REMOVE(mem);
1453
1454 /*
1455 * And show that the object has one fewer resident
1456 * page.
1457 */
1458
1459 assert(mem->object->resident_page_count > 0);
1460 mem->object->resident_page_count--;
1461
1462 if (mem->object->internal) {
1463 #if DEBUG
1464 assert(vm_page_internal_count);
1465 #endif /* DEBUG */
1466
1467 OSAddAtomic(-1, &vm_page_internal_count);
1468 } else {
1469 assert(vm_page_external_count);
1470 OSAddAtomic(-1, &vm_page_external_count);
1471
1472 if (mem->xpmapped) {
1473 assert(vm_page_xpmapped_external_count);
1474 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1475 }
1476 }
1477 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1478 if (mem->object->resident_page_count == 0)
1479 vm_object_cache_remove(mem->object);
1480 }
1481
1482 if (VM_PAGE_WIRED(mem)) {
1483 assert(mem->object->wired_page_count > 0);
1484 mem->object->wired_page_count--;
1485 }
1486 assert(mem->object->resident_page_count >=
1487 mem->object->wired_page_count);
1488 if (mem->reusable) {
1489 assert(mem->object->reusable_page_count > 0);
1490 mem->object->reusable_page_count--;
1491 assert(mem->object->reusable_page_count <=
1492 mem->object->resident_page_count);
1493 mem->reusable = FALSE;
1494 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1495 vm_page_stats_reusable.reused_remove++;
1496 } else if (mem->object->all_reusable) {
1497 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1498 vm_page_stats_reusable.reused_remove++;
1499 }
1500
1501 if (mem->object->purgable == VM_PURGABLE_DENY) {
1502 owner = TASK_NULL;
1503 } else {
1504 owner = mem->object->vo_purgeable_owner;
1505 }
1506 if (owner &&
1507 (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1508 VM_PAGE_WIRED(mem))) {
1509 /* less non-volatile bytes */
1510 ledger_debit(owner->ledger,
1511 task_ledgers.purgeable_nonvolatile,
1512 PAGE_SIZE);
1513 /* less footprint */
1514 ledger_debit(owner->ledger,
1515 task_ledgers.phys_footprint,
1516 PAGE_SIZE);
1517 } else if (owner &&
1518 (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1519 mem->object->purgable == VM_PURGABLE_EMPTY)) {
1520 assert(! VM_PAGE_WIRED(mem));
1521 /* less volatile bytes */
1522 ledger_debit(owner->ledger,
1523 task_ledgers.purgeable_volatile,
1524 PAGE_SIZE);
1525 }
1526 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1527 if (VM_PAGE_WIRED(mem)) {
1528 assert(vm_page_purgeable_wired_count > 0);
1529 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1530 } else {
1531 assert(vm_page_purgeable_count > 0);
1532 OSAddAtomic(-1, &vm_page_purgeable_count);
1533 }
1534 }
1535 if (mem->object->set_cache_attr == TRUE)
1536 pmap_set_cache_attributes(mem->phys_page, 0);
1537
1538 mem->tabled = FALSE;
1539 mem->object = VM_OBJECT_NULL;
1540 mem->offset = (vm_object_offset_t) -1;
1541 }
1542
1543
1544 /*
1545 * vm_page_lookup:
1546 *
1547 * Returns the page associated with the object/offset
1548 * pair specified; if none is found, VM_PAGE_NULL is returned.
1549 *
1550 * The object must be locked. No side effects.
1551 */
1552
1553 unsigned long vm_page_lookup_hint = 0;
1554 unsigned long vm_page_lookup_hint_next = 0;
1555 unsigned long vm_page_lookup_hint_prev = 0;
1556 unsigned long vm_page_lookup_hint_miss = 0;
1557 unsigned long vm_page_lookup_bucket_NULL = 0;
1558 unsigned long vm_page_lookup_miss = 0;
1559
1560
1561 vm_page_t
1562 vm_page_lookup(
1563 vm_object_t object,
1564 vm_object_offset_t offset)
1565 {
1566 vm_page_t mem;
1567 vm_page_bucket_t *bucket;
1568 queue_entry_t qe;
1569 lck_spin_t *bucket_lock;
1570 int hash_id;
1571
1572 vm_object_lock_assert_held(object);
1573 mem = object->memq_hint;
1574
1575 if (mem != VM_PAGE_NULL) {
1576 assert(mem->object == object);
1577
1578 if (mem->offset == offset) {
1579 vm_page_lookup_hint++;
1580 return mem;
1581 }
1582 qe = queue_next(&mem->listq);
1583
1584 if (! queue_end(&object->memq, qe)) {
1585 vm_page_t next_page;
1586
1587 next_page = (vm_page_t) qe;
1588 assert(next_page->object == object);
1589
1590 if (next_page->offset == offset) {
1591 vm_page_lookup_hint_next++;
1592 object->memq_hint = next_page; /* new hint */
1593 return next_page;
1594 }
1595 }
1596 qe = queue_prev(&mem->listq);
1597
1598 if (! queue_end(&object->memq, qe)) {
1599 vm_page_t prev_page;
1600
1601 prev_page = (vm_page_t) qe;
1602 assert(prev_page->object == object);
1603
1604 if (prev_page->offset == offset) {
1605 vm_page_lookup_hint_prev++;
1606 object->memq_hint = prev_page; /* new hint */
1607 return prev_page;
1608 }
1609 }
1610 }
1611 /*
1612 * Search the hash table for this object/offset pair
1613 */
1614 hash_id = vm_page_hash(object, offset);
1615 bucket = &vm_page_buckets[hash_id];
1616
1617 /*
1618 * since we hold the object lock, we are guaranteed that no
1619 * new pages can be inserted into this object... this in turn
1620 * guarantess that the page we're looking for can't exist
1621 * if the bucket it hashes to is currently NULL even when looked
1622 * at outside the scope of the hash bucket lock... this is a
1623 * really cheap optimiztion to avoid taking the lock
1624 */
1625 if (!bucket->page_list) {
1626 vm_page_lookup_bucket_NULL++;
1627
1628 return (VM_PAGE_NULL);
1629 }
1630 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1631
1632 lck_spin_lock(bucket_lock);
1633
1634 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1635 #if 0
1636 /*
1637 * we don't hold the page queue lock
1638 * so this check isn't safe to make
1639 */
1640 VM_PAGE_CHECK(mem);
1641 #endif
1642 if ((mem->object == object) && (mem->offset == offset))
1643 break;
1644 }
1645 lck_spin_unlock(bucket_lock);
1646
1647 if (mem != VM_PAGE_NULL) {
1648 if (object->memq_hint != VM_PAGE_NULL) {
1649 vm_page_lookup_hint_miss++;
1650 }
1651 assert(mem->object == object);
1652 object->memq_hint = mem;
1653 } else
1654 vm_page_lookup_miss++;
1655
1656 return(mem);
1657 }
1658
1659
1660 /*
1661 * vm_page_rename:
1662 *
1663 * Move the given memory entry from its
1664 * current object to the specified target object/offset.
1665 *
1666 * The object must be locked.
1667 */
1668 void
1669 vm_page_rename(
1670 register vm_page_t mem,
1671 register vm_object_t new_object,
1672 vm_object_offset_t new_offset,
1673 boolean_t encrypted_ok)
1674 {
1675 boolean_t internal_to_external, external_to_internal;
1676
1677 assert(mem->object != new_object);
1678
1679 /*
1680 * ENCRYPTED SWAP:
1681 * The encryption key is based on the page's memory object
1682 * (aka "pager") and paging offset. Moving the page to
1683 * another VM object changes its "pager" and "paging_offset"
1684 * so it has to be decrypted first, or we would lose the key.
1685 *
1686 * One exception is VM object collapsing, where we transfer pages
1687 * from one backing object to its parent object. This operation also
1688 * transfers the paging information, so the <pager,paging_offset> info
1689 * should remain consistent. The caller (vm_object_do_collapse())
1690 * sets "encrypted_ok" in this case.
1691 */
1692 if (!encrypted_ok && mem->encrypted) {
1693 panic("vm_page_rename: page %p is encrypted\n", mem);
1694 }
1695
1696 XPR(XPR_VM_PAGE,
1697 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1698 new_object, new_offset,
1699 mem, 0,0);
1700
1701 /*
1702 * Changes to mem->object require the page lock because
1703 * the pageout daemon uses that lock to get the object.
1704 */
1705 vm_page_lockspin_queues();
1706
1707 internal_to_external = FALSE;
1708 external_to_internal = FALSE;
1709
1710 if (mem->local) {
1711 /*
1712 * it's much easier to get the vm_page_pageable_xxx accounting correct
1713 * if we first move the page to the active queue... it's going to end
1714 * up there anyway, and we don't do vm_page_rename's frequently enough
1715 * for this to matter.
1716 */
1717 VM_PAGE_QUEUES_REMOVE(mem);
1718 vm_page_activate(mem);
1719 }
1720 if (mem->active || mem->inactive || mem->speculative) {
1721 if (mem->object->internal && !new_object->internal) {
1722 internal_to_external = TRUE;
1723 }
1724 if (!mem->object->internal && new_object->internal) {
1725 external_to_internal = TRUE;
1726 }
1727 }
1728
1729 vm_page_remove(mem, TRUE);
1730 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1731
1732 if (internal_to_external) {
1733 vm_page_pageable_internal_count--;
1734 vm_page_pageable_external_count++;
1735 } else if (external_to_internal) {
1736 vm_page_pageable_external_count--;
1737 vm_page_pageable_internal_count++;
1738 }
1739
1740 vm_page_unlock_queues();
1741 }
1742
1743 /*
1744 * vm_page_init:
1745 *
1746 * Initialize the fields in a new page.
1747 * This takes a structure with random values and initializes it
1748 * so that it can be given to vm_page_release or vm_page_insert.
1749 */
1750 void
1751 vm_page_init(
1752 vm_page_t mem,
1753 ppnum_t phys_page,
1754 boolean_t lopage)
1755 {
1756 assert(phys_page);
1757
1758 #if DEBUG
1759 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1760 if (!(pmap_valid_page(phys_page))) {
1761 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1762 }
1763 }
1764 #endif
1765 *mem = vm_page_template;
1766 mem->phys_page = phys_page;
1767 #if 0
1768 /*
1769 * we're leaving this turned off for now... currently pages
1770 * come off the free list and are either immediately dirtied/referenced
1771 * due to zero-fill or COW faults, or are used to read or write files...
1772 * in the file I/O case, the UPL mechanism takes care of clearing
1773 * the state of the HW ref/mod bits in a somewhat fragile way.
1774 * Since we may change the way this works in the future (to toughen it up),
1775 * I'm leaving this as a reminder of where these bits could get cleared
1776 */
1777
1778 /*
1779 * make sure both the h/w referenced and modified bits are
1780 * clear at this point... we are especially dependent on
1781 * not finding a 'stale' h/w modified in a number of spots
1782 * once this page goes back into use
1783 */
1784 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1785 #endif
1786 mem->lopage = lopage;
1787 }
1788
1789 /*
1790 * vm_page_grab_fictitious:
1791 *
1792 * Remove a fictitious page from the free list.
1793 * Returns VM_PAGE_NULL if there are no free pages.
1794 */
1795 int c_vm_page_grab_fictitious = 0;
1796 int c_vm_page_grab_fictitious_failed = 0;
1797 int c_vm_page_release_fictitious = 0;
1798 int c_vm_page_more_fictitious = 0;
1799
1800 vm_page_t
1801 vm_page_grab_fictitious_common(
1802 ppnum_t phys_addr)
1803 {
1804 vm_page_t m;
1805
1806 if ((m = (vm_page_t)zget(vm_page_zone))) {
1807
1808 vm_page_init(m, phys_addr, FALSE);
1809 m->fictitious = TRUE;
1810
1811 c_vm_page_grab_fictitious++;
1812 } else
1813 c_vm_page_grab_fictitious_failed++;
1814
1815 return m;
1816 }
1817
1818 vm_page_t
1819 vm_page_grab_fictitious(void)
1820 {
1821 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1822 }
1823
1824 vm_page_t
1825 vm_page_grab_guard(void)
1826 {
1827 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1828 }
1829
1830
1831 /*
1832 * vm_page_release_fictitious:
1833 *
1834 * Release a fictitious page to the zone pool
1835 */
1836 void
1837 vm_page_release_fictitious(
1838 vm_page_t m)
1839 {
1840 assert(!m->free);
1841 assert(m->fictitious);
1842 assert(m->phys_page == vm_page_fictitious_addr ||
1843 m->phys_page == vm_page_guard_addr);
1844
1845 c_vm_page_release_fictitious++;
1846
1847 zfree(vm_page_zone, m);
1848 }
1849
1850 /*
1851 * vm_page_more_fictitious:
1852 *
1853 * Add more fictitious pages to the zone.
1854 * Allowed to block. This routine is way intimate
1855 * with the zones code, for several reasons:
1856 * 1. we need to carve some page structures out of physical
1857 * memory before zones work, so they _cannot_ come from
1858 * the zone_map.
1859 * 2. the zone needs to be collectable in order to prevent
1860 * growth without bound. These structures are used by
1861 * the device pager (by the hundreds and thousands), as
1862 * private pages for pageout, and as blocking pages for
1863 * pagein. Temporary bursts in demand should not result in
1864 * permanent allocation of a resource.
1865 * 3. To smooth allocation humps, we allocate single pages
1866 * with kernel_memory_allocate(), and cram them into the
1867 * zone.
1868 */
1869
1870 void vm_page_more_fictitious(void)
1871 {
1872 vm_offset_t addr;
1873 kern_return_t retval;
1874
1875 c_vm_page_more_fictitious++;
1876
1877 /*
1878 * Allocate a single page from the zone_map. Do not wait if no physical
1879 * pages are immediately available, and do not zero the space. We need
1880 * our own blocking lock here to prevent having multiple,
1881 * simultaneous requests from piling up on the zone_map lock. Exactly
1882 * one (of our) threads should be potentially waiting on the map lock.
1883 * If winner is not vm-privileged, then the page allocation will fail,
1884 * and it will temporarily block here in the vm_page_wait().
1885 */
1886 lck_mtx_lock(&vm_page_alloc_lock);
1887 /*
1888 * If another thread allocated space, just bail out now.
1889 */
1890 if (zone_free_count(vm_page_zone) > 5) {
1891 /*
1892 * The number "5" is a small number that is larger than the
1893 * number of fictitious pages that any single caller will
1894 * attempt to allocate. Otherwise, a thread will attempt to
1895 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1896 * release all of the resources and locks already acquired,
1897 * and then call this routine. This routine finds the pages
1898 * that the caller released, so fails to allocate new space.
1899 * The process repeats infinitely. The largest known number
1900 * of fictitious pages required in this manner is 2. 5 is
1901 * simply a somewhat larger number.
1902 */
1903 lck_mtx_unlock(&vm_page_alloc_lock);
1904 return;
1905 }
1906
1907 retval = kernel_memory_allocate(zone_map,
1908 &addr, PAGE_SIZE, VM_PROT_ALL,
1909 KMA_KOBJECT|KMA_NOPAGEWAIT);
1910 if (retval != KERN_SUCCESS) {
1911 /*
1912 * No page was available. Drop the
1913 * lock to give another thread a chance at it, and
1914 * wait for the pageout daemon to make progress.
1915 */
1916 lck_mtx_unlock(&vm_page_alloc_lock);
1917 vm_page_wait(THREAD_UNINT);
1918 return;
1919 }
1920
1921 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1922 OSAddAtomic64(1, &(vm_page_zone->page_count));
1923
1924 zcram(vm_page_zone, addr, PAGE_SIZE);
1925
1926 lck_mtx_unlock(&vm_page_alloc_lock);
1927 }
1928
1929
1930 /*
1931 * vm_pool_low():
1932 *
1933 * Return true if it is not likely that a non-vm_privileged thread
1934 * can get memory without blocking. Advisory only, since the
1935 * situation may change under us.
1936 */
1937 int
1938 vm_pool_low(void)
1939 {
1940 /* No locking, at worst we will fib. */
1941 return( vm_page_free_count <= vm_page_free_reserved );
1942 }
1943
1944
1945
1946 /*
1947 * this is an interface to support bring-up of drivers
1948 * on platforms with physical memory > 4G...
1949 */
1950 int vm_himemory_mode = 2;
1951
1952
1953 /*
1954 * this interface exists to support hardware controllers
1955 * incapable of generating DMAs with more than 32 bits
1956 * of address on platforms with physical memory > 4G...
1957 */
1958 unsigned int vm_lopages_allocated_q = 0;
1959 unsigned int vm_lopages_allocated_cpm_success = 0;
1960 unsigned int vm_lopages_allocated_cpm_failed = 0;
1961 queue_head_t vm_lopage_queue_free;
1962
1963 vm_page_t
1964 vm_page_grablo(void)
1965 {
1966 vm_page_t mem;
1967
1968 if (vm_lopage_needed == FALSE)
1969 return (vm_page_grab());
1970
1971 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1972
1973 if ( !queue_empty(&vm_lopage_queue_free)) {
1974 queue_remove_first(&vm_lopage_queue_free,
1975 mem,
1976 vm_page_t,
1977 pageq);
1978 assert(vm_lopage_free_count);
1979
1980 vm_lopage_free_count--;
1981 vm_lopages_allocated_q++;
1982
1983 if (vm_lopage_free_count < vm_lopage_lowater)
1984 vm_lopage_refill = TRUE;
1985
1986 lck_mtx_unlock(&vm_page_queue_free_lock);
1987 } else {
1988 lck_mtx_unlock(&vm_page_queue_free_lock);
1989
1990 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1991
1992 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1993 vm_lopages_allocated_cpm_failed++;
1994 lck_mtx_unlock(&vm_page_queue_free_lock);
1995
1996 return (VM_PAGE_NULL);
1997 }
1998 mem->busy = TRUE;
1999
2000 vm_page_lockspin_queues();
2001
2002 mem->gobbled = FALSE;
2003 vm_page_gobble_count--;
2004 vm_page_wire_count--;
2005
2006 vm_lopages_allocated_cpm_success++;
2007 vm_page_unlock_queues();
2008 }
2009 assert(mem->busy);
2010 assert(!mem->free);
2011 assert(!mem->pmapped);
2012 assert(!mem->wpmapped);
2013 assert(!pmap_is_noencrypt(mem->phys_page));
2014
2015 mem->pageq.next = NULL;
2016 mem->pageq.prev = NULL;
2017
2018 return (mem);
2019 }
2020
2021
2022 /*
2023 * vm_page_grab:
2024 *
2025 * first try to grab a page from the per-cpu free list...
2026 * this must be done while pre-emption is disabled... if
2027 * a page is available, we're done...
2028 * if no page is available, grab the vm_page_queue_free_lock
2029 * and see if current number of free pages would allow us
2030 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2031 * if there are pages available, disable preemption and
2032 * recheck the state of the per-cpu free list... we could
2033 * have been preempted and moved to a different cpu, or
2034 * some other thread could have re-filled it... if still
2035 * empty, figure out how many pages we can steal from the
2036 * global free queue and move to the per-cpu queue...
2037 * return 1 of these pages when done... only wakeup the
2038 * pageout_scan thread if we moved pages from the global
2039 * list... no need for the wakeup if we've satisfied the
2040 * request from the per-cpu queue.
2041 */
2042
2043
2044 vm_page_t
2045 vm_page_grab( void )
2046 {
2047 vm_page_t mem;
2048
2049
2050 disable_preemption();
2051
2052 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2053 return_page_from_cpu_list:
2054 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2055 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2056
2057 enable_preemption();
2058 mem->pageq.next = NULL;
2059
2060 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2061 assert(mem->tabled == FALSE);
2062 assert(mem->object == VM_OBJECT_NULL);
2063 assert(!mem->laundry);
2064 assert(!mem->free);
2065 assert(pmap_verify_free(mem->phys_page));
2066 assert(mem->busy);
2067 assert(!mem->encrypted);
2068 assert(!mem->pmapped);
2069 assert(!mem->wpmapped);
2070 assert(!mem->active);
2071 assert(!mem->inactive);
2072 assert(!mem->throttled);
2073 assert(!mem->speculative);
2074 assert(!pmap_is_noencrypt(mem->phys_page));
2075
2076 return mem;
2077 }
2078 enable_preemption();
2079
2080
2081 /*
2082 * Optionally produce warnings if the wire or gobble
2083 * counts exceed some threshold.
2084 */
2085 #if VM_PAGE_WIRE_COUNT_WARNING
2086 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2087 printf("mk: vm_page_grab(): high wired page count of %d\n",
2088 vm_page_wire_count);
2089 }
2090 #endif
2091 #if VM_PAGE_GOBBLE_COUNT_WARNING
2092 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2093 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2094 vm_page_gobble_count);
2095 }
2096 #endif
2097 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2098
2099 /*
2100 * Only let privileged threads (involved in pageout)
2101 * dip into the reserved pool.
2102 */
2103 if ((vm_page_free_count < vm_page_free_reserved) &&
2104 !(current_thread()->options & TH_OPT_VMPRIV)) {
2105 lck_mtx_unlock(&vm_page_queue_free_lock);
2106 mem = VM_PAGE_NULL;
2107 }
2108 else {
2109 vm_page_t head;
2110 vm_page_t tail;
2111 unsigned int pages_to_steal;
2112 unsigned int color;
2113
2114 while ( vm_page_free_count == 0 ) {
2115
2116 lck_mtx_unlock(&vm_page_queue_free_lock);
2117 /*
2118 * must be a privileged thread to be
2119 * in this state since a non-privileged
2120 * thread would have bailed if we were
2121 * under the vm_page_free_reserved mark
2122 */
2123 VM_PAGE_WAIT();
2124 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2125 }
2126
2127 disable_preemption();
2128
2129 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2130 lck_mtx_unlock(&vm_page_queue_free_lock);
2131
2132 /*
2133 * we got preempted and moved to another processor
2134 * or we got preempted and someone else ran and filled the cache
2135 */
2136 goto return_page_from_cpu_list;
2137 }
2138 if (vm_page_free_count <= vm_page_free_reserved)
2139 pages_to_steal = 1;
2140 else {
2141 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2142 pages_to_steal = vm_free_magazine_refill_limit;
2143 else
2144 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2145 }
2146 color = PROCESSOR_DATA(current_processor(), start_color);
2147 head = tail = NULL;
2148
2149 vm_page_free_count -= pages_to_steal;
2150
2151 while (pages_to_steal--) {
2152
2153 while (queue_empty(&vm_page_queue_free[color]))
2154 color = (color + 1) & vm_color_mask;
2155
2156 queue_remove_first(&vm_page_queue_free[color],
2157 mem,
2158 vm_page_t,
2159 pageq);
2160 mem->pageq.next = NULL;
2161 mem->pageq.prev = NULL;
2162
2163 assert(!mem->active);
2164 assert(!mem->inactive);
2165 assert(!mem->throttled);
2166 assert(!mem->speculative);
2167
2168 color = (color + 1) & vm_color_mask;
2169
2170 if (head == NULL)
2171 head = mem;
2172 else
2173 tail->pageq.next = (queue_t)mem;
2174 tail = mem;
2175
2176 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2177 assert(mem->tabled == FALSE);
2178 assert(mem->object == VM_OBJECT_NULL);
2179 assert(!mem->laundry);
2180 assert(mem->free);
2181 mem->free = FALSE;
2182
2183 assert(pmap_verify_free(mem->phys_page));
2184 assert(mem->busy);
2185 assert(!mem->free);
2186 assert(!mem->encrypted);
2187 assert(!mem->pmapped);
2188 assert(!mem->wpmapped);
2189 assert(!pmap_is_noencrypt(mem->phys_page));
2190 }
2191 lck_mtx_unlock(&vm_page_queue_free_lock);
2192
2193 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2194 PROCESSOR_DATA(current_processor(), start_color) = color;
2195
2196 /*
2197 * satisfy this request
2198 */
2199 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2200 mem = head;
2201 mem->pageq.next = NULL;
2202
2203 enable_preemption();
2204 }
2205 /*
2206 * Decide if we should poke the pageout daemon.
2207 * We do this if the free count is less than the low
2208 * water mark, or if the free count is less than the high
2209 * water mark (but above the low water mark) and the inactive
2210 * count is less than its target.
2211 *
2212 * We don't have the counts locked ... if they change a little,
2213 * it doesn't really matter.
2214 */
2215 if ((vm_page_free_count < vm_page_free_min) ||
2216 ((vm_page_free_count < vm_page_free_target) &&
2217 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2218 thread_wakeup((event_t) &vm_page_free_wanted);
2219
2220 VM_CHECK_MEMORYSTATUS;
2221
2222 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2223
2224 return mem;
2225 }
2226
2227 /*
2228 * vm_page_release:
2229 *
2230 * Return a page to the free list.
2231 */
2232
2233 void
2234 vm_page_release(
2235 register vm_page_t mem)
2236 {
2237 unsigned int color;
2238 int need_wakeup = 0;
2239 int need_priv_wakeup = 0;
2240
2241
2242 assert(!mem->private && !mem->fictitious);
2243 if (vm_page_free_verify) {
2244 assert(pmap_verify_free(mem->phys_page));
2245 }
2246 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2247
2248 pmap_clear_noencrypt(mem->phys_page);
2249
2250 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2251 #if DEBUG
2252 if (mem->free)
2253 panic("vm_page_release");
2254 #endif
2255
2256 assert(mem->busy);
2257 assert(!mem->laundry);
2258 assert(mem->object == VM_OBJECT_NULL);
2259 assert(mem->pageq.next == NULL &&
2260 mem->pageq.prev == NULL);
2261 assert(mem->listq.next == NULL &&
2262 mem->listq.prev == NULL);
2263
2264 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2265 vm_lopage_free_count < vm_lopage_free_limit &&
2266 mem->phys_page < max_valid_low_ppnum) {
2267 /*
2268 * this exists to support hardware controllers
2269 * incapable of generating DMAs with more than 32 bits
2270 * of address on platforms with physical memory > 4G...
2271 */
2272 queue_enter_first(&vm_lopage_queue_free,
2273 mem,
2274 vm_page_t,
2275 pageq);
2276 vm_lopage_free_count++;
2277
2278 if (vm_lopage_free_count >= vm_lopage_free_limit)
2279 vm_lopage_refill = FALSE;
2280
2281 mem->lopage = TRUE;
2282 } else {
2283 mem->lopage = FALSE;
2284 mem->free = TRUE;
2285
2286 color = mem->phys_page & vm_color_mask;
2287 queue_enter_first(&vm_page_queue_free[color],
2288 mem,
2289 vm_page_t,
2290 pageq);
2291 vm_page_free_count++;
2292 /*
2293 * Check if we should wake up someone waiting for page.
2294 * But don't bother waking them unless they can allocate.
2295 *
2296 * We wakeup only one thread, to prevent starvation.
2297 * Because the scheduling system handles wait queues FIFO,
2298 * if we wakeup all waiting threads, one greedy thread
2299 * can starve multiple niceguy threads. When the threads
2300 * all wakeup, the greedy threads runs first, grabs the page,
2301 * and waits for another page. It will be the first to run
2302 * when the next page is freed.
2303 *
2304 * However, there is a slight danger here.
2305 * The thread we wake might not use the free page.
2306 * Then the other threads could wait indefinitely
2307 * while the page goes unused. To forestall this,
2308 * the pageout daemon will keep making free pages
2309 * as long as vm_page_free_wanted is non-zero.
2310 */
2311
2312 assert(vm_page_free_count > 0);
2313 if (vm_page_free_wanted_privileged > 0) {
2314 vm_page_free_wanted_privileged--;
2315 need_priv_wakeup = 1;
2316 } else if (vm_page_free_wanted > 0 &&
2317 vm_page_free_count > vm_page_free_reserved) {
2318 vm_page_free_wanted--;
2319 need_wakeup = 1;
2320 }
2321 }
2322 lck_mtx_unlock(&vm_page_queue_free_lock);
2323
2324 if (need_priv_wakeup)
2325 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2326 else if (need_wakeup)
2327 thread_wakeup_one((event_t) &vm_page_free_count);
2328
2329 VM_CHECK_MEMORYSTATUS;
2330 }
2331
2332 /*
2333 * This version of vm_page_release() is used only at startup
2334 * when we are single-threaded and pages are being released
2335 * for the first time. Hence, no locking or unnecessary checks are made.
2336 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2337 */
2338 void
2339 vm_page_release_startup(
2340 register vm_page_t mem)
2341 {
2342 queue_t queue_free;
2343
2344 if (vm_lopage_free_count < vm_lopage_free_limit &&
2345 mem->phys_page < max_valid_low_ppnum) {
2346 mem->lopage = TRUE;
2347 vm_lopage_free_count++;
2348 queue_free = &vm_lopage_queue_free;
2349 } else {
2350 mem->lopage = FALSE;
2351 mem->free = TRUE;
2352 vm_page_free_count++;
2353 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2354 }
2355 queue_enter_first(queue_free, mem, vm_page_t, pageq);
2356 }
2357
2358 /*
2359 * vm_page_wait:
2360 *
2361 * Wait for a page to become available.
2362 * If there are plenty of free pages, then we don't sleep.
2363 *
2364 * Returns:
2365 * TRUE: There may be another page, try again
2366 * FALSE: We were interrupted out of our wait, don't try again
2367 */
2368
2369 boolean_t
2370 vm_page_wait(
2371 int interruptible )
2372 {
2373 /*
2374 * We can't use vm_page_free_reserved to make this
2375 * determination. Consider: some thread might
2376 * need to allocate two pages. The first allocation
2377 * succeeds, the second fails. After the first page is freed,
2378 * a call to vm_page_wait must really block.
2379 */
2380 kern_return_t wait_result;
2381 int need_wakeup = 0;
2382 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2383
2384 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2385
2386 if (is_privileged && vm_page_free_count) {
2387 lck_mtx_unlock(&vm_page_queue_free_lock);
2388 return TRUE;
2389 }
2390 if (vm_page_free_count < vm_page_free_target) {
2391
2392 if (is_privileged) {
2393 if (vm_page_free_wanted_privileged++ == 0)
2394 need_wakeup = 1;
2395 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2396 } else {
2397 if (vm_page_free_wanted++ == 0)
2398 need_wakeup = 1;
2399 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2400 }
2401 lck_mtx_unlock(&vm_page_queue_free_lock);
2402 counter(c_vm_page_wait_block++);
2403
2404 if (need_wakeup)
2405 thread_wakeup((event_t)&vm_page_free_wanted);
2406
2407 if (wait_result == THREAD_WAITING) {
2408 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2409 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2410 wait_result = thread_block(THREAD_CONTINUE_NULL);
2411 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2412 }
2413
2414 return(wait_result == THREAD_AWAKENED);
2415 } else {
2416 lck_mtx_unlock(&vm_page_queue_free_lock);
2417 return TRUE;
2418 }
2419 }
2420
2421 /*
2422 * vm_page_alloc:
2423 *
2424 * Allocate and return a memory cell associated
2425 * with this VM object/offset pair.
2426 *
2427 * Object must be locked.
2428 */
2429
2430 vm_page_t
2431 vm_page_alloc(
2432 vm_object_t object,
2433 vm_object_offset_t offset)
2434 {
2435 register vm_page_t mem;
2436
2437 vm_object_lock_assert_exclusive(object);
2438 mem = vm_page_grab();
2439 if (mem == VM_PAGE_NULL)
2440 return VM_PAGE_NULL;
2441
2442 vm_page_insert(mem, object, offset);
2443
2444 return(mem);
2445 }
2446
2447 vm_page_t
2448 vm_page_alloclo(
2449 vm_object_t object,
2450 vm_object_offset_t offset)
2451 {
2452 register vm_page_t mem;
2453
2454 vm_object_lock_assert_exclusive(object);
2455 mem = vm_page_grablo();
2456 if (mem == VM_PAGE_NULL)
2457 return VM_PAGE_NULL;
2458
2459 vm_page_insert(mem, object, offset);
2460
2461 return(mem);
2462 }
2463
2464
2465 /*
2466 * vm_page_alloc_guard:
2467 *
2468 * Allocate a fictitious page which will be used
2469 * as a guard page. The page will be inserted into
2470 * the object and returned to the caller.
2471 */
2472
2473 vm_page_t
2474 vm_page_alloc_guard(
2475 vm_object_t object,
2476 vm_object_offset_t offset)
2477 {
2478 register vm_page_t mem;
2479
2480 vm_object_lock_assert_exclusive(object);
2481 mem = vm_page_grab_guard();
2482 if (mem == VM_PAGE_NULL)
2483 return VM_PAGE_NULL;
2484
2485 vm_page_insert(mem, object, offset);
2486
2487 return(mem);
2488 }
2489
2490
2491 counter(unsigned int c_laundry_pages_freed = 0;)
2492
2493 /*
2494 * vm_page_free_prepare:
2495 *
2496 * Removes page from any queue it may be on
2497 * and disassociates it from its VM object.
2498 *
2499 * Object and page queues must be locked prior to entry.
2500 */
2501 static void
2502 vm_page_free_prepare(
2503 vm_page_t mem)
2504 {
2505 vm_page_free_prepare_queues(mem);
2506 vm_page_free_prepare_object(mem, TRUE);
2507 }
2508
2509
2510 void
2511 vm_page_free_prepare_queues(
2512 vm_page_t mem)
2513 {
2514 VM_PAGE_CHECK(mem);
2515 assert(!mem->free);
2516 assert(!mem->cleaning);
2517
2518 #if MACH_ASSERT || DEBUG
2519 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2520 if (mem->free)
2521 panic("vm_page_free: freeing page on free list\n");
2522 #endif /* MACH_ASSERT || DEBUG */
2523 if (mem->object) {
2524 vm_object_lock_assert_exclusive(mem->object);
2525 }
2526 if (mem->laundry) {
2527 /*
2528 * We may have to free a page while it's being laundered
2529 * if we lost its pager (due to a forced unmount, for example).
2530 * We need to call vm_pageout_steal_laundry() before removing
2531 * the page from its VM object, so that we can remove it
2532 * from its pageout queue and adjust the laundry accounting
2533 */
2534 vm_pageout_steal_laundry(mem, TRUE);
2535 counter(++c_laundry_pages_freed);
2536 }
2537
2538 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2539
2540 if (VM_PAGE_WIRED(mem)) {
2541 if (mem->object) {
2542 assert(mem->object->wired_page_count > 0);
2543 mem->object->wired_page_count--;
2544 assert(mem->object->resident_page_count >=
2545 mem->object->wired_page_count);
2546
2547 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2548 OSAddAtomic(+1, &vm_page_purgeable_count);
2549 assert(vm_page_purgeable_wired_count > 0);
2550 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2551 }
2552 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2553 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2554 mem->object->vo_purgeable_owner != TASK_NULL) {
2555 task_t owner;
2556
2557 owner = mem->object->vo_purgeable_owner;
2558 /*
2559 * While wired, this page was accounted
2560 * as "non-volatile" but it should now
2561 * be accounted as "volatile".
2562 */
2563 /* one less "non-volatile"... */
2564 ledger_debit(owner->ledger,
2565 task_ledgers.purgeable_nonvolatile,
2566 PAGE_SIZE);
2567 /* ... and "phys_footprint" */
2568 ledger_debit(owner->ledger,
2569 task_ledgers.phys_footprint,
2570 PAGE_SIZE);
2571 /* one more "volatile" */
2572 ledger_credit(owner->ledger,
2573 task_ledgers.purgeable_volatile,
2574 PAGE_SIZE);
2575 }
2576 }
2577 if (!mem->private && !mem->fictitious)
2578 vm_page_wire_count--;
2579 mem->wire_count = 0;
2580 assert(!mem->gobbled);
2581 } else if (mem->gobbled) {
2582 if (!mem->private && !mem->fictitious)
2583 vm_page_wire_count--;
2584 vm_page_gobble_count--;
2585 }
2586 }
2587
2588
2589 void
2590 vm_page_free_prepare_object(
2591 vm_page_t mem,
2592 boolean_t remove_from_hash)
2593 {
2594 if (mem->tabled)
2595 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2596
2597 PAGE_WAKEUP(mem); /* clears wanted */
2598
2599 if (mem->private) {
2600 mem->private = FALSE;
2601 mem->fictitious = TRUE;
2602 mem->phys_page = vm_page_fictitious_addr;
2603 }
2604 if ( !mem->fictitious) {
2605 vm_page_init(mem, mem->phys_page, mem->lopage);
2606 }
2607 }
2608
2609
2610 /*
2611 * vm_page_free:
2612 *
2613 * Returns the given page to the free list,
2614 * disassociating it with any VM object.
2615 *
2616 * Object and page queues must be locked prior to entry.
2617 */
2618 void
2619 vm_page_free(
2620 vm_page_t mem)
2621 {
2622 vm_page_free_prepare(mem);
2623
2624 if (mem->fictitious) {
2625 vm_page_release_fictitious(mem);
2626 } else {
2627 vm_page_release(mem);
2628 }
2629 }
2630
2631
2632 void
2633 vm_page_free_unlocked(
2634 vm_page_t mem,
2635 boolean_t remove_from_hash)
2636 {
2637 vm_page_lockspin_queues();
2638 vm_page_free_prepare_queues(mem);
2639 vm_page_unlock_queues();
2640
2641 vm_page_free_prepare_object(mem, remove_from_hash);
2642
2643 if (mem->fictitious) {
2644 vm_page_release_fictitious(mem);
2645 } else {
2646 vm_page_release(mem);
2647 }
2648 }
2649
2650
2651 /*
2652 * Free a list of pages. The list can be up to several hundred pages,
2653 * as blocked up by vm_pageout_scan().
2654 * The big win is not having to take the free list lock once
2655 * per page.
2656 */
2657 void
2658 vm_page_free_list(
2659 vm_page_t freeq,
2660 boolean_t prepare_object)
2661 {
2662 vm_page_t mem;
2663 vm_page_t nxt;
2664 vm_page_t local_freeq;
2665 int pg_count;
2666
2667 while (freeq) {
2668
2669 pg_count = 0;
2670 local_freeq = VM_PAGE_NULL;
2671 mem = freeq;
2672
2673 /*
2674 * break up the processing into smaller chunks so
2675 * that we can 'pipeline' the pages onto the
2676 * free list w/o introducing too much
2677 * contention on the global free queue lock
2678 */
2679 while (mem && pg_count < 64) {
2680
2681 assert(!mem->inactive);
2682 assert(!mem->active);
2683 assert(!mem->throttled);
2684 assert(!mem->free);
2685 assert(!mem->speculative);
2686 assert(!VM_PAGE_WIRED(mem));
2687 assert(mem->pageq.prev == NULL);
2688
2689 nxt = (vm_page_t)(mem->pageq.next);
2690
2691 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2692 assert(pmap_verify_free(mem->phys_page));
2693 }
2694 if (prepare_object == TRUE)
2695 vm_page_free_prepare_object(mem, TRUE);
2696
2697 if (!mem->fictitious) {
2698 assert(mem->busy);
2699
2700 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2701 vm_lopage_free_count < vm_lopage_free_limit &&
2702 mem->phys_page < max_valid_low_ppnum) {
2703 mem->pageq.next = NULL;
2704 vm_page_release(mem);
2705 } else {
2706 /*
2707 * IMPORTANT: we can't set the page "free" here
2708 * because that would make the page eligible for
2709 * a physically-contiguous allocation (see
2710 * vm_page_find_contiguous()) right away (we don't
2711 * hold the vm_page_queue_free lock). That would
2712 * cause trouble because the page is not actually
2713 * in the free queue yet...
2714 */
2715 mem->pageq.next = (queue_entry_t)local_freeq;
2716 local_freeq = mem;
2717 pg_count++;
2718
2719 pmap_clear_noencrypt(mem->phys_page);
2720 }
2721 } else {
2722 assert(mem->phys_page == vm_page_fictitious_addr ||
2723 mem->phys_page == vm_page_guard_addr);
2724 vm_page_release_fictitious(mem);
2725 }
2726 mem = nxt;
2727 }
2728 freeq = mem;
2729
2730 if ( (mem = local_freeq) ) {
2731 unsigned int avail_free_count;
2732 unsigned int need_wakeup = 0;
2733 unsigned int need_priv_wakeup = 0;
2734
2735 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2736
2737 while (mem) {
2738 int color;
2739
2740 nxt = (vm_page_t)(mem->pageq.next);
2741
2742 assert(!mem->free);
2743 assert(mem->busy);
2744 mem->free = TRUE;
2745
2746 color = mem->phys_page & vm_color_mask;
2747 queue_enter_first(&vm_page_queue_free[color],
2748 mem,
2749 vm_page_t,
2750 pageq);
2751 mem = nxt;
2752 }
2753 vm_page_free_count += pg_count;
2754 avail_free_count = vm_page_free_count;
2755
2756 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2757
2758 if (avail_free_count < vm_page_free_wanted_privileged) {
2759 need_priv_wakeup = avail_free_count;
2760 vm_page_free_wanted_privileged -= avail_free_count;
2761 avail_free_count = 0;
2762 } else {
2763 need_priv_wakeup = vm_page_free_wanted_privileged;
2764 vm_page_free_wanted_privileged = 0;
2765 avail_free_count -= vm_page_free_wanted_privileged;
2766 }
2767 }
2768 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2769 unsigned int available_pages;
2770
2771 available_pages = avail_free_count - vm_page_free_reserved;
2772
2773 if (available_pages >= vm_page_free_wanted) {
2774 need_wakeup = vm_page_free_wanted;
2775 vm_page_free_wanted = 0;
2776 } else {
2777 need_wakeup = available_pages;
2778 vm_page_free_wanted -= available_pages;
2779 }
2780 }
2781 lck_mtx_unlock(&vm_page_queue_free_lock);
2782
2783 if (need_priv_wakeup != 0) {
2784 /*
2785 * There shouldn't be that many VM-privileged threads,
2786 * so let's wake them all up, even if we don't quite
2787 * have enough pages to satisfy them all.
2788 */
2789 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2790 }
2791 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2792 /*
2793 * We don't expect to have any more waiters
2794 * after this, so let's wake them all up at
2795 * once.
2796 */
2797 thread_wakeup((event_t) &vm_page_free_count);
2798 } else for (; need_wakeup != 0; need_wakeup--) {
2799 /*
2800 * Wake up one waiter per page we just released.
2801 */
2802 thread_wakeup_one((event_t) &vm_page_free_count);
2803 }
2804
2805 VM_CHECK_MEMORYSTATUS;
2806 }
2807 }
2808 }
2809
2810
2811 /*
2812 * vm_page_wire:
2813 *
2814 * Mark this page as wired down by yet
2815 * another map, removing it from paging queues
2816 * as necessary.
2817 *
2818 * The page's object and the page queues must be locked.
2819 */
2820 void
2821 vm_page_wire(
2822 register vm_page_t mem)
2823 {
2824
2825 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2826
2827 VM_PAGE_CHECK(mem);
2828 if (mem->object) {
2829 vm_object_lock_assert_exclusive(mem->object);
2830 } else {
2831 /*
2832 * In theory, the page should be in an object before it
2833 * gets wired, since we need to hold the object lock
2834 * to update some fields in the page structure.
2835 * However, some code (i386 pmap, for example) might want
2836 * to wire a page before it gets inserted into an object.
2837 * That's somewhat OK, as long as nobody else can get to
2838 * that page and update it at the same time.
2839 */
2840 }
2841 #if DEBUG
2842 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2843 #endif
2844 if ( !VM_PAGE_WIRED(mem)) {
2845
2846 if (mem->pageout_queue) {
2847 mem->pageout = FALSE;
2848 vm_pageout_throttle_up(mem);
2849 }
2850 VM_PAGE_QUEUES_REMOVE(mem);
2851
2852 if (mem->object) {
2853 mem->object->wired_page_count++;
2854 assert(mem->object->resident_page_count >=
2855 mem->object->wired_page_count);
2856 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2857 assert(vm_page_purgeable_count > 0);
2858 OSAddAtomic(-1, &vm_page_purgeable_count);
2859 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2860 }
2861 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2862 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2863 mem->object->vo_purgeable_owner != TASK_NULL) {
2864 task_t owner;
2865
2866 owner = mem->object->vo_purgeable_owner;
2867 /* less volatile bytes */
2868 ledger_debit(owner->ledger,
2869 task_ledgers.purgeable_volatile,
2870 PAGE_SIZE);
2871 /* more not-quite-volatile bytes */
2872 ledger_credit(owner->ledger,
2873 task_ledgers.purgeable_nonvolatile,
2874 PAGE_SIZE);
2875 /* more footprint */
2876 ledger_credit(owner->ledger,
2877 task_ledgers.phys_footprint,
2878 PAGE_SIZE);
2879 }
2880 if (mem->object->all_reusable) {
2881 /*
2882 * Wired pages are not counted as "re-usable"
2883 * in "all_reusable" VM objects, so nothing
2884 * to do here.
2885 */
2886 } else if (mem->reusable) {
2887 /*
2888 * This page is not "re-usable" when it's
2889 * wired, so adjust its state and the
2890 * accounting.
2891 */
2892 vm_object_reuse_pages(mem->object,
2893 mem->offset,
2894 mem->offset+PAGE_SIZE_64,
2895 FALSE);
2896 }
2897 }
2898 assert(!mem->reusable);
2899
2900 if (!mem->private && !mem->fictitious && !mem->gobbled)
2901 vm_page_wire_count++;
2902 if (mem->gobbled)
2903 vm_page_gobble_count--;
2904 mem->gobbled = FALSE;
2905
2906 VM_CHECK_MEMORYSTATUS;
2907
2908 /*
2909 * ENCRYPTED SWAP:
2910 * The page could be encrypted, but
2911 * We don't have to decrypt it here
2912 * because we don't guarantee that the
2913 * data is actually valid at this point.
2914 * The page will get decrypted in
2915 * vm_fault_wire() if needed.
2916 */
2917 }
2918 assert(!mem->gobbled);
2919 mem->wire_count++;
2920 VM_PAGE_CHECK(mem);
2921 }
2922
2923 /*
2924 * vm_page_gobble:
2925 *
2926 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2927 *
2928 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2929 */
2930 void
2931 vm_page_gobble(
2932 register vm_page_t mem)
2933 {
2934 vm_page_lockspin_queues();
2935 VM_PAGE_CHECK(mem);
2936
2937 assert(!mem->gobbled);
2938 assert( !VM_PAGE_WIRED(mem));
2939
2940 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2941 if (!mem->private && !mem->fictitious)
2942 vm_page_wire_count++;
2943 }
2944 vm_page_gobble_count++;
2945 mem->gobbled = TRUE;
2946 vm_page_unlock_queues();
2947 }
2948
2949 /*
2950 * vm_page_unwire:
2951 *
2952 * Release one wiring of this page, potentially
2953 * enabling it to be paged again.
2954 *
2955 * The page's object and the page queues must be locked.
2956 */
2957 void
2958 vm_page_unwire(
2959 vm_page_t mem,
2960 boolean_t queueit)
2961 {
2962
2963 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2964
2965 VM_PAGE_CHECK(mem);
2966 assert(VM_PAGE_WIRED(mem));
2967 assert(mem->object != VM_OBJECT_NULL);
2968 #if DEBUG
2969 vm_object_lock_assert_exclusive(mem->object);
2970 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2971 #endif
2972 if (--mem->wire_count == 0) {
2973 assert(!mem->private && !mem->fictitious);
2974 vm_page_wire_count--;
2975 assert(mem->object->wired_page_count > 0);
2976 mem->object->wired_page_count--;
2977 assert(mem->object->resident_page_count >=
2978 mem->object->wired_page_count);
2979 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2980 OSAddAtomic(+1, &vm_page_purgeable_count);
2981 assert(vm_page_purgeable_wired_count > 0);
2982 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2983 }
2984 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2985 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2986 mem->object->vo_purgeable_owner != TASK_NULL) {
2987 task_t owner;
2988
2989 owner = mem->object->vo_purgeable_owner;
2990 /* more volatile bytes */
2991 ledger_credit(owner->ledger,
2992 task_ledgers.purgeable_volatile,
2993 PAGE_SIZE);
2994 /* less not-quite-volatile bytes */
2995 ledger_debit(owner->ledger,
2996 task_ledgers.purgeable_nonvolatile,
2997 PAGE_SIZE);
2998 /* less footprint */
2999 ledger_debit(owner->ledger,
3000 task_ledgers.phys_footprint,
3001 PAGE_SIZE);
3002 }
3003 assert(mem->object != kernel_object);
3004 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3005
3006 if (queueit == TRUE) {
3007 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3008 vm_page_deactivate(mem);
3009 } else {
3010 vm_page_activate(mem);
3011 }
3012 }
3013
3014 VM_CHECK_MEMORYSTATUS;
3015
3016 }
3017 VM_PAGE_CHECK(mem);
3018 }
3019
3020 /*
3021 * vm_page_deactivate:
3022 *
3023 * Returns the given page to the inactive list,
3024 * indicating that no physical maps have access
3025 * to this page. [Used by the physical mapping system.]
3026 *
3027 * The page queues must be locked.
3028 */
3029 void
3030 vm_page_deactivate(
3031 vm_page_t m)
3032 {
3033 vm_page_deactivate_internal(m, TRUE);
3034 }
3035
3036
3037 void
3038 vm_page_deactivate_internal(
3039 vm_page_t m,
3040 boolean_t clear_hw_reference)
3041 {
3042
3043 VM_PAGE_CHECK(m);
3044 assert(m->object != kernel_object);
3045 assert(m->phys_page != vm_page_guard_addr);
3046
3047 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3048 #if DEBUG
3049 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3050 #endif
3051 /*
3052 * This page is no longer very interesting. If it was
3053 * interesting (active or inactive/referenced), then we
3054 * clear the reference bit and (re)enter it in the
3055 * inactive queue. Note wired pages should not have
3056 * their reference bit cleared.
3057 */
3058 assert ( !(m->absent && !m->unusual));
3059
3060 if (m->gobbled) { /* can this happen? */
3061 assert( !VM_PAGE_WIRED(m));
3062
3063 if (!m->private && !m->fictitious)
3064 vm_page_wire_count--;
3065 vm_page_gobble_count--;
3066 m->gobbled = FALSE;
3067 }
3068 /*
3069 * if this page is currently on the pageout queue, we can't do the
3070 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3071 * and we can't remove it manually since we would need the object lock
3072 * (which is not required here) to decrement the activity_in_progress
3073 * reference which is held on the object while the page is in the pageout queue...
3074 * just let the normal laundry processing proceed
3075 */
3076 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3077 return;
3078
3079 if (!m->absent && clear_hw_reference == TRUE)
3080 pmap_clear_reference(m->phys_page);
3081
3082 m->reference = FALSE;
3083 m->no_cache = FALSE;
3084
3085 if (!m->inactive) {
3086 VM_PAGE_QUEUES_REMOVE(m);
3087
3088 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3089 m->dirty && m->object->internal &&
3090 (m->object->purgable == VM_PURGABLE_DENY ||
3091 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3092 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3093 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3094 m->throttled = TRUE;
3095 vm_page_throttled_count++;
3096 } else {
3097 if (m->object->named && m->object->ref_count == 1) {
3098 vm_page_speculate(m, FALSE);
3099 #if DEVELOPMENT || DEBUG
3100 vm_page_speculative_recreated++;
3101 #endif
3102 } else {
3103 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3104 }
3105 }
3106 }
3107 }
3108
3109 /*
3110 * vm_page_enqueue_cleaned
3111 *
3112 * Put the page on the cleaned queue, mark it cleaned, etc.
3113 * Being on the cleaned queue (and having m->clean_queue set)
3114 * does ** NOT ** guarantee that the page is clean!
3115 *
3116 * Call with the queues lock held.
3117 */
3118
3119 void vm_page_enqueue_cleaned(vm_page_t m)
3120 {
3121 assert(m->phys_page != vm_page_guard_addr);
3122 #if DEBUG
3123 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3124 #endif
3125 assert( !(m->absent && !m->unusual));
3126
3127 if (m->gobbled) {
3128 assert( !VM_PAGE_WIRED(m));
3129 if (!m->private && !m->fictitious)
3130 vm_page_wire_count--;
3131 vm_page_gobble_count--;
3132 m->gobbled = FALSE;
3133 }
3134 /*
3135 * if this page is currently on the pageout queue, we can't do the
3136 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3137 * and we can't remove it manually since we would need the object lock
3138 * (which is not required here) to decrement the activity_in_progress
3139 * reference which is held on the object while the page is in the pageout queue...
3140 * just let the normal laundry processing proceed
3141 */
3142 if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3143 return;
3144
3145 VM_PAGE_QUEUES_REMOVE(m);
3146
3147 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3148 m->clean_queue = TRUE;
3149 vm_page_cleaned_count++;
3150
3151 m->inactive = TRUE;
3152 vm_page_inactive_count++;
3153 if (m->object->internal) {
3154 vm_page_pageable_internal_count++;
3155 } else {
3156 vm_page_pageable_external_count++;
3157 }
3158
3159 vm_pageout_enqueued_cleaned++;
3160 }
3161
3162 /*
3163 * vm_page_activate:
3164 *
3165 * Put the specified page on the active list (if appropriate).
3166 *
3167 * The page queues must be locked.
3168 */
3169
3170 void
3171 vm_page_activate(
3172 register vm_page_t m)
3173 {
3174 VM_PAGE_CHECK(m);
3175 #ifdef FIXME_4778297
3176 assert(m->object != kernel_object);
3177 #endif
3178 assert(m->phys_page != vm_page_guard_addr);
3179 #if DEBUG
3180 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3181 #endif
3182 assert( !(m->absent && !m->unusual));
3183
3184 if (m->gobbled) {
3185 assert( !VM_PAGE_WIRED(m));
3186 if (!m->private && !m->fictitious)
3187 vm_page_wire_count--;
3188 vm_page_gobble_count--;
3189 m->gobbled = FALSE;
3190 }
3191 /*
3192 * if this page is currently on the pageout queue, we can't do the
3193 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3194 * and we can't remove it manually since we would need the object lock
3195 * (which is not required here) to decrement the activity_in_progress
3196 * reference which is held on the object while the page is in the pageout queue...
3197 * just let the normal laundry processing proceed
3198 */
3199 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3200 return;
3201
3202 #if DEBUG
3203 if (m->active)
3204 panic("vm_page_activate: already active");
3205 #endif
3206
3207 if (m->speculative) {
3208 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3209 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3210 }
3211
3212 VM_PAGE_QUEUES_REMOVE(m);
3213
3214 if ( !VM_PAGE_WIRED(m)) {
3215
3216 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3217 m->dirty && m->object->internal &&
3218 (m->object->purgable == VM_PURGABLE_DENY ||
3219 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3220 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3221 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3222 m->throttled = TRUE;
3223 vm_page_throttled_count++;
3224 } else {
3225 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3226 m->active = TRUE;
3227 vm_page_active_count++;
3228 if (m->object->internal) {
3229 vm_page_pageable_internal_count++;
3230 } else {
3231 vm_page_pageable_external_count++;
3232 }
3233 }
3234 m->reference = TRUE;
3235 m->no_cache = FALSE;
3236 }
3237 VM_PAGE_CHECK(m);
3238 }
3239
3240
3241 /*
3242 * vm_page_speculate:
3243 *
3244 * Put the specified page on the speculative list (if appropriate).
3245 *
3246 * The page queues must be locked.
3247 */
3248 void
3249 vm_page_speculate(
3250 vm_page_t m,
3251 boolean_t new)
3252 {
3253 struct vm_speculative_age_q *aq;
3254
3255 VM_PAGE_CHECK(m);
3256 assert(m->object != kernel_object);
3257 assert(m->phys_page != vm_page_guard_addr);
3258 #if DEBUG
3259 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3260 #endif
3261 assert( !(m->absent && !m->unusual));
3262
3263 /*
3264 * if this page is currently on the pageout queue, we can't do the
3265 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3266 * and we can't remove it manually since we would need the object lock
3267 * (which is not required here) to decrement the activity_in_progress
3268 * reference which is held on the object while the page is in the pageout queue...
3269 * just let the normal laundry processing proceed
3270 */
3271 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3272 return;
3273
3274 VM_PAGE_QUEUES_REMOVE(m);
3275
3276 if ( !VM_PAGE_WIRED(m)) {
3277 mach_timespec_t ts;
3278 clock_sec_t sec;
3279 clock_nsec_t nsec;
3280
3281 clock_get_system_nanotime(&sec, &nsec);
3282 ts.tv_sec = (unsigned int) sec;
3283 ts.tv_nsec = nsec;
3284
3285 if (vm_page_speculative_count == 0) {
3286
3287 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3288 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3289
3290 aq = &vm_page_queue_speculative[speculative_age_index];
3291
3292 /*
3293 * set the timer to begin a new group
3294 */
3295 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3296 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3297
3298 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3299 } else {
3300 aq = &vm_page_queue_speculative[speculative_age_index];
3301
3302 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3303
3304 speculative_age_index++;
3305
3306 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3307 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3308 if (speculative_age_index == speculative_steal_index) {
3309 speculative_steal_index = speculative_age_index + 1;
3310
3311 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3312 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3313 }
3314 aq = &vm_page_queue_speculative[speculative_age_index];
3315
3316 if (!queue_empty(&aq->age_q))
3317 vm_page_speculate_ageit(aq);
3318
3319 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3320 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3321
3322 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3323 }
3324 }
3325 enqueue_tail(&aq->age_q, &m->pageq);
3326 m->speculative = TRUE;
3327 vm_page_speculative_count++;
3328 if (m->object->internal) {
3329 vm_page_pageable_internal_count++;
3330 } else {
3331 vm_page_pageable_external_count++;
3332 }
3333
3334 if (new == TRUE) {
3335 vm_object_lock_assert_exclusive(m->object);
3336
3337 m->object->pages_created++;
3338 #if DEVELOPMENT || DEBUG
3339 vm_page_speculative_created++;
3340 #endif
3341 }
3342 }
3343 VM_PAGE_CHECK(m);
3344 }
3345
3346
3347 /*
3348 * move pages from the specified aging bin to
3349 * the speculative bin that pageout_scan claims from
3350 *
3351 * The page queues must be locked.
3352 */
3353 void
3354 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3355 {
3356 struct vm_speculative_age_q *sq;
3357 vm_page_t t;
3358
3359 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3360
3361 if (queue_empty(&sq->age_q)) {
3362 sq->age_q.next = aq->age_q.next;
3363 sq->age_q.prev = aq->age_q.prev;
3364
3365 t = (vm_page_t)sq->age_q.next;
3366 t->pageq.prev = &sq->age_q;
3367
3368 t = (vm_page_t)sq->age_q.prev;
3369 t->pageq.next = &sq->age_q;
3370 } else {
3371 t = (vm_page_t)sq->age_q.prev;
3372 t->pageq.next = aq->age_q.next;
3373
3374 t = (vm_page_t)aq->age_q.next;
3375 t->pageq.prev = sq->age_q.prev;
3376
3377 t = (vm_page_t)aq->age_q.prev;
3378 t->pageq.next = &sq->age_q;
3379
3380 sq->age_q.prev = aq->age_q.prev;
3381 }
3382 queue_init(&aq->age_q);
3383 }
3384
3385
3386 void
3387 vm_page_lru(
3388 vm_page_t m)
3389 {
3390 VM_PAGE_CHECK(m);
3391 assert(m->object != kernel_object);
3392 assert(m->phys_page != vm_page_guard_addr);
3393
3394 #if DEBUG
3395 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3396 #endif
3397 /*
3398 * if this page is currently on the pageout queue, we can't do the
3399 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3400 * and we can't remove it manually since we would need the object lock
3401 * (which is not required here) to decrement the activity_in_progress
3402 * reference which is held on the object while the page is in the pageout queue...
3403 * just let the normal laundry processing proceed
3404 */
3405 if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3406 return;
3407
3408 m->no_cache = FALSE;
3409
3410 VM_PAGE_QUEUES_REMOVE(m);
3411
3412 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3413 }
3414
3415
3416 void
3417 vm_page_reactivate_all_throttled(void)
3418 {
3419 vm_page_t first_throttled, last_throttled;
3420 vm_page_t first_active;
3421 vm_page_t m;
3422 int extra_active_count;
3423 int extra_internal_count, extra_external_count;
3424
3425 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3426 return;
3427
3428 extra_active_count = 0;
3429 extra_internal_count = 0;
3430 extra_external_count = 0;
3431 vm_page_lock_queues();
3432 if (! queue_empty(&vm_page_queue_throttled)) {
3433 /*
3434 * Switch "throttled" pages to "active".
3435 */
3436 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3437 VM_PAGE_CHECK(m);
3438 assert(m->throttled);
3439 assert(!m->active);
3440 assert(!m->inactive);
3441 assert(!m->speculative);
3442 assert(!VM_PAGE_WIRED(m));
3443
3444 extra_active_count++;
3445 if (m->object->internal) {
3446 extra_internal_count++;
3447 } else {
3448 extra_external_count++;
3449 }
3450
3451 m->throttled = FALSE;
3452 m->active = TRUE;
3453 VM_PAGE_CHECK(m);
3454 }
3455
3456 /*
3457 * Transfer the entire throttled queue to a regular LRU page queues.
3458 * We insert it at the head of the active queue, so that these pages
3459 * get re-evaluated by the LRU algorithm first, since they've been
3460 * completely out of it until now.
3461 */
3462 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3463 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3464 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3465 if (queue_empty(&vm_page_queue_active)) {
3466 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3467 } else {
3468 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3469 }
3470 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3471 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3472 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3473
3474 #if DEBUG
3475 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3476 #endif
3477 queue_init(&vm_page_queue_throttled);
3478 /*
3479 * Adjust the global page counts.
3480 */
3481 vm_page_active_count += extra_active_count;
3482 vm_page_pageable_internal_count += extra_internal_count;
3483 vm_page_pageable_external_count += extra_external_count;
3484 vm_page_throttled_count = 0;
3485 }
3486 assert(vm_page_throttled_count == 0);
3487 assert(queue_empty(&vm_page_queue_throttled));
3488 vm_page_unlock_queues();
3489 }
3490
3491
3492 /*
3493 * move pages from the indicated local queue to the global active queue
3494 * its ok to fail if we're below the hard limit and force == FALSE
3495 * the nolocks == TRUE case is to allow this function to be run on
3496 * the hibernate path
3497 */
3498
3499 void
3500 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3501 {
3502 struct vpl *lq;
3503 vm_page_t first_local, last_local;
3504 vm_page_t first_active;
3505 vm_page_t m;
3506 uint32_t count = 0;
3507
3508 if (vm_page_local_q == NULL)
3509 return;
3510
3511 lq = &vm_page_local_q[lid].vpl_un.vpl;
3512
3513 if (nolocks == FALSE) {
3514 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3515 if ( !vm_page_trylockspin_queues())
3516 return;
3517 } else
3518 vm_page_lockspin_queues();
3519
3520 VPL_LOCK(&lq->vpl_lock);
3521 }
3522 if (lq->vpl_count) {
3523 /*
3524 * Switch "local" pages to "active".
3525 */
3526 assert(!queue_empty(&lq->vpl_queue));
3527
3528 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3529 VM_PAGE_CHECK(m);
3530 assert(m->local);
3531 assert(!m->active);
3532 assert(!m->inactive);
3533 assert(!m->speculative);
3534 assert(!VM_PAGE_WIRED(m));
3535 assert(!m->throttled);
3536 assert(!m->fictitious);
3537
3538 if (m->local_id != lid)
3539 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3540
3541 m->local_id = 0;
3542 m->local = FALSE;
3543 m->active = TRUE;
3544 VM_PAGE_CHECK(m);
3545
3546 count++;
3547 }
3548 if (count != lq->vpl_count)
3549 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3550
3551 /*
3552 * Transfer the entire local queue to a regular LRU page queues.
3553 */
3554 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3555 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3556 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3557
3558 if (queue_empty(&vm_page_queue_active)) {
3559 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3560 } else {
3561 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3562 }
3563 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3564 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3565 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3566
3567 queue_init(&lq->vpl_queue);
3568 /*
3569 * Adjust the global page counts.
3570 */
3571 vm_page_active_count += lq->vpl_count;
3572 vm_page_pageable_internal_count += lq->vpl_internal_count;
3573 vm_page_pageable_external_count += lq->vpl_external_count;
3574 lq->vpl_count = 0;
3575 lq->vpl_internal_count = 0;
3576 lq->vpl_external_count = 0;
3577 }
3578 assert(queue_empty(&lq->vpl_queue));
3579
3580 if (nolocks == FALSE) {
3581 VPL_UNLOCK(&lq->vpl_lock);
3582 vm_page_unlock_queues();
3583 }
3584 }
3585
3586 /*
3587 * vm_page_part_zero_fill:
3588 *
3589 * Zero-fill a part of the page.
3590 */
3591 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3592 void
3593 vm_page_part_zero_fill(
3594 vm_page_t m,
3595 vm_offset_t m_pa,
3596 vm_size_t len)
3597 {
3598
3599 #if 0
3600 /*
3601 * we don't hold the page queue lock
3602 * so this check isn't safe to make
3603 */
3604 VM_PAGE_CHECK(m);
3605 #endif
3606
3607 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3608 pmap_zero_part_page(m->phys_page, m_pa, len);
3609 #else
3610 vm_page_t tmp;
3611 while (1) {
3612 tmp = vm_page_grab();
3613 if (tmp == VM_PAGE_NULL) {
3614 vm_page_wait(THREAD_UNINT);
3615 continue;
3616 }
3617 break;
3618 }
3619 vm_page_zero_fill(tmp);
3620 if(m_pa != 0) {
3621 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3622 }
3623 if((m_pa + len) < PAGE_SIZE) {
3624 vm_page_part_copy(m, m_pa + len, tmp,
3625 m_pa + len, PAGE_SIZE - (m_pa + len));
3626 }
3627 vm_page_copy(tmp,m);
3628 VM_PAGE_FREE(tmp);
3629 #endif
3630
3631 }
3632
3633 /*
3634 * vm_page_zero_fill:
3635 *
3636 * Zero-fill the specified page.
3637 */
3638 void
3639 vm_page_zero_fill(
3640 vm_page_t m)
3641 {
3642 XPR(XPR_VM_PAGE,
3643 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3644 m->object, m->offset, m, 0,0);
3645 #if 0
3646 /*
3647 * we don't hold the page queue lock
3648 * so this check isn't safe to make
3649 */
3650 VM_PAGE_CHECK(m);
3651 #endif
3652
3653 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3654 pmap_zero_page(m->phys_page);
3655 }
3656
3657 /*
3658 * vm_page_part_copy:
3659 *
3660 * copy part of one page to another
3661 */
3662
3663 void
3664 vm_page_part_copy(
3665 vm_page_t src_m,
3666 vm_offset_t src_pa,
3667 vm_page_t dst_m,
3668 vm_offset_t dst_pa,
3669 vm_size_t len)
3670 {
3671 #if 0
3672 /*
3673 * we don't hold the page queue lock
3674 * so this check isn't safe to make
3675 */
3676 VM_PAGE_CHECK(src_m);
3677 VM_PAGE_CHECK(dst_m);
3678 #endif
3679 pmap_copy_part_page(src_m->phys_page, src_pa,
3680 dst_m->phys_page, dst_pa, len);
3681 }
3682
3683 /*
3684 * vm_page_copy:
3685 *
3686 * Copy one page to another
3687 *
3688 * ENCRYPTED SWAP:
3689 * The source page should not be encrypted. The caller should
3690 * make sure the page is decrypted first, if necessary.
3691 */
3692
3693 int vm_page_copy_cs_validations = 0;
3694 int vm_page_copy_cs_tainted = 0;
3695
3696 void
3697 vm_page_copy(
3698 vm_page_t src_m,
3699 vm_page_t dest_m)
3700 {
3701 XPR(XPR_VM_PAGE,
3702 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3703 src_m->object, src_m->offset,
3704 dest_m->object, dest_m->offset,
3705 0);
3706 #if 0
3707 /*
3708 * we don't hold the page queue lock
3709 * so this check isn't safe to make
3710 */
3711 VM_PAGE_CHECK(src_m);
3712 VM_PAGE_CHECK(dest_m);
3713 #endif
3714 vm_object_lock_assert_held(src_m->object);
3715
3716 /*
3717 * ENCRYPTED SWAP:
3718 * The source page should not be encrypted at this point.
3719 * The destination page will therefore not contain encrypted
3720 * data after the copy.
3721 */
3722 if (src_m->encrypted) {
3723 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3724 }
3725 dest_m->encrypted = FALSE;
3726
3727 if (src_m->object != VM_OBJECT_NULL &&
3728 src_m->object->code_signed) {
3729 /*
3730 * We're copying a page from a code-signed object.
3731 * Whoever ends up mapping the copy page might care about
3732 * the original page's integrity, so let's validate the
3733 * source page now.
3734 */
3735 vm_page_copy_cs_validations++;
3736 vm_page_validate_cs(src_m);
3737 }
3738
3739 if (vm_page_is_slideable(src_m)) {
3740 boolean_t was_busy = src_m->busy;
3741 src_m->busy = TRUE;
3742 (void) vm_page_slide(src_m, 0);
3743 assert(src_m->busy);
3744 if (!was_busy) {
3745 PAGE_WAKEUP_DONE(src_m);
3746 }
3747 }
3748
3749 /*
3750 * Propagate the cs_tainted bit to the copy page. Do not propagate
3751 * the cs_validated bit.
3752 */
3753 dest_m->cs_tainted = src_m->cs_tainted;
3754 if (dest_m->cs_tainted) {
3755 vm_page_copy_cs_tainted++;
3756 }
3757 dest_m->slid = src_m->slid;
3758 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3759 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3760 }
3761
3762 #if MACH_ASSERT
3763 static void
3764 _vm_page_print(
3765 vm_page_t p)
3766 {
3767 printf("vm_page %p: \n", p);
3768 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3769 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3770 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3771 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3772 printf(" wire_count=%u\n", p->wire_count);
3773
3774 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3775 (p->local ? "" : "!"),
3776 (p->inactive ? "" : "!"),
3777 (p->active ? "" : "!"),
3778 (p->pageout_queue ? "" : "!"),
3779 (p->speculative ? "" : "!"),
3780 (p->laundry ? "" : "!"));
3781 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3782 (p->free ? "" : "!"),
3783 (p->reference ? "" : "!"),
3784 (p->gobbled ? "" : "!"),
3785 (p->private ? "" : "!"),
3786 (p->throttled ? "" : "!"));
3787 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3788 (p->busy ? "" : "!"),
3789 (p->wanted ? "" : "!"),
3790 (p->tabled ? "" : "!"),
3791 (p->fictitious ? "" : "!"),
3792 (p->pmapped ? "" : "!"),
3793 (p->wpmapped ? "" : "!"));
3794 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3795 (p->pageout ? "" : "!"),
3796 (p->absent ? "" : "!"),
3797 (p->error ? "" : "!"),
3798 (p->dirty ? "" : "!"),
3799 (p->cleaning ? "" : "!"),
3800 (p->precious ? "" : "!"),
3801 (p->clustered ? "" : "!"));
3802 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3803 (p->overwriting ? "" : "!"),
3804 (p->restart ? "" : "!"),
3805 (p->unusual ? "" : "!"),
3806 (p->encrypted ? "" : "!"),
3807 (p->encrypted_cleaning ? "" : "!"));
3808 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3809 (p->cs_validated ? "" : "!"),
3810 (p->cs_tainted ? "" : "!"),
3811 (p->no_cache ? "" : "!"));
3812
3813 printf("phys_page=0x%x\n", p->phys_page);
3814 }
3815
3816 /*
3817 * Check that the list of pages is ordered by
3818 * ascending physical address and has no holes.
3819 */
3820 static int
3821 vm_page_verify_contiguous(
3822 vm_page_t pages,
3823 unsigned int npages)
3824 {
3825 register vm_page_t m;
3826 unsigned int page_count;
3827 vm_offset_t prev_addr;
3828
3829 prev_addr = pages->phys_page;
3830 page_count = 1;
3831 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3832 if (m->phys_page != prev_addr + 1) {
3833 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3834 m, (long)prev_addr, m->phys_page);
3835 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3836 panic("vm_page_verify_contiguous: not contiguous!");
3837 }
3838 prev_addr = m->phys_page;
3839 ++page_count;
3840 }
3841 if (page_count != npages) {
3842 printf("pages %p actual count 0x%x but requested 0x%x\n",
3843 pages, page_count, npages);
3844 panic("vm_page_verify_contiguous: count error");
3845 }
3846 return 1;
3847 }
3848
3849
3850 /*
3851 * Check the free lists for proper length etc.
3852 */
3853 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
3854 static unsigned int
3855 vm_page_verify_free_list(
3856 queue_head_t *vm_page_queue,
3857 unsigned int color,
3858 vm_page_t look_for_page,
3859 boolean_t expect_page)
3860 {
3861 unsigned int npages;
3862 vm_page_t m;
3863 vm_page_t prev_m;
3864 boolean_t found_page;
3865
3866 if (! vm_page_verify_this_free_list_enabled)
3867 return 0;
3868
3869 found_page = FALSE;
3870 npages = 0;
3871 prev_m = (vm_page_t) vm_page_queue;
3872 queue_iterate(vm_page_queue,
3873 m,
3874 vm_page_t,
3875 pageq) {
3876
3877 if (m == look_for_page) {
3878 found_page = TRUE;
3879 }
3880 if ((vm_page_t) m->pageq.prev != prev_m)
3881 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3882 color, npages, m, m->pageq.prev, prev_m);
3883 if ( ! m->busy )
3884 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3885 color, npages, m);
3886 if (color != (unsigned int) -1) {
3887 if ((m->phys_page & vm_color_mask) != color)
3888 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3889 color, npages, m, m->phys_page & vm_color_mask, color);
3890 if ( ! m->free )
3891 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3892 color, npages, m);
3893 }
3894 ++npages;
3895 prev_m = m;
3896 }
3897 if (look_for_page != VM_PAGE_NULL) {
3898 unsigned int other_color;
3899
3900 if (expect_page && !found_page) {
3901 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3902 color, npages, look_for_page, look_for_page->phys_page);
3903 _vm_page_print(look_for_page);
3904 for (other_color = 0;
3905 other_color < vm_colors;
3906 other_color++) {
3907 if (other_color == color)
3908 continue;
3909 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3910 other_color, look_for_page, FALSE);
3911 }
3912 if (color == (unsigned int) -1) {
3913 vm_page_verify_free_list(&vm_lopage_queue_free,
3914 (unsigned int) -1, look_for_page, FALSE);
3915 }
3916 panic("vm_page_verify_free_list(color=%u)\n", color);
3917 }
3918 if (!expect_page && found_page) {
3919 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3920 color, npages, look_for_page, look_for_page->phys_page);
3921 }
3922 }
3923 return npages;
3924 }
3925
3926 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
3927 static void
3928 vm_page_verify_free_lists( void )
3929 {
3930 unsigned int color, npages, nlopages;
3931 boolean_t toggle = TRUE;
3932
3933 if (! vm_page_verify_all_free_lists_enabled)
3934 return;
3935
3936 npages = 0;
3937
3938 lck_mtx_lock(&vm_page_queue_free_lock);
3939
3940 if (vm_page_verify_this_free_list_enabled == TRUE) {
3941 /*
3942 * This variable has been set globally for extra checking of
3943 * each free list Q. Since we didn't set it, we don't own it
3944 * and we shouldn't toggle it.
3945 */
3946 toggle = FALSE;
3947 }
3948
3949 if (toggle == TRUE) {
3950 vm_page_verify_this_free_list_enabled = TRUE;
3951 }
3952
3953 for( color = 0; color < vm_colors; color++ ) {
3954 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3955 color, VM_PAGE_NULL, FALSE);
3956 }
3957 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3958 (unsigned int) -1,
3959 VM_PAGE_NULL, FALSE);
3960 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3961 panic("vm_page_verify_free_lists: "
3962 "npages %u free_count %d nlopages %u lo_free_count %u",
3963 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3964
3965 if (toggle == TRUE) {
3966 vm_page_verify_this_free_list_enabled = FALSE;
3967 }
3968
3969 lck_mtx_unlock(&vm_page_queue_free_lock);
3970 }
3971
3972 void
3973 vm_page_queues_assert(
3974 vm_page_t mem,
3975 int val)
3976 {
3977 #if DEBUG
3978 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3979 #endif
3980 if (mem->free + mem->active + mem->inactive + mem->speculative +
3981 mem->throttled + mem->pageout_queue > (val)) {
3982 _vm_page_print(mem);
3983 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3984 }
3985 if (VM_PAGE_WIRED(mem)) {
3986 assert(!mem->active);
3987 assert(!mem->inactive);
3988 assert(!mem->speculative);
3989 assert(!mem->throttled);
3990 assert(!mem->pageout_queue);
3991 }
3992 }
3993 #endif /* MACH_ASSERT */
3994
3995
3996 /*
3997 * CONTIGUOUS PAGE ALLOCATION
3998 *
3999 * Find a region large enough to contain at least n pages
4000 * of contiguous physical memory.
4001 *
4002 * This is done by traversing the vm_page_t array in a linear fashion
4003 * we assume that the vm_page_t array has the avaiable physical pages in an
4004 * ordered, ascending list... this is currently true of all our implementations
4005 * and must remain so... there can be 'holes' in the array... we also can
4006 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4007 * which use to happen via 'vm_page_convert'... that function was no longer
4008 * being called and was removed...
4009 *
4010 * The basic flow consists of stabilizing some of the interesting state of
4011 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4012 * sweep at the beginning of the array looking for pages that meet our criterea
4013 * for a 'stealable' page... currently we are pretty conservative... if the page
4014 * meets this criterea and is physically contiguous to the previous page in the 'run'
4015 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4016 * and start to develop a new run... if at this point we've already considered
4017 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4018 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4019 * to other threads trying to acquire free pages (or move pages from q to q),
4020 * and then continue from the spot we left off... we only make 1 pass through the
4021 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4022 * which steals the pages from the queues they're currently on... pages on the free
4023 * queue can be stolen directly... pages that are on any of the other queues
4024 * must be removed from the object they are tabled on... this requires taking the
4025 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4026 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4027 * dump the pages we've currently stolen back to the free list, and pick up our
4028 * scan from the point where we aborted the 'current' run.
4029 *
4030 *
4031 * Requirements:
4032 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4033 *
4034 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4035 *
4036 * Algorithm:
4037 */
4038
4039 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4040
4041
4042 #define RESET_STATE_OF_RUN() \
4043 MACRO_BEGIN \
4044 prevcontaddr = -2; \
4045 start_pnum = -1; \
4046 free_considered = 0; \
4047 substitute_needed = 0; \
4048 npages = 0; \
4049 MACRO_END
4050
4051 /*
4052 * Can we steal in-use (i.e. not free) pages when searching for
4053 * physically-contiguous pages ?
4054 */
4055 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4056
4057 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4058 #if DEBUG
4059 int vm_page_find_contig_debug = 0;
4060 #endif
4061
4062 static vm_page_t
4063 vm_page_find_contiguous(
4064 unsigned int contig_pages,
4065 ppnum_t max_pnum,
4066 ppnum_t pnum_mask,
4067 boolean_t wire,
4068 int flags)
4069 {
4070 vm_page_t m = NULL;
4071 ppnum_t prevcontaddr;
4072 ppnum_t start_pnum;
4073 unsigned int npages, considered, scanned;
4074 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4075 unsigned int idx_last_contig_page_found = 0;
4076 int free_considered, free_available;
4077 int substitute_needed;
4078 boolean_t wrapped;
4079 #if DEBUG
4080 clock_sec_t tv_start_sec, tv_end_sec;
4081 clock_usec_t tv_start_usec, tv_end_usec;
4082 #endif
4083 #if MACH_ASSERT
4084 int yielded = 0;
4085 int dumped_run = 0;
4086 int stolen_pages = 0;
4087 int compressed_pages = 0;
4088 #endif
4089
4090 if (contig_pages == 0)
4091 return VM_PAGE_NULL;
4092
4093 #if MACH_ASSERT
4094 vm_page_verify_free_lists();
4095 #endif
4096 #if DEBUG
4097 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4098 #endif
4099 PAGE_REPLACEMENT_ALLOWED(TRUE);
4100
4101 vm_page_lock_queues();
4102 lck_mtx_lock(&vm_page_queue_free_lock);
4103
4104 RESET_STATE_OF_RUN();
4105
4106 scanned = 0;
4107 considered = 0;
4108 free_available = vm_page_free_count - vm_page_free_reserved;
4109
4110 wrapped = FALSE;
4111
4112 if(flags & KMA_LOMEM)
4113 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4114 else
4115 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4116
4117 orig_last_idx = idx_last_contig_page_found;
4118 last_idx = orig_last_idx;
4119
4120 for (page_idx = last_idx, start_idx = last_idx;
4121 npages < contig_pages && page_idx < vm_pages_count;
4122 page_idx++) {
4123 retry:
4124 if (wrapped &&
4125 npages == 0 &&
4126 page_idx >= orig_last_idx) {
4127 /*
4128 * We're back where we started and we haven't
4129 * found any suitable contiguous range. Let's
4130 * give up.
4131 */
4132 break;
4133 }
4134 scanned++;
4135 m = &vm_pages[page_idx];
4136
4137 assert(!m->fictitious);
4138 assert(!m->private);
4139
4140 if (max_pnum && m->phys_page > max_pnum) {
4141 /* no more low pages... */
4142 break;
4143 }
4144 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4145 /*
4146 * not aligned
4147 */
4148 RESET_STATE_OF_RUN();
4149
4150 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4151 m->encrypted_cleaning ||
4152 m->pageout_queue || m->laundry || m->wanted ||
4153 m->cleaning || m->overwriting || m->pageout) {
4154 /*
4155 * page is in a transient state
4156 * or a state we don't want to deal
4157 * with, so don't consider it which
4158 * means starting a new run
4159 */
4160 RESET_STATE_OF_RUN();
4161
4162 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4163 /*
4164 * page needs to be on one of our queues
4165 * or it needs to belong to the compressor pool
4166 * in order for it to be stable behind the
4167 * locks we hold at this point...
4168 * if not, don't consider it which
4169 * means starting a new run
4170 */
4171 RESET_STATE_OF_RUN();
4172
4173 } else if (!m->free && (!m->tabled || m->busy)) {
4174 /*
4175 * pages on the free list are always 'busy'
4176 * so we couldn't test for 'busy' in the check
4177 * for the transient states... pages that are
4178 * 'free' are never 'tabled', so we also couldn't
4179 * test for 'tabled'. So we check here to make
4180 * sure that a non-free page is not busy and is
4181 * tabled on an object...
4182 * if not, don't consider it which
4183 * means starting a new run
4184 */
4185 RESET_STATE_OF_RUN();
4186
4187 } else {
4188 if (m->phys_page != prevcontaddr + 1) {
4189 if ((m->phys_page & pnum_mask) != 0) {
4190 RESET_STATE_OF_RUN();
4191 goto did_consider;
4192 } else {
4193 npages = 1;
4194 start_idx = page_idx;
4195 start_pnum = m->phys_page;
4196 }
4197 } else {
4198 npages++;
4199 }
4200 prevcontaddr = m->phys_page;
4201
4202 VM_PAGE_CHECK(m);
4203 if (m->free) {
4204 free_considered++;
4205 } else {
4206 /*
4207 * This page is not free.
4208 * If we can't steal used pages,
4209 * we have to give up this run
4210 * and keep looking.
4211 * Otherwise, we might need to
4212 * move the contents of this page
4213 * into a substitute page.
4214 */
4215 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4216 if (m->pmapped || m->dirty || m->precious) {
4217 substitute_needed++;
4218 }
4219 #else
4220 RESET_STATE_OF_RUN();
4221 #endif
4222 }
4223
4224 if ((free_considered + substitute_needed) > free_available) {
4225 /*
4226 * if we let this run continue
4227 * we will end up dropping the vm_page_free_count
4228 * below the reserve limit... we need to abort
4229 * this run, but we can at least re-consider this
4230 * page... thus the jump back to 'retry'
4231 */
4232 RESET_STATE_OF_RUN();
4233
4234 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4235 considered++;
4236 goto retry;
4237 }
4238 /*
4239 * free_available == 0
4240 * so can't consider any free pages... if
4241 * we went to retry in this case, we'd
4242 * get stuck looking at the same page
4243 * w/o making any forward progress
4244 * we also want to take this path if we've already
4245 * reached our limit that controls the lock latency
4246 */
4247 }
4248 }
4249 did_consider:
4250 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4251
4252 PAGE_REPLACEMENT_ALLOWED(FALSE);
4253
4254 lck_mtx_unlock(&vm_page_queue_free_lock);
4255 vm_page_unlock_queues();
4256
4257 mutex_pause(0);
4258
4259 PAGE_REPLACEMENT_ALLOWED(TRUE);
4260
4261 vm_page_lock_queues();
4262 lck_mtx_lock(&vm_page_queue_free_lock);
4263
4264 RESET_STATE_OF_RUN();
4265 /*
4266 * reset our free page limit since we
4267 * dropped the lock protecting the vm_page_free_queue
4268 */
4269 free_available = vm_page_free_count - vm_page_free_reserved;
4270 considered = 0;
4271 #if MACH_ASSERT
4272 yielded++;
4273 #endif
4274 goto retry;
4275 }
4276 considered++;
4277 }
4278 m = VM_PAGE_NULL;
4279
4280 if (npages != contig_pages) {
4281 if (!wrapped) {
4282 /*
4283 * We didn't find a contiguous range but we didn't
4284 * start from the very first page.
4285 * Start again from the very first page.
4286 */
4287 RESET_STATE_OF_RUN();
4288 if( flags & KMA_LOMEM)
4289 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4290 else
4291 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4292 last_idx = 0;
4293 page_idx = last_idx;
4294 wrapped = TRUE;
4295 goto retry;
4296 }
4297 lck_mtx_unlock(&vm_page_queue_free_lock);
4298 } else {
4299 vm_page_t m1;
4300 vm_page_t m2;
4301 unsigned int cur_idx;
4302 unsigned int tmp_start_idx;
4303 vm_object_t locked_object = VM_OBJECT_NULL;
4304 boolean_t abort_run = FALSE;
4305
4306 assert(page_idx - start_idx == contig_pages);
4307
4308 tmp_start_idx = start_idx;
4309
4310 /*
4311 * first pass through to pull the free pages
4312 * off of the free queue so that in case we
4313 * need substitute pages, we won't grab any
4314 * of the free pages in the run... we'll clear
4315 * the 'free' bit in the 2nd pass, and even in
4316 * an abort_run case, we'll collect all of the
4317 * free pages in this run and return them to the free list
4318 */
4319 while (start_idx < page_idx) {
4320
4321 m1 = &vm_pages[start_idx++];
4322
4323 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4324 assert(m1->free);
4325 #endif
4326
4327 if (m1->free) {
4328 unsigned int color;
4329
4330 color = m1->phys_page & vm_color_mask;
4331 #if MACH_ASSERT
4332 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4333 #endif
4334 queue_remove(&vm_page_queue_free[color],
4335 m1,
4336 vm_page_t,
4337 pageq);
4338 m1->pageq.next = NULL;
4339 m1->pageq.prev = NULL;
4340 #if MACH_ASSERT
4341 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4342 #endif
4343 /*
4344 * Clear the "free" bit so that this page
4345 * does not get considered for another
4346 * concurrent physically-contiguous allocation.
4347 */
4348 m1->free = FALSE;
4349 assert(m1->busy);
4350
4351 vm_page_free_count--;
4352 }
4353 }
4354 if( flags & KMA_LOMEM)
4355 vm_page_lomem_find_contiguous_last_idx = page_idx;
4356 else
4357 vm_page_find_contiguous_last_idx = page_idx;
4358
4359 /*
4360 * we can drop the free queue lock at this point since
4361 * we've pulled any 'free' candidates off of the list
4362 * we need it dropped so that we can do a vm_page_grab
4363 * when substituing for pmapped/dirty pages
4364 */
4365 lck_mtx_unlock(&vm_page_queue_free_lock);
4366
4367 start_idx = tmp_start_idx;
4368 cur_idx = page_idx - 1;
4369
4370 while (start_idx++ < page_idx) {
4371 /*
4372 * must go through the list from back to front
4373 * so that the page list is created in the
4374 * correct order - low -> high phys addresses
4375 */
4376 m1 = &vm_pages[cur_idx--];
4377
4378 assert(!m1->free);
4379
4380 if (m1->object == VM_OBJECT_NULL) {
4381 /*
4382 * page has already been removed from
4383 * the free list in the 1st pass
4384 */
4385 assert(m1->offset == (vm_object_offset_t) -1);
4386 assert(m1->busy);
4387 assert(!m1->wanted);
4388 assert(!m1->laundry);
4389 } else {
4390 vm_object_t object;
4391 int refmod;
4392 boolean_t disconnected, reusable;
4393
4394 if (abort_run == TRUE)
4395 continue;
4396
4397 object = m1->object;
4398
4399 if (object != locked_object) {
4400 if (locked_object) {
4401 vm_object_unlock(locked_object);
4402 locked_object = VM_OBJECT_NULL;
4403 }
4404 if (vm_object_lock_try(object))
4405 locked_object = object;
4406 }
4407 if (locked_object == VM_OBJECT_NULL ||
4408 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4409 m1->encrypted_cleaning ||
4410 m1->pageout_queue || m1->laundry || m1->wanted ||
4411 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4412
4413 if (locked_object) {
4414 vm_object_unlock(locked_object);
4415 locked_object = VM_OBJECT_NULL;
4416 }
4417 tmp_start_idx = cur_idx;
4418 abort_run = TRUE;
4419 continue;
4420 }
4421
4422 disconnected = FALSE;
4423 reusable = FALSE;
4424
4425 if ((m1->reusable ||
4426 m1->object->all_reusable) &&
4427 m1->inactive &&
4428 !m1->dirty &&
4429 !m1->reference) {
4430 /* reusable page... */
4431 refmod = pmap_disconnect(m1->phys_page);
4432 disconnected = TRUE;
4433 if (refmod == 0) {
4434 /*
4435 * ... not reused: can steal
4436 * without relocating contents.
4437 */
4438 reusable = TRUE;
4439 }
4440 }
4441
4442 if ((m1->pmapped &&
4443 ! reusable) ||
4444 m1->dirty ||
4445 m1->precious) {
4446 vm_object_offset_t offset;
4447
4448 m2 = vm_page_grab();
4449
4450 if (m2 == VM_PAGE_NULL) {
4451 if (locked_object) {
4452 vm_object_unlock(locked_object);
4453 locked_object = VM_OBJECT_NULL;
4454 }
4455 tmp_start_idx = cur_idx;
4456 abort_run = TRUE;
4457 continue;
4458 }
4459 if (! disconnected) {
4460 if (m1->pmapped)
4461 refmod = pmap_disconnect(m1->phys_page);
4462 else
4463 refmod = 0;
4464 }
4465
4466 /* copy the page's contents */
4467 pmap_copy_page(m1->phys_page, m2->phys_page);
4468 /* copy the page's state */
4469 assert(!VM_PAGE_WIRED(m1));
4470 assert(!m1->free);
4471 assert(!m1->pageout_queue);
4472 assert(!m1->laundry);
4473 m2->reference = m1->reference;
4474 assert(!m1->gobbled);
4475 assert(!m1->private);
4476 m2->no_cache = m1->no_cache;
4477 m2->xpmapped = 0;
4478 assert(!m1->busy);
4479 assert(!m1->wanted);
4480 assert(!m1->fictitious);
4481 m2->pmapped = m1->pmapped; /* should flush cache ? */
4482 m2->wpmapped = m1->wpmapped;
4483 assert(!m1->pageout);
4484 m2->absent = m1->absent;
4485 m2->error = m1->error;
4486 m2->dirty = m1->dirty;
4487 assert(!m1->cleaning);
4488 m2->precious = m1->precious;
4489 m2->clustered = m1->clustered;
4490 assert(!m1->overwriting);
4491 m2->restart = m1->restart;
4492 m2->unusual = m1->unusual;
4493 m2->encrypted = m1->encrypted;
4494 assert(!m1->encrypted_cleaning);
4495 m2->cs_validated = m1->cs_validated;
4496 m2->cs_tainted = m1->cs_tainted;
4497
4498 /*
4499 * If m1 had really been reusable,
4500 * we would have just stolen it, so
4501 * let's not propagate it's "reusable"
4502 * bit and assert that m2 is not
4503 * marked as "reusable".
4504 */
4505 // m2->reusable = m1->reusable;
4506 assert(!m2->reusable);
4507
4508 assert(!m1->lopage);
4509 m2->slid = m1->slid;
4510 m2->compressor = m1->compressor;
4511
4512 /*
4513 * page may need to be flushed if
4514 * it is marshalled into a UPL
4515 * that is going to be used by a device
4516 * that doesn't support coherency
4517 */
4518 m2->written_by_kernel = TRUE;
4519
4520 /*
4521 * make sure we clear the ref/mod state
4522 * from the pmap layer... else we risk
4523 * inheriting state from the last time
4524 * this page was used...
4525 */
4526 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4527
4528 if (refmod & VM_MEM_REFERENCED)
4529 m2->reference = TRUE;
4530 if (refmod & VM_MEM_MODIFIED) {
4531 SET_PAGE_DIRTY(m2, TRUE);
4532 }
4533 offset = m1->offset;
4534
4535 /*
4536 * completely cleans up the state
4537 * of the page so that it is ready
4538 * to be put onto the free list, or
4539 * for this purpose it looks like it
4540 * just came off of the free list
4541 */
4542 vm_page_free_prepare(m1);
4543
4544 /*
4545 * now put the substitute page
4546 * on the object
4547 */
4548 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4549
4550 if (m2->compressor) {
4551 m2->pmapped = TRUE;
4552 m2->wpmapped = TRUE;
4553
4554 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4555 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4556 #if MACH_ASSERT
4557 compressed_pages++;
4558 #endif
4559 } else {
4560 if (m2->reference)
4561 vm_page_activate(m2);
4562 else
4563 vm_page_deactivate(m2);
4564 }
4565 PAGE_WAKEUP_DONE(m2);
4566
4567 } else {
4568 assert(!m1->compressor);
4569
4570 /*
4571 * completely cleans up the state
4572 * of the page so that it is ready
4573 * to be put onto the free list, or
4574 * for this purpose it looks like it
4575 * just came off of the free list
4576 */
4577 vm_page_free_prepare(m1);
4578 }
4579 #if MACH_ASSERT
4580 stolen_pages++;
4581 #endif
4582 }
4583 m1->pageq.next = (queue_entry_t) m;
4584 m1->pageq.prev = NULL;
4585 m = m1;
4586 }
4587 if (locked_object) {
4588 vm_object_unlock(locked_object);
4589 locked_object = VM_OBJECT_NULL;
4590 }
4591
4592 if (abort_run == TRUE) {
4593 if (m != VM_PAGE_NULL) {
4594 vm_page_free_list(m, FALSE);
4595 }
4596 #if MACH_ASSERT
4597 dumped_run++;
4598 #endif
4599 /*
4600 * want the index of the last
4601 * page in this run that was
4602 * successfully 'stolen', so back
4603 * it up 1 for the auto-decrement on use
4604 * and 1 more to bump back over this page
4605 */
4606 page_idx = tmp_start_idx + 2;
4607 if (page_idx >= vm_pages_count) {
4608 if (wrapped)
4609 goto done_scanning;
4610 page_idx = last_idx = 0;
4611 wrapped = TRUE;
4612 }
4613 abort_run = FALSE;
4614
4615 /*
4616 * We didn't find a contiguous range but we didn't
4617 * start from the very first page.
4618 * Start again from the very first page.
4619 */
4620 RESET_STATE_OF_RUN();
4621
4622 if( flags & KMA_LOMEM)
4623 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4624 else
4625 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4626
4627 last_idx = page_idx;
4628
4629 lck_mtx_lock(&vm_page_queue_free_lock);
4630 /*
4631 * reset our free page limit since we
4632 * dropped the lock protecting the vm_page_free_queue
4633 */
4634 free_available = vm_page_free_count - vm_page_free_reserved;
4635 goto retry;
4636 }
4637
4638 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4639
4640 if (wire == TRUE)
4641 m1->wire_count++;
4642 else
4643 m1->gobbled = TRUE;
4644 }
4645 if (wire == FALSE)
4646 vm_page_gobble_count += npages;
4647
4648 /*
4649 * gobbled pages are also counted as wired pages
4650 */
4651 vm_page_wire_count += npages;
4652
4653 assert(vm_page_verify_contiguous(m, npages));
4654 }
4655 done_scanning:
4656 PAGE_REPLACEMENT_ALLOWED(FALSE);
4657
4658 vm_page_unlock_queues();
4659
4660 #if DEBUG
4661 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4662
4663 tv_end_sec -= tv_start_sec;
4664 if (tv_end_usec < tv_start_usec) {
4665 tv_end_sec--;
4666 tv_end_usec += 1000000;
4667 }
4668 tv_end_usec -= tv_start_usec;
4669 if (tv_end_usec >= 1000000) {
4670 tv_end_sec++;
4671 tv_end_sec -= 1000000;
4672 }
4673 if (vm_page_find_contig_debug) {
4674 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4675 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4676 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4677 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4678 }
4679
4680 #endif
4681 #if MACH_ASSERT
4682 vm_page_verify_free_lists();
4683 #endif
4684 return m;
4685 }
4686
4687 /*
4688 * Allocate a list of contiguous, wired pages.
4689 */
4690 kern_return_t
4691 cpm_allocate(
4692 vm_size_t size,
4693 vm_page_t *list,
4694 ppnum_t max_pnum,
4695 ppnum_t pnum_mask,
4696 boolean_t wire,
4697 int flags)
4698 {
4699 vm_page_t pages;
4700 unsigned int npages;
4701
4702 if (size % PAGE_SIZE != 0)
4703 return KERN_INVALID_ARGUMENT;
4704
4705 npages = (unsigned int) (size / PAGE_SIZE);
4706 if (npages != size / PAGE_SIZE) {
4707 /* 32-bit overflow */
4708 return KERN_INVALID_ARGUMENT;
4709 }
4710
4711 /*
4712 * Obtain a pointer to a subset of the free
4713 * list large enough to satisfy the request;
4714 * the region will be physically contiguous.
4715 */
4716 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4717
4718 if (pages == VM_PAGE_NULL)
4719 return KERN_NO_SPACE;
4720 /*
4721 * determine need for wakeups
4722 */
4723 if ((vm_page_free_count < vm_page_free_min) ||
4724 ((vm_page_free_count < vm_page_free_target) &&
4725 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4726 thread_wakeup((event_t) &vm_page_free_wanted);
4727
4728 VM_CHECK_MEMORYSTATUS;
4729
4730 /*
4731 * The CPM pages should now be available and
4732 * ordered by ascending physical address.
4733 */
4734 assert(vm_page_verify_contiguous(pages, npages));
4735
4736 *list = pages;
4737 return KERN_SUCCESS;
4738 }
4739
4740
4741 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4742
4743 /*
4744 * when working on a 'run' of pages, it is necessary to hold
4745 * the vm_page_queue_lock (a hot global lock) for certain operations
4746 * on the page... however, the majority of the work can be done
4747 * while merely holding the object lock... in fact there are certain
4748 * collections of pages that don't require any work brokered by the
4749 * vm_page_queue_lock... to mitigate the time spent behind the global
4750 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4751 * while doing all of the work that doesn't require the vm_page_queue_lock...
4752 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4753 * necessary work for each page... we will grab the busy bit on the page
4754 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4755 * if it can't immediately take the vm_page_queue_lock in order to compete
4756 * for the locks in the same order that vm_pageout_scan takes them.
4757 * the operation names are modeled after the names of the routines that
4758 * need to be called in order to make the changes very obvious in the
4759 * original loop
4760 */
4761
4762 void
4763 vm_page_do_delayed_work(
4764 vm_object_t object,
4765 struct vm_page_delayed_work *dwp,
4766 int dw_count)
4767 {
4768 int j;
4769 vm_page_t m;
4770 vm_page_t local_free_q = VM_PAGE_NULL;
4771
4772 /*
4773 * pageout_scan takes the vm_page_lock_queues first
4774 * then tries for the object lock... to avoid what
4775 * is effectively a lock inversion, we'll go to the
4776 * trouble of taking them in that same order... otherwise
4777 * if this object contains the majority of the pages resident
4778 * in the UBC (or a small set of large objects actively being
4779 * worked on contain the majority of the pages), we could
4780 * cause the pageout_scan thread to 'starve' in its attempt
4781 * to find pages to move to the free queue, since it has to
4782 * successfully acquire the object lock of any candidate page
4783 * before it can steal/clean it.
4784 */
4785 if (!vm_page_trylockspin_queues()) {
4786 vm_object_unlock(object);
4787
4788 vm_page_lockspin_queues();
4789
4790 for (j = 0; ; j++) {
4791 if (!vm_object_lock_avoid(object) &&
4792 _vm_object_lock_try(object))
4793 break;
4794 vm_page_unlock_queues();
4795 mutex_pause(j);
4796 vm_page_lockspin_queues();
4797 }
4798 }
4799 for (j = 0; j < dw_count; j++, dwp++) {
4800
4801 m = dwp->dw_m;
4802
4803 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4804 vm_pageout_throttle_up(m);
4805 #if CONFIG_PHANTOM_CACHE
4806 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4807 vm_phantom_cache_update(m);
4808 #endif
4809 if (dwp->dw_mask & DW_vm_page_wire)
4810 vm_page_wire(m);
4811 else if (dwp->dw_mask & DW_vm_page_unwire) {
4812 boolean_t queueit;
4813
4814 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4815
4816 vm_page_unwire(m, queueit);
4817 }
4818 if (dwp->dw_mask & DW_vm_page_free) {
4819 vm_page_free_prepare_queues(m);
4820
4821 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4822 /*
4823 * Add this page to our list of reclaimed pages,
4824 * to be freed later.
4825 */
4826 m->pageq.next = (queue_entry_t) local_free_q;
4827 local_free_q = m;
4828 } else {
4829 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4830 vm_page_deactivate_internal(m, FALSE);
4831 else if (dwp->dw_mask & DW_vm_page_activate) {
4832 if (m->active == FALSE) {
4833 vm_page_activate(m);
4834 }
4835 }
4836 else if (dwp->dw_mask & DW_vm_page_speculate)
4837 vm_page_speculate(m, TRUE);
4838 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4839 /*
4840 * if we didn't hold the object lock and did this,
4841 * we might disconnect the page, then someone might
4842 * soft fault it back in, then we would put it on the
4843 * cleaned queue, and so we would have a referenced (maybe even dirty)
4844 * page on that queue, which we don't want
4845 */
4846 int refmod_state = pmap_disconnect(m->phys_page);
4847
4848 if ((refmod_state & VM_MEM_REFERENCED)) {
4849 /*
4850 * this page has been touched since it got cleaned; let's activate it
4851 * if it hasn't already been
4852 */
4853 vm_pageout_enqueued_cleaned++;
4854 vm_pageout_cleaned_reactivated++;
4855 vm_pageout_cleaned_commit_reactivated++;
4856
4857 if (m->active == FALSE)
4858 vm_page_activate(m);
4859 } else {
4860 m->reference = FALSE;
4861 vm_page_enqueue_cleaned(m);
4862 }
4863 }
4864 else if (dwp->dw_mask & DW_vm_page_lru)
4865 vm_page_lru(m);
4866 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4867 if ( !m->pageout_queue)
4868 VM_PAGE_QUEUES_REMOVE(m);
4869 }
4870 if (dwp->dw_mask & DW_set_reference)
4871 m->reference = TRUE;
4872 else if (dwp->dw_mask & DW_clear_reference)
4873 m->reference = FALSE;
4874
4875 if (dwp->dw_mask & DW_move_page) {
4876 if ( !m->pageout_queue) {
4877 VM_PAGE_QUEUES_REMOVE(m);
4878
4879 assert(m->object != kernel_object);
4880
4881 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4882 }
4883 }
4884 if (dwp->dw_mask & DW_clear_busy)
4885 m->busy = FALSE;
4886
4887 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4888 PAGE_WAKEUP(m);
4889 }
4890 }
4891 vm_page_unlock_queues();
4892
4893 if (local_free_q)
4894 vm_page_free_list(local_free_q, TRUE);
4895
4896 VM_CHECK_MEMORYSTATUS;
4897
4898 }
4899
4900 kern_return_t
4901 vm_page_alloc_list(
4902 int page_count,
4903 int flags,
4904 vm_page_t *list)
4905 {
4906 vm_page_t lo_page_list = VM_PAGE_NULL;
4907 vm_page_t mem;
4908 int i;
4909
4910 if ( !(flags & KMA_LOMEM))
4911 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4912
4913 for (i = 0; i < page_count; i++) {
4914
4915 mem = vm_page_grablo();
4916
4917 if (mem == VM_PAGE_NULL) {
4918 if (lo_page_list)
4919 vm_page_free_list(lo_page_list, FALSE);
4920
4921 *list = VM_PAGE_NULL;
4922
4923 return (KERN_RESOURCE_SHORTAGE);
4924 }
4925 mem->pageq.next = (queue_entry_t) lo_page_list;
4926 lo_page_list = mem;
4927 }
4928 *list = lo_page_list;
4929
4930 return (KERN_SUCCESS);
4931 }
4932
4933 void
4934 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4935 {
4936 page->offset = offset;
4937 }
4938
4939 vm_page_t
4940 vm_page_get_next(vm_page_t page)
4941 {
4942 return ((vm_page_t) page->pageq.next);
4943 }
4944
4945 vm_object_offset_t
4946 vm_page_get_offset(vm_page_t page)
4947 {
4948 return (page->offset);
4949 }
4950
4951 ppnum_t
4952 vm_page_get_phys_page(vm_page_t page)
4953 {
4954 return (page->phys_page);
4955 }
4956
4957
4958 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4959
4960 #if HIBERNATION
4961
4962 static vm_page_t hibernate_gobble_queue;
4963
4964 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4965
4966 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4967 static int hibernate_flush_dirty_pages(int);
4968 static int hibernate_flush_queue(queue_head_t *, int);
4969
4970 void hibernate_flush_wait(void);
4971 void hibernate_mark_in_progress(void);
4972 void hibernate_clear_in_progress(void);
4973
4974 void hibernate_free_range(int, int);
4975 void hibernate_hash_insert_page(vm_page_t);
4976 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4977 void hibernate_rebuild_vm_structs(void);
4978 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4979 ppnum_t hibernate_lookup_paddr(unsigned int);
4980
4981 struct hibernate_statistics {
4982 int hibernate_considered;
4983 int hibernate_reentered_on_q;
4984 int hibernate_found_dirty;
4985 int hibernate_skipped_cleaning;
4986 int hibernate_skipped_transient;
4987 int hibernate_skipped_precious;
4988 int hibernate_skipped_external;
4989 int hibernate_queue_nolock;
4990 int hibernate_queue_paused;
4991 int hibernate_throttled;
4992 int hibernate_throttle_timeout;
4993 int hibernate_drained;
4994 int hibernate_drain_timeout;
4995 int cd_lock_failed;
4996 int cd_found_precious;
4997 int cd_found_wired;
4998 int cd_found_busy;
4999 int cd_found_unusual;
5000 int cd_found_cleaning;
5001 int cd_found_laundry;
5002 int cd_found_dirty;
5003 int cd_found_xpmapped;
5004 int cd_skipped_xpmapped;
5005 int cd_local_free;
5006 int cd_total_free;
5007 int cd_vm_page_wire_count;
5008 int cd_vm_struct_pages_unneeded;
5009 int cd_pages;
5010 int cd_discarded;
5011 int cd_count_wire;
5012 } hibernate_stats;
5013
5014
5015 /*
5016 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5017 * so that we don't overrun the estimated image size, which would
5018 * result in a hibernation failure.
5019 */
5020 #define HIBERNATE_XPMAPPED_LIMIT 40000
5021
5022
5023 static int
5024 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5025 {
5026 wait_result_t wait_result;
5027
5028 vm_page_lock_queues();
5029
5030 while ( !queue_empty(&q->pgo_pending) ) {
5031
5032 q->pgo_draining = TRUE;
5033
5034 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5035
5036 vm_page_unlock_queues();
5037
5038 wait_result = thread_block(THREAD_CONTINUE_NULL);
5039
5040 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5041 hibernate_stats.hibernate_drain_timeout++;
5042
5043 if (q == &vm_pageout_queue_external)
5044 return (0);
5045
5046 return (1);
5047 }
5048 vm_page_lock_queues();
5049
5050 hibernate_stats.hibernate_drained++;
5051 }
5052 vm_page_unlock_queues();
5053
5054 return (0);
5055 }
5056
5057
5058 boolean_t hibernate_skip_external = FALSE;
5059
5060 static int
5061 hibernate_flush_queue(queue_head_t *q, int qcount)
5062 {
5063 vm_page_t m;
5064 vm_object_t l_object = NULL;
5065 vm_object_t m_object = NULL;
5066 int refmod_state = 0;
5067 int try_failed_count = 0;
5068 int retval = 0;
5069 int current_run = 0;
5070 struct vm_pageout_queue *iq;
5071 struct vm_pageout_queue *eq;
5072 struct vm_pageout_queue *tq;
5073
5074
5075 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5076
5077 iq = &vm_pageout_queue_internal;
5078 eq = &vm_pageout_queue_external;
5079
5080 vm_page_lock_queues();
5081
5082 while (qcount && !queue_empty(q)) {
5083
5084 if (current_run++ == 1000) {
5085 if (hibernate_should_abort()) {
5086 retval = 1;
5087 break;
5088 }
5089 current_run = 0;
5090 }
5091
5092 m = (vm_page_t) queue_first(q);
5093 m_object = m->object;
5094
5095 /*
5096 * check to see if we currently are working
5097 * with the same object... if so, we've
5098 * already got the lock
5099 */
5100 if (m_object != l_object) {
5101 /*
5102 * the object associated with candidate page is
5103 * different from the one we were just working
5104 * with... dump the lock if we still own it
5105 */
5106 if (l_object != NULL) {
5107 vm_object_unlock(l_object);
5108 l_object = NULL;
5109 }
5110 /*
5111 * Try to lock object; since we've alread got the
5112 * page queues lock, we can only 'try' for this one.
5113 * if the 'try' fails, we need to do a mutex_pause
5114 * to allow the owner of the object lock a chance to
5115 * run...
5116 */
5117 if ( !vm_object_lock_try_scan(m_object)) {
5118
5119 if (try_failed_count > 20) {
5120 hibernate_stats.hibernate_queue_nolock++;
5121
5122 goto reenter_pg_on_q;
5123 }
5124
5125 vm_page_unlock_queues();
5126 mutex_pause(try_failed_count++);
5127 vm_page_lock_queues();
5128
5129 hibernate_stats.hibernate_queue_paused++;
5130 continue;
5131 } else {
5132 l_object = m_object;
5133 }
5134 }
5135 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5136 /*
5137 * page is not to be cleaned
5138 * put it back on the head of its queue
5139 */
5140 if (m->cleaning)
5141 hibernate_stats.hibernate_skipped_cleaning++;
5142 else
5143 hibernate_stats.hibernate_skipped_transient++;
5144
5145 goto reenter_pg_on_q;
5146 }
5147 if (m_object->copy == VM_OBJECT_NULL) {
5148 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5149 /*
5150 * let the normal hibernate image path
5151 * deal with these
5152 */
5153 goto reenter_pg_on_q;
5154 }
5155 }
5156 if ( !m->dirty && m->pmapped) {
5157 refmod_state = pmap_get_refmod(m->phys_page);
5158
5159 if ((refmod_state & VM_MEM_MODIFIED)) {
5160 SET_PAGE_DIRTY(m, FALSE);
5161 }
5162 } else
5163 refmod_state = 0;
5164
5165 if ( !m->dirty) {
5166 /*
5167 * page is not to be cleaned
5168 * put it back on the head of its queue
5169 */
5170 if (m->precious)
5171 hibernate_stats.hibernate_skipped_precious++;
5172
5173 goto reenter_pg_on_q;
5174 }
5175
5176 if (hibernate_skip_external == TRUE && !m_object->internal) {
5177
5178 hibernate_stats.hibernate_skipped_external++;
5179
5180 goto reenter_pg_on_q;
5181 }
5182 tq = NULL;
5183
5184 if (m_object->internal) {
5185 if (VM_PAGE_Q_THROTTLED(iq))
5186 tq = iq;
5187 } else if (VM_PAGE_Q_THROTTLED(eq))
5188 tq = eq;
5189
5190 if (tq != NULL) {
5191 wait_result_t wait_result;
5192 int wait_count = 5;
5193
5194 if (l_object != NULL) {
5195 vm_object_unlock(l_object);
5196 l_object = NULL;
5197 }
5198
5199 while (retval == 0) {
5200
5201 tq->pgo_throttled = TRUE;
5202
5203 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5204
5205 vm_page_unlock_queues();
5206
5207 wait_result = thread_block(THREAD_CONTINUE_NULL);
5208
5209 vm_page_lock_queues();
5210
5211 if (wait_result != THREAD_TIMED_OUT)
5212 break;
5213 if (!VM_PAGE_Q_THROTTLED(tq))
5214 break;
5215
5216 if (hibernate_should_abort())
5217 retval = 1;
5218
5219 if (--wait_count == 0) {
5220
5221 hibernate_stats.hibernate_throttle_timeout++;
5222
5223 if (tq == eq) {
5224 hibernate_skip_external = TRUE;
5225 break;
5226 }
5227 retval = 1;
5228 }
5229 }
5230 if (retval)
5231 break;
5232
5233 hibernate_stats.hibernate_throttled++;
5234
5235 continue;
5236 }
5237 /*
5238 * we've already factored out pages in the laundry which
5239 * means this page can't be on the pageout queue so it's
5240 * safe to do the VM_PAGE_QUEUES_REMOVE
5241 */
5242 assert(!m->pageout_queue);
5243
5244 VM_PAGE_QUEUES_REMOVE(m);
5245
5246 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5247 pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5248
5249 vm_pageout_cluster(m, FALSE);
5250
5251 hibernate_stats.hibernate_found_dirty++;
5252
5253 goto next_pg;
5254
5255 reenter_pg_on_q:
5256 queue_remove(q, m, vm_page_t, pageq);
5257 queue_enter(q, m, vm_page_t, pageq);
5258
5259 hibernate_stats.hibernate_reentered_on_q++;
5260 next_pg:
5261 hibernate_stats.hibernate_considered++;
5262
5263 qcount--;
5264 try_failed_count = 0;
5265 }
5266 if (l_object != NULL) {
5267 vm_object_unlock(l_object);
5268 l_object = NULL;
5269 }
5270
5271 vm_page_unlock_queues();
5272
5273 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5274
5275 return (retval);
5276 }
5277
5278
5279 static int
5280 hibernate_flush_dirty_pages(int pass)
5281 {
5282 struct vm_speculative_age_q *aq;
5283 uint32_t i;
5284
5285 if (vm_page_local_q) {
5286 for (i = 0; i < vm_page_local_q_count; i++)
5287 vm_page_reactivate_local(i, TRUE, FALSE);
5288 }
5289
5290 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5291 int qcount;
5292 vm_page_t m;
5293
5294 aq = &vm_page_queue_speculative[i];
5295
5296 if (queue_empty(&aq->age_q))
5297 continue;
5298 qcount = 0;
5299
5300 vm_page_lockspin_queues();
5301
5302 queue_iterate(&aq->age_q,
5303 m,
5304 vm_page_t,
5305 pageq)
5306 {
5307 qcount++;
5308 }
5309 vm_page_unlock_queues();
5310
5311 if (qcount) {
5312 if (hibernate_flush_queue(&aq->age_q, qcount))
5313 return (1);
5314 }
5315 }
5316 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5317 return (1);
5318 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5319 return (1);
5320 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5321 return (1);
5322 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5323 return (1);
5324
5325 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5326 vm_compressor_record_warmup_start();
5327
5328 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5329 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5330 vm_compressor_record_warmup_end();
5331 return (1);
5332 }
5333 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5334 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5335 vm_compressor_record_warmup_end();
5336 return (1);
5337 }
5338 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5339 vm_compressor_record_warmup_end();
5340
5341 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5342 return (1);
5343
5344 return (0);
5345 }
5346
5347
5348 void
5349 hibernate_reset_stats()
5350 {
5351 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5352 }
5353
5354
5355 int
5356 hibernate_flush_memory()
5357 {
5358 int retval;
5359
5360 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5361
5362 hibernate_cleaning_in_progress = TRUE;
5363 hibernate_skip_external = FALSE;
5364
5365 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5366
5367 if (COMPRESSED_PAGER_IS_ACTIVE) {
5368
5369 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5370
5371 vm_compressor_flush();
5372
5373 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5374 }
5375 if (consider_buffer_cache_collect != NULL) {
5376 unsigned int orig_wire_count;
5377
5378 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5379 orig_wire_count = vm_page_wire_count;
5380
5381 (void)(*consider_buffer_cache_collect)(1);
5382 consider_zone_gc(TRUE);
5383
5384 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5385
5386 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5387 }
5388 }
5389 hibernate_cleaning_in_progress = FALSE;
5390
5391 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5392
5393 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5394 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5395
5396
5397 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5398 hibernate_stats.hibernate_considered,
5399 hibernate_stats.hibernate_reentered_on_q,
5400 hibernate_stats.hibernate_found_dirty);
5401 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5402 hibernate_stats.hibernate_skipped_cleaning,
5403 hibernate_stats.hibernate_skipped_transient,
5404 hibernate_stats.hibernate_skipped_precious,
5405 hibernate_stats.hibernate_skipped_external,
5406 hibernate_stats.hibernate_queue_nolock);
5407 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5408 hibernate_stats.hibernate_queue_paused,
5409 hibernate_stats.hibernate_throttled,
5410 hibernate_stats.hibernate_throttle_timeout,
5411 hibernate_stats.hibernate_drained,
5412 hibernate_stats.hibernate_drain_timeout);
5413
5414 return (retval);
5415 }
5416
5417
5418 static void
5419 hibernate_page_list_zero(hibernate_page_list_t *list)
5420 {
5421 uint32_t bank;
5422 hibernate_bitmap_t * bitmap;
5423
5424 bitmap = &list->bank_bitmap[0];
5425 for (bank = 0; bank < list->bank_count; bank++)
5426 {
5427 uint32_t last_bit;
5428
5429 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5430 // set out-of-bound bits at end of bitmap.
5431 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5432 if (last_bit)
5433 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5434
5435 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5436 }
5437 }
5438
5439 void
5440 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5441 {
5442 uint32_t i;
5443 vm_page_t m;
5444 uint64_t start, end, timeout, nsec;
5445 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5446 clock_get_uptime(&start);
5447
5448 for (i = 0; i < gobble_count; i++)
5449 {
5450 while (VM_PAGE_NULL == (m = vm_page_grab()))
5451 {
5452 clock_get_uptime(&end);
5453 if (end >= timeout)
5454 break;
5455 VM_PAGE_WAIT();
5456 }
5457 if (!m)
5458 break;
5459 m->busy = FALSE;
5460 vm_page_gobble(m);
5461
5462 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5463 hibernate_gobble_queue = m;
5464 }
5465
5466 clock_get_uptime(&end);
5467 absolutetime_to_nanoseconds(end - start, &nsec);
5468 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5469 }
5470
5471 void
5472 hibernate_free_gobble_pages(void)
5473 {
5474 vm_page_t m, next;
5475 uint32_t count = 0;
5476
5477 m = (vm_page_t) hibernate_gobble_queue;
5478 while(m)
5479 {
5480 next = (vm_page_t) m->pageq.next;
5481 vm_page_free(m);
5482 count++;
5483 m = next;
5484 }
5485 hibernate_gobble_queue = VM_PAGE_NULL;
5486
5487 if (count)
5488 HIBLOG("Freed %d pages\n", count);
5489 }
5490
5491 static boolean_t
5492 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5493 {
5494 vm_object_t object = NULL;
5495 int refmod_state;
5496 boolean_t discard = FALSE;
5497
5498 do
5499 {
5500 if (m->private)
5501 panic("hibernate_consider_discard: private");
5502
5503 if (!vm_object_lock_try(m->object)) {
5504 if (!preflight) hibernate_stats.cd_lock_failed++;
5505 break;
5506 }
5507 object = m->object;
5508
5509 if (VM_PAGE_WIRED(m)) {
5510 if (!preflight) hibernate_stats.cd_found_wired++;
5511 break;
5512 }
5513 if (m->precious) {
5514 if (!preflight) hibernate_stats.cd_found_precious++;
5515 break;
5516 }
5517 if (m->busy || !object->alive) {
5518 /*
5519 * Somebody is playing with this page.
5520 */
5521 if (!preflight) hibernate_stats.cd_found_busy++;
5522 break;
5523 }
5524 if (m->absent || m->unusual || m->error) {
5525 /*
5526 * If it's unusual in anyway, ignore it
5527 */
5528 if (!preflight) hibernate_stats.cd_found_unusual++;
5529 break;
5530 }
5531 if (m->cleaning) {
5532 if (!preflight) hibernate_stats.cd_found_cleaning++;
5533 break;
5534 }
5535 if (m->laundry) {
5536 if (!preflight) hibernate_stats.cd_found_laundry++;
5537 break;
5538 }
5539 if (!m->dirty)
5540 {
5541 refmod_state = pmap_get_refmod(m->phys_page);
5542
5543 if (refmod_state & VM_MEM_REFERENCED)
5544 m->reference = TRUE;
5545 if (refmod_state & VM_MEM_MODIFIED) {
5546 SET_PAGE_DIRTY(m, FALSE);
5547 }
5548 }
5549
5550 /*
5551 * If it's clean or purgeable we can discard the page on wakeup.
5552 */
5553 discard = (!m->dirty)
5554 || (VM_PURGABLE_VOLATILE == object->purgable)
5555 || (VM_PURGABLE_EMPTY == object->purgable);
5556
5557
5558 if (discard == FALSE) {
5559 if (!preflight)
5560 hibernate_stats.cd_found_dirty++;
5561 } else if (m->xpmapped && m->reference && !object->internal) {
5562 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5563 if (!preflight)
5564 hibernate_stats.cd_found_xpmapped++;
5565 discard = FALSE;
5566 } else {
5567 if (!preflight)
5568 hibernate_stats.cd_skipped_xpmapped++;
5569 }
5570 }
5571 }
5572 while (FALSE);
5573
5574 if (object)
5575 vm_object_unlock(object);
5576
5577 return (discard);
5578 }
5579
5580
5581 static void
5582 hibernate_discard_page(vm_page_t m)
5583 {
5584 if (m->absent || m->unusual || m->error)
5585 /*
5586 * If it's unusual in anyway, ignore
5587 */
5588 return;
5589
5590 #if MACH_ASSERT || DEBUG
5591 vm_object_t object = m->object;
5592 if (!vm_object_lock_try(m->object))
5593 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5594 #else
5595 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5596 makes sure these locks are uncontended before sleep */
5597 #endif /* MACH_ASSERT || DEBUG */
5598
5599 if (m->pmapped == TRUE)
5600 {
5601 __unused int refmod_state = pmap_disconnect(m->phys_page);
5602 }
5603
5604 if (m->laundry)
5605 panic("hibernate_discard_page(%p) laundry", m);
5606 if (m->private)
5607 panic("hibernate_discard_page(%p) private", m);
5608 if (m->fictitious)
5609 panic("hibernate_discard_page(%p) fictitious", m);
5610
5611 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5612 {
5613 /* object should be on a queue */
5614 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5615 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5616 assert(old_queue);
5617 if (m->object->purgeable_when_ripe) {
5618 vm_purgeable_token_delete_first(old_queue);
5619 }
5620 m->object->purgable = VM_PURGABLE_EMPTY;
5621
5622 /*
5623 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5624 * accounted in the "volatile" ledger, so no change here.
5625 * We have to update vm_page_purgeable_count, though, since we're
5626 * effectively purging this object.
5627 */
5628 unsigned int delta;
5629 assert(m->object->resident_page_count >= m->object->wired_page_count);
5630 delta = (m->object->resident_page_count - m->object->wired_page_count);
5631 assert(vm_page_purgeable_count >= delta);
5632 assert(delta > 0);
5633 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5634 }
5635
5636 vm_page_free(m);
5637
5638 #if MACH_ASSERT || DEBUG
5639 vm_object_unlock(object);
5640 #endif /* MACH_ASSERT || DEBUG */
5641 }
5642
5643 /*
5644 Grab locks for hibernate_page_list_setall()
5645 */
5646 void
5647 hibernate_vm_lock_queues(void)
5648 {
5649 vm_object_lock(compressor_object);
5650 vm_page_lock_queues();
5651 lck_mtx_lock(&vm_page_queue_free_lock);
5652
5653 if (vm_page_local_q) {
5654 uint32_t i;
5655 for (i = 0; i < vm_page_local_q_count; i++) {
5656 struct vpl *lq;
5657 lq = &vm_page_local_q[i].vpl_un.vpl;
5658 VPL_LOCK(&lq->vpl_lock);
5659 }
5660 }
5661 }
5662
5663 void
5664 hibernate_vm_unlock_queues(void)
5665 {
5666 if (vm_page_local_q) {
5667 uint32_t i;
5668 for (i = 0; i < vm_page_local_q_count; i++) {
5669 struct vpl *lq;
5670 lq = &vm_page_local_q[i].vpl_un.vpl;
5671 VPL_UNLOCK(&lq->vpl_lock);
5672 }
5673 }
5674 lck_mtx_unlock(&vm_page_queue_free_lock);
5675 vm_page_unlock_queues();
5676 vm_object_unlock(compressor_object);
5677 }
5678
5679 /*
5680 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5681 pages known to VM to not need saving are subtracted.
5682 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5683 */
5684
5685 void
5686 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5687 hibernate_page_list_t * page_list_wired,
5688 hibernate_page_list_t * page_list_pal,
5689 boolean_t preflight,
5690 boolean_t will_discard,
5691 uint32_t * pagesOut)
5692 {
5693 uint64_t start, end, nsec;
5694 vm_page_t m;
5695 vm_page_t next;
5696 uint32_t pages = page_list->page_count;
5697 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5698 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5699 uint32_t count_wire = pages;
5700 uint32_t count_discard_active = 0;
5701 uint32_t count_discard_inactive = 0;
5702 uint32_t count_discard_cleaned = 0;
5703 uint32_t count_discard_purgeable = 0;
5704 uint32_t count_discard_speculative = 0;
5705 uint32_t count_discard_vm_struct_pages = 0;
5706 uint32_t i;
5707 uint32_t bank;
5708 hibernate_bitmap_t * bitmap;
5709 hibernate_bitmap_t * bitmap_wired;
5710 boolean_t discard_all;
5711 boolean_t discard;
5712
5713 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5714
5715 if (preflight) {
5716 page_list = NULL;
5717 page_list_wired = NULL;
5718 page_list_pal = NULL;
5719 discard_all = FALSE;
5720 } else {
5721 discard_all = will_discard;
5722 }
5723
5724 #if MACH_ASSERT || DEBUG
5725 if (!preflight)
5726 {
5727 vm_page_lock_queues();
5728 if (vm_page_local_q) {
5729 for (i = 0; i < vm_page_local_q_count; i++) {
5730 struct vpl *lq;
5731 lq = &vm_page_local_q[i].vpl_un.vpl;
5732 VPL_LOCK(&lq->vpl_lock);
5733 }
5734 }
5735 }
5736 #endif /* MACH_ASSERT || DEBUG */
5737
5738
5739 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5740
5741 clock_get_uptime(&start);
5742
5743 if (!preflight) {
5744 hibernate_page_list_zero(page_list);
5745 hibernate_page_list_zero(page_list_wired);
5746 hibernate_page_list_zero(page_list_pal);
5747
5748 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5749 hibernate_stats.cd_pages = pages;
5750 }
5751
5752 if (vm_page_local_q) {
5753 for (i = 0; i < vm_page_local_q_count; i++)
5754 vm_page_reactivate_local(i, TRUE, !preflight);
5755 }
5756
5757 if (preflight) {
5758 vm_object_lock(compressor_object);
5759 vm_page_lock_queues();
5760 lck_mtx_lock(&vm_page_queue_free_lock);
5761 }
5762
5763 m = (vm_page_t) hibernate_gobble_queue;
5764 while (m)
5765 {
5766 pages--;
5767 count_wire--;
5768 if (!preflight) {
5769 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5770 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5771 }
5772 m = (vm_page_t) m->pageq.next;
5773 }
5774
5775 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5776 {
5777 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5778 {
5779 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5780 {
5781 pages--;
5782 count_wire--;
5783 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5784 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5785
5786 hibernate_stats.cd_local_free++;
5787 hibernate_stats.cd_total_free++;
5788 }
5789 }
5790 }
5791
5792 for( i = 0; i < vm_colors; i++ )
5793 {
5794 queue_iterate(&vm_page_queue_free[i],
5795 m,
5796 vm_page_t,
5797 pageq)
5798 {
5799 pages--;
5800 count_wire--;
5801 if (!preflight) {
5802 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5803 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5804
5805 hibernate_stats.cd_total_free++;
5806 }
5807 }
5808 }
5809
5810 queue_iterate(&vm_lopage_queue_free,
5811 m,
5812 vm_page_t,
5813 pageq)
5814 {
5815 pages--;
5816 count_wire--;
5817 if (!preflight) {
5818 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5819 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5820
5821 hibernate_stats.cd_total_free++;
5822 }
5823 }
5824
5825 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5826 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5827 {
5828 next = (vm_page_t) m->pageq.next;
5829 discard = FALSE;
5830 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5831 && hibernate_consider_discard(m, preflight))
5832 {
5833 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5834 count_discard_inactive++;
5835 discard = discard_all;
5836 }
5837 else
5838 count_throttled++;
5839 count_wire--;
5840 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5841
5842 if (discard) hibernate_discard_page(m);
5843 m = next;
5844 }
5845
5846 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5847 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5848 {
5849 next = (vm_page_t) m->pageq.next;
5850 discard = FALSE;
5851 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5852 && hibernate_consider_discard(m, preflight))
5853 {
5854 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5855 if (m->dirty)
5856 count_discard_purgeable++;
5857 else
5858 count_discard_inactive++;
5859 discard = discard_all;
5860 }
5861 else
5862 count_anonymous++;
5863 count_wire--;
5864 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5865 if (discard) hibernate_discard_page(m);
5866 m = next;
5867 }
5868
5869 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5870 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5871 {
5872 next = (vm_page_t) m->pageq.next;
5873 discard = FALSE;
5874 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5875 && hibernate_consider_discard(m, preflight))
5876 {
5877 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5878 if (m->dirty)
5879 count_discard_purgeable++;
5880 else
5881 count_discard_cleaned++;
5882 discard = discard_all;
5883 }
5884 else
5885 count_cleaned++;
5886 count_wire--;
5887 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5888 if (discard) hibernate_discard_page(m);
5889 m = next;
5890 }
5891
5892 m = (vm_page_t) queue_first(&vm_page_queue_active);
5893 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5894 {
5895 next = (vm_page_t) m->pageq.next;
5896 discard = FALSE;
5897 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5898 && hibernate_consider_discard(m, preflight))
5899 {
5900 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5901 if (m->dirty)
5902 count_discard_purgeable++;
5903 else
5904 count_discard_active++;
5905 discard = discard_all;
5906 }
5907 else
5908 count_active++;
5909 count_wire--;
5910 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5911 if (discard) hibernate_discard_page(m);
5912 m = next;
5913 }
5914
5915 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5916 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5917 {
5918 next = (vm_page_t) m->pageq.next;
5919 discard = FALSE;
5920 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5921 && hibernate_consider_discard(m, preflight))
5922 {
5923 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5924 if (m->dirty)
5925 count_discard_purgeable++;
5926 else
5927 count_discard_inactive++;
5928 discard = discard_all;
5929 }
5930 else
5931 count_inactive++;
5932 count_wire--;
5933 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5934 if (discard) hibernate_discard_page(m);
5935 m = next;
5936 }
5937
5938 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5939 {
5940 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5941 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5942 {
5943 next = (vm_page_t) m->pageq.next;
5944 discard = FALSE;
5945 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5946 && hibernate_consider_discard(m, preflight))
5947 {
5948 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5949 count_discard_speculative++;
5950 discard = discard_all;
5951 }
5952 else
5953 count_speculative++;
5954 count_wire--;
5955 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5956 if (discard) hibernate_discard_page(m);
5957 m = next;
5958 }
5959 }
5960
5961 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5962 {
5963 count_compressor++;
5964 count_wire--;
5965 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5966 }
5967
5968 if (preflight == FALSE && discard_all == TRUE) {
5969 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5970
5971 HIBLOG("hibernate_teardown started\n");
5972 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5973 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5974
5975 pages -= count_discard_vm_struct_pages;
5976 count_wire -= count_discard_vm_struct_pages;
5977
5978 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5979
5980 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5981 }
5982
5983 if (!preflight) {
5984 // pull wired from hibernate_bitmap
5985 bitmap = &page_list->bank_bitmap[0];
5986 bitmap_wired = &page_list_wired->bank_bitmap[0];
5987 for (bank = 0; bank < page_list->bank_count; bank++)
5988 {
5989 for (i = 0; i < bitmap->bitmapwords; i++)
5990 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5991 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5992 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5993 }
5994 }
5995
5996 // machine dependent adjustments
5997 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
5998
5999 if (!preflight) {
6000 hibernate_stats.cd_count_wire = count_wire;
6001 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6002 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6003 }
6004
6005 clock_get_uptime(&end);
6006 absolutetime_to_nanoseconds(end - start, &nsec);
6007 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6008
6009 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6010 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6011 discard_all ? "did" : "could",
6012 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6013
6014 if (hibernate_stats.cd_skipped_xpmapped)
6015 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6016
6017 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6018
6019 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6020
6021 #if MACH_ASSERT || DEBUG
6022 if (!preflight)
6023 {
6024 if (vm_page_local_q) {
6025 for (i = 0; i < vm_page_local_q_count; i++) {
6026 struct vpl *lq;
6027 lq = &vm_page_local_q[i].vpl_un.vpl;
6028 VPL_UNLOCK(&lq->vpl_lock);
6029 }
6030 }
6031 vm_page_unlock_queues();
6032 }
6033 #endif /* MACH_ASSERT || DEBUG */
6034
6035 if (preflight) {
6036 lck_mtx_unlock(&vm_page_queue_free_lock);
6037 vm_page_unlock_queues();
6038 vm_object_unlock(compressor_object);
6039 }
6040
6041 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6042 }
6043
6044 void
6045 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6046 {
6047 uint64_t start, end, nsec;
6048 vm_page_t m;
6049 vm_page_t next;
6050 uint32_t i;
6051 uint32_t count_discard_active = 0;
6052 uint32_t count_discard_inactive = 0;
6053 uint32_t count_discard_purgeable = 0;
6054 uint32_t count_discard_cleaned = 0;
6055 uint32_t count_discard_speculative = 0;
6056
6057
6058 #if MACH_ASSERT || DEBUG
6059 vm_page_lock_queues();
6060 if (vm_page_local_q) {
6061 for (i = 0; i < vm_page_local_q_count; i++) {
6062 struct vpl *lq;
6063 lq = &vm_page_local_q[i].vpl_un.vpl;
6064 VPL_LOCK(&lq->vpl_lock);
6065 }
6066 }
6067 #endif /* MACH_ASSERT || DEBUG */
6068
6069 clock_get_uptime(&start);
6070
6071 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6072 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6073 {
6074 next = (vm_page_t) m->pageq.next;
6075 if (hibernate_page_bittst(page_list, m->phys_page))
6076 {
6077 if (m->dirty)
6078 count_discard_purgeable++;
6079 else
6080 count_discard_inactive++;
6081 hibernate_discard_page(m);
6082 }
6083 m = next;
6084 }
6085
6086 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6087 {
6088 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6089 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6090 {
6091 next = (vm_page_t) m->pageq.next;
6092 if (hibernate_page_bittst(page_list, m->phys_page))
6093 {
6094 count_discard_speculative++;
6095 hibernate_discard_page(m);
6096 }
6097 m = next;
6098 }
6099 }
6100
6101 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6102 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6103 {
6104 next = (vm_page_t) m->pageq.next;
6105 if (hibernate_page_bittst(page_list, m->phys_page))
6106 {
6107 if (m->dirty)
6108 count_discard_purgeable++;
6109 else
6110 count_discard_inactive++;
6111 hibernate_discard_page(m);
6112 }
6113 m = next;
6114 }
6115
6116 m = (vm_page_t) queue_first(&vm_page_queue_active);
6117 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6118 {
6119 next = (vm_page_t) m->pageq.next;
6120 if (hibernate_page_bittst(page_list, m->phys_page))
6121 {
6122 if (m->dirty)
6123 count_discard_purgeable++;
6124 else
6125 count_discard_active++;
6126 hibernate_discard_page(m);
6127 }
6128 m = next;
6129 }
6130
6131 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6132 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6133 {
6134 next = (vm_page_t) m->pageq.next;
6135 if (hibernate_page_bittst(page_list, m->phys_page))
6136 {
6137 if (m->dirty)
6138 count_discard_purgeable++;
6139 else
6140 count_discard_cleaned++;
6141 hibernate_discard_page(m);
6142 }
6143 m = next;
6144 }
6145
6146 #if MACH_ASSERT || DEBUG
6147 if (vm_page_local_q) {
6148 for (i = 0; i < vm_page_local_q_count; i++) {
6149 struct vpl *lq;
6150 lq = &vm_page_local_q[i].vpl_un.vpl;
6151 VPL_UNLOCK(&lq->vpl_lock);
6152 }
6153 }
6154 vm_page_unlock_queues();
6155 #endif /* MACH_ASSERT || DEBUG */
6156
6157 clock_get_uptime(&end);
6158 absolutetime_to_nanoseconds(end - start, &nsec);
6159 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6160 nsec / 1000000ULL,
6161 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6162 }
6163
6164 boolean_t hibernate_paddr_map_inited = FALSE;
6165 boolean_t hibernate_rebuild_needed = FALSE;
6166 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
6167 vm_page_t hibernate_rebuild_hash_list = NULL;
6168
6169 unsigned int hibernate_teardown_found_tabled_pages = 0;
6170 unsigned int hibernate_teardown_found_created_pages = 0;
6171 unsigned int hibernate_teardown_found_free_pages = 0;
6172 unsigned int hibernate_teardown_vm_page_free_count;
6173
6174
6175 struct ppnum_mapping {
6176 struct ppnum_mapping *ppnm_next;
6177 ppnum_t ppnm_base_paddr;
6178 unsigned int ppnm_sindx;
6179 unsigned int ppnm_eindx;
6180 };
6181
6182 struct ppnum_mapping *ppnm_head;
6183 struct ppnum_mapping *ppnm_last_found = NULL;
6184
6185
6186 void
6187 hibernate_create_paddr_map()
6188 {
6189 unsigned int i;
6190 ppnum_t next_ppnum_in_run = 0;
6191 struct ppnum_mapping *ppnm = NULL;
6192
6193 if (hibernate_paddr_map_inited == FALSE) {
6194
6195 for (i = 0; i < vm_pages_count; i++) {
6196
6197 if (ppnm)
6198 ppnm->ppnm_eindx = i;
6199
6200 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6201
6202 ppnm = kalloc(sizeof(struct ppnum_mapping));
6203
6204 ppnm->ppnm_next = ppnm_head;
6205 ppnm_head = ppnm;
6206
6207 ppnm->ppnm_sindx = i;
6208 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6209 }
6210 next_ppnum_in_run = vm_pages[i].phys_page + 1;
6211 }
6212 ppnm->ppnm_eindx++;
6213
6214 hibernate_paddr_map_inited = TRUE;
6215 }
6216 }
6217
6218 ppnum_t
6219 hibernate_lookup_paddr(unsigned int indx)
6220 {
6221 struct ppnum_mapping *ppnm = NULL;
6222
6223 ppnm = ppnm_last_found;
6224
6225 if (ppnm) {
6226 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6227 goto done;
6228 }
6229 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6230
6231 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6232 ppnm_last_found = ppnm;
6233 break;
6234 }
6235 }
6236 if (ppnm == NULL)
6237 panic("hibernate_lookup_paddr of %d failed\n", indx);
6238 done:
6239 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6240 }
6241
6242
6243 uint32_t
6244 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6245 {
6246 addr64_t saddr_aligned;
6247 addr64_t eaddr_aligned;
6248 addr64_t addr;
6249 ppnum_t paddr;
6250 unsigned int mark_as_unneeded_pages = 0;
6251
6252 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6253 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6254
6255 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6256
6257 paddr = pmap_find_phys(kernel_pmap, addr);
6258
6259 assert(paddr);
6260
6261 hibernate_page_bitset(page_list, TRUE, paddr);
6262 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6263
6264 mark_as_unneeded_pages++;
6265 }
6266 return (mark_as_unneeded_pages);
6267 }
6268
6269
6270 void
6271 hibernate_hash_insert_page(vm_page_t mem)
6272 {
6273 vm_page_bucket_t *bucket;
6274 int hash_id;
6275
6276 assert(mem->hashed);
6277 assert(mem->object);
6278 assert(mem->offset != (vm_object_offset_t) -1);
6279
6280 /*
6281 * Insert it into the object_object/offset hash table
6282 */
6283 hash_id = vm_page_hash(mem->object, mem->offset);
6284 bucket = &vm_page_buckets[hash_id];
6285
6286 mem->next_m = bucket->page_list;
6287 bucket->page_list = VM_PAGE_PACK_PTR(mem);
6288 }
6289
6290
6291 void
6292 hibernate_free_range(int sindx, int eindx)
6293 {
6294 vm_page_t mem;
6295 unsigned int color;
6296
6297 while (sindx < eindx) {
6298 mem = &vm_pages[sindx];
6299
6300 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6301
6302 mem->lopage = FALSE;
6303 mem->free = TRUE;
6304
6305 color = mem->phys_page & vm_color_mask;
6306 queue_enter_first(&vm_page_queue_free[color],
6307 mem,
6308 vm_page_t,
6309 pageq);
6310 vm_page_free_count++;
6311
6312 sindx++;
6313 }
6314 }
6315
6316
6317 extern void hibernate_rebuild_pmap_structs(void);
6318
6319 void
6320 hibernate_rebuild_vm_structs(void)
6321 {
6322 int cindx, sindx, eindx;
6323 vm_page_t mem, tmem, mem_next;
6324 AbsoluteTime startTime, endTime;
6325 uint64_t nsec;
6326
6327 if (hibernate_rebuild_needed == FALSE)
6328 return;
6329
6330 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6331 HIBLOG("hibernate_rebuild started\n");
6332
6333 clock_get_uptime(&startTime);
6334
6335 hibernate_rebuild_pmap_structs();
6336
6337 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6338 eindx = vm_pages_count;
6339
6340 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6341
6342 mem = &vm_pages[cindx];
6343 /*
6344 * hibernate_teardown_vm_structs leaves the location where
6345 * this vm_page_t must be located in "next".
6346 */
6347 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6348 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6349
6350 sindx = (int)(tmem - &vm_pages[0]);
6351
6352 if (mem != tmem) {
6353 /*
6354 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6355 * so move it back to its real location
6356 */
6357 *tmem = *mem;
6358 mem = tmem;
6359 }
6360 if (mem->hashed)
6361 hibernate_hash_insert_page(mem);
6362 /*
6363 * the 'hole' between this vm_page_t and the previous
6364 * vm_page_t we moved needs to be initialized as
6365 * a range of free vm_page_t's
6366 */
6367 hibernate_free_range(sindx + 1, eindx);
6368
6369 eindx = sindx;
6370 }
6371 if (sindx)
6372 hibernate_free_range(0, sindx);
6373
6374 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6375
6376 /*
6377 * process the list of vm_page_t's that were entered in the hash,
6378 * but were not located in the vm_pages arrary... these are
6379 * vm_page_t's that were created on the fly (i.e. fictitious)
6380 */
6381 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6382 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6383
6384 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6385 hibernate_hash_insert_page(mem);
6386 }
6387 hibernate_rebuild_hash_list = NULL;
6388
6389 clock_get_uptime(&endTime);
6390 SUB_ABSOLUTETIME(&endTime, &startTime);
6391 absolutetime_to_nanoseconds(endTime, &nsec);
6392
6393 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6394
6395 hibernate_rebuild_needed = FALSE;
6396
6397 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6398 }
6399
6400
6401 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6402
6403 uint32_t
6404 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6405 {
6406 unsigned int i;
6407 unsigned int compact_target_indx;
6408 vm_page_t mem, mem_next;
6409 vm_page_bucket_t *bucket;
6410 unsigned int mark_as_unneeded_pages = 0;
6411 unsigned int unneeded_vm_page_bucket_pages = 0;
6412 unsigned int unneeded_vm_pages_pages = 0;
6413 unsigned int unneeded_pmap_pages = 0;
6414 addr64_t start_of_unneeded = 0;
6415 addr64_t end_of_unneeded = 0;
6416
6417
6418 if (hibernate_should_abort())
6419 return (0);
6420
6421 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6422 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6423 vm_page_cleaned_count, compressor_object->resident_page_count);
6424
6425 for (i = 0; i < vm_page_bucket_count; i++) {
6426
6427 bucket = &vm_page_buckets[i];
6428
6429 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6430 assert(mem->hashed);
6431
6432 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6433
6434 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6435 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6436 hibernate_rebuild_hash_list = mem;
6437 }
6438 }
6439 }
6440 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6441 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6442
6443 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6444
6445 compact_target_indx = 0;
6446
6447 for (i = 0; i < vm_pages_count; i++) {
6448
6449 mem = &vm_pages[i];
6450
6451 if (mem->free) {
6452 unsigned int color;
6453
6454 assert(mem->busy);
6455 assert(!mem->lopage);
6456
6457 color = mem->phys_page & vm_color_mask;
6458
6459 queue_remove(&vm_page_queue_free[color],
6460 mem,
6461 vm_page_t,
6462 pageq);
6463 mem->pageq.next = NULL;
6464 mem->pageq.prev = NULL;
6465
6466 vm_page_free_count--;
6467
6468 hibernate_teardown_found_free_pages++;
6469
6470 if ( !vm_pages[compact_target_indx].free)
6471 compact_target_indx = i;
6472 } else {
6473 /*
6474 * record this vm_page_t's original location
6475 * we need this even if it doesn't get moved
6476 * as an indicator to the rebuild function that
6477 * we don't have to move it
6478 */
6479 mem->next_m = VM_PAGE_PACK_PTR(mem);
6480
6481 if (vm_pages[compact_target_indx].free) {
6482 /*
6483 * we've got a hole to fill, so
6484 * move this vm_page_t to it's new home
6485 */
6486 vm_pages[compact_target_indx] = *mem;
6487 mem->free = TRUE;
6488
6489 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6490 compact_target_indx++;
6491 } else
6492 hibernate_teardown_last_valid_compact_indx = i;
6493 }
6494 }
6495 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6496 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6497 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6498
6499 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6500
6501 if (start_of_unneeded) {
6502 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6503 mark_as_unneeded_pages += unneeded_pmap_pages;
6504 }
6505 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6506
6507 hibernate_rebuild_needed = TRUE;
6508
6509 return (mark_as_unneeded_pages);
6510 }
6511
6512
6513 #endif /* HIBERNATION */
6514
6515 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6516
6517 #include <mach_vm_debug.h>
6518 #if MACH_VM_DEBUG
6519
6520 #include <mach_debug/hash_info.h>
6521 #include <vm/vm_debug.h>
6522
6523 /*
6524 * Routine: vm_page_info
6525 * Purpose:
6526 * Return information about the global VP table.
6527 * Fills the buffer with as much information as possible
6528 * and returns the desired size of the buffer.
6529 * Conditions:
6530 * Nothing locked. The caller should provide
6531 * possibly-pageable memory.
6532 */
6533
6534 unsigned int
6535 vm_page_info(
6536 hash_info_bucket_t *info,
6537 unsigned int count)
6538 {
6539 unsigned int i;
6540 lck_spin_t *bucket_lock;
6541
6542 if (vm_page_bucket_count < count)
6543 count = vm_page_bucket_count;
6544
6545 for (i = 0; i < count; i++) {
6546 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6547 unsigned int bucket_count = 0;
6548 vm_page_t m;
6549
6550 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6551 lck_spin_lock(bucket_lock);
6552
6553 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6554 bucket_count++;
6555
6556 lck_spin_unlock(bucket_lock);
6557
6558 /* don't touch pageable memory while holding locks */
6559 info[i].hib_count = bucket_count;
6560 }
6561
6562 return vm_page_bucket_count;
6563 }
6564 #endif /* MACH_VM_DEBUG */
6565
6566 #if VM_PAGE_BUCKETS_CHECK
6567 void
6568 vm_page_buckets_check(void)
6569 {
6570 unsigned int i;
6571 vm_page_t p;
6572 unsigned int p_hash;
6573 vm_page_bucket_t *bucket;
6574 lck_spin_t *bucket_lock;
6575
6576 if (!vm_page_buckets_check_ready) {
6577 return;
6578 }
6579
6580 #if HIBERNATION
6581 if (hibernate_rebuild_needed ||
6582 hibernate_rebuild_hash_list) {
6583 panic("BUCKET_CHECK: hibernation in progress: "
6584 "rebuild_needed=%d rebuild_hash_list=%p\n",
6585 hibernate_rebuild_needed,
6586 hibernate_rebuild_hash_list);
6587 }
6588 #endif /* HIBERNATION */
6589
6590 #if VM_PAGE_FAKE_BUCKETS
6591 char *cp;
6592 for (cp = (char *) vm_page_fake_buckets_start;
6593 cp < (char *) vm_page_fake_buckets_end;
6594 cp++) {
6595 if (*cp != 0x5a) {
6596 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6597 "[0x%llx:0x%llx]\n",
6598 cp,
6599 (uint64_t) vm_page_fake_buckets_start,
6600 (uint64_t) vm_page_fake_buckets_end);
6601 }
6602 }
6603 #endif /* VM_PAGE_FAKE_BUCKETS */
6604
6605 for (i = 0; i < vm_page_bucket_count; i++) {
6606 bucket = &vm_page_buckets[i];
6607 if (!bucket->page_list) {
6608 continue;
6609 }
6610
6611 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6612 lck_spin_lock(bucket_lock);
6613 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6614 while (p != VM_PAGE_NULL) {
6615 if (!p->hashed) {
6616 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6617 "hash %d in bucket %d at %p "
6618 "is not hashed\n",
6619 p, p->object, p->offset,
6620 p_hash, i, bucket);
6621 }
6622 p_hash = vm_page_hash(p->object, p->offset);
6623 if (p_hash != i) {
6624 panic("BUCKET_CHECK: corruption in bucket %d "
6625 "at %p: page %p object %p offset 0x%llx "
6626 "hash %d\n",
6627 i, bucket, p, p->object, p->offset,
6628 p_hash);
6629 }
6630 p = VM_PAGE_UNPACK_PTR(p->next_m);
6631 }
6632 lck_spin_unlock(bucket_lock);
6633 }
6634
6635 // printf("BUCKET_CHECK: checked buckets\n");
6636 }
6637 #endif /* VM_PAGE_BUCKETS_CHECK */