]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
fd55a91d1f5407ba49ba5befc20d172c09721f69
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
68
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
72 #include <mach/sdt.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/policy_internal.h>
76 #include <kern/task.h>
77 #include <kern/thread.h>
78 #include <kern/kalloc.h>
79 #include <kern/zalloc.h>
80 #include <kern/xpr.h>
81 #include <kern/ledger.h>
82 #include <vm/pmap.h>
83 #include <vm/vm_init.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
88 #include <kern/misc_protos.h>
89 #include <zone_debug.h>
90 #include <mach_debug/zone_info.h>
91 #include <vm/cpm.h>
92 #include <pexpert/pexpert.h>
93
94 #include <vm/vm_protos.h>
95 #include <vm/memory_object.h>
96 #include <vm/vm_purgeable_internal.h>
97 #include <vm/vm_compressor.h>
98
99 #if CONFIG_PHANTOM_CACHE
100 #include <vm/vm_phantom_cache.h>
101 #endif
102
103 #include <IOKit/IOHibernatePrivate.h>
104
105 #include <sys/kdebug.h>
106
107
108 char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
109 char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
110 char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
111 char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
112
113 #if CONFIG_SECLUDED_MEMORY
114 struct vm_page_secluded_data vm_page_secluded;
115 #endif /* CONFIG_SECLUDED_MEMORY */
116
117 boolean_t hibernate_cleaning_in_progress = FALSE;
118 boolean_t vm_page_free_verify = TRUE;
119
120 uint32_t vm_lopage_free_count = 0;
121 uint32_t vm_lopage_free_limit = 0;
122 uint32_t vm_lopage_lowater = 0;
123 boolean_t vm_lopage_refill = FALSE;
124 boolean_t vm_lopage_needed = FALSE;
125
126 lck_mtx_ext_t vm_page_queue_lock_ext;
127 lck_mtx_ext_t vm_page_queue_free_lock_ext;
128 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
129
130 int speculative_age_index = 0;
131 int speculative_steal_index = 0;
132 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
133
134
135 __private_extern__ void vm_page_init_lck_grp(void);
136
137 static void vm_page_free_prepare(vm_page_t page);
138 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
139
140 static void vm_tag_init(void);
141
142 uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
143 uint32_t vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
144 uint32_t vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
145
146 /*
147 * Associated with page of user-allocatable memory is a
148 * page structure.
149 */
150
151 /*
152 * These variables record the values returned by vm_page_bootstrap,
153 * for debugging purposes. The implementation of pmap_steal_memory
154 * and pmap_startup here also uses them internally.
155 */
156
157 vm_offset_t virtual_space_start;
158 vm_offset_t virtual_space_end;
159 uint32_t vm_page_pages;
160
161 /*
162 * The vm_page_lookup() routine, which provides for fast
163 * (virtual memory object, offset) to page lookup, employs
164 * the following hash table. The vm_page_{insert,remove}
165 * routines install and remove associations in the table.
166 * [This table is often called the virtual-to-physical,
167 * or VP, table.]
168 */
169 typedef struct {
170 vm_page_packed_t page_list;
171 #if MACH_PAGE_HASH_STATS
172 int cur_count; /* current count */
173 int hi_count; /* high water mark */
174 #endif /* MACH_PAGE_HASH_STATS */
175 } vm_page_bucket_t;
176
177
178 #define BUCKETS_PER_LOCK 16
179
180 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
181 unsigned int vm_page_bucket_count = 0; /* How big is array? */
182 unsigned int vm_page_hash_mask; /* Mask for hash function */
183 unsigned int vm_page_hash_shift; /* Shift for hash function */
184 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
185 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
186
187 lck_spin_t *vm_page_bucket_locks;
188 lck_spin_t vm_objects_wired_lock;
189 lck_spin_t vm_allocation_sites_lock;
190
191 #if VM_PAGE_BUCKETS_CHECK
192 boolean_t vm_page_buckets_check_ready = FALSE;
193 #if VM_PAGE_FAKE_BUCKETS
194 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
195 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
196 #endif /* VM_PAGE_FAKE_BUCKETS */
197 #endif /* VM_PAGE_BUCKETS_CHECK */
198
199
200
201 #if MACH_PAGE_HASH_STATS
202 /* This routine is only for debug. It is intended to be called by
203 * hand by a developer using a kernel debugger. This routine prints
204 * out vm_page_hash table statistics to the kernel debug console.
205 */
206 void
207 hash_debug(void)
208 {
209 int i;
210 int numbuckets = 0;
211 int highsum = 0;
212 int maxdepth = 0;
213
214 for (i = 0; i < vm_page_bucket_count; i++) {
215 if (vm_page_buckets[i].hi_count) {
216 numbuckets++;
217 highsum += vm_page_buckets[i].hi_count;
218 if (vm_page_buckets[i].hi_count > maxdepth)
219 maxdepth = vm_page_buckets[i].hi_count;
220 }
221 }
222 printf("Total number of buckets: %d\n", vm_page_bucket_count);
223 printf("Number used buckets: %d = %d%%\n",
224 numbuckets, 100*numbuckets/vm_page_bucket_count);
225 printf("Number unused buckets: %d = %d%%\n",
226 vm_page_bucket_count - numbuckets,
227 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
228 printf("Sum of bucket max depth: %d\n", highsum);
229 printf("Average bucket depth: %d.%2d\n",
230 highsum/vm_page_bucket_count,
231 highsum%vm_page_bucket_count);
232 printf("Maximum bucket depth: %d\n", maxdepth);
233 }
234 #endif /* MACH_PAGE_HASH_STATS */
235
236 /*
237 * The virtual page size is currently implemented as a runtime
238 * variable, but is constant once initialized using vm_set_page_size.
239 * This initialization must be done in the machine-dependent
240 * bootstrap sequence, before calling other machine-independent
241 * initializations.
242 *
243 * All references to the virtual page size outside this
244 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
245 * constants.
246 */
247 vm_size_t page_size = PAGE_SIZE;
248 vm_size_t page_mask = PAGE_MASK;
249 int page_shift = PAGE_SHIFT;
250
251 /*
252 * Resident page structures are initialized from
253 * a template (see vm_page_alloc).
254 *
255 * When adding a new field to the virtual memory
256 * object structure, be sure to add initialization
257 * (see vm_page_bootstrap).
258 */
259 struct vm_page vm_page_template;
260
261 vm_page_t vm_pages = VM_PAGE_NULL;
262 vm_page_t vm_page_array_beginning_addr;
263 vm_page_t vm_page_array_ending_addr;
264
265 unsigned int vm_pages_count = 0;
266 ppnum_t vm_page_lowest = 0;
267
268 /*
269 * Resident pages that represent real memory
270 * are allocated from a set of free lists,
271 * one per color.
272 */
273 unsigned int vm_colors;
274 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
275 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
276 unsigned int vm_free_magazine_refill_limit = 0;
277
278
279 struct vm_page_queue_free_head {
280 vm_page_queue_head_t qhead;
281 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
282
283 struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
284
285
286 unsigned int vm_page_free_wanted;
287 unsigned int vm_page_free_wanted_privileged;
288 #if CONFIG_SECLUDED_MEMORY
289 unsigned int vm_page_free_wanted_secluded;
290 #endif /* CONFIG_SECLUDED_MEMORY */
291 unsigned int vm_page_free_count;
292
293 /*
294 * Occasionally, the virtual memory system uses
295 * resident page structures that do not refer to
296 * real pages, for example to leave a page with
297 * important state information in the VP table.
298 *
299 * These page structures are allocated the way
300 * most other kernel structures are.
301 */
302 zone_t vm_page_array_zone;
303 zone_t vm_page_zone;
304 vm_locks_array_t vm_page_locks;
305 decl_lck_mtx_data(,vm_page_alloc_lock)
306 lck_mtx_ext_t vm_page_alloc_lock_ext;
307
308 unsigned int io_throttle_zero_fill;
309
310 unsigned int vm_page_local_q_count = 0;
311 unsigned int vm_page_local_q_soft_limit = 250;
312 unsigned int vm_page_local_q_hard_limit = 500;
313 struct vplq *vm_page_local_q = NULL;
314
315 /* N.B. Guard and fictitious pages must not
316 * be assigned a zero phys_page value.
317 */
318 /*
319 * Fictitious pages don't have a physical address,
320 * but we must initialize phys_page to something.
321 * For debugging, this should be a strange value
322 * that the pmap module can recognize in assertions.
323 */
324 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
325
326 /*
327 * Guard pages are not accessible so they don't
328 * need a physical address, but we need to enter
329 * one in the pmap.
330 * Let's make it recognizable and make sure that
331 * we don't use a real physical page with that
332 * physical address.
333 */
334 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
335
336 /*
337 * Resident page structures are also chained on
338 * queues that are used by the page replacement
339 * system (pageout daemon). These queues are
340 * defined here, but are shared by the pageout
341 * module. The inactive queue is broken into
342 * file backed and anonymous for convenience as the
343 * pageout daemon often assignes a higher
344 * importance to anonymous pages (less likely to pick)
345 */
346 vm_page_queue_head_t vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
347 vm_page_queue_head_t vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
348 #if CONFIG_SECLUDED_MEMORY
349 vm_page_queue_head_t vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
350 #endif /* CONFIG_SECLUDED_MEMORY */
351 vm_page_queue_head_t vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* inactive memory queue for anonymous pages */
352 vm_page_queue_head_t vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
353
354 queue_head_t vm_objects_wired;
355
356 #if CONFIG_BACKGROUND_QUEUE
357 vm_page_queue_head_t vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
358 uint32_t vm_page_background_limit;
359 uint32_t vm_page_background_target;
360 uint32_t vm_page_background_count;
361 uint64_t vm_page_background_promoted_count;
362
363 uint32_t vm_page_background_internal_count;
364 uint32_t vm_page_background_external_count;
365
366 uint32_t vm_page_background_mode;
367 uint32_t vm_page_background_exclude_external;
368 #endif
369
370 unsigned int vm_page_active_count;
371 unsigned int vm_page_inactive_count;
372 #if CONFIG_SECLUDED_MEMORY
373 unsigned int vm_page_secluded_count;
374 unsigned int vm_page_secluded_count_free;
375 unsigned int vm_page_secluded_count_inuse;
376 #endif /* CONFIG_SECLUDED_MEMORY */
377 unsigned int vm_page_anonymous_count;
378 unsigned int vm_page_throttled_count;
379 unsigned int vm_page_speculative_count;
380
381 unsigned int vm_page_wire_count;
382 unsigned int vm_page_stolen_count;
383 unsigned int vm_page_wire_count_initial;
384 unsigned int vm_page_pages_initial;
385 unsigned int vm_page_gobble_count = 0;
386
387 #define VM_PAGE_WIRE_COUNT_WARNING 0
388 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
389
390 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
391 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
392 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
393
394 unsigned int vm_page_xpmapped_external_count = 0;
395 unsigned int vm_page_external_count = 0;
396 unsigned int vm_page_internal_count = 0;
397 unsigned int vm_page_pageable_external_count = 0;
398 unsigned int vm_page_pageable_internal_count = 0;
399
400 #if DEVELOPMENT || DEBUG
401 unsigned int vm_page_speculative_recreated = 0;
402 unsigned int vm_page_speculative_created = 0;
403 unsigned int vm_page_speculative_used = 0;
404 #endif
405
406 vm_page_queue_head_t vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
407
408 unsigned int vm_page_cleaned_count = 0;
409 unsigned int vm_pageout_enqueued_cleaned = 0;
410
411 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
412 ppnum_t max_valid_low_ppnum = 0xffffffff;
413
414
415 /*
416 * Several page replacement parameters are also
417 * shared with this module, so that page allocation
418 * (done here in vm_page_alloc) can trigger the
419 * pageout daemon.
420 */
421 unsigned int vm_page_free_target = 0;
422 unsigned int vm_page_free_min = 0;
423 unsigned int vm_page_throttle_limit = 0;
424 unsigned int vm_page_inactive_target = 0;
425 #if CONFIG_SECLUDED_MEMORY
426 unsigned int vm_page_secluded_target = 0;
427 #endif /* CONFIG_SECLUDED_MEMORY */
428 unsigned int vm_page_anonymous_min = 0;
429 unsigned int vm_page_inactive_min = 0;
430 unsigned int vm_page_free_reserved = 0;
431 unsigned int vm_page_throttle_count = 0;
432
433
434 /*
435 * The VM system has a couple of heuristics for deciding
436 * that pages are "uninteresting" and should be placed
437 * on the inactive queue as likely candidates for replacement.
438 * These variables let the heuristics be controlled at run-time
439 * to make experimentation easier.
440 */
441
442 boolean_t vm_page_deactivate_hint = TRUE;
443
444 struct vm_page_stats_reusable vm_page_stats_reusable;
445
446 /*
447 * vm_set_page_size:
448 *
449 * Sets the page size, perhaps based upon the memory
450 * size. Must be called before any use of page-size
451 * dependent functions.
452 *
453 * Sets page_shift and page_mask from page_size.
454 */
455 void
456 vm_set_page_size(void)
457 {
458 page_size = PAGE_SIZE;
459 page_mask = PAGE_MASK;
460 page_shift = PAGE_SHIFT;
461
462 if ((page_mask & page_size) != 0)
463 panic("vm_set_page_size: page size not a power of two");
464
465 for (page_shift = 0; ; page_shift++)
466 if ((1U << page_shift) == page_size)
467 break;
468 }
469
470 #define COLOR_GROUPS_TO_STEAL 4
471
472
473 /* Called once during statup, once the cache geometry is known.
474 */
475 static void
476 vm_page_set_colors( void )
477 {
478 unsigned int n, override;
479
480 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
481 n = override;
482 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
483 n = vm_cache_geometry_colors;
484 else n = DEFAULT_COLORS; /* use default if all else fails */
485
486 if ( n == 0 )
487 n = 1;
488 if ( n > MAX_COLORS )
489 n = MAX_COLORS;
490
491 /* the count must be a power of 2 */
492 if ( ( n & (n - 1)) != 0 )
493 panic("vm_page_set_colors");
494
495 vm_colors = n;
496 vm_color_mask = n - 1;
497
498 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
499 }
500
501
502 lck_grp_t vm_page_lck_grp_free;
503 lck_grp_t vm_page_lck_grp_queue;
504 lck_grp_t vm_page_lck_grp_local;
505 lck_grp_t vm_page_lck_grp_purge;
506 lck_grp_t vm_page_lck_grp_alloc;
507 lck_grp_t vm_page_lck_grp_bucket;
508 lck_grp_attr_t vm_page_lck_grp_attr;
509 lck_attr_t vm_page_lck_attr;
510
511
512 __private_extern__ void
513 vm_page_init_lck_grp(void)
514 {
515 /*
516 * initialze the vm_page lock world
517 */
518 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
519 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
520 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
521 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
522 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
523 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
524 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
525 lck_attr_setdefault(&vm_page_lck_attr);
526 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
527
528 vm_compressor_init_locks();
529 }
530
531 void
532 vm_page_init_local_q()
533 {
534 unsigned int num_cpus;
535 unsigned int i;
536 struct vplq *t_local_q;
537
538 num_cpus = ml_get_max_cpus();
539
540 /*
541 * no point in this for a uni-processor system
542 */
543 if (num_cpus >= 2) {
544 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
545
546 for (i = 0; i < num_cpus; i++) {
547 struct vpl *lq;
548
549 lq = &t_local_q[i].vpl_un.vpl;
550 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
551 vm_page_queue_init(&lq->vpl_queue);
552 lq->vpl_count = 0;
553 lq->vpl_internal_count = 0;
554 lq->vpl_external_count = 0;
555 }
556 vm_page_local_q_count = num_cpus;
557
558 vm_page_local_q = (struct vplq *)t_local_q;
559 }
560 }
561
562
563 /*
564 * vm_page_bootstrap:
565 *
566 * Initializes the resident memory module.
567 *
568 * Allocates memory for the page cells, and
569 * for the object/offset-to-page hash table headers.
570 * Each page cell is initialized and placed on the free list.
571 * Returns the range of available kernel virtual memory.
572 */
573
574 void
575 vm_page_bootstrap(
576 vm_offset_t *startp,
577 vm_offset_t *endp)
578 {
579 vm_page_t m;
580 unsigned int i;
581 unsigned int log1;
582 unsigned int log2;
583 unsigned int size;
584
585 /*
586 * Initialize the vm_page template.
587 */
588
589 m = &vm_page_template;
590 bzero(m, sizeof (*m));
591
592 #if CONFIG_BACKGROUND_QUEUE
593 m->vm_page_backgroundq.next = 0;
594 m->vm_page_backgroundq.prev = 0;
595 m->vm_page_in_background = FALSE;
596 m->vm_page_on_backgroundq = FALSE;
597 #endif
598
599 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
600 m->listq.next = 0;
601 m->listq.prev = 0;
602 m->next_m = 0;
603
604 m->vm_page_object = 0; /* reset later */
605 m->offset = (vm_object_offset_t) -1; /* reset later */
606
607 m->wire_count = 0;
608 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
609 m->laundry = FALSE;
610 m->reference = FALSE;
611 m->gobbled = FALSE;
612 m->private = FALSE;
613 m->__unused_pageq_bits = 0;
614
615 VM_PAGE_SET_PHYS_PAGE(m, 0); /* reset later */
616 m->busy = TRUE;
617 m->wanted = FALSE;
618 m->tabled = FALSE;
619 m->hashed = FALSE;
620 m->fictitious = FALSE;
621 m->pmapped = FALSE;
622 m->wpmapped = FALSE;
623 m->free_when_done = FALSE;
624 m->absent = FALSE;
625 m->error = FALSE;
626 m->dirty = FALSE;
627 m->cleaning = FALSE;
628 m->precious = FALSE;
629 m->clustered = FALSE;
630 m->overwriting = FALSE;
631 m->restart = FALSE;
632 m->unusual = FALSE;
633 m->encrypted = FALSE;
634 m->encrypted_cleaning = FALSE;
635 m->cs_validated = FALSE;
636 m->cs_tainted = FALSE;
637 m->cs_nx = FALSE;
638 m->no_cache = FALSE;
639 m->reusable = FALSE;
640 m->slid = FALSE;
641 m->xpmapped = FALSE;
642 m->written_by_kernel = FALSE;
643 m->__unused_object_bits = 0;
644
645 /*
646 * Initialize the page queues.
647 */
648 vm_page_init_lck_grp();
649
650 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
651 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
652 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
653
654 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
655 int group;
656
657 purgeable_queues[i].token_q_head = 0;
658 purgeable_queues[i].token_q_tail = 0;
659 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
660 queue_init(&purgeable_queues[i].objq[group]);
661
662 purgeable_queues[i].type = i;
663 purgeable_queues[i].new_pages = 0;
664 #if MACH_ASSERT
665 purgeable_queues[i].debug_count_tokens = 0;
666 purgeable_queues[i].debug_count_objects = 0;
667 #endif
668 };
669 purgeable_nonvolatile_count = 0;
670 queue_init(&purgeable_nonvolatile_queue);
671
672 for (i = 0; i < MAX_COLORS; i++ )
673 vm_page_queue_init(&vm_page_queue_free[i].qhead);
674
675 vm_page_queue_init(&vm_lopage_queue_free);
676 vm_page_queue_init(&vm_page_queue_active);
677 vm_page_queue_init(&vm_page_queue_inactive);
678 #if CONFIG_SECLUDED_MEMORY
679 vm_page_queue_init(&vm_page_queue_secluded);
680 #endif /* CONFIG_SECLUDED_MEMORY */
681 vm_page_queue_init(&vm_page_queue_cleaned);
682 vm_page_queue_init(&vm_page_queue_throttled);
683 vm_page_queue_init(&vm_page_queue_anonymous);
684 queue_init(&vm_objects_wired);
685
686 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
687 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
688
689 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
690 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
691 }
692 #if CONFIG_BACKGROUND_QUEUE
693 vm_page_queue_init(&vm_page_queue_background);
694
695 vm_page_background_count = 0;
696 vm_page_background_internal_count = 0;
697 vm_page_background_external_count = 0;
698 vm_page_background_promoted_count = 0;
699
700 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
701
702 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
703 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
704 vm_page_background_limit = vm_page_background_target + 256;
705
706 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
707 vm_page_background_exclude_external = 0;
708
709 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
710 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
711 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
712 PE_parse_boot_argn("vm_page_bg_limit", &vm_page_background_limit, sizeof(vm_page_background_limit));
713
714 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_3)
715 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
716
717 if (vm_page_background_limit <= vm_page_background_target)
718 vm_page_background_limit = vm_page_background_target + 256;
719 #endif
720 vm_page_free_wanted = 0;
721 vm_page_free_wanted_privileged = 0;
722 #if CONFIG_SECLUDED_MEMORY
723 vm_page_free_wanted_secluded = 0;
724 #endif /* CONFIG_SECLUDED_MEMORY */
725
726 vm_page_set_colors();
727
728 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
729 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
730 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
731 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
732
733 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
734 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
735 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
736 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
737 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
738 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
739 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
740 #if CONFIG_SECLUDED_MEMORY
741 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
742 #endif /* CONFIG_SECLUDED_MEMORY */
743
744 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
745 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
746 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
747 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
748 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
749 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
750 #if CONFIG_SECLUDED_MEMORY
751 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
752 #endif /* CONFIG_SECLUDED_MEMORY */
753
754 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
755 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
756 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
757 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
758 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
759 #if CONFIG_SECLUDED_MEMORY
760 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
761 #endif /* CONFIG_SECLUDED_MEMORY */
762
763
764 /*
765 * Steal memory for the map and zone subsystems.
766 */
767 #if CONFIG_GZALLOC
768 gzalloc_configure();
769 #endif
770 kernel_debug_string_early("vm_map_steal_memory");
771 vm_map_steal_memory();
772
773 /*
774 * Allocate (and initialize) the virtual-to-physical
775 * table hash buckets.
776 *
777 * The number of buckets should be a power of two to
778 * get a good hash function. The following computation
779 * chooses the first power of two that is greater
780 * than the number of physical pages in the system.
781 */
782
783 if (vm_page_bucket_count == 0) {
784 unsigned int npages = pmap_free_pages();
785
786 vm_page_bucket_count = 1;
787 while (vm_page_bucket_count < npages)
788 vm_page_bucket_count <<= 1;
789 }
790 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
791
792 vm_page_hash_mask = vm_page_bucket_count - 1;
793
794 /*
795 * Calculate object shift value for hashing algorithm:
796 * O = log2(sizeof(struct vm_object))
797 * B = log2(vm_page_bucket_count)
798 * hash shifts the object left by
799 * B/2 - O
800 */
801 size = vm_page_bucket_count;
802 for (log1 = 0; size > 1; log1++)
803 size /= 2;
804 size = sizeof(struct vm_object);
805 for (log2 = 0; size > 1; log2++)
806 size /= 2;
807 vm_page_hash_shift = log1/2 - log2 + 1;
808
809 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
810 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
811 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
812
813 if (vm_page_hash_mask & vm_page_bucket_count)
814 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
815
816 #if VM_PAGE_BUCKETS_CHECK
817 #if VM_PAGE_FAKE_BUCKETS
818 /*
819 * Allocate a decoy set of page buckets, to detect
820 * any stomping there.
821 */
822 vm_page_fake_buckets = (vm_page_bucket_t *)
823 pmap_steal_memory(vm_page_bucket_count *
824 sizeof(vm_page_bucket_t));
825 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
826 vm_page_fake_buckets_end =
827 vm_map_round_page((vm_page_fake_buckets_start +
828 (vm_page_bucket_count *
829 sizeof (vm_page_bucket_t))),
830 PAGE_MASK);
831 char *cp;
832 for (cp = (char *)vm_page_fake_buckets_start;
833 cp < (char *)vm_page_fake_buckets_end;
834 cp++) {
835 *cp = 0x5a;
836 }
837 #endif /* VM_PAGE_FAKE_BUCKETS */
838 #endif /* VM_PAGE_BUCKETS_CHECK */
839
840 kernel_debug_string_early("vm_page_buckets");
841 vm_page_buckets = (vm_page_bucket_t *)
842 pmap_steal_memory(vm_page_bucket_count *
843 sizeof(vm_page_bucket_t));
844
845 kernel_debug_string_early("vm_page_bucket_locks");
846 vm_page_bucket_locks = (lck_spin_t *)
847 pmap_steal_memory(vm_page_bucket_lock_count *
848 sizeof(lck_spin_t));
849
850 for (i = 0; i < vm_page_bucket_count; i++) {
851 vm_page_bucket_t *bucket = &vm_page_buckets[i];
852
853 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
854 #if MACH_PAGE_HASH_STATS
855 bucket->cur_count = 0;
856 bucket->hi_count = 0;
857 #endif /* MACH_PAGE_HASH_STATS */
858 }
859
860 for (i = 0; i < vm_page_bucket_lock_count; i++)
861 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
862
863 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
864 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
865 vm_tag_init();
866
867 #if VM_PAGE_BUCKETS_CHECK
868 vm_page_buckets_check_ready = TRUE;
869 #endif /* VM_PAGE_BUCKETS_CHECK */
870
871 /*
872 * Machine-dependent code allocates the resident page table.
873 * It uses vm_page_init to initialize the page frames.
874 * The code also returns to us the virtual space available
875 * to the kernel. We don't trust the pmap module
876 * to get the alignment right.
877 */
878
879 kernel_debug_string_early("pmap_startup");
880 pmap_startup(&virtual_space_start, &virtual_space_end);
881 virtual_space_start = round_page(virtual_space_start);
882 virtual_space_end = trunc_page(virtual_space_end);
883
884 *startp = virtual_space_start;
885 *endp = virtual_space_end;
886
887 /*
888 * Compute the initial "wire" count.
889 * Up until now, the pages which have been set aside are not under
890 * the VM system's control, so although they aren't explicitly
891 * wired, they nonetheless can't be moved. At this moment,
892 * all VM managed pages are "free", courtesy of pmap_startup.
893 */
894 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
895 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
896 #if CONFIG_SECLUDED_MEMORY
897 vm_page_wire_count -= vm_page_secluded_count;
898 #endif
899 vm_page_wire_count_initial = vm_page_wire_count;
900 vm_page_pages_initial = vm_page_pages;
901
902 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
903 vm_page_free_count, vm_page_wire_count);
904
905 kernel_debug_string_early("vm_page_bootstrap complete");
906 simple_lock_init(&vm_paging_lock, 0);
907 }
908
909 #ifndef MACHINE_PAGES
910 /*
911 * We implement pmap_steal_memory and pmap_startup with the help
912 * of two simpler functions, pmap_virtual_space and pmap_next_page.
913 */
914
915 void *
916 pmap_steal_memory(
917 vm_size_t size)
918 {
919 vm_offset_t addr, vaddr;
920 ppnum_t phys_page;
921
922 /*
923 * We round the size to a round multiple.
924 */
925
926 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
927
928 /*
929 * If this is the first call to pmap_steal_memory,
930 * we have to initialize ourself.
931 */
932
933 if (virtual_space_start == virtual_space_end) {
934 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
935
936 /*
937 * The initial values must be aligned properly, and
938 * we don't trust the pmap module to do it right.
939 */
940
941 virtual_space_start = round_page(virtual_space_start);
942 virtual_space_end = trunc_page(virtual_space_end);
943 }
944
945 /*
946 * Allocate virtual memory for this request.
947 */
948
949 addr = virtual_space_start;
950 virtual_space_start += size;
951
952 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
953
954 /*
955 * Allocate and map physical pages to back new virtual pages.
956 */
957
958 for (vaddr = round_page(addr);
959 vaddr < addr + size;
960 vaddr += PAGE_SIZE) {
961
962 if (!pmap_next_page_hi(&phys_page))
963 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
964
965 /*
966 * XXX Logically, these mappings should be wired,
967 * but some pmap modules barf if they are.
968 */
969 #if defined(__LP64__)
970 pmap_pre_expand(kernel_pmap, vaddr);
971 #endif
972
973 pmap_enter(kernel_pmap, vaddr, phys_page,
974 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
975 VM_WIMG_USE_DEFAULT, FALSE);
976 /*
977 * Account for newly stolen memory
978 */
979 vm_page_wire_count++;
980 vm_page_stolen_count++;
981 }
982
983 return (void *) addr;
984 }
985
986 #if CONFIG_SECLUDED_MEMORY
987 /* boot-args to control secluded memory */
988 unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
989 int secluded_for_iokit = 1; /* IOKit can use secluded memory */
990 int secluded_for_apps = 1; /* apps can use secluded memory */
991 int secluded_for_filecache = 2; /* filecache can use seclude memory */
992 #if 11
993 int secluded_for_fbdp = 0;
994 #endif
995 int secluded_aging_policy = SECLUDED_AGING_BEFORE_ACTIVE;
996 #endif /* CONFIG_SECLUDED_MEMORY */
997
998
999
1000
1001 void vm_page_release_startup(vm_page_t mem);
1002 void
1003 pmap_startup(
1004 vm_offset_t *startp,
1005 vm_offset_t *endp)
1006 {
1007 unsigned int i, npages, pages_initialized, fill, fillval;
1008 ppnum_t phys_page;
1009 addr64_t tmpaddr;
1010
1011 #if defined(__LP64__)
1012 /*
1013 * make sure we are aligned on a 64 byte boundary
1014 * for VM_PAGE_PACK_PTR (it clips off the low-order
1015 * 6 bits of the pointer)
1016 */
1017 if (virtual_space_start != virtual_space_end)
1018 virtual_space_start = round_page(virtual_space_start);
1019 #endif
1020
1021 /*
1022 * We calculate how many page frames we will have
1023 * and then allocate the page structures in one chunk.
1024 */
1025
1026 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
1027 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
1028 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1029
1030 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1031
1032 /*
1033 * Initialize the page frames.
1034 */
1035 kernel_debug_string_early("Initialize the page frames");
1036
1037 vm_page_array_beginning_addr = &vm_pages[0];
1038 vm_page_array_ending_addr = &vm_pages[npages];
1039
1040
1041 for (i = 0, pages_initialized = 0; i < npages; i++) {
1042 if (!pmap_next_page(&phys_page))
1043 break;
1044 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1045 vm_page_lowest = phys_page;
1046
1047 vm_page_init(&vm_pages[i], phys_page, FALSE);
1048 vm_page_pages++;
1049 pages_initialized++;
1050 }
1051 vm_pages_count = pages_initialized;
1052
1053 #if defined(__LP64__)
1054
1055 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
1056 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1057
1058 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
1059 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1060 #endif
1061 kernel_debug_string_early("page fill/release");
1062 /*
1063 * Check if we want to initialize pages to a known value
1064 */
1065 fill = 0; /* Assume no fill */
1066 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
1067 #if DEBUG
1068 /* This slows down booting the DEBUG kernel, particularly on
1069 * large memory systems, but is worthwhile in deterministically
1070 * trapping uninitialized memory usage.
1071 */
1072 if (fill == 0) {
1073 fill = 1;
1074 fillval = 0xDEB8F177;
1075 }
1076 #endif
1077 if (fill)
1078 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1079
1080 #if CONFIG_SECLUDED_MEMORY
1081 /* default: no secluded mem */
1082 secluded_mem_mb = 0;
1083 if (max_mem > 1*1024*1024*1024) {
1084 /* default to 90MB for devices with > 1GB of RAM */
1085 secluded_mem_mb = 90;
1086 }
1087 /* override with value from device tree, if provided */
1088 PE_get_default("kern.secluded_mem_mb",
1089 &secluded_mem_mb, sizeof(secluded_mem_mb));
1090 /* override with value from boot-args, if provided */
1091 PE_parse_boot_argn("secluded_mem_mb",
1092 &secluded_mem_mb,
1093 sizeof (secluded_mem_mb));
1094
1095 vm_page_secluded_target = (unsigned int)
1096 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1097 PE_parse_boot_argn("secluded_for_iokit",
1098 &secluded_for_iokit,
1099 sizeof (secluded_for_iokit));
1100 PE_parse_boot_argn("secluded_for_apps",
1101 &secluded_for_apps,
1102 sizeof (secluded_for_apps));
1103 PE_parse_boot_argn("secluded_for_filecache",
1104 &secluded_for_filecache,
1105 sizeof (secluded_for_filecache));
1106 #if 11
1107 PE_parse_boot_argn("secluded_for_fbdp",
1108 &secluded_for_fbdp,
1109 sizeof (secluded_for_fbdp));
1110 #endif
1111 PE_parse_boot_argn("secluded_aging_policy",
1112 &secluded_aging_policy,
1113 sizeof (secluded_aging_policy));
1114 #endif /* CONFIG_SECLUDED_MEMORY */
1115
1116 // -debug code remove
1117 if (2 == vm_himemory_mode) {
1118 // free low -> high so high is preferred
1119 for (i = 1; i <= pages_initialized; i++) {
1120 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
1121 vm_page_release_startup(&vm_pages[i - 1]);
1122 }
1123 }
1124 else
1125 // debug code remove-
1126
1127 /*
1128 * Release pages in reverse order so that physical pages
1129 * initially get allocated in ascending addresses. This keeps
1130 * the devices (which must address physical memory) happy if
1131 * they require several consecutive pages.
1132 */
1133 for (i = pages_initialized; i > 0; i--) {
1134 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
1135 vm_page_release_startup(&vm_pages[i - 1]);
1136 }
1137
1138 VM_CHECK_MEMORYSTATUS;
1139
1140 #if 0
1141 {
1142 vm_page_t xx, xxo, xxl;
1143 int i, j, k, l;
1144
1145 j = 0; /* (BRINGUP) */
1146 xxl = 0;
1147
1148 for( i = 0; i < vm_colors; i++ ) {
1149 queue_iterate(&vm_page_queue_free[i].qhead,
1150 xx,
1151 vm_page_t,
1152 pageq) { /* BRINGUP */
1153 j++; /* (BRINGUP) */
1154 if(j > vm_page_free_count) { /* (BRINGUP) */
1155 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
1156 }
1157
1158 l = vm_page_free_count - j; /* (BRINGUP) */
1159 k = 0; /* (BRINGUP) */
1160
1161 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1162
1163 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
1164 k++;
1165 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1166 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
1167 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1168 }
1169 }
1170
1171 xxl = xx;
1172 }
1173 }
1174
1175 if(j != vm_page_free_count) { /* (BRINGUP) */
1176 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1177 }
1178 }
1179 #endif
1180
1181
1182 /*
1183 * We have to re-align virtual_space_start,
1184 * because pmap_steal_memory has been using it.
1185 */
1186
1187 virtual_space_start = round_page(virtual_space_start);
1188
1189 *startp = virtual_space_start;
1190 *endp = virtual_space_end;
1191 }
1192 #endif /* MACHINE_PAGES */
1193
1194 /*
1195 * Routine: vm_page_module_init
1196 * Purpose:
1197 * Second initialization pass, to be done after
1198 * the basic VM system is ready.
1199 */
1200 void
1201 vm_page_module_init(void)
1202 {
1203 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1204 vm_size_t vm_page_with_ppnum_size;
1205
1206 vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1207 0, PAGE_SIZE, "vm pages array");
1208
1209 zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1210 zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1211 zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1212 zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1213 zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1214 /*
1215 * Adjust zone statistics to account for the real pages allocated
1216 * in vm_page_create(). [Q: is this really what we want?]
1217 */
1218 vm_page_array_zone->count += vm_page_pages;
1219 vm_page_array_zone->sum_count += vm_page_pages;
1220 vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1221 vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1222 vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1223 OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1224 /* since zone accounts for these, take them out of stolen */
1225 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1226
1227 vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1228
1229 vm_page_zone = zinit(vm_page_with_ppnum_size,
1230 0, PAGE_SIZE, "vm pages");
1231
1232 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1233 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1234 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1235 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1236 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1237 }
1238
1239 /*
1240 * Routine: vm_page_create
1241 * Purpose:
1242 * After the VM system is up, machine-dependent code
1243 * may stumble across more physical memory. For example,
1244 * memory that it was reserving for a frame buffer.
1245 * vm_page_create turns this memory into available pages.
1246 */
1247
1248 void
1249 vm_page_create(
1250 ppnum_t start,
1251 ppnum_t end)
1252 {
1253 ppnum_t phys_page;
1254 vm_page_t m;
1255
1256 for (phys_page = start;
1257 phys_page < end;
1258 phys_page++) {
1259 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1260 == VM_PAGE_NULL)
1261 vm_page_more_fictitious();
1262
1263 m->fictitious = FALSE;
1264 pmap_clear_noencrypt(phys_page);
1265
1266 vm_page_pages++;
1267 vm_page_release(m, FALSE);
1268 }
1269 }
1270
1271 /*
1272 * vm_page_hash:
1273 *
1274 * Distributes the object/offset key pair among hash buckets.
1275 *
1276 * NOTE: The bucket count must be a power of 2
1277 */
1278 #define vm_page_hash(object, offset) (\
1279 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1280 & vm_page_hash_mask)
1281
1282
1283 /*
1284 * vm_page_insert: [ internal use only ]
1285 *
1286 * Inserts the given mem entry into the object/object-page
1287 * table and object list.
1288 *
1289 * The object must be locked.
1290 */
1291 void
1292 vm_page_insert(
1293 vm_page_t mem,
1294 vm_object_t object,
1295 vm_object_offset_t offset)
1296 {
1297 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1298 }
1299
1300 void
1301 vm_page_insert_wired(
1302 vm_page_t mem,
1303 vm_object_t object,
1304 vm_object_offset_t offset,
1305 vm_tag_t tag)
1306 {
1307 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1308 }
1309
1310 void
1311 vm_page_insert_internal(
1312 vm_page_t mem,
1313 vm_object_t object,
1314 vm_object_offset_t offset,
1315 vm_tag_t tag,
1316 boolean_t queues_lock_held,
1317 boolean_t insert_in_hash,
1318 boolean_t batch_pmap_op,
1319 boolean_t batch_accounting,
1320 uint64_t *delayed_ledger_update)
1321 {
1322 vm_page_bucket_t *bucket;
1323 lck_spin_t *bucket_lock;
1324 int hash_id;
1325 task_t owner;
1326
1327 XPR(XPR_VM_PAGE,
1328 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1329 object, offset, mem, 0,0);
1330 #if 0
1331 /*
1332 * we may not hold the page queue lock
1333 * so this check isn't safe to make
1334 */
1335 VM_PAGE_CHECK(mem);
1336 #endif
1337
1338 assert(page_aligned(offset));
1339
1340 assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1341
1342 /* the vm_submap_object is only a placeholder for submaps */
1343 assert(object != vm_submap_object);
1344
1345 vm_object_lock_assert_exclusive(object);
1346 LCK_MTX_ASSERT(&vm_page_queue_lock,
1347 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1348 : LCK_MTX_ASSERT_NOTOWNED);
1349 if (queues_lock_held == FALSE)
1350 assert(!VM_PAGE_PAGEABLE(mem));
1351
1352 if (insert_in_hash == TRUE) {
1353 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1354 if (mem->tabled || mem->vm_page_object)
1355 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1356 "already in (obj=%p,off=0x%llx)",
1357 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1358 #endif
1359 assert(!object->internal || offset < object->vo_size);
1360 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1361
1362 /*
1363 * Record the object/offset pair in this page
1364 */
1365
1366 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1367 mem->offset = offset;
1368
1369 #if CONFIG_SECLUDED_MEMORY
1370 if (object->eligible_for_secluded) {
1371 vm_page_secluded.eligible_for_secluded++;
1372 }
1373 #endif /* CONFIG_SECLUDED_MEMORY */
1374
1375 /*
1376 * Insert it into the object_object/offset hash table
1377 */
1378 hash_id = vm_page_hash(object, offset);
1379 bucket = &vm_page_buckets[hash_id];
1380 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1381
1382 lck_spin_lock(bucket_lock);
1383
1384 mem->next_m = bucket->page_list;
1385 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1386 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1387
1388 #if MACH_PAGE_HASH_STATS
1389 if (++bucket->cur_count > bucket->hi_count)
1390 bucket->hi_count = bucket->cur_count;
1391 #endif /* MACH_PAGE_HASH_STATS */
1392 mem->hashed = TRUE;
1393 lck_spin_unlock(bucket_lock);
1394 }
1395
1396 {
1397 unsigned int cache_attr;
1398
1399 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1400
1401 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1402 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1403 }
1404 }
1405 /*
1406 * Now link into the object's list of backed pages.
1407 */
1408 vm_page_queue_enter(&object->memq, mem, vm_page_t, listq);
1409 object->memq_hint = mem;
1410 mem->tabled = TRUE;
1411
1412 /*
1413 * Show that the object has one more resident page.
1414 */
1415
1416 object->resident_page_count++;
1417 if (VM_PAGE_WIRED(mem)) {
1418 assert(mem->wire_count > 0);
1419
1420 if (!mem->private && !mem->fictitious)
1421 {
1422 if (!object->wired_page_count)
1423 {
1424 assert(VM_KERN_MEMORY_NONE != tag);
1425 object->wire_tag = tag;
1426 VM_OBJECT_WIRED(object);
1427 }
1428 }
1429 object->wired_page_count++;
1430 }
1431 assert(object->resident_page_count >= object->wired_page_count);
1432
1433 if (batch_accounting == FALSE) {
1434 if (object->internal) {
1435 OSAddAtomic(1, &vm_page_internal_count);
1436 } else {
1437 OSAddAtomic(1, &vm_page_external_count);
1438 }
1439 }
1440
1441 /*
1442 * It wouldn't make sense to insert a "reusable" page in
1443 * an object (the page would have been marked "reusable" only
1444 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1445 * in the object at that time).
1446 * But a page could be inserted in a "all_reusable" object, if
1447 * something faults it in (a vm_read() from another task or a
1448 * "use-after-free" issue in user space, for example). It can
1449 * also happen if we're relocating a page from that object to
1450 * a different physical page during a physically-contiguous
1451 * allocation.
1452 */
1453 assert(!mem->reusable);
1454 if (object->all_reusable) {
1455 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1456 }
1457
1458 if (object->purgable == VM_PURGABLE_DENY) {
1459 owner = TASK_NULL;
1460 } else {
1461 owner = object->vo_purgeable_owner;
1462 }
1463 if (owner &&
1464 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1465 VM_PAGE_WIRED(mem))) {
1466
1467 if (delayed_ledger_update)
1468 *delayed_ledger_update += PAGE_SIZE;
1469 else {
1470 /* more non-volatile bytes */
1471 ledger_credit(owner->ledger,
1472 task_ledgers.purgeable_nonvolatile,
1473 PAGE_SIZE);
1474 /* more footprint */
1475 ledger_credit(owner->ledger,
1476 task_ledgers.phys_footprint,
1477 PAGE_SIZE);
1478 }
1479
1480 } else if (owner &&
1481 (object->purgable == VM_PURGABLE_VOLATILE ||
1482 object->purgable == VM_PURGABLE_EMPTY)) {
1483 assert(! VM_PAGE_WIRED(mem));
1484 /* more volatile bytes */
1485 ledger_credit(owner->ledger,
1486 task_ledgers.purgeable_volatile,
1487 PAGE_SIZE);
1488 }
1489
1490 if (object->purgable == VM_PURGABLE_VOLATILE) {
1491 if (VM_PAGE_WIRED(mem)) {
1492 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1493 } else {
1494 OSAddAtomic(+1, &vm_page_purgeable_count);
1495 }
1496 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1497 mem->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q) {
1498 /*
1499 * This page belongs to a purged VM object but hasn't
1500 * been purged (because it was "busy").
1501 * It's in the "throttled" queue and hence not
1502 * visible to vm_pageout_scan(). Move it to a pageable
1503 * queue, so that it can eventually be reclaimed, instead
1504 * of lingering in the "empty" object.
1505 */
1506 if (queues_lock_held == FALSE)
1507 vm_page_lockspin_queues();
1508 vm_page_deactivate(mem);
1509 if (queues_lock_held == FALSE)
1510 vm_page_unlock_queues();
1511 }
1512
1513 #if VM_OBJECT_TRACKING_OP_MODIFIED
1514 if (vm_object_tracking_inited &&
1515 object->internal &&
1516 object->resident_page_count == 0 &&
1517 object->pager == NULL &&
1518 object->shadow != NULL &&
1519 object->shadow->copy == object) {
1520 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1521 int numsaved = 0;
1522
1523 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1524 btlog_add_entry(vm_object_tracking_btlog,
1525 object,
1526 VM_OBJECT_TRACKING_OP_MODIFIED,
1527 bt,
1528 numsaved);
1529 }
1530 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1531 }
1532
1533 /*
1534 * vm_page_replace:
1535 *
1536 * Exactly like vm_page_insert, except that we first
1537 * remove any existing page at the given offset in object.
1538 *
1539 * The object must be locked.
1540 */
1541 void
1542 vm_page_replace(
1543 vm_page_t mem,
1544 vm_object_t object,
1545 vm_object_offset_t offset)
1546 {
1547 vm_page_bucket_t *bucket;
1548 vm_page_t found_m = VM_PAGE_NULL;
1549 lck_spin_t *bucket_lock;
1550 int hash_id;
1551
1552 #if 0
1553 /*
1554 * we don't hold the page queue lock
1555 * so this check isn't safe to make
1556 */
1557 VM_PAGE_CHECK(mem);
1558 #endif
1559 vm_object_lock_assert_exclusive(object);
1560 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1561 if (mem->tabled || mem->vm_page_object)
1562 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1563 "already in (obj=%p,off=0x%llx)",
1564 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1565 #endif
1566 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1567
1568 assert(!VM_PAGE_PAGEABLE(mem));
1569
1570 /*
1571 * Record the object/offset pair in this page
1572 */
1573 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1574 mem->offset = offset;
1575
1576 /*
1577 * Insert it into the object_object/offset hash table,
1578 * replacing any page that might have been there.
1579 */
1580
1581 hash_id = vm_page_hash(object, offset);
1582 bucket = &vm_page_buckets[hash_id];
1583 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1584
1585 lck_spin_lock(bucket_lock);
1586
1587 if (bucket->page_list) {
1588 vm_page_packed_t *mp = &bucket->page_list;
1589 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
1590
1591 do {
1592 /*
1593 * compare packed object pointers
1594 */
1595 if (m->vm_page_object == mem->vm_page_object && m->offset == offset) {
1596 /*
1597 * Remove old page from hash list
1598 */
1599 *mp = m->next_m;
1600 m->hashed = FALSE;
1601
1602 found_m = m;
1603 break;
1604 }
1605 mp = &m->next_m;
1606 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
1607
1608 mem->next_m = bucket->page_list;
1609 } else {
1610 mem->next_m = VM_PAGE_PACK_PTR(NULL);
1611 }
1612 /*
1613 * insert new page at head of hash list
1614 */
1615 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1616 mem->hashed = TRUE;
1617
1618 lck_spin_unlock(bucket_lock);
1619
1620 if (found_m) {
1621 /*
1622 * there was already a page at the specified
1623 * offset for this object... remove it from
1624 * the object and free it back to the free list
1625 */
1626 vm_page_free_unlocked(found_m, FALSE);
1627 }
1628 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1629 }
1630
1631 /*
1632 * vm_page_remove: [ internal use only ]
1633 *
1634 * Removes the given mem entry from the object/offset-page
1635 * table and the object page list.
1636 *
1637 * The object must be locked.
1638 */
1639
1640 void
1641 vm_page_remove(
1642 vm_page_t mem,
1643 boolean_t remove_from_hash)
1644 {
1645 vm_page_bucket_t *bucket;
1646 vm_page_t this;
1647 lck_spin_t *bucket_lock;
1648 int hash_id;
1649 task_t owner;
1650 vm_object_t m_object;
1651
1652 m_object = VM_PAGE_OBJECT(mem);
1653
1654 XPR(XPR_VM_PAGE,
1655 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1656 m_object, mem->offset,
1657 mem, 0,0);
1658
1659 vm_object_lock_assert_exclusive(m_object);
1660 assert(mem->tabled);
1661 assert(!mem->cleaning);
1662 assert(!mem->laundry);
1663
1664 if (VM_PAGE_PAGEABLE(mem)) {
1665 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1666 }
1667 #if 0
1668 /*
1669 * we don't hold the page queue lock
1670 * so this check isn't safe to make
1671 */
1672 VM_PAGE_CHECK(mem);
1673 #endif
1674 if (remove_from_hash == TRUE) {
1675 /*
1676 * Remove from the object_object/offset hash table
1677 */
1678 hash_id = vm_page_hash(m_object, mem->offset);
1679 bucket = &vm_page_buckets[hash_id];
1680 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1681
1682 lck_spin_lock(bucket_lock);
1683
1684 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
1685 /* optimize for common case */
1686
1687 bucket->page_list = mem->next_m;
1688 } else {
1689 vm_page_packed_t *prev;
1690
1691 for (prev = &this->next_m;
1692 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
1693 prev = &this->next_m)
1694 continue;
1695 *prev = this->next_m;
1696 }
1697 #if MACH_PAGE_HASH_STATS
1698 bucket->cur_count--;
1699 #endif /* MACH_PAGE_HASH_STATS */
1700 mem->hashed = FALSE;
1701 lck_spin_unlock(bucket_lock);
1702 }
1703 /*
1704 * Now remove from the object's list of backed pages.
1705 */
1706
1707 vm_page_remove_internal(mem);
1708
1709 /*
1710 * And show that the object has one fewer resident
1711 * page.
1712 */
1713
1714 assert(m_object->resident_page_count > 0);
1715 m_object->resident_page_count--;
1716
1717 if (m_object->internal) {
1718 #if DEBUG
1719 assert(vm_page_internal_count);
1720 #endif /* DEBUG */
1721
1722 OSAddAtomic(-1, &vm_page_internal_count);
1723 } else {
1724 assert(vm_page_external_count);
1725 OSAddAtomic(-1, &vm_page_external_count);
1726
1727 if (mem->xpmapped) {
1728 assert(vm_page_xpmapped_external_count);
1729 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1730 }
1731 }
1732 if (!m_object->internal && (m_object->objq.next || m_object->objq.prev)) {
1733 if (m_object->resident_page_count == 0)
1734 vm_object_cache_remove(m_object);
1735 }
1736
1737 if (VM_PAGE_WIRED(mem)) {
1738 assert(mem->wire_count > 0);
1739 assert(m_object->wired_page_count > 0);
1740 m_object->wired_page_count--;
1741 if (!m_object->wired_page_count) {
1742 VM_OBJECT_UNWIRED(m_object);
1743 }
1744 }
1745 assert(m_object->resident_page_count >=
1746 m_object->wired_page_count);
1747 if (mem->reusable) {
1748 assert(m_object->reusable_page_count > 0);
1749 m_object->reusable_page_count--;
1750 assert(m_object->reusable_page_count <=
1751 m_object->resident_page_count);
1752 mem->reusable = FALSE;
1753 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1754 vm_page_stats_reusable.reused_remove++;
1755 } else if (m_object->all_reusable) {
1756 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1757 vm_page_stats_reusable.reused_remove++;
1758 }
1759
1760 if (m_object->purgable == VM_PURGABLE_DENY) {
1761 owner = TASK_NULL;
1762 } else {
1763 owner = m_object->vo_purgeable_owner;
1764 }
1765 if (owner &&
1766 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
1767 VM_PAGE_WIRED(mem))) {
1768 /* less non-volatile bytes */
1769 ledger_debit(owner->ledger,
1770 task_ledgers.purgeable_nonvolatile,
1771 PAGE_SIZE);
1772 /* less footprint */
1773 ledger_debit(owner->ledger,
1774 task_ledgers.phys_footprint,
1775 PAGE_SIZE);
1776 } else if (owner &&
1777 (m_object->purgable == VM_PURGABLE_VOLATILE ||
1778 m_object->purgable == VM_PURGABLE_EMPTY)) {
1779 assert(! VM_PAGE_WIRED(mem));
1780 /* less volatile bytes */
1781 ledger_debit(owner->ledger,
1782 task_ledgers.purgeable_volatile,
1783 PAGE_SIZE);
1784 }
1785 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
1786 if (VM_PAGE_WIRED(mem)) {
1787 assert(vm_page_purgeable_wired_count > 0);
1788 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1789 } else {
1790 assert(vm_page_purgeable_count > 0);
1791 OSAddAtomic(-1, &vm_page_purgeable_count);
1792 }
1793 }
1794 if (m_object->set_cache_attr == TRUE)
1795 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
1796
1797 mem->tabled = FALSE;
1798 mem->vm_page_object = 0;
1799 mem->offset = (vm_object_offset_t) -1;
1800 }
1801
1802
1803 /*
1804 * vm_page_lookup:
1805 *
1806 * Returns the page associated with the object/offset
1807 * pair specified; if none is found, VM_PAGE_NULL is returned.
1808 *
1809 * The object must be locked. No side effects.
1810 */
1811
1812 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1813
1814 #if DEBUG_VM_PAGE_LOOKUP
1815
1816 struct {
1817 uint64_t vpl_total;
1818 uint64_t vpl_empty_obj;
1819 uint64_t vpl_bucket_NULL;
1820 uint64_t vpl_hit_hint;
1821 uint64_t vpl_hit_hint_next;
1822 uint64_t vpl_hit_hint_prev;
1823 uint64_t vpl_fast;
1824 uint64_t vpl_slow;
1825 uint64_t vpl_hit;
1826 uint64_t vpl_miss;
1827
1828 uint64_t vpl_fast_elapsed;
1829 uint64_t vpl_slow_elapsed;
1830 } vm_page_lookup_stats __attribute__((aligned(8)));
1831
1832 #endif
1833
1834 #define KDP_VM_PAGE_WALK_MAX 1000
1835
1836 vm_page_t
1837 kdp_vm_page_lookup(
1838 vm_object_t object,
1839 vm_object_offset_t offset)
1840 {
1841 vm_page_t cur_page;
1842 int num_traversed = 0;
1843
1844 if (not_in_kdp) {
1845 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1846 }
1847
1848 vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1849 if (cur_page->offset == offset) {
1850 return cur_page;
1851 }
1852 num_traversed++;
1853
1854 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1855 return VM_PAGE_NULL;
1856 }
1857 }
1858
1859 return VM_PAGE_NULL;
1860 }
1861
1862 vm_page_t
1863 vm_page_lookup(
1864 vm_object_t object,
1865 vm_object_offset_t offset)
1866 {
1867 vm_page_t mem;
1868 vm_page_bucket_t *bucket;
1869 vm_page_queue_entry_t qe;
1870 lck_spin_t *bucket_lock = NULL;
1871 int hash_id;
1872 #if DEBUG_VM_PAGE_LOOKUP
1873 uint64_t start, elapsed;
1874
1875 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
1876 #endif
1877 vm_object_lock_assert_held(object);
1878
1879 if (object->resident_page_count == 0) {
1880 #if DEBUG_VM_PAGE_LOOKUP
1881 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
1882 #endif
1883 return (VM_PAGE_NULL);
1884 }
1885
1886 mem = object->memq_hint;
1887
1888 if (mem != VM_PAGE_NULL) {
1889 assert(VM_PAGE_OBJECT(mem) == object);
1890
1891 if (mem->offset == offset) {
1892 #if DEBUG_VM_PAGE_LOOKUP
1893 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
1894 #endif
1895 return (mem);
1896 }
1897 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->listq);
1898
1899 if (! vm_page_queue_end(&object->memq, qe)) {
1900 vm_page_t next_page;
1901
1902 next_page = (vm_page_t)((uintptr_t)qe);
1903 assert(VM_PAGE_OBJECT(next_page) == object);
1904
1905 if (next_page->offset == offset) {
1906 object->memq_hint = next_page; /* new hint */
1907 #if DEBUG_VM_PAGE_LOOKUP
1908 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
1909 #endif
1910 return (next_page);
1911 }
1912 }
1913 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->listq);
1914
1915 if (! vm_page_queue_end(&object->memq, qe)) {
1916 vm_page_t prev_page;
1917
1918 prev_page = (vm_page_t)((uintptr_t)qe);
1919 assert(VM_PAGE_OBJECT(prev_page) == object);
1920
1921 if (prev_page->offset == offset) {
1922 object->memq_hint = prev_page; /* new hint */
1923 #if DEBUG_VM_PAGE_LOOKUP
1924 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
1925 #endif
1926 return (prev_page);
1927 }
1928 }
1929 }
1930 /*
1931 * Search the hash table for this object/offset pair
1932 */
1933 hash_id = vm_page_hash(object, offset);
1934 bucket = &vm_page_buckets[hash_id];
1935
1936 /*
1937 * since we hold the object lock, we are guaranteed that no
1938 * new pages can be inserted into this object... this in turn
1939 * guarantess that the page we're looking for can't exist
1940 * if the bucket it hashes to is currently NULL even when looked
1941 * at outside the scope of the hash bucket lock... this is a
1942 * really cheap optimiztion to avoid taking the lock
1943 */
1944 if (!bucket->page_list) {
1945 #if DEBUG_VM_PAGE_LOOKUP
1946 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
1947 #endif
1948 return (VM_PAGE_NULL);
1949 }
1950
1951 #if DEBUG_VM_PAGE_LOOKUP
1952 start = mach_absolute_time();
1953 #endif
1954 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
1955 /*
1956 * on average, it's roughly 3 times faster to run a short memq list
1957 * than to take the spin lock and go through the hash list
1958 */
1959 mem = (vm_page_t)vm_page_queue_first(&object->memq);
1960
1961 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
1962
1963 if (mem->offset == offset)
1964 break;
1965
1966 mem = (vm_page_t)vm_page_queue_next(&mem->listq);
1967 }
1968 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
1969 mem = NULL;
1970 } else {
1971 vm_page_object_t packed_object;
1972
1973 packed_object = VM_PAGE_PACK_OBJECT(object);
1974
1975 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1976
1977 lck_spin_lock(bucket_lock);
1978
1979 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
1980 mem != VM_PAGE_NULL;
1981 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m))) {
1982 #if 0
1983 /*
1984 * we don't hold the page queue lock
1985 * so this check isn't safe to make
1986 */
1987 VM_PAGE_CHECK(mem);
1988 #endif
1989 if ((mem->vm_page_object == packed_object) && (mem->offset == offset))
1990 break;
1991 }
1992 lck_spin_unlock(bucket_lock);
1993 }
1994
1995 #if DEBUG_VM_PAGE_LOOKUP
1996 elapsed = mach_absolute_time() - start;
1997
1998 if (bucket_lock) {
1999 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2000 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2001 } else {
2002 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2003 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2004 }
2005 if (mem != VM_PAGE_NULL)
2006 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2007 else
2008 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2009 #endif
2010 if (mem != VM_PAGE_NULL) {
2011 assert(VM_PAGE_OBJECT(mem) == object);
2012
2013 object->memq_hint = mem;
2014 }
2015 return (mem);
2016 }
2017
2018
2019 /*
2020 * vm_page_rename:
2021 *
2022 * Move the given memory entry from its
2023 * current object to the specified target object/offset.
2024 *
2025 * The object must be locked.
2026 */
2027 void
2028 vm_page_rename(
2029 vm_page_t mem,
2030 vm_object_t new_object,
2031 vm_object_offset_t new_offset,
2032 boolean_t encrypted_ok)
2033 {
2034 boolean_t internal_to_external, external_to_internal;
2035 vm_tag_t tag;
2036 vm_object_t m_object;
2037
2038 m_object = VM_PAGE_OBJECT(mem);
2039
2040 assert(m_object != new_object);
2041 assert(m_object);
2042
2043 /*
2044 * ENCRYPTED SWAP:
2045 * The encryption key is based on the page's memory object
2046 * (aka "pager") and paging offset. Moving the page to
2047 * another VM object changes its "pager" and "paging_offset"
2048 * so it has to be decrypted first, or we would lose the key.
2049 *
2050 * One exception is VM object collapsing, where we transfer pages
2051 * from one backing object to its parent object. This operation also
2052 * transfers the paging information, so the <pager,paging_offset> info
2053 * should remain consistent. The caller (vm_object_do_collapse())
2054 * sets "encrypted_ok" in this case.
2055 */
2056 if (!encrypted_ok && mem->encrypted) {
2057 panic("vm_page_rename: page %p is encrypted\n", mem);
2058 }
2059
2060 XPR(XPR_VM_PAGE,
2061 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2062 new_object, new_offset,
2063 mem, 0,0);
2064
2065 /*
2066 * Changes to mem->object require the page lock because
2067 * the pageout daemon uses that lock to get the object.
2068 */
2069 vm_page_lockspin_queues();
2070
2071 internal_to_external = FALSE;
2072 external_to_internal = FALSE;
2073
2074 if (mem->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2075 /*
2076 * it's much easier to get the vm_page_pageable_xxx accounting correct
2077 * if we first move the page to the active queue... it's going to end
2078 * up there anyway, and we don't do vm_page_rename's frequently enough
2079 * for this to matter.
2080 */
2081 vm_page_queues_remove(mem, FALSE);
2082 vm_page_activate(mem);
2083 }
2084 if (VM_PAGE_PAGEABLE(mem)) {
2085 if (m_object->internal && !new_object->internal) {
2086 internal_to_external = TRUE;
2087 }
2088 if (!m_object->internal && new_object->internal) {
2089 external_to_internal = TRUE;
2090 }
2091 }
2092
2093 tag = m_object->wire_tag;
2094 vm_page_remove(mem, TRUE);
2095 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2096
2097 if (internal_to_external) {
2098 vm_page_pageable_internal_count--;
2099 vm_page_pageable_external_count++;
2100 } else if (external_to_internal) {
2101 vm_page_pageable_external_count--;
2102 vm_page_pageable_internal_count++;
2103 }
2104
2105 vm_page_unlock_queues();
2106 }
2107
2108 /*
2109 * vm_page_init:
2110 *
2111 * Initialize the fields in a new page.
2112 * This takes a structure with random values and initializes it
2113 * so that it can be given to vm_page_release or vm_page_insert.
2114 */
2115 void
2116 vm_page_init(
2117 vm_page_t mem,
2118 ppnum_t phys_page,
2119 boolean_t lopage)
2120 {
2121 assert(phys_page);
2122
2123 #if DEBUG
2124 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2125 if (!(pmap_valid_page(phys_page))) {
2126 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2127 }
2128 }
2129 #endif
2130 *mem = vm_page_template;
2131
2132 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2133 #if 0
2134 /*
2135 * we're leaving this turned off for now... currently pages
2136 * come off the free list and are either immediately dirtied/referenced
2137 * due to zero-fill or COW faults, or are used to read or write files...
2138 * in the file I/O case, the UPL mechanism takes care of clearing
2139 * the state of the HW ref/mod bits in a somewhat fragile way.
2140 * Since we may change the way this works in the future (to toughen it up),
2141 * I'm leaving this as a reminder of where these bits could get cleared
2142 */
2143
2144 /*
2145 * make sure both the h/w referenced and modified bits are
2146 * clear at this point... we are especially dependent on
2147 * not finding a 'stale' h/w modified in a number of spots
2148 * once this page goes back into use
2149 */
2150 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2151 #endif
2152 mem->lopage = lopage;
2153 }
2154
2155 /*
2156 * vm_page_grab_fictitious:
2157 *
2158 * Remove a fictitious page from the free list.
2159 * Returns VM_PAGE_NULL if there are no free pages.
2160 */
2161 int c_vm_page_grab_fictitious = 0;
2162 int c_vm_page_grab_fictitious_failed = 0;
2163 int c_vm_page_release_fictitious = 0;
2164 int c_vm_page_more_fictitious = 0;
2165
2166 vm_page_t
2167 vm_page_grab_fictitious_common(
2168 ppnum_t phys_addr)
2169 {
2170 vm_page_t m;
2171
2172 if ((m = (vm_page_t)zget(vm_page_zone))) {
2173
2174 vm_page_init(m, phys_addr, FALSE);
2175 m->fictitious = TRUE;
2176
2177 c_vm_page_grab_fictitious++;
2178 } else
2179 c_vm_page_grab_fictitious_failed++;
2180
2181 return m;
2182 }
2183
2184 vm_page_t
2185 vm_page_grab_fictitious(void)
2186 {
2187 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2188 }
2189
2190 vm_page_t
2191 vm_page_grab_guard(void)
2192 {
2193 return vm_page_grab_fictitious_common(vm_page_guard_addr);
2194 }
2195
2196
2197 /*
2198 * vm_page_release_fictitious:
2199 *
2200 * Release a fictitious page to the zone pool
2201 */
2202 void
2203 vm_page_release_fictitious(
2204 vm_page_t m)
2205 {
2206 assert((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) || (m->vm_page_q_state == VM_PAGE_IS_WIRED));
2207 assert(m->fictitious);
2208 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2209 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2210
2211 c_vm_page_release_fictitious++;
2212
2213 zfree(vm_page_zone, m);
2214 }
2215
2216 /*
2217 * vm_page_more_fictitious:
2218 *
2219 * Add more fictitious pages to the zone.
2220 * Allowed to block. This routine is way intimate
2221 * with the zones code, for several reasons:
2222 * 1. we need to carve some page structures out of physical
2223 * memory before zones work, so they _cannot_ come from
2224 * the zone_map.
2225 * 2. the zone needs to be collectable in order to prevent
2226 * growth without bound. These structures are used by
2227 * the device pager (by the hundreds and thousands), as
2228 * private pages for pageout, and as blocking pages for
2229 * pagein. Temporary bursts in demand should not result in
2230 * permanent allocation of a resource.
2231 * 3. To smooth allocation humps, we allocate single pages
2232 * with kernel_memory_allocate(), and cram them into the
2233 * zone.
2234 */
2235
2236 void vm_page_more_fictitious(void)
2237 {
2238 vm_offset_t addr;
2239 kern_return_t retval;
2240
2241 c_vm_page_more_fictitious++;
2242
2243 /*
2244 * Allocate a single page from the zone_map. Do not wait if no physical
2245 * pages are immediately available, and do not zero the space. We need
2246 * our own blocking lock here to prevent having multiple,
2247 * simultaneous requests from piling up on the zone_map lock. Exactly
2248 * one (of our) threads should be potentially waiting on the map lock.
2249 * If winner is not vm-privileged, then the page allocation will fail,
2250 * and it will temporarily block here in the vm_page_wait().
2251 */
2252 lck_mtx_lock(&vm_page_alloc_lock);
2253 /*
2254 * If another thread allocated space, just bail out now.
2255 */
2256 if (zone_free_count(vm_page_zone) > 5) {
2257 /*
2258 * The number "5" is a small number that is larger than the
2259 * number of fictitious pages that any single caller will
2260 * attempt to allocate. Otherwise, a thread will attempt to
2261 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2262 * release all of the resources and locks already acquired,
2263 * and then call this routine. This routine finds the pages
2264 * that the caller released, so fails to allocate new space.
2265 * The process repeats infinitely. The largest known number
2266 * of fictitious pages required in this manner is 2. 5 is
2267 * simply a somewhat larger number.
2268 */
2269 lck_mtx_unlock(&vm_page_alloc_lock);
2270 return;
2271 }
2272
2273 retval = kernel_memory_allocate(zone_map,
2274 &addr, PAGE_SIZE, VM_PROT_ALL,
2275 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2276 if (retval != KERN_SUCCESS) {
2277 /*
2278 * No page was available. Drop the
2279 * lock to give another thread a chance at it, and
2280 * wait for the pageout daemon to make progress.
2281 */
2282 lck_mtx_unlock(&vm_page_alloc_lock);
2283 vm_page_wait(THREAD_UNINT);
2284 return;
2285 }
2286
2287 zcram(vm_page_zone, addr, PAGE_SIZE);
2288
2289 lck_mtx_unlock(&vm_page_alloc_lock);
2290 }
2291
2292
2293 /*
2294 * vm_pool_low():
2295 *
2296 * Return true if it is not likely that a non-vm_privileged thread
2297 * can get memory without blocking. Advisory only, since the
2298 * situation may change under us.
2299 */
2300 int
2301 vm_pool_low(void)
2302 {
2303 /* No locking, at worst we will fib. */
2304 return( vm_page_free_count <= vm_page_free_reserved );
2305 }
2306
2307
2308 #if CONFIG_BACKGROUND_QUEUE
2309
2310 void
2311 vm_page_update_background_state(vm_page_t mem)
2312 {
2313 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2314 return;
2315
2316 if (mem->vm_page_in_background == FALSE)
2317 return;
2318
2319 #if BACKGROUNDQ_BASED_ON_QOS
2320 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2321 return;
2322 #else
2323 task_t my_task;
2324
2325 my_task = current_task();
2326
2327 if (my_task) {
2328 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2329 return;
2330 }
2331 #endif
2332 vm_page_lockspin_queues();
2333
2334 mem->vm_page_in_background = FALSE;
2335 vm_page_background_promoted_count++;
2336
2337 vm_page_remove_from_backgroundq(mem);
2338
2339 vm_page_unlock_queues();
2340 }
2341
2342
2343 void
2344 vm_page_assign_background_state(vm_page_t mem)
2345 {
2346 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2347 return;
2348
2349 #if BACKGROUNDQ_BASED_ON_QOS
2350 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2351 mem->vm_page_in_background = TRUE;
2352 else
2353 mem->vm_page_in_background = FALSE;
2354 #else
2355 task_t my_task;
2356
2357 my_task = current_task();
2358
2359 if (my_task)
2360 mem->vm_page_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2361 #endif
2362 }
2363
2364
2365 void
2366 vm_page_remove_from_backgroundq(
2367 vm_page_t mem)
2368 {
2369 vm_object_t m_object;
2370
2371 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2372
2373 if (mem->vm_page_on_backgroundq) {
2374 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2375
2376 mem->vm_page_backgroundq.next = 0;
2377 mem->vm_page_backgroundq.prev = 0;
2378 mem->vm_page_on_backgroundq = FALSE;
2379
2380 vm_page_background_count--;
2381
2382 m_object = VM_PAGE_OBJECT(mem);
2383
2384 if (m_object->internal)
2385 vm_page_background_internal_count--;
2386 else
2387 vm_page_background_external_count--;
2388 } else {
2389 assert(VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.next) == (uintptr_t)NULL &&
2390 VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.prev) == (uintptr_t)NULL);
2391 }
2392 }
2393
2394
2395 void
2396 vm_page_add_to_backgroundq(
2397 vm_page_t mem,
2398 boolean_t first)
2399 {
2400 vm_object_t m_object;
2401
2402 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2403
2404 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2405 return;
2406
2407 if (mem->vm_page_on_backgroundq == FALSE) {
2408
2409 m_object = VM_PAGE_OBJECT(mem);
2410
2411 if (vm_page_background_exclude_external && !m_object->internal)
2412 return;
2413
2414 if (first == TRUE)
2415 vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2416 else
2417 vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2418 mem->vm_page_on_backgroundq = TRUE;
2419
2420 vm_page_background_count++;
2421
2422 if (m_object->internal)
2423 vm_page_background_internal_count++;
2424 else
2425 vm_page_background_external_count++;
2426 }
2427 }
2428
2429 #endif
2430
2431 /*
2432 * this is an interface to support bring-up of drivers
2433 * on platforms with physical memory > 4G...
2434 */
2435 int vm_himemory_mode = 2;
2436
2437
2438 /*
2439 * this interface exists to support hardware controllers
2440 * incapable of generating DMAs with more than 32 bits
2441 * of address on platforms with physical memory > 4G...
2442 */
2443 unsigned int vm_lopages_allocated_q = 0;
2444 unsigned int vm_lopages_allocated_cpm_success = 0;
2445 unsigned int vm_lopages_allocated_cpm_failed = 0;
2446 vm_page_queue_head_t vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2447
2448 vm_page_t
2449 vm_page_grablo(void)
2450 {
2451 vm_page_t mem;
2452
2453 if (vm_lopage_needed == FALSE)
2454 return (vm_page_grab());
2455
2456 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2457
2458 if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2459 vm_page_queue_remove_first(&vm_lopage_queue_free,
2460 mem,
2461 vm_page_t,
2462 pageq);
2463 assert(vm_lopage_free_count);
2464 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2465 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2466
2467 vm_lopage_free_count--;
2468 vm_lopages_allocated_q++;
2469
2470 if (vm_lopage_free_count < vm_lopage_lowater)
2471 vm_lopage_refill = TRUE;
2472
2473 lck_mtx_unlock(&vm_page_queue_free_lock);
2474
2475 #if CONFIG_BACKGROUND_QUEUE
2476 vm_page_assign_background_state(mem);
2477 #endif
2478 } else {
2479 lck_mtx_unlock(&vm_page_queue_free_lock);
2480
2481 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2482
2483 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2484 vm_lopages_allocated_cpm_failed++;
2485 lck_mtx_unlock(&vm_page_queue_free_lock);
2486
2487 return (VM_PAGE_NULL);
2488 }
2489 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2490
2491 mem->busy = TRUE;
2492
2493 vm_page_lockspin_queues();
2494
2495 mem->gobbled = FALSE;
2496 vm_page_gobble_count--;
2497 vm_page_wire_count--;
2498
2499 vm_lopages_allocated_cpm_success++;
2500 vm_page_unlock_queues();
2501 }
2502 assert(mem->busy);
2503 assert(!mem->pmapped);
2504 assert(!mem->wpmapped);
2505 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2506
2507 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2508
2509 return (mem);
2510 }
2511
2512
2513 /*
2514 * vm_page_grab:
2515 *
2516 * first try to grab a page from the per-cpu free list...
2517 * this must be done while pre-emption is disabled... if
2518 * a page is available, we're done...
2519 * if no page is available, grab the vm_page_queue_free_lock
2520 * and see if current number of free pages would allow us
2521 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2522 * if there are pages available, disable preemption and
2523 * recheck the state of the per-cpu free list... we could
2524 * have been preempted and moved to a different cpu, or
2525 * some other thread could have re-filled it... if still
2526 * empty, figure out how many pages we can steal from the
2527 * global free queue and move to the per-cpu queue...
2528 * return 1 of these pages when done... only wakeup the
2529 * pageout_scan thread if we moved pages from the global
2530 * list... no need for the wakeup if we've satisfied the
2531 * request from the per-cpu queue.
2532 */
2533
2534 #if CONFIG_SECLUDED_MEMORY
2535 vm_page_t vm_page_grab_secluded(void);
2536 #endif /* CONFIG_SECLUDED_MEMORY */
2537
2538 vm_page_t
2539 vm_page_grab(void)
2540 {
2541 return vm_page_grab_options(0);
2542 }
2543
2544 vm_page_t
2545 vm_page_grab_options(
2546 int grab_options)
2547 {
2548 vm_page_t mem;
2549
2550 disable_preemption();
2551
2552 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2553 return_page_from_cpu_list:
2554 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2555
2556 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2557 PROCESSOR_DATA(current_processor(), free_pages) = mem->snext;
2558
2559 enable_preemption();
2560 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2561 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2562
2563 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2564 assert(mem->tabled == FALSE);
2565 assert(mem->vm_page_object == 0);
2566 assert(!mem->laundry);
2567 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2568 assert(mem->busy);
2569 assert(!mem->encrypted);
2570 assert(!mem->pmapped);
2571 assert(!mem->wpmapped);
2572 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2573
2574 #if CONFIG_BACKGROUND_QUEUE
2575 vm_page_assign_background_state(mem);
2576 #endif
2577 return mem;
2578 }
2579 enable_preemption();
2580
2581
2582 /*
2583 * Optionally produce warnings if the wire or gobble
2584 * counts exceed some threshold.
2585 */
2586 #if VM_PAGE_WIRE_COUNT_WARNING
2587 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2588 printf("mk: vm_page_grab(): high wired page count of %d\n",
2589 vm_page_wire_count);
2590 }
2591 #endif
2592 #if VM_PAGE_GOBBLE_COUNT_WARNING
2593 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2594 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2595 vm_page_gobble_count);
2596 }
2597 #endif
2598
2599 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2600
2601 /*
2602 * Only let privileged threads (involved in pageout)
2603 * dip into the reserved pool.
2604 */
2605 if ((vm_page_free_count < vm_page_free_reserved) &&
2606 !(current_thread()->options & TH_OPT_VMPRIV)) {
2607 /* no page for us in the free queue... */
2608 lck_mtx_unlock(&vm_page_queue_free_lock);
2609 mem = VM_PAGE_NULL;
2610
2611 #if CONFIG_SECLUDED_MEMORY
2612 /* ... but can we try and grab from the secluded queue? */
2613 if (vm_page_secluded_count > 0 &&
2614 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2615 task_can_use_secluded_mem(current_task()))) {
2616 mem = vm_page_grab_secluded();
2617 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2618 vm_page_secluded.grab_for_iokit++;
2619 if (mem) {
2620 vm_page_secluded.grab_for_iokit_success++;
2621 }
2622 }
2623 if (mem) {
2624 VM_CHECK_MEMORYSTATUS;
2625 return mem;
2626 }
2627 }
2628 #else /* CONFIG_SECLUDED_MEMORY */
2629 (void) grab_options;
2630 #endif /* CONFIG_SECLUDED_MEMORY */
2631 }
2632 else {
2633 vm_page_t head;
2634 vm_page_t tail;
2635 unsigned int pages_to_steal;
2636 unsigned int color;
2637
2638 while ( vm_page_free_count == 0 ) {
2639
2640 lck_mtx_unlock(&vm_page_queue_free_lock);
2641 /*
2642 * must be a privileged thread to be
2643 * in this state since a non-privileged
2644 * thread would have bailed if we were
2645 * under the vm_page_free_reserved mark
2646 */
2647 VM_PAGE_WAIT();
2648 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2649 }
2650
2651 disable_preemption();
2652
2653 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2654 lck_mtx_unlock(&vm_page_queue_free_lock);
2655
2656 /*
2657 * we got preempted and moved to another processor
2658 * or we got preempted and someone else ran and filled the cache
2659 */
2660 goto return_page_from_cpu_list;
2661 }
2662 if (vm_page_free_count <= vm_page_free_reserved)
2663 pages_to_steal = 1;
2664 else {
2665 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2666 pages_to_steal = vm_free_magazine_refill_limit;
2667 else
2668 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2669 }
2670 color = PROCESSOR_DATA(current_processor(), start_color);
2671 head = tail = NULL;
2672
2673 vm_page_free_count -= pages_to_steal;
2674
2675 while (pages_to_steal--) {
2676
2677 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2678 color = (color + 1) & vm_color_mask;
2679
2680 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2681 mem,
2682 vm_page_t,
2683 pageq);
2684 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_Q);
2685
2686 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2687
2688 color = (color + 1) & vm_color_mask;
2689
2690 if (head == NULL)
2691 head = mem;
2692 else
2693 tail->snext = mem;
2694 tail = mem;
2695
2696 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2697 assert(mem->tabled == FALSE);
2698 assert(mem->vm_page_object == 0);
2699 assert(!mem->laundry);
2700
2701 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2702
2703 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2704 assert(mem->busy);
2705 assert(!mem->encrypted);
2706 assert(!mem->pmapped);
2707 assert(!mem->wpmapped);
2708 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2709 }
2710 lck_mtx_unlock(&vm_page_queue_free_lock);
2711
2712 PROCESSOR_DATA(current_processor(), free_pages) = head->snext;
2713 PROCESSOR_DATA(current_processor(), start_color) = color;
2714
2715 /*
2716 * satisfy this request
2717 */
2718 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2719 mem = head;
2720 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2721
2722 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2723 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2724
2725 enable_preemption();
2726 }
2727 /*
2728 * Decide if we should poke the pageout daemon.
2729 * We do this if the free count is less than the low
2730 * water mark, or if the free count is less than the high
2731 * water mark (but above the low water mark) and the inactive
2732 * count is less than its target.
2733 *
2734 * We don't have the counts locked ... if they change a little,
2735 * it doesn't really matter.
2736 */
2737 if ((vm_page_free_count < vm_page_free_min) ||
2738 ((vm_page_free_count < vm_page_free_target) &&
2739 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2740 thread_wakeup((event_t) &vm_page_free_wanted);
2741 #if CONFIG_BACKGROUND_QUEUE
2742 if (vm_page_background_mode == VM_PAGE_BG_LEVEL_3 && (vm_page_background_count > vm_page_background_limit))
2743 thread_wakeup((event_t) &vm_page_free_wanted);
2744 #endif
2745
2746 VM_CHECK_MEMORYSTATUS;
2747
2748 if (mem) {
2749 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2750
2751 #if CONFIG_BACKGROUND_QUEUE
2752 vm_page_assign_background_state(mem);
2753 #endif
2754 }
2755 return mem;
2756 }
2757
2758 #if CONFIG_SECLUDED_MEMORY
2759 vm_page_t
2760 vm_page_grab_secluded(void)
2761 {
2762 vm_page_t mem;
2763 vm_object_t object;
2764 int refmod_state;
2765
2766 if (vm_page_secluded_count == 0) {
2767 /* no secluded pages to grab... */
2768 return VM_PAGE_NULL;
2769 }
2770
2771 /* secluded queue is protected by the VM page queue lock */
2772 vm_page_lock_queues();
2773
2774 if (vm_page_secluded_count == 0) {
2775 /* no secluded pages to grab... */
2776 vm_page_unlock_queues();
2777 return VM_PAGE_NULL;
2778 }
2779
2780 #if 00
2781 /* can we grab from the secluded queue? */
2782 if (vm_page_secluded_count > vm_page_secluded_target ||
2783 (vm_page_secluded_count > 0 &&
2784 task_can_use_secluded_mem(current_task()))) {
2785 /* OK */
2786 } else {
2787 /* can't grab from secluded queue... */
2788 vm_page_unlock_queues();
2789 return VM_PAGE_NULL;
2790 }
2791 #endif
2792
2793 /* we can grab a page from secluded queue! */
2794 assert((vm_page_secluded_count_free +
2795 vm_page_secluded_count_inuse) ==
2796 vm_page_secluded_count);
2797 if (current_task()->task_can_use_secluded_mem) {
2798 assert(num_tasks_can_use_secluded_mem > 0);
2799 }
2800 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
2801 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2802 vm_page_queue_remove_first(&vm_page_queue_secluded,
2803 mem,
2804 vm_page_t,
2805 pageq);
2806 assert(mem->vm_page_q_state == VM_PAGE_ON_SECLUDED_Q);
2807
2808 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2809 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2810 vm_page_secluded_count--;
2811
2812 object = VM_PAGE_OBJECT(mem);
2813
2814 assert(!mem->fictitious);
2815 assert(!VM_PAGE_WIRED(mem));
2816 if (object == VM_OBJECT_NULL) {
2817 /* free for grab! */
2818 assert(mem->busy);
2819 vm_page_secluded_count_free--;
2820 vm_page_unlock_queues();
2821 vm_page_secluded.grab_success_free++;
2822 return mem;
2823 }
2824
2825 vm_page_secluded_count_inuse--;
2826 assert(!object->internal);
2827 // vm_page_pageable_external_count--;
2828
2829 if (!vm_object_lock_try(object)) {
2830 // printf("SECLUDED: page %p: object %p locked\n", mem, object);
2831 vm_page_secluded.grab_failure_locked++;
2832 reactivate_secluded_page:
2833 vm_page_activate(mem);
2834 vm_page_unlock_queues();
2835 return VM_PAGE_NULL;
2836 }
2837 if (mem->busy ||
2838 mem->cleaning ||
2839 mem->laundry) {
2840 /* can't steal page in this state... */
2841 vm_object_unlock(object);
2842 vm_page_secluded.grab_failure_state++;
2843 goto reactivate_secluded_page;
2844 }
2845
2846 mem->busy = TRUE;
2847 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
2848 if (refmod_state & VM_MEM_REFERENCED) {
2849 mem->reference = TRUE;
2850 }
2851 if (refmod_state & VM_MEM_MODIFIED) {
2852 SET_PAGE_DIRTY(mem, FALSE);
2853 }
2854 if (mem->dirty || mem->precious) {
2855 /* can't grab a dirty page; re-activate */
2856 // printf("SECLUDED: dirty page %p\n", mem);
2857 vm_page_secluded.grab_failure_dirty++;
2858 vm_object_unlock(object);
2859 goto reactivate_secluded_page;
2860 }
2861 if (mem->reference) {
2862 /* it's been used but we do need to grab a page... */
2863 }
2864 vm_page_unlock_queues();
2865
2866 /* finish what vm_page_free() would have done... */
2867 vm_page_free_prepare_object(mem, TRUE);
2868 vm_object_unlock(object);
2869 object = VM_OBJECT_NULL;
2870 if (vm_page_free_verify) {
2871 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2872 }
2873 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
2874 assert(mem->busy);
2875 vm_page_secluded.grab_success_other++;
2876
2877 return mem;
2878 }
2879 #endif /* CONFIG_SECLUDED_MEMORY */
2880
2881 /*
2882 * vm_page_release:
2883 *
2884 * Return a page to the free list.
2885 */
2886
2887 void
2888 vm_page_release(
2889 vm_page_t mem,
2890 boolean_t page_queues_locked)
2891 {
2892 unsigned int color;
2893 int need_wakeup = 0;
2894 int need_priv_wakeup = 0;
2895 #if CONFIG_SECLUDED_MEMORY
2896 int need_secluded_wakeup = 0;
2897 #endif /* CONFIG_SECLUDED_MEMORY */
2898
2899 if (page_queues_locked) {
2900 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2901 } else {
2902 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2903 }
2904
2905 assert(!mem->private && !mem->fictitious);
2906 if (vm_page_free_verify) {
2907 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2908 }
2909 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2910
2911 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
2912
2913 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2914
2915 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2916 assert(mem->busy);
2917 assert(!mem->laundry);
2918 assert(mem->vm_page_object == 0);
2919 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
2920 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2921 #if CONFIG_BACKGROUND_QUEUE
2922 assert(mem->vm_page_backgroundq.next == 0 &&
2923 mem->vm_page_backgroundq.prev == 0 &&
2924 mem->vm_page_on_backgroundq == FALSE);
2925 #endif
2926 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2927 vm_lopage_free_count < vm_lopage_free_limit &&
2928 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
2929 /*
2930 * this exists to support hardware controllers
2931 * incapable of generating DMAs with more than 32 bits
2932 * of address on platforms with physical memory > 4G...
2933 */
2934 vm_page_queue_enter_first(&vm_lopage_queue_free,
2935 mem,
2936 vm_page_t,
2937 pageq);
2938 vm_lopage_free_count++;
2939
2940 if (vm_lopage_free_count >= vm_lopage_free_limit)
2941 vm_lopage_refill = FALSE;
2942
2943 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
2944 mem->lopage = TRUE;
2945 #if CONFIG_SECLUDED_MEMORY
2946 } else if (vm_page_free_count > vm_page_free_reserved &&
2947 vm_page_secluded_count < vm_page_secluded_target &&
2948 num_tasks_can_use_secluded_mem == 0) {
2949 /*
2950 * XXX FBDP TODO: also avoid refilling secluded queue
2951 * when some IOKit objects are already grabbing from it...
2952 */
2953 if (!page_queues_locked) {
2954 if (!vm_page_trylock_queues()) {
2955 /* take locks in right order */
2956 lck_mtx_unlock(&vm_page_queue_free_lock);
2957 vm_page_lock_queues();
2958 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2959 }
2960 }
2961 mem->lopage = FALSE;
2962 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2963 vm_page_queue_enter_first(&vm_page_queue_secluded,
2964 mem,
2965 vm_page_t,
2966 pageq);
2967 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
2968 vm_page_secluded_count++;
2969 vm_page_secluded_count_free++;
2970 if (!page_queues_locked) {
2971 vm_page_unlock_queues();
2972 }
2973 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
2974 if (vm_page_free_wanted_secluded > 0) {
2975 vm_page_free_wanted_secluded--;
2976 need_secluded_wakeup = 1;
2977 }
2978 #endif /* CONFIG_SECLUDED_MEMORY */
2979 } else {
2980 mem->lopage = FALSE;
2981 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
2982
2983 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
2984 vm_page_queue_enter_first(&vm_page_queue_free[color].qhead,
2985 mem,
2986 vm_page_t,
2987 pageq);
2988 vm_page_free_count++;
2989 /*
2990 * Check if we should wake up someone waiting for page.
2991 * But don't bother waking them unless they can allocate.
2992 *
2993 * We wakeup only one thread, to prevent starvation.
2994 * Because the scheduling system handles wait queues FIFO,
2995 * if we wakeup all waiting threads, one greedy thread
2996 * can starve multiple niceguy threads. When the threads
2997 * all wakeup, the greedy threads runs first, grabs the page,
2998 * and waits for another page. It will be the first to run
2999 * when the next page is freed.
3000 *
3001 * However, there is a slight danger here.
3002 * The thread we wake might not use the free page.
3003 * Then the other threads could wait indefinitely
3004 * while the page goes unused. To forestall this,
3005 * the pageout daemon will keep making free pages
3006 * as long as vm_page_free_wanted is non-zero.
3007 */
3008
3009 assert(vm_page_free_count > 0);
3010 if (vm_page_free_wanted_privileged > 0) {
3011 vm_page_free_wanted_privileged--;
3012 need_priv_wakeup = 1;
3013 #if CONFIG_SECLUDED_MEMORY
3014 } else if (vm_page_free_wanted_secluded > 0 &&
3015 vm_page_free_count > vm_page_free_reserved) {
3016 vm_page_free_wanted_secluded--;
3017 need_secluded_wakeup = 1;
3018 #endif /* CONFIG_SECLUDED_MEMORY */
3019 } else if (vm_page_free_wanted > 0 &&
3020 vm_page_free_count > vm_page_free_reserved) {
3021 vm_page_free_wanted--;
3022 need_wakeup = 1;
3023 }
3024 }
3025 lck_mtx_unlock(&vm_page_queue_free_lock);
3026
3027 if (need_priv_wakeup)
3028 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
3029 #if CONFIG_SECLUDED_MEMORY
3030 else if (need_secluded_wakeup)
3031 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3032 #endif /* CONFIG_SECLUDED_MEMORY */
3033 else if (need_wakeup)
3034 thread_wakeup_one((event_t) &vm_page_free_count);
3035
3036 VM_CHECK_MEMORYSTATUS;
3037 }
3038
3039 /*
3040 * This version of vm_page_release() is used only at startup
3041 * when we are single-threaded and pages are being released
3042 * for the first time. Hence, no locking or unnecessary checks are made.
3043 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3044 */
3045 void
3046 vm_page_release_startup(
3047 vm_page_t mem)
3048 {
3049 vm_page_queue_t queue_free;
3050
3051 if (vm_lopage_free_count < vm_lopage_free_limit &&
3052 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3053 mem->lopage = TRUE;
3054 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3055 vm_lopage_free_count++;
3056 queue_free = &vm_lopage_queue_free;
3057 #if CONFIG_SECLUDED_MEMORY
3058 } else if (vm_page_secluded_count < vm_page_secluded_target) {
3059 mem->lopage = FALSE;
3060 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3061 vm_page_secluded_count++;
3062 vm_page_secluded_count_free++;
3063 queue_free = &vm_page_queue_secluded;
3064 #endif /* CONFIG_SECLUDED_MEMORY */
3065 } else {
3066 mem->lopage = FALSE;
3067 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3068 vm_page_free_count++;
3069 queue_free = &vm_page_queue_free[VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask].qhead;
3070 }
3071 vm_page_queue_enter_first(queue_free, mem, vm_page_t, pageq);
3072 }
3073
3074 /*
3075 * vm_page_wait:
3076 *
3077 * Wait for a page to become available.
3078 * If there are plenty of free pages, then we don't sleep.
3079 *
3080 * Returns:
3081 * TRUE: There may be another page, try again
3082 * FALSE: We were interrupted out of our wait, don't try again
3083 */
3084
3085 boolean_t
3086 vm_page_wait(
3087 int interruptible )
3088 {
3089 /*
3090 * We can't use vm_page_free_reserved to make this
3091 * determination. Consider: some thread might
3092 * need to allocate two pages. The first allocation
3093 * succeeds, the second fails. After the first page is freed,
3094 * a call to vm_page_wait must really block.
3095 */
3096 kern_return_t wait_result;
3097 int need_wakeup = 0;
3098 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3099
3100 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3101
3102 if (is_privileged && vm_page_free_count) {
3103 lck_mtx_unlock(&vm_page_queue_free_lock);
3104 return TRUE;
3105 }
3106
3107 if (vm_page_free_count >= vm_page_free_target) {
3108 lck_mtx_unlock(&vm_page_queue_free_lock);
3109 return TRUE;
3110 }
3111
3112 if (is_privileged) {
3113 if (vm_page_free_wanted_privileged++ == 0)
3114 need_wakeup = 1;
3115 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3116 #if CONFIG_SECLUDED_MEMORY
3117 } else if (secluded_for_apps &&
3118 task_can_use_secluded_mem(current_task())) {
3119 #if 00
3120 /* XXX FBDP: need pageq lock for this... */
3121 /* XXX FBDP: might wait even if pages available, */
3122 /* XXX FBDP: hopefully not for too long... */
3123 if (vm_page_secluded_count > 0) {
3124 lck_mtx_unlock(&vm_page_queue_free_lock);
3125 return TRUE;
3126 }
3127 #endif
3128 if (vm_page_free_wanted_secluded++ == 0) {
3129 need_wakeup = 1;
3130 }
3131 wait_result = assert_wait(
3132 (event_t)&vm_page_free_wanted_secluded,
3133 interruptible);
3134 #endif /* CONFIG_SECLUDED_MEMORY */
3135 } else {
3136 if (vm_page_free_wanted++ == 0)
3137 need_wakeup = 1;
3138 wait_result = assert_wait((event_t)&vm_page_free_count,
3139 interruptible);
3140 }
3141 lck_mtx_unlock(&vm_page_queue_free_lock);
3142 counter(c_vm_page_wait_block++);
3143
3144 if (need_wakeup)
3145 thread_wakeup((event_t)&vm_page_free_wanted);
3146
3147 if (wait_result == THREAD_WAITING) {
3148 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3149 vm_page_free_wanted_privileged,
3150 vm_page_free_wanted,
3151 #if CONFIG_SECLUDED_MEMORY
3152 vm_page_free_wanted_secluded,
3153 #else /* CONFIG_SECLUDED_MEMORY */
3154 0,
3155 #endif /* CONFIG_SECLUDED_MEMORY */
3156 0);
3157 wait_result = thread_block(THREAD_CONTINUE_NULL);
3158 VM_DEBUG_EVENT(vm_page_wait_block,
3159 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3160 }
3161
3162 return (wait_result == THREAD_AWAKENED);
3163 }
3164
3165 /*
3166 * vm_page_alloc:
3167 *
3168 * Allocate and return a memory cell associated
3169 * with this VM object/offset pair.
3170 *
3171 * Object must be locked.
3172 */
3173
3174 vm_page_t
3175 vm_page_alloc(
3176 vm_object_t object,
3177 vm_object_offset_t offset)
3178 {
3179 vm_page_t mem;
3180 int grab_options;
3181
3182 vm_object_lock_assert_exclusive(object);
3183 grab_options = 0;
3184 #if CONFIG_SECLUDED_MEMORY
3185 if (object->can_grab_secluded) {
3186 grab_options |= VM_PAGE_GRAB_SECLUDED;
3187 }
3188 #endif /* CONFIG_SECLUDED_MEMORY */
3189 mem = vm_page_grab_options(grab_options);
3190 if (mem == VM_PAGE_NULL)
3191 return VM_PAGE_NULL;
3192
3193 vm_page_insert(mem, object, offset);
3194
3195 return(mem);
3196 }
3197
3198 /*
3199 * vm_page_alloc_guard:
3200 *
3201 * Allocate a fictitious page which will be used
3202 * as a guard page. The page will be inserted into
3203 * the object and returned to the caller.
3204 */
3205
3206 vm_page_t
3207 vm_page_alloc_guard(
3208 vm_object_t object,
3209 vm_object_offset_t offset)
3210 {
3211 vm_page_t mem;
3212
3213 vm_object_lock_assert_exclusive(object);
3214 mem = vm_page_grab_guard();
3215 if (mem == VM_PAGE_NULL)
3216 return VM_PAGE_NULL;
3217
3218 vm_page_insert(mem, object, offset);
3219
3220 return(mem);
3221 }
3222
3223
3224 counter(unsigned int c_laundry_pages_freed = 0;)
3225
3226 /*
3227 * vm_page_free_prepare:
3228 *
3229 * Removes page from any queue it may be on
3230 * and disassociates it from its VM object.
3231 *
3232 * Object and page queues must be locked prior to entry.
3233 */
3234 static void
3235 vm_page_free_prepare(
3236 vm_page_t mem)
3237 {
3238 vm_page_free_prepare_queues(mem);
3239 vm_page_free_prepare_object(mem, TRUE);
3240 }
3241
3242
3243 void
3244 vm_page_free_prepare_queues(
3245 vm_page_t mem)
3246 {
3247 vm_object_t m_object;
3248
3249 VM_PAGE_CHECK(mem);
3250
3251 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
3252 assert(!mem->cleaning);
3253 m_object = VM_PAGE_OBJECT(mem);
3254
3255 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3256 if (m_object) {
3257 vm_object_lock_assert_exclusive(m_object);
3258 }
3259 if (mem->laundry) {
3260 /*
3261 * We may have to free a page while it's being laundered
3262 * if we lost its pager (due to a forced unmount, for example).
3263 * We need to call vm_pageout_steal_laundry() before removing
3264 * the page from its VM object, so that we can remove it
3265 * from its pageout queue and adjust the laundry accounting
3266 */
3267 vm_pageout_steal_laundry(mem, TRUE);
3268 counter(++c_laundry_pages_freed);
3269 }
3270
3271 vm_page_queues_remove(mem, TRUE);
3272
3273 if (VM_PAGE_WIRED(mem)) {
3274 assert(mem->wire_count > 0);
3275
3276 if (m_object) {
3277 assert(m_object->wired_page_count > 0);
3278 m_object->wired_page_count--;
3279 if (!m_object->wired_page_count) {
3280 VM_OBJECT_UNWIRED(m_object);
3281 }
3282
3283 assert(m_object->resident_page_count >=
3284 m_object->wired_page_count);
3285
3286 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3287 OSAddAtomic(+1, &vm_page_purgeable_count);
3288 assert(vm_page_purgeable_wired_count > 0);
3289 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3290 }
3291 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3292 m_object->purgable == VM_PURGABLE_EMPTY) &&
3293 m_object->vo_purgeable_owner != TASK_NULL) {
3294 task_t owner;
3295
3296 owner = m_object->vo_purgeable_owner;
3297 /*
3298 * While wired, this page was accounted
3299 * as "non-volatile" but it should now
3300 * be accounted as "volatile".
3301 */
3302 /* one less "non-volatile"... */
3303 ledger_debit(owner->ledger,
3304 task_ledgers.purgeable_nonvolatile,
3305 PAGE_SIZE);
3306 /* ... and "phys_footprint" */
3307 ledger_debit(owner->ledger,
3308 task_ledgers.phys_footprint,
3309 PAGE_SIZE);
3310 /* one more "volatile" */
3311 ledger_credit(owner->ledger,
3312 task_ledgers.purgeable_volatile,
3313 PAGE_SIZE);
3314 }
3315 }
3316 if (!mem->private && !mem->fictitious)
3317 vm_page_wire_count--;
3318
3319 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3320 mem->wire_count = 0;
3321 assert(!mem->gobbled);
3322 } else if (mem->gobbled) {
3323 if (!mem->private && !mem->fictitious)
3324 vm_page_wire_count--;
3325 vm_page_gobble_count--;
3326 }
3327 }
3328
3329
3330 void
3331 vm_page_free_prepare_object(
3332 vm_page_t mem,
3333 boolean_t remove_from_hash)
3334 {
3335 if (mem->tabled)
3336 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
3337
3338 PAGE_WAKEUP(mem); /* clears wanted */
3339
3340 if (mem->private) {
3341 mem->private = FALSE;
3342 mem->fictitious = TRUE;
3343 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
3344 }
3345 if ( !mem->fictitious) {
3346 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->lopage);
3347 }
3348 }
3349
3350
3351 /*
3352 * vm_page_free:
3353 *
3354 * Returns the given page to the free list,
3355 * disassociating it with any VM object.
3356 *
3357 * Object and page queues must be locked prior to entry.
3358 */
3359 void
3360 vm_page_free(
3361 vm_page_t mem)
3362 {
3363 vm_page_free_prepare(mem);
3364
3365 if (mem->fictitious) {
3366 vm_page_release_fictitious(mem);
3367 } else {
3368 vm_page_release(mem,
3369 TRUE); /* page queues are locked */
3370 }
3371 }
3372
3373
3374 void
3375 vm_page_free_unlocked(
3376 vm_page_t mem,
3377 boolean_t remove_from_hash)
3378 {
3379 vm_page_lockspin_queues();
3380 vm_page_free_prepare_queues(mem);
3381 vm_page_unlock_queues();
3382
3383 vm_page_free_prepare_object(mem, remove_from_hash);
3384
3385 if (mem->fictitious) {
3386 vm_page_release_fictitious(mem);
3387 } else {
3388 vm_page_release(mem, FALSE); /* page queues are not locked */
3389 }
3390 }
3391
3392
3393 /*
3394 * Free a list of pages. The list can be up to several hundred pages,
3395 * as blocked up by vm_pageout_scan().
3396 * The big win is not having to take the free list lock once
3397 * per page.
3398 */
3399 void
3400 vm_page_free_list(
3401 vm_page_t freeq,
3402 boolean_t prepare_object)
3403 {
3404 vm_page_t mem;
3405 vm_page_t nxt;
3406 vm_page_t local_freeq;
3407 int pg_count;
3408
3409 while (freeq) {
3410
3411 pg_count = 0;
3412 local_freeq = VM_PAGE_NULL;
3413 mem = freeq;
3414
3415 /*
3416 * break up the processing into smaller chunks so
3417 * that we can 'pipeline' the pages onto the
3418 * free list w/o introducing too much
3419 * contention on the global free queue lock
3420 */
3421 while (mem && pg_count < 64) {
3422
3423 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3424 #if CONFIG_BACKGROUND_QUEUE
3425 assert(mem->vm_page_backgroundq.next == 0 &&
3426 mem->vm_page_backgroundq.prev == 0 &&
3427 mem->vm_page_on_backgroundq == FALSE);
3428 #endif
3429 nxt = mem->snext;
3430 mem->snext = NULL;
3431 assert(mem->pageq.prev == 0);
3432
3433 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
3434 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3435 }
3436 if (prepare_object == TRUE)
3437 vm_page_free_prepare_object(mem, TRUE);
3438
3439 if (!mem->fictitious) {
3440 assert(mem->busy);
3441
3442 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3443 vm_lopage_free_count < vm_lopage_free_limit &&
3444 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3445 vm_page_release(mem, FALSE); /* page queues are not locked */
3446 #if CONFIG_SECLUDED_MEMORY
3447 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3448 num_tasks_can_use_secluded_mem == 0) {
3449 vm_page_release(mem,
3450 FALSE); /* page queues are not locked */
3451 #endif /* CONFIG_SECLUDED_MEMORY */
3452 } else {
3453 /*
3454 * IMPORTANT: we can't set the page "free" here
3455 * because that would make the page eligible for
3456 * a physically-contiguous allocation (see
3457 * vm_page_find_contiguous()) right away (we don't
3458 * hold the vm_page_queue_free lock). That would
3459 * cause trouble because the page is not actually
3460 * in the free queue yet...
3461 */
3462 mem->snext = local_freeq;
3463 local_freeq = mem;
3464 pg_count++;
3465
3466 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3467 }
3468 } else {
3469 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3470 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
3471 vm_page_release_fictitious(mem);
3472 }
3473 mem = nxt;
3474 }
3475 freeq = mem;
3476
3477 if ( (mem = local_freeq) ) {
3478 unsigned int avail_free_count;
3479 unsigned int need_wakeup = 0;
3480 unsigned int need_priv_wakeup = 0;
3481 #if CONFIG_SECLUDED_MEMORY
3482 unsigned int need_wakeup_secluded = 0;
3483 #endif /* CONFIG_SECLUDED_MEMORY */
3484
3485 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3486
3487 while (mem) {
3488 int color;
3489
3490 nxt = mem->snext;
3491
3492 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3493 assert(mem->busy);
3494 mem->lopage = FALSE;
3495 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3496
3497 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
3498 vm_page_queue_enter_first(&vm_page_queue_free[color].qhead,
3499 mem,
3500 vm_page_t,
3501 pageq);
3502 mem = nxt;
3503 }
3504 vm_page_free_count += pg_count;
3505 avail_free_count = vm_page_free_count;
3506
3507 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3508
3509 if (avail_free_count < vm_page_free_wanted_privileged) {
3510 need_priv_wakeup = avail_free_count;
3511 vm_page_free_wanted_privileged -= avail_free_count;
3512 avail_free_count = 0;
3513 } else {
3514 need_priv_wakeup = vm_page_free_wanted_privileged;
3515 avail_free_count -= vm_page_free_wanted_privileged;
3516 vm_page_free_wanted_privileged = 0;
3517 }
3518 }
3519 #if CONFIG_SECLUDED_MEMORY
3520 if (vm_page_free_wanted_secluded > 0 &&
3521 avail_free_count > vm_page_free_reserved) {
3522 unsigned int available_pages;
3523 available_pages = (avail_free_count -
3524 vm_page_free_reserved);
3525 if (available_pages <
3526 vm_page_free_wanted_secluded) {
3527 need_wakeup_secluded = available_pages;
3528 vm_page_free_wanted_secluded -=
3529 available_pages;
3530 avail_free_count -= available_pages;
3531 } else {
3532 need_wakeup_secluded =
3533 vm_page_free_wanted_secluded;
3534 avail_free_count -=
3535 vm_page_free_wanted_secluded;
3536 vm_page_free_wanted_secluded = 0;
3537 }
3538 }
3539 #endif /* CONFIG_SECLUDED_MEMORY */
3540 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3541 unsigned int available_pages;
3542
3543 available_pages = avail_free_count - vm_page_free_reserved;
3544
3545 if (available_pages >= vm_page_free_wanted) {
3546 need_wakeup = vm_page_free_wanted;
3547 vm_page_free_wanted = 0;
3548 } else {
3549 need_wakeup = available_pages;
3550 vm_page_free_wanted -= available_pages;
3551 }
3552 }
3553 lck_mtx_unlock(&vm_page_queue_free_lock);
3554
3555 if (need_priv_wakeup != 0) {
3556 /*
3557 * There shouldn't be that many VM-privileged threads,
3558 * so let's wake them all up, even if we don't quite
3559 * have enough pages to satisfy them all.
3560 */
3561 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3562 }
3563 #if CONFIG_SECLUDED_MEMORY
3564 if (need_wakeup_secluded != 0 &&
3565 vm_page_free_wanted_secluded == 0) {
3566 thread_wakeup((event_t)
3567 &vm_page_free_wanted_secluded);
3568 } else {
3569 for (;
3570 need_wakeup_secluded != 0;
3571 need_wakeup_secluded--) {
3572 thread_wakeup_one(
3573 (event_t)
3574 &vm_page_free_wanted_secluded);
3575 }
3576 }
3577 #endif /* CONFIG_SECLUDED_MEMORY */
3578 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3579 /*
3580 * We don't expect to have any more waiters
3581 * after this, so let's wake them all up at
3582 * once.
3583 */
3584 thread_wakeup((event_t) &vm_page_free_count);
3585 } else for (; need_wakeup != 0; need_wakeup--) {
3586 /*
3587 * Wake up one waiter per page we just released.
3588 */
3589 thread_wakeup_one((event_t) &vm_page_free_count);
3590 }
3591
3592 VM_CHECK_MEMORYSTATUS;
3593 }
3594 }
3595 }
3596
3597
3598 /*
3599 * vm_page_wire:
3600 *
3601 * Mark this page as wired down by yet
3602 * another map, removing it from paging queues
3603 * as necessary.
3604 *
3605 * The page's object and the page queues must be locked.
3606 */
3607
3608
3609 void
3610 vm_page_wire(
3611 vm_page_t mem,
3612 vm_tag_t tag,
3613 boolean_t check_memorystatus)
3614 {
3615 vm_object_t m_object;
3616
3617 m_object = VM_PAGE_OBJECT(mem);
3618
3619 // dbgLog(current_thread(), mem->offset, m_object, 1); /* (TEST/DEBUG) */
3620
3621 VM_PAGE_CHECK(mem);
3622 if (m_object) {
3623 vm_object_lock_assert_exclusive(m_object);
3624 } else {
3625 /*
3626 * In theory, the page should be in an object before it
3627 * gets wired, since we need to hold the object lock
3628 * to update some fields in the page structure.
3629 * However, some code (i386 pmap, for example) might want
3630 * to wire a page before it gets inserted into an object.
3631 * That's somewhat OK, as long as nobody else can get to
3632 * that page and update it at the same time.
3633 */
3634 }
3635 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3636 if ( !VM_PAGE_WIRED(mem)) {
3637
3638 if (mem->laundry)
3639 vm_pageout_steal_laundry(mem, TRUE);
3640
3641 vm_page_queues_remove(mem, TRUE);
3642
3643 assert(mem->wire_count == 0);
3644 mem->vm_page_q_state = VM_PAGE_IS_WIRED;
3645
3646 if (m_object) {
3647
3648 if (!mem->private && !mem->fictitious)
3649 {
3650 if (!m_object->wired_page_count)
3651 {
3652 assert(VM_KERN_MEMORY_NONE != tag);
3653 m_object->wire_tag = tag;
3654 VM_OBJECT_WIRED(m_object);
3655 }
3656 }
3657 m_object->wired_page_count++;
3658
3659 assert(m_object->resident_page_count >=
3660 m_object->wired_page_count);
3661 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3662 assert(vm_page_purgeable_count > 0);
3663 OSAddAtomic(-1, &vm_page_purgeable_count);
3664 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3665 }
3666 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3667 m_object->purgable == VM_PURGABLE_EMPTY) &&
3668 m_object->vo_purgeable_owner != TASK_NULL) {
3669 task_t owner;
3670
3671 owner = m_object->vo_purgeable_owner;
3672 /* less volatile bytes */
3673 ledger_debit(owner->ledger,
3674 task_ledgers.purgeable_volatile,
3675 PAGE_SIZE);
3676 /* more not-quite-volatile bytes */
3677 ledger_credit(owner->ledger,
3678 task_ledgers.purgeable_nonvolatile,
3679 PAGE_SIZE);
3680 /* more footprint */
3681 ledger_credit(owner->ledger,
3682 task_ledgers.phys_footprint,
3683 PAGE_SIZE);
3684 }
3685 if (m_object->all_reusable) {
3686 /*
3687 * Wired pages are not counted as "re-usable"
3688 * in "all_reusable" VM objects, so nothing
3689 * to do here.
3690 */
3691 } else if (mem->reusable) {
3692 /*
3693 * This page is not "re-usable" when it's
3694 * wired, so adjust its state and the
3695 * accounting.
3696 */
3697 vm_object_reuse_pages(m_object,
3698 mem->offset,
3699 mem->offset+PAGE_SIZE_64,
3700 FALSE);
3701 }
3702 }
3703 assert(!mem->reusable);
3704
3705 if (!mem->private && !mem->fictitious && !mem->gobbled)
3706 vm_page_wire_count++;
3707 if (mem->gobbled)
3708 vm_page_gobble_count--;
3709 mem->gobbled = FALSE;
3710
3711 if (check_memorystatus == TRUE) {
3712 VM_CHECK_MEMORYSTATUS;
3713 }
3714 /*
3715 * ENCRYPTED SWAP:
3716 * The page could be encrypted, but
3717 * We don't have to decrypt it here
3718 * because we don't guarantee that the
3719 * data is actually valid at this point.
3720 * The page will get decrypted in
3721 * vm_fault_wire() if needed.
3722 */
3723 }
3724 assert(!mem->gobbled);
3725 assert(mem->vm_page_q_state == VM_PAGE_IS_WIRED);
3726 mem->wire_count++;
3727 if (__improbable(mem->wire_count == 0)) {
3728 panic("vm_page_wire(%p): wire_count overflow", mem);
3729 }
3730 VM_PAGE_CHECK(mem);
3731 }
3732
3733 /*
3734 * vm_page_unwire:
3735 *
3736 * Release one wiring of this page, potentially
3737 * enabling it to be paged again.
3738 *
3739 * The page's object and the page queues must be locked.
3740 */
3741 void
3742 vm_page_unwire(
3743 vm_page_t mem,
3744 boolean_t queueit)
3745 {
3746 vm_object_t m_object;
3747
3748 m_object = VM_PAGE_OBJECT(mem);
3749
3750 // dbgLog(current_thread(), mem->offset, m_object, 0); /* (TEST/DEBUG) */
3751
3752 VM_PAGE_CHECK(mem);
3753 assert(VM_PAGE_WIRED(mem));
3754 assert(mem->wire_count > 0);
3755 assert(!mem->gobbled);
3756 assert(m_object != VM_OBJECT_NULL);
3757 vm_object_lock_assert_exclusive(m_object);
3758 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3759 if (--mem->wire_count == 0) {
3760 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3761
3762 if (!mem->private && !mem->fictitious) {
3763 vm_page_wire_count--;
3764 }
3765 assert(m_object->wired_page_count > 0);
3766 m_object->wired_page_count--;
3767 if (!m_object->wired_page_count) {
3768 VM_OBJECT_UNWIRED(m_object);
3769 }
3770 assert(m_object->resident_page_count >=
3771 m_object->wired_page_count);
3772 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3773 OSAddAtomic(+1, &vm_page_purgeable_count);
3774 assert(vm_page_purgeable_wired_count > 0);
3775 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3776 }
3777 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3778 m_object->purgable == VM_PURGABLE_EMPTY) &&
3779 m_object->vo_purgeable_owner != TASK_NULL) {
3780 task_t owner;
3781
3782 owner = m_object->vo_purgeable_owner;
3783 /* more volatile bytes */
3784 ledger_credit(owner->ledger,
3785 task_ledgers.purgeable_volatile,
3786 PAGE_SIZE);
3787 /* less not-quite-volatile bytes */
3788 ledger_debit(owner->ledger,
3789 task_ledgers.purgeable_nonvolatile,
3790 PAGE_SIZE);
3791 /* less footprint */
3792 ledger_debit(owner->ledger,
3793 task_ledgers.phys_footprint,
3794 PAGE_SIZE);
3795 }
3796 assert(m_object != kernel_object);
3797 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3798
3799 if (queueit == TRUE) {
3800 if (m_object->purgable == VM_PURGABLE_EMPTY) {
3801 vm_page_deactivate(mem);
3802 } else {
3803 vm_page_activate(mem);
3804 }
3805 }
3806
3807 VM_CHECK_MEMORYSTATUS;
3808
3809 }
3810 VM_PAGE_CHECK(mem);
3811 }
3812
3813 /*
3814 * vm_page_deactivate:
3815 *
3816 * Returns the given page to the inactive list,
3817 * indicating that no physical maps have access
3818 * to this page. [Used by the physical mapping system.]
3819 *
3820 * The page queues must be locked.
3821 */
3822 void
3823 vm_page_deactivate(
3824 vm_page_t m)
3825 {
3826 vm_page_deactivate_internal(m, TRUE);
3827 }
3828
3829
3830 void
3831 vm_page_deactivate_internal(
3832 vm_page_t m,
3833 boolean_t clear_hw_reference)
3834 {
3835 vm_object_t m_object;
3836
3837 m_object = VM_PAGE_OBJECT(m);
3838
3839 VM_PAGE_CHECK(m);
3840 assert(m_object != kernel_object);
3841 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
3842
3843 // dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3844 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3845 /*
3846 * This page is no longer very interesting. If it was
3847 * interesting (active or inactive/referenced), then we
3848 * clear the reference bit and (re)enter it in the
3849 * inactive queue. Note wired pages should not have
3850 * their reference bit cleared.
3851 */
3852 assert ( !(m->absent && !m->unusual));
3853
3854 if (m->gobbled) { /* can this happen? */
3855 assert( !VM_PAGE_WIRED(m));
3856
3857 if (!m->private && !m->fictitious)
3858 vm_page_wire_count--;
3859 vm_page_gobble_count--;
3860 m->gobbled = FALSE;
3861 }
3862 /*
3863 * if this page is currently on the pageout queue, we can't do the
3864 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3865 * and we can't remove it manually since we would need the object lock
3866 * (which is not required here) to decrement the activity_in_progress
3867 * reference which is held on the object while the page is in the pageout queue...
3868 * just let the normal laundry processing proceed
3869 */
3870 if (m->laundry || m->private || m->fictitious ||
3871 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
3872 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
3873 VM_PAGE_WIRED(m)) {
3874 return;
3875 }
3876 if (!m->absent && clear_hw_reference == TRUE)
3877 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
3878
3879 m->reference = FALSE;
3880 m->no_cache = FALSE;
3881
3882 if ( !VM_PAGE_INACTIVE(m)) {
3883 vm_page_queues_remove(m, FALSE);
3884
3885 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3886 m->dirty && m_object->internal &&
3887 (m_object->purgable == VM_PURGABLE_DENY ||
3888 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
3889 m_object->purgable == VM_PURGABLE_VOLATILE)) {
3890 vm_page_check_pageable_safe(m);
3891 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3892 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
3893 vm_page_throttled_count++;
3894 } else {
3895 if (m_object->named && m_object->ref_count == 1) {
3896 vm_page_speculate(m, FALSE);
3897 #if DEVELOPMENT || DEBUG
3898 vm_page_speculative_recreated++;
3899 #endif
3900 } else {
3901 vm_page_enqueue_inactive(m, FALSE);
3902 }
3903 }
3904 }
3905 }
3906
3907 /*
3908 * vm_page_enqueue_cleaned
3909 *
3910 * Put the page on the cleaned queue, mark it cleaned, etc.
3911 * Being on the cleaned queue (and having m->clean_queue set)
3912 * does ** NOT ** guarantee that the page is clean!
3913 *
3914 * Call with the queues lock held.
3915 */
3916
3917 void vm_page_enqueue_cleaned(vm_page_t m)
3918 {
3919 vm_object_t m_object;
3920
3921 m_object = VM_PAGE_OBJECT(m);
3922
3923 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
3924 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3925 assert( !(m->absent && !m->unusual));
3926 assert( !VM_PAGE_WIRED(m));
3927
3928 if (m->gobbled) {
3929 if (!m->private && !m->fictitious)
3930 vm_page_wire_count--;
3931 vm_page_gobble_count--;
3932 m->gobbled = FALSE;
3933 }
3934 /*
3935 * if this page is currently on the pageout queue, we can't do the
3936 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3937 * and we can't remove it manually since we would need the object lock
3938 * (which is not required here) to decrement the activity_in_progress
3939 * reference which is held on the object while the page is in the pageout queue...
3940 * just let the normal laundry processing proceed
3941 */
3942 if (m->laundry || m->private || m->fictitious ||
3943 (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
3944 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
3945 return;
3946 }
3947 vm_page_queues_remove(m, FALSE);
3948
3949 vm_page_check_pageable_safe(m);
3950 vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3951 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
3952 vm_page_cleaned_count++;
3953
3954 vm_page_inactive_count++;
3955 if (m_object->internal) {
3956 vm_page_pageable_internal_count++;
3957 } else {
3958 vm_page_pageable_external_count++;
3959 }
3960 #if CONFIG_BACKGROUND_QUEUE
3961 if (m->vm_page_in_background)
3962 vm_page_add_to_backgroundq(m, TRUE);
3963 #endif
3964 vm_pageout_enqueued_cleaned++;
3965 }
3966
3967 /*
3968 * vm_page_activate:
3969 *
3970 * Put the specified page on the active list (if appropriate).
3971 *
3972 * The page queues must be locked.
3973 */
3974
3975 void
3976 vm_page_activate(
3977 vm_page_t m)
3978 {
3979 vm_object_t m_object;
3980
3981 m_object = VM_PAGE_OBJECT(m);
3982
3983 VM_PAGE_CHECK(m);
3984 #ifdef FIXME_4778297
3985 assert(m_object != kernel_object);
3986 #endif
3987 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
3988 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3989 assert( !(m->absent && !m->unusual));
3990
3991 if (m->gobbled) {
3992 assert( !VM_PAGE_WIRED(m));
3993 if (!m->private && !m->fictitious)
3994 vm_page_wire_count--;
3995 vm_page_gobble_count--;
3996 m->gobbled = FALSE;
3997 }
3998 /*
3999 * if this page is currently on the pageout queue, we can't do the
4000 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4001 * and we can't remove it manually since we would need the object lock
4002 * (which is not required here) to decrement the activity_in_progress
4003 * reference which is held on the object while the page is in the pageout queue...
4004 * just let the normal laundry processing proceed
4005 */
4006 if (m->laundry || m->private || m->fictitious ||
4007 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4008 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4009 return;
4010
4011 #if DEBUG
4012 if (m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q)
4013 panic("vm_page_activate: already active");
4014 #endif
4015
4016 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4017 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4018 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4019 }
4020
4021 vm_page_queues_remove(m, FALSE);
4022
4023 if ( !VM_PAGE_WIRED(m)) {
4024 vm_page_check_pageable_safe(m);
4025 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4026 m->dirty && m_object->internal &&
4027 (m_object->purgable == VM_PURGABLE_DENY ||
4028 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4029 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4030 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4031 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4032 vm_page_throttled_count++;
4033 } else {
4034 #if CONFIG_SECLUDED_MEMORY
4035 if (secluded_for_filecache &&
4036 vm_page_secluded_target != 0 &&
4037 num_tasks_can_use_secluded_mem == 0 &&
4038 m_object->eligible_for_secluded &&
4039 ((secluded_aging_policy == SECLUDED_AGING_FIFO) ||
4040 (secluded_aging_policy ==
4041 SECLUDED_AGING_ALONG_ACTIVE) ||
4042 (secluded_aging_policy ==
4043 SECLUDED_AGING_BEFORE_ACTIVE))) {
4044 vm_page_queue_enter(&vm_page_queue_secluded, m,
4045 vm_page_t, pageq);
4046 m->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
4047 vm_page_secluded_count++;
4048 vm_page_secluded_count_inuse++;
4049 assert(!m_object->internal);
4050 // vm_page_pageable_external_count++;
4051 } else
4052 #endif /* CONFIG_SECLUDED_MEMORY */
4053 vm_page_enqueue_active(m, FALSE);
4054 }
4055 m->reference = TRUE;
4056 m->no_cache = FALSE;
4057 }
4058 VM_PAGE_CHECK(m);
4059 }
4060
4061
4062 /*
4063 * vm_page_speculate:
4064 *
4065 * Put the specified page on the speculative list (if appropriate).
4066 *
4067 * The page queues must be locked.
4068 */
4069 void
4070 vm_page_speculate(
4071 vm_page_t m,
4072 boolean_t new)
4073 {
4074 struct vm_speculative_age_q *aq;
4075 vm_object_t m_object;
4076
4077 m_object = VM_PAGE_OBJECT(m);
4078
4079 VM_PAGE_CHECK(m);
4080 vm_page_check_pageable_safe(m);
4081
4082 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4083 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4084 assert( !(m->absent && !m->unusual));
4085 assert(m_object->internal == FALSE);
4086
4087 /*
4088 * if this page is currently on the pageout queue, we can't do the
4089 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4090 * and we can't remove it manually since we would need the object lock
4091 * (which is not required here) to decrement the activity_in_progress
4092 * reference which is held on the object while the page is in the pageout queue...
4093 * just let the normal laundry processing proceed
4094 */
4095 if (m->laundry || m->private || m->fictitious ||
4096 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4097 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4098 return;
4099
4100 vm_page_queues_remove(m, FALSE);
4101
4102 if ( !VM_PAGE_WIRED(m)) {
4103 mach_timespec_t ts;
4104 clock_sec_t sec;
4105 clock_nsec_t nsec;
4106
4107 clock_get_system_nanotime(&sec, &nsec);
4108 ts.tv_sec = (unsigned int) sec;
4109 ts.tv_nsec = nsec;
4110
4111 if (vm_page_speculative_count == 0) {
4112
4113 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4114 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4115
4116 aq = &vm_page_queue_speculative[speculative_age_index];
4117
4118 /*
4119 * set the timer to begin a new group
4120 */
4121 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4122 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4123
4124 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4125 } else {
4126 aq = &vm_page_queue_speculative[speculative_age_index];
4127
4128 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4129
4130 speculative_age_index++;
4131
4132 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4133 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4134 if (speculative_age_index == speculative_steal_index) {
4135 speculative_steal_index = speculative_age_index + 1;
4136
4137 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4138 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4139 }
4140 aq = &vm_page_queue_speculative[speculative_age_index];
4141
4142 if (!vm_page_queue_empty(&aq->age_q))
4143 vm_page_speculate_ageit(aq);
4144
4145 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4146 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4147
4148 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4149 }
4150 }
4151 vm_page_enqueue_tail(&aq->age_q, &m->pageq);
4152 m->vm_page_q_state = VM_PAGE_ON_SPECULATIVE_Q;
4153 vm_page_speculative_count++;
4154 vm_page_pageable_external_count++;
4155
4156 if (new == TRUE) {
4157 vm_object_lock_assert_exclusive(m_object);
4158
4159 m_object->pages_created++;
4160 #if DEVELOPMENT || DEBUG
4161 vm_page_speculative_created++;
4162 #endif
4163 }
4164 }
4165 VM_PAGE_CHECK(m);
4166 }
4167
4168
4169 /*
4170 * move pages from the specified aging bin to
4171 * the speculative bin that pageout_scan claims from
4172 *
4173 * The page queues must be locked.
4174 */
4175 void
4176 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4177 {
4178 struct vm_speculative_age_q *sq;
4179 vm_page_t t;
4180
4181 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4182
4183 if (vm_page_queue_empty(&sq->age_q)) {
4184 sq->age_q.next = aq->age_q.next;
4185 sq->age_q.prev = aq->age_q.prev;
4186
4187 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4188 t->pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
4189
4190 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4191 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4192 } else {
4193 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4194 t->pageq.next = aq->age_q.next;
4195
4196 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
4197 t->pageq.prev = sq->age_q.prev;
4198
4199 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4200 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4201
4202 sq->age_q.prev = aq->age_q.prev;
4203 }
4204 vm_page_queue_init(&aq->age_q);
4205 }
4206
4207
4208 void
4209 vm_page_lru(
4210 vm_page_t m)
4211 {
4212 VM_PAGE_CHECK(m);
4213 assert(VM_PAGE_OBJECT(m) != kernel_object);
4214 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4215
4216 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4217 /*
4218 * if this page is currently on the pageout queue, we can't do the
4219 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4220 * and we can't remove it manually since we would need the object lock
4221 * (which is not required here) to decrement the activity_in_progress
4222 * reference which is held on the object while the page is in the pageout queue...
4223 * just let the normal laundry processing proceed
4224 */
4225 if (m->laundry || m->private ||
4226 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4227 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4228 VM_PAGE_WIRED(m))
4229 return;
4230
4231 m->no_cache = FALSE;
4232
4233 vm_page_queues_remove(m, FALSE);
4234
4235 vm_page_enqueue_inactive(m, FALSE);
4236 }
4237
4238
4239 void
4240 vm_page_reactivate_all_throttled(void)
4241 {
4242 vm_page_t first_throttled, last_throttled;
4243 vm_page_t first_active;
4244 vm_page_t m;
4245 int extra_active_count;
4246 int extra_internal_count, extra_external_count;
4247 vm_object_t m_object;
4248
4249 if (!VM_DYNAMIC_PAGING_ENABLED())
4250 return;
4251
4252 extra_active_count = 0;
4253 extra_internal_count = 0;
4254 extra_external_count = 0;
4255 vm_page_lock_queues();
4256 if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
4257 /*
4258 * Switch "throttled" pages to "active".
4259 */
4260 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
4261 VM_PAGE_CHECK(m);
4262 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
4263
4264 m_object = VM_PAGE_OBJECT(m);
4265
4266 extra_active_count++;
4267 if (m_object->internal) {
4268 extra_internal_count++;
4269 } else {
4270 extra_external_count++;
4271 }
4272
4273 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4274 VM_PAGE_CHECK(m);
4275 #if CONFIG_BACKGROUND_QUEUE
4276 if (m->vm_page_in_background)
4277 vm_page_add_to_backgroundq(m, FALSE);
4278 #endif
4279 }
4280
4281 /*
4282 * Transfer the entire throttled queue to a regular LRU page queues.
4283 * We insert it at the head of the active queue, so that these pages
4284 * get re-evaluated by the LRU algorithm first, since they've been
4285 * completely out of it until now.
4286 */
4287 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4288 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4289 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4290 if (vm_page_queue_empty(&vm_page_queue_active)) {
4291 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4292 } else {
4293 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4294 }
4295 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4296 first_throttled->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4297 last_throttled->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4298
4299 #if DEBUG
4300 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4301 #endif
4302 vm_page_queue_init(&vm_page_queue_throttled);
4303 /*
4304 * Adjust the global page counts.
4305 */
4306 vm_page_active_count += extra_active_count;
4307 vm_page_pageable_internal_count += extra_internal_count;
4308 vm_page_pageable_external_count += extra_external_count;
4309 vm_page_throttled_count = 0;
4310 }
4311 assert(vm_page_throttled_count == 0);
4312 assert(vm_page_queue_empty(&vm_page_queue_throttled));
4313 vm_page_unlock_queues();
4314 }
4315
4316
4317 /*
4318 * move pages from the indicated local queue to the global active queue
4319 * its ok to fail if we're below the hard limit and force == FALSE
4320 * the nolocks == TRUE case is to allow this function to be run on
4321 * the hibernate path
4322 */
4323
4324 void
4325 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4326 {
4327 struct vpl *lq;
4328 vm_page_t first_local, last_local;
4329 vm_page_t first_active;
4330 vm_page_t m;
4331 uint32_t count = 0;
4332
4333 if (vm_page_local_q == NULL)
4334 return;
4335
4336 lq = &vm_page_local_q[lid].vpl_un.vpl;
4337
4338 if (nolocks == FALSE) {
4339 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4340 if ( !vm_page_trylockspin_queues())
4341 return;
4342 } else
4343 vm_page_lockspin_queues();
4344
4345 VPL_LOCK(&lq->vpl_lock);
4346 }
4347 if (lq->vpl_count) {
4348 /*
4349 * Switch "local" pages to "active".
4350 */
4351 assert(!vm_page_queue_empty(&lq->vpl_queue));
4352
4353 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
4354 VM_PAGE_CHECK(m);
4355 vm_page_check_pageable_safe(m);
4356 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4357 assert(!m->fictitious);
4358
4359 if (m->local_id != lid)
4360 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4361
4362 m->local_id = 0;
4363 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4364 VM_PAGE_CHECK(m);
4365 #if CONFIG_BACKGROUND_QUEUE
4366 if (m->vm_page_in_background)
4367 vm_page_add_to_backgroundq(m, FALSE);
4368 #endif
4369 count++;
4370 }
4371 if (count != lq->vpl_count)
4372 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4373
4374 /*
4375 * Transfer the entire local queue to a regular LRU page queues.
4376 */
4377 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4378 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4379 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4380
4381 if (vm_page_queue_empty(&vm_page_queue_active)) {
4382 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4383 } else {
4384 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4385 }
4386 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4387 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4388 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4389
4390 vm_page_queue_init(&lq->vpl_queue);
4391 /*
4392 * Adjust the global page counts.
4393 */
4394 vm_page_active_count += lq->vpl_count;
4395 vm_page_pageable_internal_count += lq->vpl_internal_count;
4396 vm_page_pageable_external_count += lq->vpl_external_count;
4397 lq->vpl_count = 0;
4398 lq->vpl_internal_count = 0;
4399 lq->vpl_external_count = 0;
4400 }
4401 assert(vm_page_queue_empty(&lq->vpl_queue));
4402
4403 if (nolocks == FALSE) {
4404 VPL_UNLOCK(&lq->vpl_lock);
4405 vm_page_unlock_queues();
4406 }
4407 }
4408
4409 /*
4410 * vm_page_part_zero_fill:
4411 *
4412 * Zero-fill a part of the page.
4413 */
4414 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4415 void
4416 vm_page_part_zero_fill(
4417 vm_page_t m,
4418 vm_offset_t m_pa,
4419 vm_size_t len)
4420 {
4421
4422 #if 0
4423 /*
4424 * we don't hold the page queue lock
4425 * so this check isn't safe to make
4426 */
4427 VM_PAGE_CHECK(m);
4428 #endif
4429
4430 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4431 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
4432 #else
4433 vm_page_t tmp;
4434 while (1) {
4435 tmp = vm_page_grab();
4436 if (tmp == VM_PAGE_NULL) {
4437 vm_page_wait(THREAD_UNINT);
4438 continue;
4439 }
4440 break;
4441 }
4442 vm_page_zero_fill(tmp);
4443 if(m_pa != 0) {
4444 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4445 }
4446 if((m_pa + len) < PAGE_SIZE) {
4447 vm_page_part_copy(m, m_pa + len, tmp,
4448 m_pa + len, PAGE_SIZE - (m_pa + len));
4449 }
4450 vm_page_copy(tmp,m);
4451 VM_PAGE_FREE(tmp);
4452 #endif
4453
4454 }
4455
4456 /*
4457 * vm_page_zero_fill:
4458 *
4459 * Zero-fill the specified page.
4460 */
4461 void
4462 vm_page_zero_fill(
4463 vm_page_t m)
4464 {
4465 XPR(XPR_VM_PAGE,
4466 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4467 VM_PAGE_OBJECT(m), m->offset, m, 0,0);
4468 #if 0
4469 /*
4470 * we don't hold the page queue lock
4471 * so this check isn't safe to make
4472 */
4473 VM_PAGE_CHECK(m);
4474 #endif
4475
4476 // dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4477 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
4478 }
4479
4480 /*
4481 * vm_page_part_copy:
4482 *
4483 * copy part of one page to another
4484 */
4485
4486 void
4487 vm_page_part_copy(
4488 vm_page_t src_m,
4489 vm_offset_t src_pa,
4490 vm_page_t dst_m,
4491 vm_offset_t dst_pa,
4492 vm_size_t len)
4493 {
4494 #if 0
4495 /*
4496 * we don't hold the page queue lock
4497 * so this check isn't safe to make
4498 */
4499 VM_PAGE_CHECK(src_m);
4500 VM_PAGE_CHECK(dst_m);
4501 #endif
4502 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4503 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
4504 }
4505
4506 /*
4507 * vm_page_copy:
4508 *
4509 * Copy one page to another
4510 *
4511 * ENCRYPTED SWAP:
4512 * The source page should not be encrypted. The caller should
4513 * make sure the page is decrypted first, if necessary.
4514 */
4515
4516 int vm_page_copy_cs_validations = 0;
4517 int vm_page_copy_cs_tainted = 0;
4518
4519 void
4520 vm_page_copy(
4521 vm_page_t src_m,
4522 vm_page_t dest_m)
4523 {
4524 vm_object_t src_m_object;
4525
4526 src_m_object = VM_PAGE_OBJECT(src_m);
4527
4528 XPR(XPR_VM_PAGE,
4529 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4530 src_m_object, src_m->offset,
4531 VM_PAGE_OBJECT(dest_m), dest_m->offset,
4532 0);
4533 #if 0
4534 /*
4535 * we don't hold the page queue lock
4536 * so this check isn't safe to make
4537 */
4538 VM_PAGE_CHECK(src_m);
4539 VM_PAGE_CHECK(dest_m);
4540 #endif
4541 vm_object_lock_assert_held(src_m_object);
4542
4543 /*
4544 * ENCRYPTED SWAP:
4545 * The source page should not be encrypted at this point.
4546 * The destination page will therefore not contain encrypted
4547 * data after the copy.
4548 */
4549 if (src_m->encrypted) {
4550 panic("vm_page_copy: source page %p is encrypted\n", src_m);
4551 }
4552 dest_m->encrypted = FALSE;
4553
4554 if (src_m_object != VM_OBJECT_NULL &&
4555 src_m_object->code_signed) {
4556 /*
4557 * We're copying a page from a code-signed object.
4558 * Whoever ends up mapping the copy page might care about
4559 * the original page's integrity, so let's validate the
4560 * source page now.
4561 */
4562 vm_page_copy_cs_validations++;
4563 vm_page_validate_cs(src_m);
4564 #if DEVELOPMENT || DEBUG
4565 DTRACE_VM4(codesigned_copy,
4566 vm_object_t, src_m_object,
4567 vm_object_offset_t, src_m->offset,
4568 int, src_m->cs_validated,
4569 int, src_m->cs_tainted);
4570 #endif /* DEVELOPMENT || DEBUG */
4571
4572 }
4573
4574 if (vm_page_is_slideable(src_m)) {
4575 boolean_t was_busy = src_m->busy;
4576 src_m->busy = TRUE;
4577 (void) vm_page_slide(src_m, 0);
4578 assert(src_m->busy);
4579 if (!was_busy) {
4580 PAGE_WAKEUP_DONE(src_m);
4581 }
4582 }
4583
4584 /*
4585 * Propagate the cs_tainted bit to the copy page. Do not propagate
4586 * the cs_validated bit.
4587 */
4588 dest_m->cs_tainted = src_m->cs_tainted;
4589 if (dest_m->cs_tainted) {
4590 vm_page_copy_cs_tainted++;
4591 }
4592 dest_m->slid = src_m->slid;
4593 dest_m->error = src_m->error; /* sliding src_m might have failed... */
4594 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
4595 }
4596
4597 #if MACH_ASSERT
4598 static void
4599 _vm_page_print(
4600 vm_page_t p)
4601 {
4602 printf("vm_page %p: \n", p);
4603 printf(" pageq: next=%p prev=%p\n",
4604 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.next),
4605 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.prev));
4606 printf(" listq: next=%p prev=%p\n",
4607 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.next)),
4608 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.prev)));
4609 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m)));
4610 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->offset);
4611 printf(" wire_count=%u\n", p->wire_count);
4612 printf(" q_state=%u\n", p->vm_page_q_state);
4613
4614 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
4615 (p->laundry ? "" : "!"),
4616 (p->reference ? "" : "!"),
4617 (p->gobbled ? "" : "!"),
4618 (p->private ? "" : "!"));
4619 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4620 (p->busy ? "" : "!"),
4621 (p->wanted ? "" : "!"),
4622 (p->tabled ? "" : "!"),
4623 (p->fictitious ? "" : "!"),
4624 (p->pmapped ? "" : "!"),
4625 (p->wpmapped ? "" : "!"));
4626 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4627 (p->free_when_done ? "" : "!"),
4628 (p->absent ? "" : "!"),
4629 (p->error ? "" : "!"),
4630 (p->dirty ? "" : "!"),
4631 (p->cleaning ? "" : "!"),
4632 (p->precious ? "" : "!"),
4633 (p->clustered ? "" : "!"));
4634 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
4635 (p->overwriting ? "" : "!"),
4636 (p->restart ? "" : "!"),
4637 (p->unusual ? "" : "!"),
4638 (p->encrypted ? "" : "!"),
4639 (p->encrypted_cleaning ? "" : "!"));
4640 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
4641 (p->cs_validated ? "" : "!"),
4642 (p->cs_tainted ? "" : "!"),
4643 (p->cs_nx ? "" : "!"),
4644 (p->no_cache ? "" : "!"));
4645
4646 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
4647 }
4648
4649 /*
4650 * Check that the list of pages is ordered by
4651 * ascending physical address and has no holes.
4652 */
4653 static int
4654 vm_page_verify_contiguous(
4655 vm_page_t pages,
4656 unsigned int npages)
4657 {
4658 vm_page_t m;
4659 unsigned int page_count;
4660 vm_offset_t prev_addr;
4661
4662 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
4663 page_count = 1;
4664 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
4665 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4666 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
4667 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
4668 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
4669 panic("vm_page_verify_contiguous: not contiguous!");
4670 }
4671 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4672 ++page_count;
4673 }
4674 if (page_count != npages) {
4675 printf("pages %p actual count 0x%x but requested 0x%x\n",
4676 pages, page_count, npages);
4677 panic("vm_page_verify_contiguous: count error");
4678 }
4679 return 1;
4680 }
4681
4682
4683 /*
4684 * Check the free lists for proper length etc.
4685 */
4686 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
4687 static unsigned int
4688 vm_page_verify_free_list(
4689 vm_page_queue_head_t *vm_page_queue,
4690 unsigned int color,
4691 vm_page_t look_for_page,
4692 boolean_t expect_page)
4693 {
4694 unsigned int npages;
4695 vm_page_t m;
4696 vm_page_t prev_m;
4697 boolean_t found_page;
4698
4699 if (! vm_page_verify_this_free_list_enabled)
4700 return 0;
4701
4702 found_page = FALSE;
4703 npages = 0;
4704 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
4705
4706 vm_page_queue_iterate(vm_page_queue,
4707 m,
4708 vm_page_t,
4709 pageq) {
4710
4711 if (m == look_for_page) {
4712 found_page = TRUE;
4713 }
4714 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev) != prev_m)
4715 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4716 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev), prev_m);
4717 if ( ! m->busy )
4718 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4719 color, npages, m);
4720 if (color != (unsigned int) -1) {
4721 if ((VM_PAGE_GET_PHYS_PAGE(m) & vm_color_mask) != color)
4722 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4723 color, npages, m, VM_PAGE_GET_PHYS_PAGE(m) & vm_color_mask, color);
4724 if (m->vm_page_q_state != VM_PAGE_ON_FREE_Q)
4725 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
4726 color, npages, m, m->vm_page_q_state);
4727 } else {
4728 if (m->vm_page_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
4729 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
4730 npages, m, m->vm_page_q_state);
4731 }
4732 ++npages;
4733 prev_m = m;
4734 }
4735 if (look_for_page != VM_PAGE_NULL) {
4736 unsigned int other_color;
4737
4738 if (expect_page && !found_page) {
4739 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4740 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4741 _vm_page_print(look_for_page);
4742 for (other_color = 0;
4743 other_color < vm_colors;
4744 other_color++) {
4745 if (other_color == color)
4746 continue;
4747 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
4748 other_color, look_for_page, FALSE);
4749 }
4750 if (color == (unsigned int) -1) {
4751 vm_page_verify_free_list(&vm_lopage_queue_free,
4752 (unsigned int) -1, look_for_page, FALSE);
4753 }
4754 panic("vm_page_verify_free_list(color=%u)\n", color);
4755 }
4756 if (!expect_page && found_page) {
4757 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4758 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4759 }
4760 }
4761 return npages;
4762 }
4763
4764 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
4765 static void
4766 vm_page_verify_free_lists( void )
4767 {
4768 unsigned int color, npages, nlopages;
4769 boolean_t toggle = TRUE;
4770
4771 if (! vm_page_verify_all_free_lists_enabled)
4772 return;
4773
4774 npages = 0;
4775
4776 lck_mtx_lock(&vm_page_queue_free_lock);
4777
4778 if (vm_page_verify_this_free_list_enabled == TRUE) {
4779 /*
4780 * This variable has been set globally for extra checking of
4781 * each free list Q. Since we didn't set it, we don't own it
4782 * and we shouldn't toggle it.
4783 */
4784 toggle = FALSE;
4785 }
4786
4787 if (toggle == TRUE) {
4788 vm_page_verify_this_free_list_enabled = TRUE;
4789 }
4790
4791 for( color = 0; color < vm_colors; color++ ) {
4792 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
4793 color, VM_PAGE_NULL, FALSE);
4794 }
4795 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4796 (unsigned int) -1,
4797 VM_PAGE_NULL, FALSE);
4798 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4799 panic("vm_page_verify_free_lists: "
4800 "npages %u free_count %d nlopages %u lo_free_count %u",
4801 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
4802
4803 if (toggle == TRUE) {
4804 vm_page_verify_this_free_list_enabled = FALSE;
4805 }
4806
4807 lck_mtx_unlock(&vm_page_queue_free_lock);
4808 }
4809
4810 #endif /* MACH_ASSERT */
4811
4812
4813
4814
4815
4816 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4817
4818 /*
4819 * CONTIGUOUS PAGE ALLOCATION
4820 *
4821 * Find a region large enough to contain at least n pages
4822 * of contiguous physical memory.
4823 *
4824 * This is done by traversing the vm_page_t array in a linear fashion
4825 * we assume that the vm_page_t array has the avaiable physical pages in an
4826 * ordered, ascending list... this is currently true of all our implementations
4827 * and must remain so... there can be 'holes' in the array... we also can
4828 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4829 * which use to happen via 'vm_page_convert'... that function was no longer
4830 * being called and was removed...
4831 *
4832 * The basic flow consists of stabilizing some of the interesting state of
4833 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4834 * sweep at the beginning of the array looking for pages that meet our criterea
4835 * for a 'stealable' page... currently we are pretty conservative... if the page
4836 * meets this criterea and is physically contiguous to the previous page in the 'run'
4837 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4838 * and start to develop a new run... if at this point we've already considered
4839 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4840 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4841 * to other threads trying to acquire free pages (or move pages from q to q),
4842 * and then continue from the spot we left off... we only make 1 pass through the
4843 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4844 * which steals the pages from the queues they're currently on... pages on the free
4845 * queue can be stolen directly... pages that are on any of the other queues
4846 * must be removed from the object they are tabled on... this requires taking the
4847 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4848 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4849 * dump the pages we've currently stolen back to the free list, and pick up our
4850 * scan from the point where we aborted the 'current' run.
4851 *
4852 *
4853 * Requirements:
4854 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4855 *
4856 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4857 *
4858 * Algorithm:
4859 */
4860
4861 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4862
4863
4864 #define RESET_STATE_OF_RUN() \
4865 MACRO_BEGIN \
4866 prevcontaddr = -2; \
4867 start_pnum = -1; \
4868 free_considered = 0; \
4869 substitute_needed = 0; \
4870 npages = 0; \
4871 MACRO_END
4872
4873 /*
4874 * Can we steal in-use (i.e. not free) pages when searching for
4875 * physically-contiguous pages ?
4876 */
4877 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4878
4879 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4880 #if DEBUG
4881 int vm_page_find_contig_debug = 0;
4882 #endif
4883
4884 static vm_page_t
4885 vm_page_find_contiguous(
4886 unsigned int contig_pages,
4887 ppnum_t max_pnum,
4888 ppnum_t pnum_mask,
4889 boolean_t wire,
4890 int flags)
4891 {
4892 vm_page_t m = NULL;
4893 ppnum_t prevcontaddr;
4894 ppnum_t start_pnum;
4895 unsigned int npages, considered, scanned;
4896 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4897 unsigned int idx_last_contig_page_found = 0;
4898 int free_considered, free_available;
4899 int substitute_needed;
4900 boolean_t wrapped, zone_gc_called = FALSE;
4901 #if DEBUG
4902 clock_sec_t tv_start_sec, tv_end_sec;
4903 clock_usec_t tv_start_usec, tv_end_usec;
4904 #endif
4905
4906 int yielded = 0;
4907 int dumped_run = 0;
4908 int stolen_pages = 0;
4909 int compressed_pages = 0;
4910
4911
4912 if (contig_pages == 0)
4913 return VM_PAGE_NULL;
4914
4915 full_scan_again:
4916
4917 #if MACH_ASSERT
4918 vm_page_verify_free_lists();
4919 #endif
4920 #if DEBUG
4921 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4922 #endif
4923 PAGE_REPLACEMENT_ALLOWED(TRUE);
4924
4925 vm_page_lock_queues();
4926
4927
4928 lck_mtx_lock(&vm_page_queue_free_lock);
4929
4930 RESET_STATE_OF_RUN();
4931
4932 scanned = 0;
4933 considered = 0;
4934 free_available = vm_page_free_count - vm_page_free_reserved;
4935
4936 wrapped = FALSE;
4937
4938 if(flags & KMA_LOMEM)
4939 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4940 else
4941 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4942
4943 orig_last_idx = idx_last_contig_page_found;
4944 last_idx = orig_last_idx;
4945
4946 for (page_idx = last_idx, start_idx = last_idx;
4947 npages < contig_pages && page_idx < vm_pages_count;
4948 page_idx++) {
4949 retry:
4950 if (wrapped &&
4951 npages == 0 &&
4952 page_idx >= orig_last_idx) {
4953 /*
4954 * We're back where we started and we haven't
4955 * found any suitable contiguous range. Let's
4956 * give up.
4957 */
4958 break;
4959 }
4960 scanned++;
4961 m = &vm_pages[page_idx];
4962
4963 assert(!m->fictitious);
4964 assert(!m->private);
4965
4966 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
4967 /* no more low pages... */
4968 break;
4969 }
4970 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
4971 /*
4972 * not aligned
4973 */
4974 RESET_STATE_OF_RUN();
4975
4976 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4977 m->encrypted_cleaning || m->laundry || m->wanted ||
4978 m->cleaning || m->overwriting || m->free_when_done) {
4979 /*
4980 * page is in a transient state
4981 * or a state we don't want to deal
4982 * with, so don't consider it which
4983 * means starting a new run
4984 */
4985 RESET_STATE_OF_RUN();
4986
4987 } else if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
4988 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
4989 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
4990 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4991 /*
4992 * page needs to be on one of our queues (other then the pageout or special free queues)
4993 * or it needs to belong to the compressor pool (which is now indicated
4994 * by vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
4995 * from the check for VM_PAGE_NOT_ON_Q)
4996 * in order for it to be stable behind the
4997 * locks we hold at this point...
4998 * if not, don't consider it which
4999 * means starting a new run
5000 */
5001 RESET_STATE_OF_RUN();
5002
5003 } else if ((m->vm_page_q_state != VM_PAGE_ON_FREE_Q) && (!m->tabled || m->busy)) {
5004 /*
5005 * pages on the free list are always 'busy'
5006 * so we couldn't test for 'busy' in the check
5007 * for the transient states... pages that are
5008 * 'free' are never 'tabled', so we also couldn't
5009 * test for 'tabled'. So we check here to make
5010 * sure that a non-free page is not busy and is
5011 * tabled on an object...
5012 * if not, don't consider it which
5013 * means starting a new run
5014 */
5015 RESET_STATE_OF_RUN();
5016
5017 } else {
5018 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5019 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5020 RESET_STATE_OF_RUN();
5021 goto did_consider;
5022 } else {
5023 npages = 1;
5024 start_idx = page_idx;
5025 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5026 }
5027 } else {
5028 npages++;
5029 }
5030 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5031
5032 VM_PAGE_CHECK(m);
5033 if (m->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5034 free_considered++;
5035 } else {
5036 /*
5037 * This page is not free.
5038 * If we can't steal used pages,
5039 * we have to give up this run
5040 * and keep looking.
5041 * Otherwise, we might need to
5042 * move the contents of this page
5043 * into a substitute page.
5044 */
5045 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5046 if (m->pmapped || m->dirty || m->precious) {
5047 substitute_needed++;
5048 }
5049 #else
5050 RESET_STATE_OF_RUN();
5051 #endif
5052 }
5053
5054 if ((free_considered + substitute_needed) > free_available) {
5055 /*
5056 * if we let this run continue
5057 * we will end up dropping the vm_page_free_count
5058 * below the reserve limit... we need to abort
5059 * this run, but we can at least re-consider this
5060 * page... thus the jump back to 'retry'
5061 */
5062 RESET_STATE_OF_RUN();
5063
5064 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5065 considered++;
5066 goto retry;
5067 }
5068 /*
5069 * free_available == 0
5070 * so can't consider any free pages... if
5071 * we went to retry in this case, we'd
5072 * get stuck looking at the same page
5073 * w/o making any forward progress
5074 * we also want to take this path if we've already
5075 * reached our limit that controls the lock latency
5076 */
5077 }
5078 }
5079 did_consider:
5080 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5081
5082 PAGE_REPLACEMENT_ALLOWED(FALSE);
5083
5084 lck_mtx_unlock(&vm_page_queue_free_lock);
5085 vm_page_unlock_queues();
5086
5087 mutex_pause(0);
5088
5089 PAGE_REPLACEMENT_ALLOWED(TRUE);
5090
5091 vm_page_lock_queues();
5092 lck_mtx_lock(&vm_page_queue_free_lock);
5093
5094 RESET_STATE_OF_RUN();
5095 /*
5096 * reset our free page limit since we
5097 * dropped the lock protecting the vm_page_free_queue
5098 */
5099 free_available = vm_page_free_count - vm_page_free_reserved;
5100 considered = 0;
5101
5102 yielded++;
5103
5104 goto retry;
5105 }
5106 considered++;
5107 }
5108 m = VM_PAGE_NULL;
5109
5110 if (npages != contig_pages) {
5111 if (!wrapped) {
5112 /*
5113 * We didn't find a contiguous range but we didn't
5114 * start from the very first page.
5115 * Start again from the very first page.
5116 */
5117 RESET_STATE_OF_RUN();
5118 if( flags & KMA_LOMEM)
5119 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
5120 else
5121 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5122 last_idx = 0;
5123 page_idx = last_idx;
5124 wrapped = TRUE;
5125 goto retry;
5126 }
5127 lck_mtx_unlock(&vm_page_queue_free_lock);
5128 } else {
5129 vm_page_t m1;
5130 vm_page_t m2;
5131 unsigned int cur_idx;
5132 unsigned int tmp_start_idx;
5133 vm_object_t locked_object = VM_OBJECT_NULL;
5134 boolean_t abort_run = FALSE;
5135
5136 assert(page_idx - start_idx == contig_pages);
5137
5138 tmp_start_idx = start_idx;
5139
5140 /*
5141 * first pass through to pull the free pages
5142 * off of the free queue so that in case we
5143 * need substitute pages, we won't grab any
5144 * of the free pages in the run... we'll clear
5145 * the 'free' bit in the 2nd pass, and even in
5146 * an abort_run case, we'll collect all of the
5147 * free pages in this run and return them to the free list
5148 */
5149 while (start_idx < page_idx) {
5150
5151 m1 = &vm_pages[start_idx++];
5152
5153 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5154 assert(m1->vm_page_q_state == VM_PAGE_ON_FREE_Q);
5155 #endif
5156
5157 if (m1->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5158 unsigned int color;
5159
5160 color = VM_PAGE_GET_PHYS_PAGE(m1) & vm_color_mask;
5161 #if MACH_ASSERT
5162 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
5163 #endif
5164 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5165 m1,
5166 vm_page_t,
5167 pageq);
5168
5169 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5170 #if MACH_ASSERT
5171 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
5172 #endif
5173 /*
5174 * Clear the "free" bit so that this page
5175 * does not get considered for another
5176 * concurrent physically-contiguous allocation.
5177 */
5178 m1->vm_page_q_state = VM_PAGE_NOT_ON_Q;
5179 assert(m1->busy);
5180
5181 vm_page_free_count--;
5182 }
5183 }
5184 if( flags & KMA_LOMEM)
5185 vm_page_lomem_find_contiguous_last_idx = page_idx;
5186 else
5187 vm_page_find_contiguous_last_idx = page_idx;
5188
5189 /*
5190 * we can drop the free queue lock at this point since
5191 * we've pulled any 'free' candidates off of the list
5192 * we need it dropped so that we can do a vm_page_grab
5193 * when substituing for pmapped/dirty pages
5194 */
5195 lck_mtx_unlock(&vm_page_queue_free_lock);
5196
5197 start_idx = tmp_start_idx;
5198 cur_idx = page_idx - 1;
5199
5200 while (start_idx++ < page_idx) {
5201 /*
5202 * must go through the list from back to front
5203 * so that the page list is created in the
5204 * correct order - low -> high phys addresses
5205 */
5206 m1 = &vm_pages[cur_idx--];
5207
5208 if (m1->vm_page_object == 0) {
5209 /*
5210 * page has already been removed from
5211 * the free list in the 1st pass
5212 */
5213 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5214 assert(m1->offset == (vm_object_offset_t) -1);
5215 assert(m1->busy);
5216 assert(!m1->wanted);
5217 assert(!m1->laundry);
5218 } else {
5219 vm_object_t object;
5220 int refmod;
5221 boolean_t disconnected, reusable;
5222
5223 if (abort_run == TRUE)
5224 continue;
5225
5226 assert(m1->vm_page_q_state != VM_PAGE_NOT_ON_Q);
5227
5228 object = VM_PAGE_OBJECT(m1);
5229
5230 if (object != locked_object) {
5231 if (locked_object) {
5232 vm_object_unlock(locked_object);
5233 locked_object = VM_OBJECT_NULL;
5234 }
5235 if (vm_object_lock_try(object))
5236 locked_object = object;
5237 }
5238 if (locked_object == VM_OBJECT_NULL ||
5239 (VM_PAGE_WIRED(m1) || m1->gobbled ||
5240 m1->encrypted_cleaning || m1->laundry || m1->wanted ||
5241 m1->cleaning || m1->overwriting || m1->free_when_done || m1->busy) ||
5242 (m1->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5243
5244 if (locked_object) {
5245 vm_object_unlock(locked_object);
5246 locked_object = VM_OBJECT_NULL;
5247 }
5248 tmp_start_idx = cur_idx;
5249 abort_run = TRUE;
5250 continue;
5251 }
5252
5253 disconnected = FALSE;
5254 reusable = FALSE;
5255
5256 if ((m1->reusable ||
5257 object->all_reusable) &&
5258 (m1->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5259 !m1->dirty &&
5260 !m1->reference) {
5261 /* reusable page... */
5262 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5263 disconnected = TRUE;
5264 if (refmod == 0) {
5265 /*
5266 * ... not reused: can steal
5267 * without relocating contents.
5268 */
5269 reusable = TRUE;
5270 }
5271 }
5272
5273 if ((m1->pmapped &&
5274 ! reusable) ||
5275 m1->dirty ||
5276 m1->precious) {
5277 vm_object_offset_t offset;
5278
5279 m2 = vm_page_grab();
5280
5281 if (m2 == VM_PAGE_NULL) {
5282 if (locked_object) {
5283 vm_object_unlock(locked_object);
5284 locked_object = VM_OBJECT_NULL;
5285 }
5286 tmp_start_idx = cur_idx;
5287 abort_run = TRUE;
5288 continue;
5289 }
5290 if (! disconnected) {
5291 if (m1->pmapped)
5292 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5293 else
5294 refmod = 0;
5295 }
5296
5297 /* copy the page's contents */
5298 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
5299 /* copy the page's state */
5300 assert(!VM_PAGE_WIRED(m1));
5301 assert(m1->vm_page_q_state != VM_PAGE_ON_FREE_Q);
5302 assert(m1->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q);
5303 assert(!m1->laundry);
5304 m2->reference = m1->reference;
5305 assert(!m1->gobbled);
5306 assert(!m1->private);
5307 m2->no_cache = m1->no_cache;
5308 m2->xpmapped = 0;
5309 assert(!m1->busy);
5310 assert(!m1->wanted);
5311 assert(!m1->fictitious);
5312 m2->pmapped = m1->pmapped; /* should flush cache ? */
5313 m2->wpmapped = m1->wpmapped;
5314 assert(!m1->free_when_done);
5315 m2->absent = m1->absent;
5316 m2->error = m1->error;
5317 m2->dirty = m1->dirty;
5318 assert(!m1->cleaning);
5319 m2->precious = m1->precious;
5320 m2->clustered = m1->clustered;
5321 assert(!m1->overwriting);
5322 m2->restart = m1->restart;
5323 m2->unusual = m1->unusual;
5324 m2->encrypted = m1->encrypted;
5325 assert(!m1->encrypted_cleaning);
5326 m2->cs_validated = m1->cs_validated;
5327 m2->cs_tainted = m1->cs_tainted;
5328 m2->cs_nx = m1->cs_nx;
5329
5330 /*
5331 * If m1 had really been reusable,
5332 * we would have just stolen it, so
5333 * let's not propagate it's "reusable"
5334 * bit and assert that m2 is not
5335 * marked as "reusable".
5336 */
5337 // m2->reusable = m1->reusable;
5338 assert(!m2->reusable);
5339
5340 // assert(!m1->lopage);
5341 m2->slid = m1->slid;
5342
5343 if (m1->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5344 m2->vm_page_q_state = VM_PAGE_USED_BY_COMPRESSOR;
5345
5346 /*
5347 * page may need to be flushed if
5348 * it is marshalled into a UPL
5349 * that is going to be used by a device
5350 * that doesn't support coherency
5351 */
5352 m2->written_by_kernel = TRUE;
5353
5354 /*
5355 * make sure we clear the ref/mod state
5356 * from the pmap layer... else we risk
5357 * inheriting state from the last time
5358 * this page was used...
5359 */
5360 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5361
5362 if (refmod & VM_MEM_REFERENCED)
5363 m2->reference = TRUE;
5364 if (refmod & VM_MEM_MODIFIED) {
5365 SET_PAGE_DIRTY(m2, TRUE);
5366 }
5367 offset = m1->offset;
5368
5369 /*
5370 * completely cleans up the state
5371 * of the page so that it is ready
5372 * to be put onto the free list, or
5373 * for this purpose it looks like it
5374 * just came off of the free list
5375 */
5376 vm_page_free_prepare(m1);
5377
5378 /*
5379 * now put the substitute page
5380 * on the object
5381 */
5382 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
5383
5384 if (m2->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5385 m2->pmapped = TRUE;
5386 m2->wpmapped = TRUE;
5387
5388 PMAP_ENTER(kernel_pmap, m2->offset, m2,
5389 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
5390
5391 compressed_pages++;
5392
5393 } else {
5394 if (m2->reference)
5395 vm_page_activate(m2);
5396 else
5397 vm_page_deactivate(m2);
5398 }
5399 PAGE_WAKEUP_DONE(m2);
5400
5401 } else {
5402 assert(m1->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
5403
5404 /*
5405 * completely cleans up the state
5406 * of the page so that it is ready
5407 * to be put onto the free list, or
5408 * for this purpose it looks like it
5409 * just came off of the free list
5410 */
5411 vm_page_free_prepare(m1);
5412 }
5413
5414 stolen_pages++;
5415
5416 }
5417 #if CONFIG_BACKGROUND_QUEUE
5418 vm_page_assign_background_state(m1);
5419 #endif
5420 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5421 m1->snext = m;
5422 m = m1;
5423 }
5424 if (locked_object) {
5425 vm_object_unlock(locked_object);
5426 locked_object = VM_OBJECT_NULL;
5427 }
5428
5429 if (abort_run == TRUE) {
5430 if (m != VM_PAGE_NULL) {
5431 vm_page_free_list(m, FALSE);
5432 }
5433
5434 dumped_run++;
5435
5436 /*
5437 * want the index of the last
5438 * page in this run that was
5439 * successfully 'stolen', so back
5440 * it up 1 for the auto-decrement on use
5441 * and 1 more to bump back over this page
5442 */
5443 page_idx = tmp_start_idx + 2;
5444 if (page_idx >= vm_pages_count) {
5445 if (wrapped)
5446 goto done_scanning;
5447 page_idx = last_idx = 0;
5448 wrapped = TRUE;
5449 }
5450 abort_run = FALSE;
5451
5452 /*
5453 * We didn't find a contiguous range but we didn't
5454 * start from the very first page.
5455 * Start again from the very first page.
5456 */
5457 RESET_STATE_OF_RUN();
5458
5459 if( flags & KMA_LOMEM)
5460 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
5461 else
5462 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5463
5464 last_idx = page_idx;
5465
5466 lck_mtx_lock(&vm_page_queue_free_lock);
5467 /*
5468 * reset our free page limit since we
5469 * dropped the lock protecting the vm_page_free_queue
5470 */
5471 free_available = vm_page_free_count - vm_page_free_reserved;
5472 goto retry;
5473 }
5474
5475 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
5476
5477 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5478 assert(m1->wire_count == 0);
5479
5480 if (wire == TRUE) {
5481 m1->wire_count++;
5482 m1->vm_page_q_state = VM_PAGE_IS_WIRED;
5483 } else
5484 m1->gobbled = TRUE;
5485 }
5486 if (wire == FALSE)
5487 vm_page_gobble_count += npages;
5488
5489 /*
5490 * gobbled pages are also counted as wired pages
5491 */
5492 vm_page_wire_count += npages;
5493
5494 assert(vm_page_verify_contiguous(m, npages));
5495 }
5496 done_scanning:
5497 PAGE_REPLACEMENT_ALLOWED(FALSE);
5498
5499 vm_page_unlock_queues();
5500
5501 #if DEBUG
5502 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5503
5504 tv_end_sec -= tv_start_sec;
5505 if (tv_end_usec < tv_start_usec) {
5506 tv_end_sec--;
5507 tv_end_usec += 1000000;
5508 }
5509 tv_end_usec -= tv_start_usec;
5510 if (tv_end_usec >= 1000000) {
5511 tv_end_sec++;
5512 tv_end_sec -= 1000000;
5513 }
5514 if (vm_page_find_contig_debug) {
5515 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5516 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5517 (long)tv_end_sec, tv_end_usec, orig_last_idx,
5518 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
5519 }
5520
5521 #endif
5522 #if MACH_ASSERT
5523 vm_page_verify_free_lists();
5524 #endif
5525 if (m == NULL && zone_gc_called == FALSE) {
5526 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5527 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5528 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5529
5530 if (consider_buffer_cache_collect != NULL) {
5531 (void)(*consider_buffer_cache_collect)(1);
5532 }
5533
5534 consider_zone_gc();
5535
5536 zone_gc_called = TRUE;
5537
5538 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5539 goto full_scan_again;
5540 }
5541
5542 return m;
5543 }
5544
5545 /*
5546 * Allocate a list of contiguous, wired pages.
5547 */
5548 kern_return_t
5549 cpm_allocate(
5550 vm_size_t size,
5551 vm_page_t *list,
5552 ppnum_t max_pnum,
5553 ppnum_t pnum_mask,
5554 boolean_t wire,
5555 int flags)
5556 {
5557 vm_page_t pages;
5558 unsigned int npages;
5559
5560 if (size % PAGE_SIZE != 0)
5561 return KERN_INVALID_ARGUMENT;
5562
5563 npages = (unsigned int) (size / PAGE_SIZE);
5564 if (npages != size / PAGE_SIZE) {
5565 /* 32-bit overflow */
5566 return KERN_INVALID_ARGUMENT;
5567 }
5568
5569 /*
5570 * Obtain a pointer to a subset of the free
5571 * list large enough to satisfy the request;
5572 * the region will be physically contiguous.
5573 */
5574 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
5575
5576 if (pages == VM_PAGE_NULL)
5577 return KERN_NO_SPACE;
5578 /*
5579 * determine need for wakeups
5580 */
5581 if ((vm_page_free_count < vm_page_free_min) ||
5582 ((vm_page_free_count < vm_page_free_target) &&
5583 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
5584 thread_wakeup((event_t) &vm_page_free_wanted);
5585
5586 VM_CHECK_MEMORYSTATUS;
5587
5588 /*
5589 * The CPM pages should now be available and
5590 * ordered by ascending physical address.
5591 */
5592 assert(vm_page_verify_contiguous(pages, npages));
5593
5594 *list = pages;
5595 return KERN_SUCCESS;
5596 }
5597
5598
5599 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
5600
5601 /*
5602 * when working on a 'run' of pages, it is necessary to hold
5603 * the vm_page_queue_lock (a hot global lock) for certain operations
5604 * on the page... however, the majority of the work can be done
5605 * while merely holding the object lock... in fact there are certain
5606 * collections of pages that don't require any work brokered by the
5607 * vm_page_queue_lock... to mitigate the time spent behind the global
5608 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
5609 * while doing all of the work that doesn't require the vm_page_queue_lock...
5610 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
5611 * necessary work for each page... we will grab the busy bit on the page
5612 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
5613 * if it can't immediately take the vm_page_queue_lock in order to compete
5614 * for the locks in the same order that vm_pageout_scan takes them.
5615 * the operation names are modeled after the names of the routines that
5616 * need to be called in order to make the changes very obvious in the
5617 * original loop
5618 */
5619
5620 void
5621 vm_page_do_delayed_work(
5622 vm_object_t object,
5623 vm_tag_t tag,
5624 struct vm_page_delayed_work *dwp,
5625 int dw_count)
5626 {
5627 int j;
5628 vm_page_t m;
5629 vm_page_t local_free_q = VM_PAGE_NULL;
5630
5631 /*
5632 * pageout_scan takes the vm_page_lock_queues first
5633 * then tries for the object lock... to avoid what
5634 * is effectively a lock inversion, we'll go to the
5635 * trouble of taking them in that same order... otherwise
5636 * if this object contains the majority of the pages resident
5637 * in the UBC (or a small set of large objects actively being
5638 * worked on contain the majority of the pages), we could
5639 * cause the pageout_scan thread to 'starve' in its attempt
5640 * to find pages to move to the free queue, since it has to
5641 * successfully acquire the object lock of any candidate page
5642 * before it can steal/clean it.
5643 */
5644 if (!vm_page_trylockspin_queues()) {
5645 vm_object_unlock(object);
5646
5647 vm_page_lockspin_queues();
5648
5649 for (j = 0; ; j++) {
5650 if (!vm_object_lock_avoid(object) &&
5651 _vm_object_lock_try(object))
5652 break;
5653 vm_page_unlock_queues();
5654 mutex_pause(j);
5655 vm_page_lockspin_queues();
5656 }
5657 }
5658 for (j = 0; j < dw_count; j++, dwp++) {
5659
5660 m = dwp->dw_m;
5661
5662 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
5663 vm_pageout_throttle_up(m);
5664 #if CONFIG_PHANTOM_CACHE
5665 if (dwp->dw_mask & DW_vm_phantom_cache_update)
5666 vm_phantom_cache_update(m);
5667 #endif
5668 if (dwp->dw_mask & DW_vm_page_wire)
5669 vm_page_wire(m, tag, FALSE);
5670 else if (dwp->dw_mask & DW_vm_page_unwire) {
5671 boolean_t queueit;
5672
5673 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
5674
5675 vm_page_unwire(m, queueit);
5676 }
5677 if (dwp->dw_mask & DW_vm_page_free) {
5678 vm_page_free_prepare_queues(m);
5679
5680 assert(m->pageq.next == 0 && m->pageq.prev == 0);
5681 /*
5682 * Add this page to our list of reclaimed pages,
5683 * to be freed later.
5684 */
5685 m->snext = local_free_q;
5686 local_free_q = m;
5687 } else {
5688 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5689 vm_page_deactivate_internal(m, FALSE);
5690 else if (dwp->dw_mask & DW_vm_page_activate) {
5691 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q) {
5692 vm_page_activate(m);
5693 }
5694 }
5695 else if (dwp->dw_mask & DW_vm_page_speculate)
5696 vm_page_speculate(m, TRUE);
5697 else if (dwp->dw_mask & DW_enqueue_cleaned) {
5698 /*
5699 * if we didn't hold the object lock and did this,
5700 * we might disconnect the page, then someone might
5701 * soft fault it back in, then we would put it on the
5702 * cleaned queue, and so we would have a referenced (maybe even dirty)
5703 * page on that queue, which we don't want
5704 */
5705 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5706
5707 if ((refmod_state & VM_MEM_REFERENCED)) {
5708 /*
5709 * this page has been touched since it got cleaned; let's activate it
5710 * if it hasn't already been
5711 */
5712 vm_pageout_enqueued_cleaned++;
5713 vm_pageout_cleaned_reactivated++;
5714 vm_pageout_cleaned_commit_reactivated++;
5715
5716 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q)
5717 vm_page_activate(m);
5718 } else {
5719 m->reference = FALSE;
5720 vm_page_enqueue_cleaned(m);
5721 }
5722 }
5723 else if (dwp->dw_mask & DW_vm_page_lru)
5724 vm_page_lru(m);
5725 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5726 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q)
5727 vm_page_queues_remove(m, TRUE);
5728 }
5729 if (dwp->dw_mask & DW_set_reference)
5730 m->reference = TRUE;
5731 else if (dwp->dw_mask & DW_clear_reference)
5732 m->reference = FALSE;
5733
5734 if (dwp->dw_mask & DW_move_page) {
5735 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q) {
5736 vm_page_queues_remove(m, FALSE);
5737
5738 assert(VM_PAGE_OBJECT(m) != kernel_object);
5739
5740 vm_page_enqueue_inactive(m, FALSE);
5741 }
5742 }
5743 if (dwp->dw_mask & DW_clear_busy)
5744 m->busy = FALSE;
5745
5746 if (dwp->dw_mask & DW_PAGE_WAKEUP)
5747 PAGE_WAKEUP(m);
5748 }
5749 }
5750 vm_page_unlock_queues();
5751
5752 if (local_free_q)
5753 vm_page_free_list(local_free_q, TRUE);
5754
5755 VM_CHECK_MEMORYSTATUS;
5756
5757 }
5758
5759 kern_return_t
5760 vm_page_alloc_list(
5761 int page_count,
5762 int flags,
5763 vm_page_t *list)
5764 {
5765 vm_page_t lo_page_list = VM_PAGE_NULL;
5766 vm_page_t mem;
5767 int i;
5768
5769 if ( !(flags & KMA_LOMEM))
5770 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
5771
5772 for (i = 0; i < page_count; i++) {
5773
5774 mem = vm_page_grablo();
5775
5776 if (mem == VM_PAGE_NULL) {
5777 if (lo_page_list)
5778 vm_page_free_list(lo_page_list, FALSE);
5779
5780 *list = VM_PAGE_NULL;
5781
5782 return (KERN_RESOURCE_SHORTAGE);
5783 }
5784 mem->snext = lo_page_list;
5785 lo_page_list = mem;
5786 }
5787 *list = lo_page_list;
5788
5789 return (KERN_SUCCESS);
5790 }
5791
5792 void
5793 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
5794 {
5795 page->offset = offset;
5796 }
5797
5798 vm_page_t
5799 vm_page_get_next(vm_page_t page)
5800 {
5801 return (page->snext);
5802 }
5803
5804 vm_object_offset_t
5805 vm_page_get_offset(vm_page_t page)
5806 {
5807 return (page->offset);
5808 }
5809
5810 ppnum_t
5811 vm_page_get_phys_page(vm_page_t page)
5812 {
5813 return (VM_PAGE_GET_PHYS_PAGE(page));
5814 }
5815
5816
5817 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5818
5819 #if HIBERNATION
5820
5821 static vm_page_t hibernate_gobble_queue;
5822
5823 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
5824 static int hibernate_flush_dirty_pages(int);
5825 static int hibernate_flush_queue(vm_page_queue_head_t *, int);
5826
5827 void hibernate_flush_wait(void);
5828 void hibernate_mark_in_progress(void);
5829 void hibernate_clear_in_progress(void);
5830
5831 void hibernate_free_range(int, int);
5832 void hibernate_hash_insert_page(vm_page_t);
5833 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
5834 void hibernate_rebuild_vm_structs(void);
5835 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
5836 ppnum_t hibernate_lookup_paddr(unsigned int);
5837
5838 struct hibernate_statistics {
5839 int hibernate_considered;
5840 int hibernate_reentered_on_q;
5841 int hibernate_found_dirty;
5842 int hibernate_skipped_cleaning;
5843 int hibernate_skipped_transient;
5844 int hibernate_skipped_precious;
5845 int hibernate_skipped_external;
5846 int hibernate_queue_nolock;
5847 int hibernate_queue_paused;
5848 int hibernate_throttled;
5849 int hibernate_throttle_timeout;
5850 int hibernate_drained;
5851 int hibernate_drain_timeout;
5852 int cd_lock_failed;
5853 int cd_found_precious;
5854 int cd_found_wired;
5855 int cd_found_busy;
5856 int cd_found_unusual;
5857 int cd_found_cleaning;
5858 int cd_found_laundry;
5859 int cd_found_dirty;
5860 int cd_found_xpmapped;
5861 int cd_skipped_xpmapped;
5862 int cd_local_free;
5863 int cd_total_free;
5864 int cd_vm_page_wire_count;
5865 int cd_vm_struct_pages_unneeded;
5866 int cd_pages;
5867 int cd_discarded;
5868 int cd_count_wire;
5869 } hibernate_stats;
5870
5871
5872 /*
5873 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5874 * so that we don't overrun the estimated image size, which would
5875 * result in a hibernation failure.
5876 */
5877 #define HIBERNATE_XPMAPPED_LIMIT 40000
5878
5879
5880 static int
5881 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5882 {
5883 wait_result_t wait_result;
5884
5885 vm_page_lock_queues();
5886
5887 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
5888
5889 q->pgo_draining = TRUE;
5890
5891 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5892
5893 vm_page_unlock_queues();
5894
5895 wait_result = thread_block(THREAD_CONTINUE_NULL);
5896
5897 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
5898 hibernate_stats.hibernate_drain_timeout++;
5899
5900 if (q == &vm_pageout_queue_external)
5901 return (0);
5902
5903 return (1);
5904 }
5905 vm_page_lock_queues();
5906
5907 hibernate_stats.hibernate_drained++;
5908 }
5909 vm_page_unlock_queues();
5910
5911 return (0);
5912 }
5913
5914
5915 boolean_t hibernate_skip_external = FALSE;
5916
5917 static int
5918 hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
5919 {
5920 vm_page_t m;
5921 vm_object_t l_object = NULL;
5922 vm_object_t m_object = NULL;
5923 int refmod_state = 0;
5924 int try_failed_count = 0;
5925 int retval = 0;
5926 int current_run = 0;
5927 struct vm_pageout_queue *iq;
5928 struct vm_pageout_queue *eq;
5929 struct vm_pageout_queue *tq;
5930
5931
5932 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5933
5934 iq = &vm_pageout_queue_internal;
5935 eq = &vm_pageout_queue_external;
5936
5937 vm_page_lock_queues();
5938
5939 while (qcount && !vm_page_queue_empty(q)) {
5940
5941 if (current_run++ == 1000) {
5942 if (hibernate_should_abort()) {
5943 retval = 1;
5944 break;
5945 }
5946 current_run = 0;
5947 }
5948
5949 m = (vm_page_t) vm_page_queue_first(q);
5950 m_object = VM_PAGE_OBJECT(m);
5951
5952 /*
5953 * check to see if we currently are working
5954 * with the same object... if so, we've
5955 * already got the lock
5956 */
5957 if (m_object != l_object) {
5958 /*
5959 * the object associated with candidate page is
5960 * different from the one we were just working
5961 * with... dump the lock if we still own it
5962 */
5963 if (l_object != NULL) {
5964 vm_object_unlock(l_object);
5965 l_object = NULL;
5966 }
5967 /*
5968 * Try to lock object; since we've alread got the
5969 * page queues lock, we can only 'try' for this one.
5970 * if the 'try' fails, we need to do a mutex_pause
5971 * to allow the owner of the object lock a chance to
5972 * run...
5973 */
5974 if ( !vm_object_lock_try_scan(m_object)) {
5975
5976 if (try_failed_count > 20) {
5977 hibernate_stats.hibernate_queue_nolock++;
5978
5979 goto reenter_pg_on_q;
5980 }
5981
5982 vm_page_unlock_queues();
5983 mutex_pause(try_failed_count++);
5984 vm_page_lock_queues();
5985
5986 hibernate_stats.hibernate_queue_paused++;
5987 continue;
5988 } else {
5989 l_object = m_object;
5990 }
5991 }
5992 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5993 /*
5994 * page is not to be cleaned
5995 * put it back on the head of its queue
5996 */
5997 if (m->cleaning)
5998 hibernate_stats.hibernate_skipped_cleaning++;
5999 else
6000 hibernate_stats.hibernate_skipped_transient++;
6001
6002 goto reenter_pg_on_q;
6003 }
6004 if (m_object->copy == VM_OBJECT_NULL) {
6005 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6006 /*
6007 * let the normal hibernate image path
6008 * deal with these
6009 */
6010 goto reenter_pg_on_q;
6011 }
6012 }
6013 if ( !m->dirty && m->pmapped) {
6014 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6015
6016 if ((refmod_state & VM_MEM_MODIFIED)) {
6017 SET_PAGE_DIRTY(m, FALSE);
6018 }
6019 } else
6020 refmod_state = 0;
6021
6022 if ( !m->dirty) {
6023 /*
6024 * page is not to be cleaned
6025 * put it back on the head of its queue
6026 */
6027 if (m->precious)
6028 hibernate_stats.hibernate_skipped_precious++;
6029
6030 goto reenter_pg_on_q;
6031 }
6032
6033 if (hibernate_skip_external == TRUE && !m_object->internal) {
6034
6035 hibernate_stats.hibernate_skipped_external++;
6036
6037 goto reenter_pg_on_q;
6038 }
6039 tq = NULL;
6040
6041 if (m_object->internal) {
6042 if (VM_PAGE_Q_THROTTLED(iq))
6043 tq = iq;
6044 } else if (VM_PAGE_Q_THROTTLED(eq))
6045 tq = eq;
6046
6047 if (tq != NULL) {
6048 wait_result_t wait_result;
6049 int wait_count = 5;
6050
6051 if (l_object != NULL) {
6052 vm_object_unlock(l_object);
6053 l_object = NULL;
6054 }
6055
6056 while (retval == 0) {
6057
6058 tq->pgo_throttled = TRUE;
6059
6060 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6061
6062 vm_page_unlock_queues();
6063
6064 wait_result = thread_block(THREAD_CONTINUE_NULL);
6065
6066 vm_page_lock_queues();
6067
6068 if (wait_result != THREAD_TIMED_OUT)
6069 break;
6070 if (!VM_PAGE_Q_THROTTLED(tq))
6071 break;
6072
6073 if (hibernate_should_abort())
6074 retval = 1;
6075
6076 if (--wait_count == 0) {
6077
6078 hibernate_stats.hibernate_throttle_timeout++;
6079
6080 if (tq == eq) {
6081 hibernate_skip_external = TRUE;
6082 break;
6083 }
6084 retval = 1;
6085 }
6086 }
6087 if (retval)
6088 break;
6089
6090 hibernate_stats.hibernate_throttled++;
6091
6092 continue;
6093 }
6094 /*
6095 * we've already factored out pages in the laundry which
6096 * means this page can't be on the pageout queue so it's
6097 * safe to do the vm_page_queues_remove
6098 */
6099 vm_page_queues_remove(m, TRUE);
6100
6101 if (m_object->internal == TRUE)
6102 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
6103
6104 (void)vm_pageout_cluster(m, FALSE, FALSE);
6105
6106 hibernate_stats.hibernate_found_dirty++;
6107
6108 goto next_pg;
6109
6110 reenter_pg_on_q:
6111 vm_page_queue_remove(q, m, vm_page_t, pageq);
6112 vm_page_queue_enter(q, m, vm_page_t, pageq);
6113
6114 hibernate_stats.hibernate_reentered_on_q++;
6115 next_pg:
6116 hibernate_stats.hibernate_considered++;
6117
6118 qcount--;
6119 try_failed_count = 0;
6120 }
6121 if (l_object != NULL) {
6122 vm_object_unlock(l_object);
6123 l_object = NULL;
6124 }
6125
6126 vm_page_unlock_queues();
6127
6128 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6129
6130 return (retval);
6131 }
6132
6133
6134 static int
6135 hibernate_flush_dirty_pages(int pass)
6136 {
6137 struct vm_speculative_age_q *aq;
6138 uint32_t i;
6139
6140 if (vm_page_local_q) {
6141 for (i = 0; i < vm_page_local_q_count; i++)
6142 vm_page_reactivate_local(i, TRUE, FALSE);
6143 }
6144
6145 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6146 int qcount;
6147 vm_page_t m;
6148
6149 aq = &vm_page_queue_speculative[i];
6150
6151 if (vm_page_queue_empty(&aq->age_q))
6152 continue;
6153 qcount = 0;
6154
6155 vm_page_lockspin_queues();
6156
6157 vm_page_queue_iterate(&aq->age_q,
6158 m,
6159 vm_page_t,
6160 pageq)
6161 {
6162 qcount++;
6163 }
6164 vm_page_unlock_queues();
6165
6166 if (qcount) {
6167 if (hibernate_flush_queue(&aq->age_q, qcount))
6168 return (1);
6169 }
6170 }
6171 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
6172 return (1);
6173 /* XXX FBDP TODO: flush secluded queue */
6174 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6175 return (1);
6176 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
6177 return (1);
6178 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6179 return (1);
6180
6181 if (pass == 1)
6182 vm_compressor_record_warmup_start();
6183
6184 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
6185 if (pass == 1)
6186 vm_compressor_record_warmup_end();
6187 return (1);
6188 }
6189 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
6190 if (pass == 1)
6191 vm_compressor_record_warmup_end();
6192 return (1);
6193 }
6194 if (pass == 1)
6195 vm_compressor_record_warmup_end();
6196
6197 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6198 return (1);
6199
6200 return (0);
6201 }
6202
6203
6204 void
6205 hibernate_reset_stats()
6206 {
6207 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6208 }
6209
6210
6211 int
6212 hibernate_flush_memory()
6213 {
6214 int retval;
6215
6216 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6217
6218 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6219
6220 hibernate_cleaning_in_progress = TRUE;
6221 hibernate_skip_external = FALSE;
6222
6223 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6224
6225 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6226
6227 vm_compressor_flush();
6228
6229 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6230
6231 if (consider_buffer_cache_collect != NULL) {
6232 unsigned int orig_wire_count;
6233
6234 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6235 orig_wire_count = vm_page_wire_count;
6236
6237 (void)(*consider_buffer_cache_collect)(1);
6238 consider_zone_gc();
6239
6240 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6241
6242 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
6243 }
6244 }
6245 hibernate_cleaning_in_progress = FALSE;
6246
6247 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6248
6249 if (retval)
6250 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6251
6252
6253 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6254 hibernate_stats.hibernate_considered,
6255 hibernate_stats.hibernate_reentered_on_q,
6256 hibernate_stats.hibernate_found_dirty);
6257 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6258 hibernate_stats.hibernate_skipped_cleaning,
6259 hibernate_stats.hibernate_skipped_transient,
6260 hibernate_stats.hibernate_skipped_precious,
6261 hibernate_stats.hibernate_skipped_external,
6262 hibernate_stats.hibernate_queue_nolock);
6263 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6264 hibernate_stats.hibernate_queue_paused,
6265 hibernate_stats.hibernate_throttled,
6266 hibernate_stats.hibernate_throttle_timeout,
6267 hibernate_stats.hibernate_drained,
6268 hibernate_stats.hibernate_drain_timeout);
6269
6270 return (retval);
6271 }
6272
6273
6274 static void
6275 hibernate_page_list_zero(hibernate_page_list_t *list)
6276 {
6277 uint32_t bank;
6278 hibernate_bitmap_t * bitmap;
6279
6280 bitmap = &list->bank_bitmap[0];
6281 for (bank = 0; bank < list->bank_count; bank++)
6282 {
6283 uint32_t last_bit;
6284
6285 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6286 // set out-of-bound bits at end of bitmap.
6287 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6288 if (last_bit)
6289 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6290
6291 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6292 }
6293 }
6294
6295 void
6296 hibernate_free_gobble_pages(void)
6297 {
6298 vm_page_t m, next;
6299 uint32_t count = 0;
6300
6301 m = (vm_page_t) hibernate_gobble_queue;
6302 while(m)
6303 {
6304 next = m->snext;
6305 vm_page_free(m);
6306 count++;
6307 m = next;
6308 }
6309 hibernate_gobble_queue = VM_PAGE_NULL;
6310
6311 if (count)
6312 HIBLOG("Freed %d pages\n", count);
6313 }
6314
6315 static boolean_t
6316 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
6317 {
6318 vm_object_t object = NULL;
6319 int refmod_state;
6320 boolean_t discard = FALSE;
6321
6322 do
6323 {
6324 if (m->private)
6325 panic("hibernate_consider_discard: private");
6326
6327 object = VM_PAGE_OBJECT(m);
6328
6329 if (!vm_object_lock_try(object)) {
6330 object = NULL;
6331 if (!preflight) hibernate_stats.cd_lock_failed++;
6332 break;
6333 }
6334 if (VM_PAGE_WIRED(m)) {
6335 if (!preflight) hibernate_stats.cd_found_wired++;
6336 break;
6337 }
6338 if (m->precious) {
6339 if (!preflight) hibernate_stats.cd_found_precious++;
6340 break;
6341 }
6342 if (m->busy || !object->alive) {
6343 /*
6344 * Somebody is playing with this page.
6345 */
6346 if (!preflight) hibernate_stats.cd_found_busy++;
6347 break;
6348 }
6349 if (m->absent || m->unusual || m->error) {
6350 /*
6351 * If it's unusual in anyway, ignore it
6352 */
6353 if (!preflight) hibernate_stats.cd_found_unusual++;
6354 break;
6355 }
6356 if (m->cleaning) {
6357 if (!preflight) hibernate_stats.cd_found_cleaning++;
6358 break;
6359 }
6360 if (m->laundry) {
6361 if (!preflight) hibernate_stats.cd_found_laundry++;
6362 break;
6363 }
6364 if (!m->dirty)
6365 {
6366 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6367
6368 if (refmod_state & VM_MEM_REFERENCED)
6369 m->reference = TRUE;
6370 if (refmod_state & VM_MEM_MODIFIED) {
6371 SET_PAGE_DIRTY(m, FALSE);
6372 }
6373 }
6374
6375 /*
6376 * If it's clean or purgeable we can discard the page on wakeup.
6377 */
6378 discard = (!m->dirty)
6379 || (VM_PURGABLE_VOLATILE == object->purgable)
6380 || (VM_PURGABLE_EMPTY == object->purgable);
6381
6382
6383 if (discard == FALSE) {
6384 if (!preflight)
6385 hibernate_stats.cd_found_dirty++;
6386 } else if (m->xpmapped && m->reference && !object->internal) {
6387 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6388 if (!preflight)
6389 hibernate_stats.cd_found_xpmapped++;
6390 discard = FALSE;
6391 } else {
6392 if (!preflight)
6393 hibernate_stats.cd_skipped_xpmapped++;
6394 }
6395 }
6396 }
6397 while (FALSE);
6398
6399 if (object)
6400 vm_object_unlock(object);
6401
6402 return (discard);
6403 }
6404
6405
6406 static void
6407 hibernate_discard_page(vm_page_t m)
6408 {
6409 vm_object_t m_object;
6410
6411 if (m->absent || m->unusual || m->error)
6412 /*
6413 * If it's unusual in anyway, ignore
6414 */
6415 return;
6416
6417 m_object = VM_PAGE_OBJECT(m);
6418
6419 #if MACH_ASSERT || DEBUG
6420 if (!vm_object_lock_try(m_object))
6421 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6422 #else
6423 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6424 makes sure these locks are uncontended before sleep */
6425 #endif /* MACH_ASSERT || DEBUG */
6426
6427 if (m->pmapped == TRUE)
6428 {
6429 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6430 }
6431
6432 if (m->laundry)
6433 panic("hibernate_discard_page(%p) laundry", m);
6434 if (m->private)
6435 panic("hibernate_discard_page(%p) private", m);
6436 if (m->fictitious)
6437 panic("hibernate_discard_page(%p) fictitious", m);
6438
6439 if (VM_PURGABLE_VOLATILE == m_object->purgable)
6440 {
6441 /* object should be on a queue */
6442 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6443 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
6444 assert(old_queue);
6445 if (m_object->purgeable_when_ripe) {
6446 vm_purgeable_token_delete_first(old_queue);
6447 }
6448 vm_object_lock_assert_exclusive(m_object);
6449 m_object->purgable = VM_PURGABLE_EMPTY;
6450
6451 /*
6452 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6453 * accounted in the "volatile" ledger, so no change here.
6454 * We have to update vm_page_purgeable_count, though, since we're
6455 * effectively purging this object.
6456 */
6457 unsigned int delta;
6458 assert(m_object->resident_page_count >= m_object->wired_page_count);
6459 delta = (m_object->resident_page_count - m_object->wired_page_count);
6460 assert(vm_page_purgeable_count >= delta);
6461 assert(delta > 0);
6462 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
6463 }
6464
6465 vm_page_free(m);
6466
6467 #if MACH_ASSERT || DEBUG
6468 vm_object_unlock(m_object);
6469 #endif /* MACH_ASSERT || DEBUG */
6470 }
6471
6472 /*
6473 Grab locks for hibernate_page_list_setall()
6474 */
6475 void
6476 hibernate_vm_lock_queues(void)
6477 {
6478 vm_object_lock(compressor_object);
6479 vm_page_lock_queues();
6480 lck_mtx_lock(&vm_page_queue_free_lock);
6481
6482 if (vm_page_local_q) {
6483 uint32_t i;
6484 for (i = 0; i < vm_page_local_q_count; i++) {
6485 struct vpl *lq;
6486 lq = &vm_page_local_q[i].vpl_un.vpl;
6487 VPL_LOCK(&lq->vpl_lock);
6488 }
6489 }
6490 }
6491
6492 void
6493 hibernate_vm_unlock_queues(void)
6494 {
6495 if (vm_page_local_q) {
6496 uint32_t i;
6497 for (i = 0; i < vm_page_local_q_count; i++) {
6498 struct vpl *lq;
6499 lq = &vm_page_local_q[i].vpl_un.vpl;
6500 VPL_UNLOCK(&lq->vpl_lock);
6501 }
6502 }
6503 lck_mtx_unlock(&vm_page_queue_free_lock);
6504 vm_page_unlock_queues();
6505 vm_object_unlock(compressor_object);
6506 }
6507
6508 /*
6509 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6510 pages known to VM to not need saving are subtracted.
6511 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6512 */
6513
6514 void
6515 hibernate_page_list_setall(hibernate_page_list_t * page_list,
6516 hibernate_page_list_t * page_list_wired,
6517 hibernate_page_list_t * page_list_pal,
6518 boolean_t preflight,
6519 boolean_t will_discard,
6520 uint32_t * pagesOut)
6521 {
6522 uint64_t start, end, nsec;
6523 vm_page_t m;
6524 vm_page_t next;
6525 uint32_t pages = page_list->page_count;
6526 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
6527 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
6528 uint32_t count_wire = pages;
6529 uint32_t count_discard_active = 0;
6530 uint32_t count_discard_inactive = 0;
6531 uint32_t count_discard_cleaned = 0;
6532 uint32_t count_discard_purgeable = 0;
6533 uint32_t count_discard_speculative = 0;
6534 uint32_t count_discard_vm_struct_pages = 0;
6535 uint32_t i;
6536 uint32_t bank;
6537 hibernate_bitmap_t * bitmap;
6538 hibernate_bitmap_t * bitmap_wired;
6539 boolean_t discard_all;
6540 boolean_t discard;
6541
6542 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
6543
6544 if (preflight) {
6545 page_list = NULL;
6546 page_list_wired = NULL;
6547 page_list_pal = NULL;
6548 discard_all = FALSE;
6549 } else {
6550 discard_all = will_discard;
6551 }
6552
6553 #if MACH_ASSERT || DEBUG
6554 if (!preflight)
6555 {
6556 vm_page_lock_queues();
6557 if (vm_page_local_q) {
6558 for (i = 0; i < vm_page_local_q_count; i++) {
6559 struct vpl *lq;
6560 lq = &vm_page_local_q[i].vpl_un.vpl;
6561 VPL_LOCK(&lq->vpl_lock);
6562 }
6563 }
6564 }
6565 #endif /* MACH_ASSERT || DEBUG */
6566
6567
6568 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
6569
6570 clock_get_uptime(&start);
6571
6572 if (!preflight) {
6573 hibernate_page_list_zero(page_list);
6574 hibernate_page_list_zero(page_list_wired);
6575 hibernate_page_list_zero(page_list_pal);
6576
6577 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
6578 hibernate_stats.cd_pages = pages;
6579 }
6580
6581 if (vm_page_local_q) {
6582 for (i = 0; i < vm_page_local_q_count; i++)
6583 vm_page_reactivate_local(i, TRUE, !preflight);
6584 }
6585
6586 if (preflight) {
6587 vm_object_lock(compressor_object);
6588 vm_page_lock_queues();
6589 lck_mtx_lock(&vm_page_queue_free_lock);
6590 }
6591
6592 m = (vm_page_t) hibernate_gobble_queue;
6593 while (m)
6594 {
6595 pages--;
6596 count_wire--;
6597 if (!preflight) {
6598 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6599 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6600 }
6601 m = m->snext;
6602 }
6603
6604 if (!preflight) for( i = 0; i < real_ncpus; i++ )
6605 {
6606 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
6607 {
6608 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->snext)
6609 {
6610 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
6611
6612 pages--;
6613 count_wire--;
6614 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6615 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6616
6617 hibernate_stats.cd_local_free++;
6618 hibernate_stats.cd_total_free++;
6619 }
6620 }
6621 }
6622
6623 for( i = 0; i < vm_colors; i++ )
6624 {
6625 vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
6626 m,
6627 vm_page_t,
6628 pageq)
6629 {
6630 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_Q);
6631
6632 pages--;
6633 count_wire--;
6634 if (!preflight) {
6635 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6636 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6637
6638 hibernate_stats.cd_total_free++;
6639 }
6640 }
6641 }
6642
6643 vm_page_queue_iterate(&vm_lopage_queue_free,
6644 m,
6645 vm_page_t,
6646 pageq)
6647 {
6648 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
6649
6650 pages--;
6651 count_wire--;
6652 if (!preflight) {
6653 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6654 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6655
6656 hibernate_stats.cd_total_free++;
6657 }
6658 }
6659
6660 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
6661 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
6662 {
6663 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
6664
6665 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6666 discard = FALSE;
6667 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6668 && hibernate_consider_discard(m, preflight))
6669 {
6670 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6671 count_discard_inactive++;
6672 discard = discard_all;
6673 }
6674 else
6675 count_throttled++;
6676 count_wire--;
6677 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6678
6679 if (discard) hibernate_discard_page(m);
6680 m = next;
6681 }
6682
6683 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6684 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
6685 {
6686 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6687
6688 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6689 discard = FALSE;
6690 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6691 && hibernate_consider_discard(m, preflight))
6692 {
6693 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6694 if (m->dirty)
6695 count_discard_purgeable++;
6696 else
6697 count_discard_inactive++;
6698 discard = discard_all;
6699 }
6700 else
6701 count_anonymous++;
6702 count_wire--;
6703 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6704 if (discard) hibernate_discard_page(m);
6705 m = next;
6706 }
6707
6708 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
6709 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
6710 {
6711 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
6712
6713 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6714 discard = FALSE;
6715 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6716 && hibernate_consider_discard(m, preflight))
6717 {
6718 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6719 if (m->dirty)
6720 count_discard_purgeable++;
6721 else
6722 count_discard_cleaned++;
6723 discard = discard_all;
6724 }
6725 else
6726 count_cleaned++;
6727 count_wire--;
6728 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6729 if (discard) hibernate_discard_page(m);
6730 m = next;
6731 }
6732
6733 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
6734 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
6735 {
6736 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
6737
6738 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6739 discard = FALSE;
6740 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
6741 && hibernate_consider_discard(m, preflight))
6742 {
6743 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6744 if (m->dirty)
6745 count_discard_purgeable++;
6746 else
6747 count_discard_active++;
6748 discard = discard_all;
6749 }
6750 else
6751 count_active++;
6752 count_wire--;
6753 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6754 if (discard) hibernate_discard_page(m);
6755 m = next;
6756 }
6757
6758 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
6759 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
6760 {
6761 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
6762
6763 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6764 discard = FALSE;
6765 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6766 && hibernate_consider_discard(m, preflight))
6767 {
6768 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6769 if (m->dirty)
6770 count_discard_purgeable++;
6771 else
6772 count_discard_inactive++;
6773 discard = discard_all;
6774 }
6775 else
6776 count_inactive++;
6777 count_wire--;
6778 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6779 if (discard) hibernate_discard_page(m);
6780 m = next;
6781 }
6782 /* XXX FBDP TODO: secluded queue */
6783
6784 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6785 {
6786 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
6787 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
6788 {
6789 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
6790
6791 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6792 discard = FALSE;
6793 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6794 && hibernate_consider_discard(m, preflight))
6795 {
6796 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6797 count_discard_speculative++;
6798 discard = discard_all;
6799 }
6800 else
6801 count_speculative++;
6802 count_wire--;
6803 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6804 if (discard) hibernate_discard_page(m);
6805 m = next;
6806 }
6807 }
6808
6809 vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
6810 {
6811 assert(m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR);
6812
6813 count_compressor++;
6814 count_wire--;
6815 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6816 }
6817
6818 if (preflight == FALSE && discard_all == TRUE) {
6819 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6820
6821 HIBLOG("hibernate_teardown started\n");
6822 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
6823 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
6824
6825 pages -= count_discard_vm_struct_pages;
6826 count_wire -= count_discard_vm_struct_pages;
6827
6828 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
6829
6830 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6831 }
6832
6833 if (!preflight) {
6834 // pull wired from hibernate_bitmap
6835 bitmap = &page_list->bank_bitmap[0];
6836 bitmap_wired = &page_list_wired->bank_bitmap[0];
6837 for (bank = 0; bank < page_list->bank_count; bank++)
6838 {
6839 for (i = 0; i < bitmap->bitmapwords; i++)
6840 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
6841 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
6842 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
6843 }
6844 }
6845
6846 // machine dependent adjustments
6847 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6848
6849 if (!preflight) {
6850 hibernate_stats.cd_count_wire = count_wire;
6851 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6852 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6853 }
6854
6855 clock_get_uptime(&end);
6856 absolutetime_to_nanoseconds(end - start, &nsec);
6857 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6858
6859 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6860 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6861 discard_all ? "did" : "could",
6862 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6863
6864 if (hibernate_stats.cd_skipped_xpmapped)
6865 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6866
6867 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6868
6869 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6870
6871 #if MACH_ASSERT || DEBUG
6872 if (!preflight)
6873 {
6874 if (vm_page_local_q) {
6875 for (i = 0; i < vm_page_local_q_count; i++) {
6876 struct vpl *lq;
6877 lq = &vm_page_local_q[i].vpl_un.vpl;
6878 VPL_UNLOCK(&lq->vpl_lock);
6879 }
6880 }
6881 vm_page_unlock_queues();
6882 }
6883 #endif /* MACH_ASSERT || DEBUG */
6884
6885 if (preflight) {
6886 lck_mtx_unlock(&vm_page_queue_free_lock);
6887 vm_page_unlock_queues();
6888 vm_object_unlock(compressor_object);
6889 }
6890
6891 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6892 }
6893
6894 void
6895 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6896 {
6897 uint64_t start, end, nsec;
6898 vm_page_t m;
6899 vm_page_t next;
6900 uint32_t i;
6901 uint32_t count_discard_active = 0;
6902 uint32_t count_discard_inactive = 0;
6903 uint32_t count_discard_purgeable = 0;
6904 uint32_t count_discard_cleaned = 0;
6905 uint32_t count_discard_speculative = 0;
6906
6907
6908 #if MACH_ASSERT || DEBUG
6909 vm_page_lock_queues();
6910 if (vm_page_local_q) {
6911 for (i = 0; i < vm_page_local_q_count; i++) {
6912 struct vpl *lq;
6913 lq = &vm_page_local_q[i].vpl_un.vpl;
6914 VPL_LOCK(&lq->vpl_lock);
6915 }
6916 }
6917 #endif /* MACH_ASSERT || DEBUG */
6918
6919 clock_get_uptime(&start);
6920
6921 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6922 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
6923 {
6924 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6925
6926 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
6927 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
6928 {
6929 if (m->dirty)
6930 count_discard_purgeable++;
6931 else
6932 count_discard_inactive++;
6933 hibernate_discard_page(m);
6934 }
6935 m = next;
6936 }
6937
6938 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6939 {
6940 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
6941 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
6942 {
6943 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
6944
6945 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
6946 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
6947 {
6948 count_discard_speculative++;
6949 hibernate_discard_page(m);
6950 }
6951 m = next;
6952 }
6953 }
6954
6955 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
6956 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
6957 {
6958 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
6959
6960 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
6961 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
6962 {
6963 if (m->dirty)
6964 count_discard_purgeable++;
6965 else
6966 count_discard_inactive++;
6967 hibernate_discard_page(m);
6968 }
6969 m = next;
6970 }
6971 /* XXX FBDP TODO: secluded queue */
6972
6973 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
6974 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
6975 {
6976 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
6977
6978 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
6979 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
6980 {
6981 if (m->dirty)
6982 count_discard_purgeable++;
6983 else
6984 count_discard_active++;
6985 hibernate_discard_page(m);
6986 }
6987 m = next;
6988 }
6989
6990 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
6991 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
6992 {
6993 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
6994
6995 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
6996 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
6997 {
6998 if (m->dirty)
6999 count_discard_purgeable++;
7000 else
7001 count_discard_cleaned++;
7002 hibernate_discard_page(m);
7003 }
7004 m = next;
7005 }
7006
7007 #if MACH_ASSERT || DEBUG
7008 if (vm_page_local_q) {
7009 for (i = 0; i < vm_page_local_q_count; i++) {
7010 struct vpl *lq;
7011 lq = &vm_page_local_q[i].vpl_un.vpl;
7012 VPL_UNLOCK(&lq->vpl_lock);
7013 }
7014 }
7015 vm_page_unlock_queues();
7016 #endif /* MACH_ASSERT || DEBUG */
7017
7018 clock_get_uptime(&end);
7019 absolutetime_to_nanoseconds(end - start, &nsec);
7020 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7021 nsec / 1000000ULL,
7022 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7023 }
7024
7025 boolean_t hibernate_paddr_map_inited = FALSE;
7026 boolean_t hibernate_rebuild_needed = FALSE;
7027 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
7028 vm_page_t hibernate_rebuild_hash_list = NULL;
7029
7030 unsigned int hibernate_teardown_found_tabled_pages = 0;
7031 unsigned int hibernate_teardown_found_created_pages = 0;
7032 unsigned int hibernate_teardown_found_free_pages = 0;
7033 unsigned int hibernate_teardown_vm_page_free_count;
7034
7035
7036 struct ppnum_mapping {
7037 struct ppnum_mapping *ppnm_next;
7038 ppnum_t ppnm_base_paddr;
7039 unsigned int ppnm_sindx;
7040 unsigned int ppnm_eindx;
7041 };
7042
7043 struct ppnum_mapping *ppnm_head;
7044 struct ppnum_mapping *ppnm_last_found = NULL;
7045
7046
7047 void
7048 hibernate_create_paddr_map()
7049 {
7050 unsigned int i;
7051 ppnum_t next_ppnum_in_run = 0;
7052 struct ppnum_mapping *ppnm = NULL;
7053
7054 if (hibernate_paddr_map_inited == FALSE) {
7055
7056 for (i = 0; i < vm_pages_count; i++) {
7057
7058 if (ppnm)
7059 ppnm->ppnm_eindx = i;
7060
7061 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
7062
7063 ppnm = kalloc(sizeof(struct ppnum_mapping));
7064
7065 ppnm->ppnm_next = ppnm_head;
7066 ppnm_head = ppnm;
7067
7068 ppnm->ppnm_sindx = i;
7069 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
7070 }
7071 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
7072 }
7073 ppnm->ppnm_eindx++;
7074
7075 hibernate_paddr_map_inited = TRUE;
7076 }
7077 }
7078
7079 ppnum_t
7080 hibernate_lookup_paddr(unsigned int indx)
7081 {
7082 struct ppnum_mapping *ppnm = NULL;
7083
7084 ppnm = ppnm_last_found;
7085
7086 if (ppnm) {
7087 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7088 goto done;
7089 }
7090 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7091
7092 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7093 ppnm_last_found = ppnm;
7094 break;
7095 }
7096 }
7097 if (ppnm == NULL)
7098 panic("hibernate_lookup_paddr of %d failed\n", indx);
7099 done:
7100 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7101 }
7102
7103
7104 uint32_t
7105 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7106 {
7107 addr64_t saddr_aligned;
7108 addr64_t eaddr_aligned;
7109 addr64_t addr;
7110 ppnum_t paddr;
7111 unsigned int mark_as_unneeded_pages = 0;
7112
7113 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7114 eaddr_aligned = eaddr & ~PAGE_MASK_64;
7115
7116 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7117
7118 paddr = pmap_find_phys(kernel_pmap, addr);
7119
7120 assert(paddr);
7121
7122 hibernate_page_bitset(page_list, TRUE, paddr);
7123 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7124
7125 mark_as_unneeded_pages++;
7126 }
7127 return (mark_as_unneeded_pages);
7128 }
7129
7130
7131 void
7132 hibernate_hash_insert_page(vm_page_t mem)
7133 {
7134 vm_page_bucket_t *bucket;
7135 int hash_id;
7136 vm_object_t m_object;
7137
7138 m_object = VM_PAGE_OBJECT(mem);
7139
7140 assert(mem->hashed);
7141 assert(m_object);
7142 assert(mem->offset != (vm_object_offset_t) -1);
7143
7144 /*
7145 * Insert it into the object_object/offset hash table
7146 */
7147 hash_id = vm_page_hash(m_object, mem->offset);
7148 bucket = &vm_page_buckets[hash_id];
7149
7150 mem->next_m = bucket->page_list;
7151 bucket->page_list = VM_PAGE_PACK_PTR(mem);
7152 }
7153
7154
7155 void
7156 hibernate_free_range(int sindx, int eindx)
7157 {
7158 vm_page_t mem;
7159 unsigned int color;
7160
7161 while (sindx < eindx) {
7162 mem = &vm_pages[sindx];
7163
7164 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7165
7166 mem->lopage = FALSE;
7167 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7168
7169 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
7170 vm_page_queue_enter_first(&vm_page_queue_free[color].qhead,
7171 mem,
7172 vm_page_t,
7173 pageq);
7174 vm_page_free_count++;
7175
7176 sindx++;
7177 }
7178 }
7179
7180
7181 extern void hibernate_rebuild_pmap_structs(void);
7182
7183 void
7184 hibernate_rebuild_vm_structs(void)
7185 {
7186 int cindx, sindx, eindx;
7187 vm_page_t mem, tmem, mem_next;
7188 AbsoluteTime startTime, endTime;
7189 uint64_t nsec;
7190
7191 if (hibernate_rebuild_needed == FALSE)
7192 return;
7193
7194 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
7195 HIBLOG("hibernate_rebuild started\n");
7196
7197 clock_get_uptime(&startTime);
7198
7199 hibernate_rebuild_pmap_structs();
7200
7201 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7202 eindx = vm_pages_count;
7203
7204 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7205
7206 mem = &vm_pages[cindx];
7207 /*
7208 * hibernate_teardown_vm_structs leaves the location where
7209 * this vm_page_t must be located in "next".
7210 */
7211 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7212 mem->next_m = VM_PAGE_PACK_PTR(NULL);
7213
7214 sindx = (int)(tmem - &vm_pages[0]);
7215
7216 if (mem != tmem) {
7217 /*
7218 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7219 * so move it back to its real location
7220 */
7221 *tmem = *mem;
7222 mem = tmem;
7223 }
7224 if (mem->hashed)
7225 hibernate_hash_insert_page(mem);
7226 /*
7227 * the 'hole' between this vm_page_t and the previous
7228 * vm_page_t we moved needs to be initialized as
7229 * a range of free vm_page_t's
7230 */
7231 hibernate_free_range(sindx + 1, eindx);
7232
7233 eindx = sindx;
7234 }
7235 if (sindx)
7236 hibernate_free_range(0, sindx);
7237
7238 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7239
7240 /*
7241 * process the list of vm_page_t's that were entered in the hash,
7242 * but were not located in the vm_pages arrary... these are
7243 * vm_page_t's that were created on the fly (i.e. fictitious)
7244 */
7245 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
7246 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7247
7248 mem->next_m = 0;
7249 hibernate_hash_insert_page(mem);
7250 }
7251 hibernate_rebuild_hash_list = NULL;
7252
7253 clock_get_uptime(&endTime);
7254 SUB_ABSOLUTETIME(&endTime, &startTime);
7255 absolutetime_to_nanoseconds(endTime, &nsec);
7256
7257 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7258
7259 hibernate_rebuild_needed = FALSE;
7260
7261 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
7262 }
7263
7264
7265 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7266
7267 uint32_t
7268 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7269 {
7270 unsigned int i;
7271 unsigned int compact_target_indx;
7272 vm_page_t mem, mem_next;
7273 vm_page_bucket_t *bucket;
7274 unsigned int mark_as_unneeded_pages = 0;
7275 unsigned int unneeded_vm_page_bucket_pages = 0;
7276 unsigned int unneeded_vm_pages_pages = 0;
7277 unsigned int unneeded_pmap_pages = 0;
7278 addr64_t start_of_unneeded = 0;
7279 addr64_t end_of_unneeded = 0;
7280
7281
7282 if (hibernate_should_abort())
7283 return (0);
7284
7285 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7286 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7287 vm_page_cleaned_count, compressor_object->resident_page_count);
7288
7289 for (i = 0; i < vm_page_bucket_count; i++) {
7290
7291 bucket = &vm_page_buckets[i];
7292
7293 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
7294 assert(mem->hashed);
7295
7296 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7297
7298 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
7299 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
7300 hibernate_rebuild_hash_list = mem;
7301 }
7302 }
7303 }
7304 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7305 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7306
7307 hibernate_teardown_vm_page_free_count = vm_page_free_count;
7308
7309 compact_target_indx = 0;
7310
7311 for (i = 0; i < vm_pages_count; i++) {
7312
7313 mem = &vm_pages[i];
7314
7315 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7316 unsigned int color;
7317
7318 assert(mem->busy);
7319 assert(!mem->lopage);
7320
7321 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
7322
7323 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7324 mem,
7325 vm_page_t,
7326 pageq);
7327
7328 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7329
7330 vm_page_free_count--;
7331
7332 hibernate_teardown_found_free_pages++;
7333
7334 if (vm_pages[compact_target_indx].vm_page_q_state != VM_PAGE_ON_FREE_Q)
7335 compact_target_indx = i;
7336 } else {
7337 /*
7338 * record this vm_page_t's original location
7339 * we need this even if it doesn't get moved
7340 * as an indicator to the rebuild function that
7341 * we don't have to move it
7342 */
7343 mem->next_m = VM_PAGE_PACK_PTR(mem);
7344
7345 if (vm_pages[compact_target_indx].vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7346 /*
7347 * we've got a hole to fill, so
7348 * move this vm_page_t to it's new home
7349 */
7350 vm_pages[compact_target_indx] = *mem;
7351 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7352
7353 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7354 compact_target_indx++;
7355 } else
7356 hibernate_teardown_last_valid_compact_indx = i;
7357 }
7358 }
7359 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7360 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7361 mark_as_unneeded_pages += unneeded_vm_pages_pages;
7362
7363 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7364
7365 if (start_of_unneeded) {
7366 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7367 mark_as_unneeded_pages += unneeded_pmap_pages;
7368 }
7369 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7370
7371 hibernate_rebuild_needed = TRUE;
7372
7373 return (mark_as_unneeded_pages);
7374 }
7375
7376
7377 #endif /* HIBERNATION */
7378
7379 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7380
7381 #include <mach_vm_debug.h>
7382 #if MACH_VM_DEBUG
7383
7384 #include <mach_debug/hash_info.h>
7385 #include <vm/vm_debug.h>
7386
7387 /*
7388 * Routine: vm_page_info
7389 * Purpose:
7390 * Return information about the global VP table.
7391 * Fills the buffer with as much information as possible
7392 * and returns the desired size of the buffer.
7393 * Conditions:
7394 * Nothing locked. The caller should provide
7395 * possibly-pageable memory.
7396 */
7397
7398 unsigned int
7399 vm_page_info(
7400 hash_info_bucket_t *info,
7401 unsigned int count)
7402 {
7403 unsigned int i;
7404 lck_spin_t *bucket_lock;
7405
7406 if (vm_page_bucket_count < count)
7407 count = vm_page_bucket_count;
7408
7409 for (i = 0; i < count; i++) {
7410 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7411 unsigned int bucket_count = 0;
7412 vm_page_t m;
7413
7414 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7415 lck_spin_lock(bucket_lock);
7416
7417 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7418 m != VM_PAGE_NULL;
7419 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->next_m)))
7420 bucket_count++;
7421
7422 lck_spin_unlock(bucket_lock);
7423
7424 /* don't touch pageable memory while holding locks */
7425 info[i].hib_count = bucket_count;
7426 }
7427
7428 return vm_page_bucket_count;
7429 }
7430 #endif /* MACH_VM_DEBUG */
7431
7432 #if VM_PAGE_BUCKETS_CHECK
7433 void
7434 vm_page_buckets_check(void)
7435 {
7436 unsigned int i;
7437 vm_page_t p;
7438 unsigned int p_hash;
7439 vm_page_bucket_t *bucket;
7440 lck_spin_t *bucket_lock;
7441
7442 if (!vm_page_buckets_check_ready) {
7443 return;
7444 }
7445
7446 #if HIBERNATION
7447 if (hibernate_rebuild_needed ||
7448 hibernate_rebuild_hash_list) {
7449 panic("BUCKET_CHECK: hibernation in progress: "
7450 "rebuild_needed=%d rebuild_hash_list=%p\n",
7451 hibernate_rebuild_needed,
7452 hibernate_rebuild_hash_list);
7453 }
7454 #endif /* HIBERNATION */
7455
7456 #if VM_PAGE_FAKE_BUCKETS
7457 char *cp;
7458 for (cp = (char *) vm_page_fake_buckets_start;
7459 cp < (char *) vm_page_fake_buckets_end;
7460 cp++) {
7461 if (*cp != 0x5a) {
7462 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7463 "[0x%llx:0x%llx]\n",
7464 cp,
7465 (uint64_t) vm_page_fake_buckets_start,
7466 (uint64_t) vm_page_fake_buckets_end);
7467 }
7468 }
7469 #endif /* VM_PAGE_FAKE_BUCKETS */
7470
7471 for (i = 0; i < vm_page_bucket_count; i++) {
7472 vm_object_t p_object;
7473
7474 bucket = &vm_page_buckets[i];
7475 if (!bucket->page_list) {
7476 continue;
7477 }
7478
7479 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7480 lck_spin_lock(bucket_lock);
7481 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7482
7483 while (p != VM_PAGE_NULL) {
7484 p_object = VM_PAGE_OBJECT(p);
7485
7486 if (!p->hashed) {
7487 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7488 "hash %d in bucket %d at %p "
7489 "is not hashed\n",
7490 p, p_object, p->offset,
7491 p_hash, i, bucket);
7492 }
7493 p_hash = vm_page_hash(p_object, p->offset);
7494 if (p_hash != i) {
7495 panic("BUCKET_CHECK: corruption in bucket %d "
7496 "at %p: page %p object %p offset 0x%llx "
7497 "hash %d\n",
7498 i, bucket, p, p_object, p->offset,
7499 p_hash);
7500 }
7501 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m));
7502 }
7503 lck_spin_unlock(bucket_lock);
7504 }
7505
7506 // printf("BUCKET_CHECK: checked buckets\n");
7507 }
7508 #endif /* VM_PAGE_BUCKETS_CHECK */
7509
7510 /*
7511 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7512 * local queues if they exist... its the only spot in the system where we add pages
7513 * to those queues... once on those queues, those pages can only move to one of the
7514 * global page queues or the free queues... they NEVER move from local q to local q.
7515 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7516 * the global vm_page_queue_lock at this point... we still need to take the local lock
7517 * in case this operation is being run on a different CPU then the local queue's identity,
7518 * but we don't have to worry about the page moving to a global queue or becoming wired
7519 * while we're grabbing the local lock since those operations would require the global
7520 * vm_page_queue_lock to be held, and we already own it.
7521 *
7522 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7523 * 'wired' and local are ALWAYS mutually exclusive conditions.
7524 */
7525
7526 #if CONFIG_BACKGROUND_QUEUE
7527 void
7528 vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
7529 #else
7530 void
7531 vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
7532 #endif
7533 {
7534 boolean_t was_pageable = TRUE;
7535 vm_object_t m_object;
7536
7537 m_object = VM_PAGE_OBJECT(mem);
7538
7539 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7540
7541 if (mem->vm_page_q_state == VM_PAGE_NOT_ON_Q)
7542 {
7543 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7544 #if CONFIG_BACKGROUND_QUEUE
7545 if (mem->vm_page_on_backgroundq == FALSE) {
7546 assert(mem->vm_page_backgroundq.next == 0 &&
7547 mem->vm_page_backgroundq.prev == 0 &&
7548 mem->vm_page_on_backgroundq == FALSE);
7549 }
7550 #endif
7551 return;
7552 }
7553 if (mem->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
7554 {
7555 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7556 #if CONFIG_BACKGROUND_QUEUE
7557 assert(mem->vm_page_backgroundq.next == 0 &&
7558 mem->vm_page_backgroundq.prev == 0 &&
7559 mem->vm_page_on_backgroundq == FALSE);
7560 #endif
7561 return;
7562 }
7563 if (mem->vm_page_q_state == VM_PAGE_IS_WIRED) {
7564 /*
7565 * might put these guys on a list for debugging purposes
7566 * if we do, we'll need to remove this assert
7567 */
7568 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7569 #if CONFIG_BACKGROUND_QUEUE
7570 assert(mem->vm_page_backgroundq.next == 0 &&
7571 mem->vm_page_backgroundq.prev == 0 &&
7572 mem->vm_page_on_backgroundq == FALSE);
7573 #endif
7574 return;
7575 }
7576
7577 assert(m_object != compressor_object);
7578 assert(m_object != kernel_object);
7579 assert(m_object != vm_submap_object);
7580 assert(!mem->fictitious);
7581
7582 switch(mem->vm_page_q_state) {
7583
7584 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
7585 {
7586 struct vpl *lq;
7587
7588 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
7589 VPL_LOCK(&lq->vpl_lock);
7590 vm_page_queue_remove(&lq->vpl_queue,
7591 mem, vm_page_t, pageq);
7592 mem->local_id = 0;
7593 lq->vpl_count--;
7594 if (m_object->internal) {
7595 lq->vpl_internal_count--;
7596 } else {
7597 lq->vpl_external_count--;
7598 }
7599 VPL_UNLOCK(&lq->vpl_lock);
7600 was_pageable = FALSE;
7601 break;
7602 }
7603 case VM_PAGE_ON_ACTIVE_Q:
7604 {
7605 vm_page_queue_remove(&vm_page_queue_active,
7606 mem, vm_page_t, pageq);
7607 vm_page_active_count--;
7608 break;
7609 }
7610
7611 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
7612 {
7613 assert(m_object->internal == TRUE);
7614
7615 vm_page_inactive_count--;
7616 vm_page_queue_remove(&vm_page_queue_anonymous,
7617 mem, vm_page_t, pageq);
7618 vm_page_anonymous_count--;
7619 vm_purgeable_q_advance_all();
7620 break;
7621 }
7622
7623 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
7624 {
7625 assert(m_object->internal == FALSE);
7626
7627 vm_page_inactive_count--;
7628 vm_page_queue_remove(&vm_page_queue_inactive,
7629 mem, vm_page_t, pageq);
7630 vm_purgeable_q_advance_all();
7631 break;
7632 }
7633
7634 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
7635 {
7636 assert(m_object->internal == FALSE);
7637
7638 vm_page_inactive_count--;
7639 vm_page_queue_remove(&vm_page_queue_cleaned,
7640 mem, vm_page_t, pageq);
7641 vm_page_cleaned_count--;
7642 break;
7643 }
7644
7645 case VM_PAGE_ON_THROTTLED_Q:
7646 {
7647 assert(m_object->internal == TRUE);
7648
7649 vm_page_queue_remove(&vm_page_queue_throttled,
7650 mem, vm_page_t, pageq);
7651 vm_page_throttled_count--;
7652 was_pageable = FALSE;
7653 break;
7654 }
7655
7656 case VM_PAGE_ON_SPECULATIVE_Q:
7657 {
7658 assert(m_object->internal == FALSE);
7659
7660 vm_page_remque(&mem->pageq);
7661 vm_page_speculative_count--;
7662 break;
7663 }
7664
7665 #if CONFIG_SECLUDED_MEMORY
7666 case VM_PAGE_ON_SECLUDED_Q:
7667 {
7668 vm_page_queue_remove(&vm_page_queue_secluded,
7669 mem, vm_page_t, pageq);
7670 vm_page_secluded_count--;
7671 if (m_object == VM_OBJECT_NULL) {
7672 vm_page_secluded_count_free--;
7673 was_pageable = FALSE;
7674 } else {
7675 assert(!m_object->internal);
7676 vm_page_secluded_count_inuse--;
7677 was_pageable = FALSE;
7678 // was_pageable = TRUE;
7679 }
7680 break;
7681 }
7682 #endif /* CONFIG_SECLUDED_MEMORY */
7683
7684 default:
7685 {
7686 /*
7687 * if (mem->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
7688 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
7689 * the caller is responsible for determing if the page is on that queue, and if so, must
7690 * either first remove it (it needs both the page queues lock and the object lock to do
7691 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
7692 *
7693 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
7694 * or any of the undefined states
7695 */
7696 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vm_page_q_state);
7697 break;
7698 }
7699
7700 }
7701 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7702 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
7703
7704 #if CONFIG_BACKGROUND_QUEUE
7705 if (remove_from_backgroundq == TRUE)
7706 vm_page_remove_from_backgroundq(mem);
7707 #endif
7708 if (was_pageable) {
7709 if (m_object->internal) {
7710 vm_page_pageable_internal_count--;
7711 } else {
7712 vm_page_pageable_external_count--;
7713 }
7714 }
7715 }
7716
7717 void
7718 vm_page_remove_internal(vm_page_t page)
7719 {
7720 vm_object_t __object = VM_PAGE_OBJECT(page);
7721 if (page == __object->memq_hint) {
7722 vm_page_t __new_hint;
7723 vm_page_queue_entry_t __qe;
7724 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->listq);
7725 if (vm_page_queue_end(&__object->memq, __qe)) {
7726 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->listq);
7727 if (vm_page_queue_end(&__object->memq, __qe)) {
7728 __qe = NULL;
7729 }
7730 }
7731 __new_hint = (vm_page_t)((uintptr_t) __qe);
7732 __object->memq_hint = __new_hint;
7733 }
7734 vm_page_queue_remove(&__object->memq, page, vm_page_t, listq);
7735 #if CONFIG_SECLUDED_MEMORY
7736 if (__object->eligible_for_secluded) {
7737 vm_page_secluded.eligible_for_secluded--;
7738 }
7739 #endif /* CONFIG_SECLUDED_MEMORY */
7740 }
7741
7742 void
7743 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
7744 {
7745 vm_object_t m_object;
7746
7747 m_object = VM_PAGE_OBJECT(mem);
7748
7749 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7750 assert(!mem->fictitious);
7751 assert(!mem->laundry);
7752 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
7753 vm_page_check_pageable_safe(mem);
7754
7755 #if CONFIG_SECLUDED_MEMORY
7756 if (secluded_for_filecache &&
7757 vm_page_secluded_target != 0 &&
7758 num_tasks_can_use_secluded_mem == 0 &&
7759 m_object->eligible_for_secluded &&
7760 secluded_aging_policy == SECLUDED_AGING_FIFO) {
7761 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
7762 vm_page_queue_enter(&vm_page_queue_secluded, mem,
7763 vm_page_t, pageq);
7764 vm_page_secluded_count++;
7765 vm_page_secluded_count_inuse++;
7766 assert(!m_object->internal);
7767 // vm_page_pageable_external_count++;
7768 return;
7769 }
7770 #endif /* CONFIG_SECLUDED_MEMORY */
7771
7772 if (m_object->internal) {
7773 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
7774
7775 if (first == TRUE)
7776 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
7777 else
7778 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
7779
7780 vm_page_anonymous_count++;
7781 vm_page_pageable_internal_count++;
7782 } else {
7783 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
7784
7785 if (first == TRUE)
7786 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
7787 else
7788 vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
7789
7790 vm_page_pageable_external_count++;
7791 }
7792 vm_page_inactive_count++;
7793 token_new_pagecount++;
7794
7795 #if CONFIG_BACKGROUND_QUEUE
7796 if (mem->vm_page_in_background)
7797 vm_page_add_to_backgroundq(mem, FALSE);
7798 #endif
7799 }
7800
7801 void
7802 vm_page_enqueue_active(vm_page_t mem, boolean_t first)
7803 {
7804 vm_object_t m_object;
7805
7806 m_object = VM_PAGE_OBJECT(mem);
7807
7808 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7809 assert(!mem->fictitious);
7810 assert(!mem->laundry);
7811 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
7812 vm_page_check_pageable_safe(mem);
7813
7814 mem->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
7815 if (first == TRUE)
7816 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, pageq);
7817 else
7818 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
7819 vm_page_active_count++;
7820
7821 if (m_object->internal) {
7822 vm_page_pageable_internal_count++;
7823 } else {
7824 vm_page_pageable_external_count++;
7825 }
7826
7827 #if CONFIG_BACKGROUND_QUEUE
7828 if (mem->vm_page_in_background)
7829 vm_page_add_to_backgroundq(mem, FALSE);
7830 #endif
7831 }
7832
7833 /*
7834 * Pages from special kernel objects shouldn't
7835 * be placed on pageable queues.
7836 */
7837 void
7838 vm_page_check_pageable_safe(vm_page_t page)
7839 {
7840 vm_object_t page_object;
7841
7842 page_object = VM_PAGE_OBJECT(page);
7843
7844 if (page_object == kernel_object) {
7845 panic("vm_page_check_pageable_safe: trying to add page" \
7846 "from kernel object (%p) to pageable queue", kernel_object);
7847 }
7848
7849 if (page_object == compressor_object) {
7850 panic("vm_page_check_pageable_safe: trying to add page" \
7851 "from compressor object (%p) to pageable queue", compressor_object);
7852 }
7853
7854 if (page_object == vm_submap_object) {
7855 panic("vm_page_check_pageable_safe: trying to add page" \
7856 "from submap object (%p) to pageable queue", vm_submap_object);
7857 }
7858 }
7859
7860 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
7861 * wired page diagnose
7862 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7863
7864 #include <libkern/OSKextLibPrivate.h>
7865
7866 vm_allocation_site_t *
7867 vm_allocation_sites[VM_KERN_MEMORY_COUNT];
7868
7869 vm_tag_t
7870 vm_tag_bt(void)
7871 {
7872 uintptr_t* frameptr;
7873 uintptr_t* frameptr_next;
7874 uintptr_t retaddr;
7875 uintptr_t kstackb, kstackt;
7876 const vm_allocation_site_t * site;
7877 thread_t cthread;
7878
7879 cthread = current_thread();
7880 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
7881
7882 kstackb = cthread->kernel_stack;
7883 kstackt = kstackb + kernel_stack_size;
7884
7885 /* Load stack frame pointer (EBP on x86) into frameptr */
7886 frameptr = __builtin_frame_address(0);
7887 site = NULL;
7888 while (frameptr != NULL)
7889 {
7890 /* Verify thread stack bounds */
7891 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
7892
7893 /* Next frame pointer is pointed to by the previous one */
7894 frameptr_next = (uintptr_t*) *frameptr;
7895
7896 /* Pull return address from one spot above the frame pointer */
7897 retaddr = *(frameptr + 1);
7898
7899 if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
7900 {
7901 site = OSKextGetAllocationSiteForCaller(retaddr);
7902 break;
7903 }
7904
7905 frameptr = frameptr_next;
7906 }
7907 return (site ? site->tag : VM_KERN_MEMORY_NONE);
7908 }
7909
7910 static uint64_t free_tag_bits[256/64];
7911
7912 void
7913 vm_tag_alloc_locked(vm_allocation_site_t * site)
7914 {
7915 vm_tag_t tag;
7916 uint64_t avail;
7917 uint64_t idx;
7918
7919 if (site->tag) return;
7920
7921 idx = 0;
7922 while (TRUE)
7923 {
7924 avail = free_tag_bits[idx];
7925 if (avail)
7926 {
7927 tag = __builtin_clzll(avail);
7928 avail &= ~(1ULL << (63 - tag));
7929 free_tag_bits[idx] = avail;
7930 tag += (idx << 6);
7931 break;
7932 }
7933 idx++;
7934 if (idx >= (sizeof(free_tag_bits) / sizeof(free_tag_bits[0])))
7935 {
7936 tag = VM_KERN_MEMORY_ANY;
7937 break;
7938 }
7939 }
7940 site->tag = tag;
7941 if (VM_KERN_MEMORY_ANY != tag)
7942 {
7943 assert(!vm_allocation_sites[tag]);
7944 vm_allocation_sites[tag] = site;
7945 }
7946 }
7947
7948 static void
7949 vm_tag_free_locked(vm_tag_t tag)
7950 {
7951 uint64_t avail;
7952 uint32_t idx;
7953 uint64_t bit;
7954
7955 if (VM_KERN_MEMORY_ANY == tag) return;
7956
7957 idx = (tag >> 6);
7958 avail = free_tag_bits[idx];
7959 tag &= 63;
7960 bit = (1ULL << (63 - tag));
7961 assert(!(avail & bit));
7962 free_tag_bits[idx] = (avail | bit);
7963 }
7964
7965 static void
7966 vm_tag_init(void)
7967 {
7968 vm_tag_t tag;
7969 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
7970 {
7971 vm_tag_free_locked(tag);
7972 }
7973 }
7974
7975 vm_tag_t
7976 vm_tag_alloc(vm_allocation_site_t * site)
7977 {
7978 vm_tag_t tag;
7979
7980 if (VM_TAG_BT & site->flags)
7981 {
7982 tag = vm_tag_bt();
7983 if (VM_KERN_MEMORY_NONE != tag) return (tag);
7984 }
7985
7986 if (!site->tag)
7987 {
7988 lck_spin_lock(&vm_allocation_sites_lock);
7989 vm_tag_alloc_locked(site);
7990 lck_spin_unlock(&vm_allocation_sites_lock);
7991 }
7992
7993 return (site->tag);
7994 }
7995
7996 static void
7997 vm_page_count_object(mach_memory_info_t * sites, unsigned int __unused num_sites, vm_object_t object)
7998 {
7999 if (!object->wired_page_count) return;
8000 if (object != kernel_object)
8001 {
8002 assert(object->wire_tag < num_sites);
8003 sites[object->wire_tag].size += ptoa_64(object->wired_page_count);
8004 }
8005 }
8006
8007 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * sites,
8008 unsigned int num_sites, vm_object_t object);
8009
8010 static void
8011 vm_page_iterate_purgeable_objects(mach_memory_info_t * sites, unsigned int num_sites,
8012 vm_page_iterate_proc proc, purgeable_q_t queue,
8013 int group)
8014 {
8015 vm_object_t object;
8016
8017 for (object = (vm_object_t) queue_first(&queue->objq[group]);
8018 !queue_end(&queue->objq[group], (queue_entry_t) object);
8019 object = (vm_object_t) queue_next(&object->objq))
8020 {
8021 proc(sites, num_sites, object);
8022 }
8023 }
8024
8025 static void
8026 vm_page_iterate_objects(mach_memory_info_t * sites, unsigned int num_sites,
8027 vm_page_iterate_proc proc)
8028 {
8029 purgeable_q_t volatile_q;
8030 queue_head_t * nonvolatile_q;
8031 vm_object_t object;
8032 int group;
8033
8034 lck_spin_lock(&vm_objects_wired_lock);
8035 queue_iterate(&vm_objects_wired,
8036 object,
8037 vm_object_t,
8038 objq)
8039 {
8040 proc(sites, num_sites, object);
8041 }
8042 lck_spin_unlock(&vm_objects_wired_lock);
8043
8044 lck_mtx_lock(&vm_purgeable_queue_lock);
8045 nonvolatile_q = &purgeable_nonvolatile_queue;
8046 for (object = (vm_object_t) queue_first(nonvolatile_q);
8047 !queue_end(nonvolatile_q, (queue_entry_t) object);
8048 object = (vm_object_t) queue_next(&object->objq))
8049 {
8050 proc(sites, num_sites, object);
8051 }
8052
8053 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
8054 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, 0);
8055
8056 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
8057 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8058 {
8059 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
8060 }
8061
8062 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
8063 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8064 {
8065 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
8066 }
8067 lck_mtx_unlock(&vm_purgeable_queue_lock);
8068 }
8069
8070 static uint64_t
8071 process_account(mach_memory_info_t * sites, unsigned int __unused num_sites, uint64_t zones_collectable_bytes)
8072 {
8073 uint64_t found;
8074 unsigned int idx;
8075 vm_allocation_site_t * site;
8076
8077 assert(num_sites >= VM_KERN_MEMORY_COUNT);
8078 found = 0;
8079 for (idx = 0; idx < VM_KERN_MEMORY_COUNT; idx++)
8080 {
8081 found += sites[idx].size;
8082 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8083 {
8084 sites[idx].site = idx;
8085 sites[idx].flags |= VM_KERN_SITE_TAG;
8086 if (VM_KERN_MEMORY_ZONE == idx)
8087 {
8088 sites[idx].flags |= VM_KERN_SITE_HIDE;
8089 sites[idx].collectable_bytes = zones_collectable_bytes;
8090 } else sites[idx].flags |= VM_KERN_SITE_WIRED;
8091 continue;
8092 }
8093 lck_spin_lock(&vm_allocation_sites_lock);
8094 if ((site = vm_allocation_sites[idx]))
8095 {
8096 if (sites[idx].size)
8097 {
8098 sites[idx].flags |= VM_KERN_SITE_WIRED;
8099 if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
8100 {
8101 sites[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
8102 sites[idx].flags |= VM_KERN_SITE_KMOD;
8103 }
8104 else
8105 {
8106 sites[idx].site = VM_KERNEL_UNSLIDE(site);
8107 sites[idx].flags |= VM_KERN_SITE_KERNEL;
8108 }
8109 site = NULL;
8110 }
8111 else
8112 {
8113 #if 1
8114 site = NULL;
8115 #else
8116 /* this code would free a site with no allocations but can race a new
8117 * allocation being made */
8118 vm_tag_free_locked(site->tag);
8119 site->tag = VM_KERN_MEMORY_NONE;
8120 vm_allocation_sites[idx] = NULL;
8121 if (!(VM_TAG_UNLOAD & site->flags)) site = NULL;
8122 #endif
8123 }
8124 }
8125 lck_spin_unlock(&vm_allocation_sites_lock);
8126 if (site) OSKextFreeSite(site);
8127 }
8128
8129 return (found);
8130 }
8131
8132 kern_return_t
8133 vm_page_diagnose(mach_memory_info_t * sites, unsigned int num_sites, uint64_t zones_collectable_bytes)
8134 {
8135 enum { kMaxKernelDepth = 1 };
8136 vm_map_t maps [kMaxKernelDepth];
8137 vm_map_entry_t entries[kMaxKernelDepth];
8138 vm_map_t map;
8139 vm_map_entry_t entry;
8140 vm_object_offset_t offset;
8141 vm_page_t page;
8142 int stackIdx, count;
8143 uint64_t wired_size;
8144 uint64_t wired_managed_size;
8145 uint64_t wired_reserved_size;
8146 mach_memory_info_t * counts;
8147
8148 bzero(sites, num_sites * sizeof(mach_memory_info_t));
8149
8150 if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8151
8152 vm_page_iterate_objects(sites, num_sites, &vm_page_count_object);
8153
8154 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8155 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8156 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8157
8158 assert(num_sites >= (VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT));
8159 counts = &sites[VM_KERN_MEMORY_COUNT];
8160
8161 #define SET_COUNT(xcount, xsize, xflags) \
8162 counts[xcount].site = (xcount); \
8163 counts[xcount].size = (xsize); \
8164 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8165
8166 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
8167 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
8168 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
8169 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
8170 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8171 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8172
8173 #define SET_MAP(xcount, xsize, xfree, xlargest) \
8174 counts[xcount].site = (xcount); \
8175 counts[xcount].size = (xsize); \
8176 counts[xcount].free = (xfree); \
8177 counts[xcount].largest = (xlargest); \
8178 counts[xcount].flags = VM_KERN_SITE_COUNTER;
8179
8180 vm_map_size_t map_size, map_free, map_largest;
8181
8182 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
8183 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
8184
8185 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
8186 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
8187
8188 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
8189 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
8190
8191 map = kernel_map;
8192 stackIdx = 0;
8193 while (map)
8194 {
8195 vm_map_lock(map);
8196 for (entry = map->hdr.links.next; map; entry = entry->links.next)
8197 {
8198 if (entry->is_sub_map)
8199 {
8200 assert(stackIdx < kMaxKernelDepth);
8201 maps[stackIdx] = map;
8202 entries[stackIdx] = entry;
8203 stackIdx++;
8204 map = VME_SUBMAP(entry);
8205 entry = NULL;
8206 break;
8207 }
8208 if (VME_OBJECT(entry) == kernel_object)
8209 {
8210 count = 0;
8211 vm_object_lock(VME_OBJECT(entry));
8212 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
8213 {
8214 page = vm_page_lookup(VME_OBJECT(entry), offset);
8215 if (page && VM_PAGE_WIRED(page)) count++;
8216 }
8217 vm_object_unlock(VME_OBJECT(entry));
8218
8219 if (count)
8220 {
8221 assert(VME_ALIAS(entry) < num_sites);
8222 sites[VME_ALIAS(entry)].size += ptoa_64(count);
8223 }
8224 }
8225 while (map && (entry == vm_map_last_entry(map)))
8226 {
8227 vm_map_unlock(map);
8228 if (!stackIdx) map = NULL;
8229 else
8230 {
8231 --stackIdx;
8232 map = maps[stackIdx];
8233 entry = entries[stackIdx];
8234 }
8235 }
8236 }
8237 }
8238
8239 process_account(sites, num_sites, zones_collectable_bytes);
8240
8241 return (KERN_SUCCESS);
8242 }
8243
8244 uint32_t
8245 vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
8246 {
8247 vm_allocation_site_t * site;
8248 uint32_t kmodId;
8249
8250 kmodId = 0;
8251 lck_spin_lock(&vm_allocation_sites_lock);
8252 if ((site = vm_allocation_sites[tag]))
8253 {
8254 if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
8255 {
8256 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
8257 }
8258 }
8259 lck_spin_unlock(&vm_allocation_sites_lock);
8260
8261 return (kmodId);
8262 }
8263
8264 #if DEBUG || DEVELOPMENT
8265
8266 #define vm_tag_set_lock(set) lck_spin_lock(&set->lock)
8267 #define vm_tag_set_unlock(set) lck_spin_unlock(&set->lock)
8268
8269 void
8270 vm_tag_set_init(vm_tag_set_t set, uint32_t count)
8271 {
8272 lck_spin_init(&set->lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
8273 bzero(&set->entries, count * sizeof(struct vm_tag_set_entry));
8274 }
8275
8276 kern_return_t
8277 vm_tag_set_enter(vm_tag_set_t set, uint32_t count, vm_tag_t tag)
8278 {
8279 kern_return_t kr;
8280 uint32_t idx, free;
8281
8282 vm_tag_set_lock(set);
8283
8284 assert(tag != VM_KERN_MEMORY_NONE);
8285
8286 kr = KERN_NO_SPACE;
8287 free = -1U;
8288 for (idx = 0; idx < count; idx++)
8289 {
8290 if (tag == set->entries[idx].tag)
8291 {
8292 set->entries[idx].count++;
8293 kr = KERN_SUCCESS;
8294 break;
8295 }
8296 if ((free == -1U) && !set->entries[idx].count) free = idx;
8297 }
8298
8299 if ((KERN_SUCCESS != kr) && (free != -1U))
8300 {
8301 set->entries[free].tag = tag;
8302 set->entries[free].count = 1;
8303 kr = KERN_SUCCESS;
8304 }
8305
8306 vm_tag_set_unlock(set);
8307
8308 return (kr);
8309 }
8310
8311 kern_return_t
8312 vm_tag_set_remove(vm_tag_set_t set, uint32_t count, vm_tag_t tag, vm_tag_t * new_tagp)
8313 {
8314 kern_return_t kr;
8315 uint32_t idx;
8316 vm_tag_t new_tag;
8317
8318 assert(tag != VM_KERN_MEMORY_NONE);
8319 new_tag = VM_KERN_MEMORY_NONE;
8320 vm_tag_set_lock(set);
8321
8322 kr = KERN_NOT_IN_SET;
8323 for (idx = 0; idx < count; idx++)
8324 {
8325 if ((tag != VM_KERN_MEMORY_NONE)
8326 && (tag == set->entries[idx].tag)
8327 && set->entries[idx].count)
8328 {
8329 set->entries[idx].count--;
8330 kr = KERN_SUCCESS;
8331 if (set->entries[idx].count)
8332 {
8333 new_tag = tag;
8334 break;
8335 }
8336 if (!new_tagp) break;
8337 tag = VM_KERN_MEMORY_NONE;
8338 }
8339
8340 if (set->entries[idx].count && (VM_KERN_MEMORY_NONE == new_tag))
8341 {
8342 new_tag = set->entries[idx].tag;
8343 if (VM_KERN_MEMORY_NONE == tag) break;
8344 }
8345 }
8346
8347 vm_tag_set_unlock(set);
8348 if (new_tagp) *new_tagp = new_tag;
8349
8350 return (kr);
8351 }
8352
8353 #endif /* DEBUG || DEVELOPMENT */