]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
feeaddd1e080901f9adcc99b7f50dfdf90fdadc8
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
68
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
72 #include <mach/sdt.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/policy_internal.h>
76 #include <kern/task.h>
77 #include <kern/thread.h>
78 #include <kern/kalloc.h>
79 #include <kern/zalloc.h>
80 #include <kern/xpr.h>
81 #include <kern/ledger.h>
82 #include <vm/pmap.h>
83 #include <vm/vm_init.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
88 #include <kern/misc_protos.h>
89 #include <zone_debug.h>
90 #include <mach_debug/zone_info.h>
91 #include <vm/cpm.h>
92 #include <pexpert/pexpert.h>
93 #include <san/kasan.h>
94
95 #include <vm/vm_protos.h>
96 #include <vm/memory_object.h>
97 #include <vm/vm_purgeable_internal.h>
98 #include <vm/vm_compressor.h>
99
100 #if CONFIG_PHANTOM_CACHE
101 #include <vm/vm_phantom_cache.h>
102 #endif
103
104 #include <IOKit/IOHibernatePrivate.h>
105
106 #include <sys/kdebug.h>
107
108
109 char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
110 char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
111 char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
112 char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
113
114 #if CONFIG_SECLUDED_MEMORY
115 struct vm_page_secluded_data vm_page_secluded;
116 #endif /* CONFIG_SECLUDED_MEMORY */
117
118 boolean_t hibernate_cleaning_in_progress = FALSE;
119 boolean_t vm_page_free_verify = TRUE;
120
121 uint32_t vm_lopage_free_count = 0;
122 uint32_t vm_lopage_free_limit = 0;
123 uint32_t vm_lopage_lowater = 0;
124 boolean_t vm_lopage_refill = FALSE;
125 boolean_t vm_lopage_needed = FALSE;
126
127 lck_mtx_ext_t vm_page_queue_lock_ext;
128 lck_mtx_ext_t vm_page_queue_free_lock_ext;
129 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
130
131 int speculative_age_index = 0;
132 int speculative_steal_index = 0;
133 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
134
135
136 __private_extern__ void vm_page_init_lck_grp(void);
137
138 static void vm_page_free_prepare(vm_page_t page);
139 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
140
141 static void vm_tag_init(void);
142
143 uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
144 uint32_t vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
145 uint32_t vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
146
147 /*
148 * Associated with page of user-allocatable memory is a
149 * page structure.
150 */
151
152 /*
153 * These variables record the values returned by vm_page_bootstrap,
154 * for debugging purposes. The implementation of pmap_steal_memory
155 * and pmap_startup here also uses them internally.
156 */
157
158 vm_offset_t virtual_space_start;
159 vm_offset_t virtual_space_end;
160 uint32_t vm_page_pages;
161
162 /*
163 * The vm_page_lookup() routine, which provides for fast
164 * (virtual memory object, offset) to page lookup, employs
165 * the following hash table. The vm_page_{insert,remove}
166 * routines install and remove associations in the table.
167 * [This table is often called the virtual-to-physical,
168 * or VP, table.]
169 */
170 typedef struct {
171 vm_page_packed_t page_list;
172 #if MACH_PAGE_HASH_STATS
173 int cur_count; /* current count */
174 int hi_count; /* high water mark */
175 #endif /* MACH_PAGE_HASH_STATS */
176 } vm_page_bucket_t;
177
178
179 #define BUCKETS_PER_LOCK 16
180
181 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
182 unsigned int vm_page_bucket_count = 0; /* How big is array? */
183 unsigned int vm_page_hash_mask; /* Mask for hash function */
184 unsigned int vm_page_hash_shift; /* Shift for hash function */
185 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
186 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
187
188 #ifndef VM_TAG_ACTIVE_UPDATE
189 #error VM_TAG_ACTIVE_UPDATE
190 #endif
191 #ifndef VM_MAX_TAG_ZONES
192 #error VM_MAX_TAG_ZONES
193 #endif
194
195 boolean_t vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
196 lck_spin_t *vm_page_bucket_locks;
197 lck_spin_t vm_objects_wired_lock;
198 lck_spin_t vm_allocation_sites_lock;
199
200 vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
201 vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
202 #if VM_MAX_TAG_ZONES
203 vm_allocation_zone_total_t ** vm_allocation_zone_totals;
204 #endif /* VM_MAX_TAG_ZONES */
205
206 vm_tag_t vm_allocation_tag_highest;
207
208 #if VM_PAGE_BUCKETS_CHECK
209 boolean_t vm_page_buckets_check_ready = FALSE;
210 #if VM_PAGE_FAKE_BUCKETS
211 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
212 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
213 #endif /* VM_PAGE_FAKE_BUCKETS */
214 #endif /* VM_PAGE_BUCKETS_CHECK */
215
216
217
218 #if MACH_PAGE_HASH_STATS
219 /* This routine is only for debug. It is intended to be called by
220 * hand by a developer using a kernel debugger. This routine prints
221 * out vm_page_hash table statistics to the kernel debug console.
222 */
223 void
224 hash_debug(void)
225 {
226 int i;
227 int numbuckets = 0;
228 int highsum = 0;
229 int maxdepth = 0;
230
231 for (i = 0; i < vm_page_bucket_count; i++) {
232 if (vm_page_buckets[i].hi_count) {
233 numbuckets++;
234 highsum += vm_page_buckets[i].hi_count;
235 if (vm_page_buckets[i].hi_count > maxdepth)
236 maxdepth = vm_page_buckets[i].hi_count;
237 }
238 }
239 printf("Total number of buckets: %d\n", vm_page_bucket_count);
240 printf("Number used buckets: %d = %d%%\n",
241 numbuckets, 100*numbuckets/vm_page_bucket_count);
242 printf("Number unused buckets: %d = %d%%\n",
243 vm_page_bucket_count - numbuckets,
244 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
245 printf("Sum of bucket max depth: %d\n", highsum);
246 printf("Average bucket depth: %d.%2d\n",
247 highsum/vm_page_bucket_count,
248 highsum%vm_page_bucket_count);
249 printf("Maximum bucket depth: %d\n", maxdepth);
250 }
251 #endif /* MACH_PAGE_HASH_STATS */
252
253 /*
254 * The virtual page size is currently implemented as a runtime
255 * variable, but is constant once initialized using vm_set_page_size.
256 * This initialization must be done in the machine-dependent
257 * bootstrap sequence, before calling other machine-independent
258 * initializations.
259 *
260 * All references to the virtual page size outside this
261 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
262 * constants.
263 */
264 #if defined(__arm__) || defined(__arm64__)
265 vm_size_t page_size;
266 vm_size_t page_mask;
267 int page_shift;
268 #else
269 vm_size_t page_size = PAGE_SIZE;
270 vm_size_t page_mask = PAGE_MASK;
271 int page_shift = PAGE_SHIFT;
272 #endif
273
274 /*
275 * Resident page structures are initialized from
276 * a template (see vm_page_alloc).
277 *
278 * When adding a new field to the virtual memory
279 * object structure, be sure to add initialization
280 * (see vm_page_bootstrap).
281 */
282 struct vm_page vm_page_template;
283
284 vm_page_t vm_pages = VM_PAGE_NULL;
285 vm_page_t vm_page_array_beginning_addr;
286 vm_page_t vm_page_array_ending_addr;
287 vm_page_t vm_page_array_boundary;
288
289 unsigned int vm_pages_count = 0;
290 ppnum_t vm_page_lowest = 0;
291
292 /*
293 * Resident pages that represent real memory
294 * are allocated from a set of free lists,
295 * one per color.
296 */
297 unsigned int vm_colors;
298 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
299 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
300 unsigned int vm_free_magazine_refill_limit = 0;
301
302
303 struct vm_page_queue_free_head {
304 vm_page_queue_head_t qhead;
305 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
306
307 struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
308
309
310 unsigned int vm_page_free_wanted;
311 unsigned int vm_page_free_wanted_privileged;
312 #if CONFIG_SECLUDED_MEMORY
313 unsigned int vm_page_free_wanted_secluded;
314 #endif /* CONFIG_SECLUDED_MEMORY */
315 unsigned int vm_page_free_count;
316
317 /*
318 * Occasionally, the virtual memory system uses
319 * resident page structures that do not refer to
320 * real pages, for example to leave a page with
321 * important state information in the VP table.
322 *
323 * These page structures are allocated the way
324 * most other kernel structures are.
325 */
326 zone_t vm_page_array_zone;
327 zone_t vm_page_zone;
328 vm_locks_array_t vm_page_locks;
329 decl_lck_mtx_data(,vm_page_alloc_lock)
330 lck_mtx_ext_t vm_page_alloc_lock_ext;
331
332 unsigned int io_throttle_zero_fill;
333
334 unsigned int vm_page_local_q_count = 0;
335 unsigned int vm_page_local_q_soft_limit = 250;
336 unsigned int vm_page_local_q_hard_limit = 500;
337 struct vplq *vm_page_local_q = NULL;
338
339 /* N.B. Guard and fictitious pages must not
340 * be assigned a zero phys_page value.
341 */
342 /*
343 * Fictitious pages don't have a physical address,
344 * but we must initialize phys_page to something.
345 * For debugging, this should be a strange value
346 * that the pmap module can recognize in assertions.
347 */
348 const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
349
350 /*
351 * Guard pages are not accessible so they don't
352 * need a physical address, but we need to enter
353 * one in the pmap.
354 * Let's make it recognizable and make sure that
355 * we don't use a real physical page with that
356 * physical address.
357 */
358 const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
359
360 /*
361 * Resident page structures are also chained on
362 * queues that are used by the page replacement
363 * system (pageout daemon). These queues are
364 * defined here, but are shared by the pageout
365 * module. The inactive queue is broken into
366 * file backed and anonymous for convenience as the
367 * pageout daemon often assignes a higher
368 * importance to anonymous pages (less likely to pick)
369 */
370 vm_page_queue_head_t vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
371 vm_page_queue_head_t vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
372 #if CONFIG_SECLUDED_MEMORY
373 vm_page_queue_head_t vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
374 #endif /* CONFIG_SECLUDED_MEMORY */
375 vm_page_queue_head_t vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* inactive memory queue for anonymous pages */
376 vm_page_queue_head_t vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
377
378 queue_head_t vm_objects_wired;
379
380 #if CONFIG_BACKGROUND_QUEUE
381 vm_page_queue_head_t vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
382 uint32_t vm_page_background_target;
383 uint32_t vm_page_background_count;
384 uint64_t vm_page_background_promoted_count;
385
386 uint32_t vm_page_background_internal_count;
387 uint32_t vm_page_background_external_count;
388
389 uint32_t vm_page_background_mode;
390 uint32_t vm_page_background_exclude_external;
391 #endif
392
393 unsigned int vm_page_active_count;
394 unsigned int vm_page_inactive_count;
395 #if CONFIG_SECLUDED_MEMORY
396 unsigned int vm_page_secluded_count;
397 unsigned int vm_page_secluded_count_free;
398 unsigned int vm_page_secluded_count_inuse;
399 #endif /* CONFIG_SECLUDED_MEMORY */
400 unsigned int vm_page_anonymous_count;
401 unsigned int vm_page_throttled_count;
402 unsigned int vm_page_speculative_count;
403
404 unsigned int vm_page_wire_count;
405 unsigned int vm_page_wire_count_on_boot = 0;
406 unsigned int vm_page_stolen_count;
407 unsigned int vm_page_wire_count_initial;
408 unsigned int vm_page_pages_initial;
409 unsigned int vm_page_gobble_count = 0;
410
411 #define VM_PAGE_WIRE_COUNT_WARNING 0
412 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
413
414 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
415 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
416 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
417
418 unsigned int vm_page_xpmapped_external_count = 0;
419 unsigned int vm_page_external_count = 0;
420 unsigned int vm_page_internal_count = 0;
421 unsigned int vm_page_pageable_external_count = 0;
422 unsigned int vm_page_pageable_internal_count = 0;
423
424 #if DEVELOPMENT || DEBUG
425 unsigned int vm_page_speculative_recreated = 0;
426 unsigned int vm_page_speculative_created = 0;
427 unsigned int vm_page_speculative_used = 0;
428 #endif
429
430 vm_page_queue_head_t vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
431
432 unsigned int vm_page_cleaned_count = 0;
433 unsigned int vm_pageout_enqueued_cleaned = 0;
434
435 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
436 ppnum_t max_valid_low_ppnum = 0xffffffff;
437
438
439 /*
440 * Several page replacement parameters are also
441 * shared with this module, so that page allocation
442 * (done here in vm_page_alloc) can trigger the
443 * pageout daemon.
444 */
445 unsigned int vm_page_free_target = 0;
446 unsigned int vm_page_free_min = 0;
447 unsigned int vm_page_throttle_limit = 0;
448 unsigned int vm_page_inactive_target = 0;
449 #if CONFIG_SECLUDED_MEMORY
450 unsigned int vm_page_secluded_target = 0;
451 #endif /* CONFIG_SECLUDED_MEMORY */
452 unsigned int vm_page_anonymous_min = 0;
453 unsigned int vm_page_inactive_min = 0;
454 unsigned int vm_page_free_reserved = 0;
455 unsigned int vm_page_throttle_count = 0;
456
457
458 /*
459 * The VM system has a couple of heuristics for deciding
460 * that pages are "uninteresting" and should be placed
461 * on the inactive queue as likely candidates for replacement.
462 * These variables let the heuristics be controlled at run-time
463 * to make experimentation easier.
464 */
465
466 boolean_t vm_page_deactivate_hint = TRUE;
467
468 struct vm_page_stats_reusable vm_page_stats_reusable;
469
470 /*
471 * vm_set_page_size:
472 *
473 * Sets the page size, perhaps based upon the memory
474 * size. Must be called before any use of page-size
475 * dependent functions.
476 *
477 * Sets page_shift and page_mask from page_size.
478 */
479 void
480 vm_set_page_size(void)
481 {
482 page_size = PAGE_SIZE;
483 page_mask = PAGE_MASK;
484 page_shift = PAGE_SHIFT;
485
486 if ((page_mask & page_size) != 0)
487 panic("vm_set_page_size: page size not a power of two");
488
489 for (page_shift = 0; ; page_shift++)
490 if ((1U << page_shift) == page_size)
491 break;
492 }
493
494 #if defined (__x86_64__)
495
496 #define MAX_CLUMP_SIZE 16
497 #define DEFAULT_CLUMP_SIZE 4
498
499 unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
500
501 #if DEVELOPMENT || DEBUG
502 unsigned long vm_clump_stats[MAX_CLUMP_SIZE+1];
503 unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
504
505 static inline void vm_clump_update_stats(unsigned int c) {
506 assert(c<=vm_clump_size);
507 if(c>0 && c<=vm_clump_size) vm_clump_stats[c]+=c;
508 vm_clump_allocs+=c;
509 }
510 #endif /* if DEVELOPMENT || DEBUG */
511
512 /* Called once to setup the VM clump knobs */
513 static void
514 vm_page_setup_clump( void )
515 {
516 unsigned int override, n;
517
518 vm_clump_size = DEFAULT_CLUMP_SIZE;
519 if ( PE_parse_boot_argn("clump_size", &override, sizeof (override)) ) vm_clump_size = override;
520
521 if(vm_clump_size > MAX_CLUMP_SIZE) panic("vm_page_setup_clump:: clump_size is too large!");
522 if(vm_clump_size < 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
523 if((vm_clump_size & (vm_clump_size-1)) != 0) panic("vm_page_setup_clump:: clump_size must be a power of 2");
524
525 vm_clump_promote_threshold = vm_clump_size;
526 vm_clump_mask = vm_clump_size - 1;
527 for(vm_clump_shift=0, n=vm_clump_size; n>1; n>>=1, vm_clump_shift++);
528
529 #if DEVELOPMENT || DEBUG
530 bzero(vm_clump_stats, sizeof(vm_clump_stats));
531 vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
532 #endif /* if DEVELOPMENT || DEBUG */
533 }
534
535 #endif /* #if defined (__x86_64__) */
536
537 #define COLOR_GROUPS_TO_STEAL 4
538
539 /* Called once during statup, once the cache geometry is known.
540 */
541 static void
542 vm_page_set_colors( void )
543 {
544 unsigned int n, override;
545
546 #if defined (__x86_64__)
547 /* adjust #colors because we need to color outside the clump boundary */
548 vm_cache_geometry_colors >>= vm_clump_shift;
549 #endif
550 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
551 n = override;
552 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
553 n = vm_cache_geometry_colors;
554 else n = DEFAULT_COLORS; /* use default if all else fails */
555
556 if ( n == 0 )
557 n = 1;
558 if ( n > MAX_COLORS )
559 n = MAX_COLORS;
560
561 /* the count must be a power of 2 */
562 if ( ( n & (n - 1)) != 0 )
563 n = DEFAULT_COLORS; /* use default if all else fails */
564
565 vm_colors = n;
566 vm_color_mask = n - 1;
567
568 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
569
570 #if defined (__x86_64__)
571 /* adjust for reduction in colors due to clumping and multiple cores */
572 if (real_ncpus)
573 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
574 #endif
575 }
576
577
578 lck_grp_t vm_page_lck_grp_free;
579 lck_grp_t vm_page_lck_grp_queue;
580 lck_grp_t vm_page_lck_grp_local;
581 lck_grp_t vm_page_lck_grp_purge;
582 lck_grp_t vm_page_lck_grp_alloc;
583 lck_grp_t vm_page_lck_grp_bucket;
584 lck_grp_attr_t vm_page_lck_grp_attr;
585 lck_attr_t vm_page_lck_attr;
586
587
588 __private_extern__ void
589 vm_page_init_lck_grp(void)
590 {
591 /*
592 * initialze the vm_page lock world
593 */
594 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
595 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
596 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
597 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
598 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
599 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
600 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
601 lck_attr_setdefault(&vm_page_lck_attr);
602 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
603
604 vm_compressor_init_locks();
605 }
606
607 void
608 vm_page_init_local_q()
609 {
610 unsigned int num_cpus;
611 unsigned int i;
612 struct vplq *t_local_q;
613
614 num_cpus = ml_get_max_cpus();
615
616 /*
617 * no point in this for a uni-processor system
618 */
619 if (num_cpus >= 2) {
620 #if KASAN
621 /* KASAN breaks the expectation of a size-aligned object by adding a
622 * rezone, so explicitly align. */
623 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
624 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT-1));
625 #else
626 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
627 #endif
628
629 for (i = 0; i < num_cpus; i++) {
630 struct vpl *lq;
631
632 lq = &t_local_q[i].vpl_un.vpl;
633 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
634 vm_page_queue_init(&lq->vpl_queue);
635 lq->vpl_count = 0;
636 lq->vpl_internal_count = 0;
637 lq->vpl_external_count = 0;
638 }
639 vm_page_local_q_count = num_cpus;
640
641 vm_page_local_q = (struct vplq *)t_local_q;
642 }
643 }
644
645 /*
646 * vm_init_before_launchd
647 *
648 * This should be called right before launchd is loaded.
649 */
650 void
651 vm_init_before_launchd()
652 {
653 vm_page_wire_count_on_boot = vm_page_wire_count;
654 }
655
656
657 /*
658 * vm_page_bootstrap:
659 *
660 * Initializes the resident memory module.
661 *
662 * Allocates memory for the page cells, and
663 * for the object/offset-to-page hash table headers.
664 * Each page cell is initialized and placed on the free list.
665 * Returns the range of available kernel virtual memory.
666 */
667
668 void
669 vm_page_bootstrap(
670 vm_offset_t *startp,
671 vm_offset_t *endp)
672 {
673 vm_page_t m;
674 unsigned int i;
675 unsigned int log1;
676 unsigned int log2;
677 unsigned int size;
678
679 /*
680 * Initialize the vm_page template.
681 */
682
683 m = &vm_page_template;
684 bzero(m, sizeof (*m));
685
686 #if CONFIG_BACKGROUND_QUEUE
687 m->vm_page_backgroundq.next = 0;
688 m->vm_page_backgroundq.prev = 0;
689 m->vm_page_in_background = FALSE;
690 m->vm_page_on_backgroundq = FALSE;
691 #endif
692
693 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
694 m->listq.next = 0;
695 m->listq.prev = 0;
696 m->next_m = 0;
697
698 m->vm_page_object = 0; /* reset later */
699 m->offset = (vm_object_offset_t) -1; /* reset later */
700
701 m->wire_count = 0;
702 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
703 m->laundry = FALSE;
704 m->reference = FALSE;
705 m->gobbled = FALSE;
706 m->private = FALSE;
707 m->__unused_pageq_bits = 0;
708
709 #if !defined(__arm__) && !defined(__arm64__)
710 VM_PAGE_SET_PHYS_PAGE(m, 0); /* reset later */
711 #endif
712 m->busy = TRUE;
713 m->wanted = FALSE;
714 m->tabled = FALSE;
715 m->hashed = FALSE;
716 m->fictitious = FALSE;
717 m->pmapped = FALSE;
718 m->wpmapped = FALSE;
719 m->free_when_done = FALSE;
720 m->absent = FALSE;
721 m->error = FALSE;
722 m->dirty = FALSE;
723 m->cleaning = FALSE;
724 m->precious = FALSE;
725 m->clustered = FALSE;
726 m->overwriting = FALSE;
727 m->restart = FALSE;
728 m->unusual = FALSE;
729 m->cs_validated = FALSE;
730 m->cs_tainted = FALSE;
731 m->cs_nx = FALSE;
732 m->no_cache = FALSE;
733 m->reusable = FALSE;
734 m->slid = FALSE;
735 m->xpmapped = FALSE;
736 m->written_by_kernel = FALSE;
737 m->__unused_object_bits = 0;
738
739 /*
740 * Initialize the page queues.
741 */
742 vm_page_init_lck_grp();
743
744 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
745 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
746 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
747
748 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
749 int group;
750
751 purgeable_queues[i].token_q_head = 0;
752 purgeable_queues[i].token_q_tail = 0;
753 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
754 queue_init(&purgeable_queues[i].objq[group]);
755
756 purgeable_queues[i].type = i;
757 purgeable_queues[i].new_pages = 0;
758 #if MACH_ASSERT
759 purgeable_queues[i].debug_count_tokens = 0;
760 purgeable_queues[i].debug_count_objects = 0;
761 #endif
762 };
763 purgeable_nonvolatile_count = 0;
764 queue_init(&purgeable_nonvolatile_queue);
765
766 for (i = 0; i < MAX_COLORS; i++ )
767 vm_page_queue_init(&vm_page_queue_free[i].qhead);
768
769 vm_page_queue_init(&vm_lopage_queue_free);
770 vm_page_queue_init(&vm_page_queue_active);
771 vm_page_queue_init(&vm_page_queue_inactive);
772 #if CONFIG_SECLUDED_MEMORY
773 vm_page_queue_init(&vm_page_queue_secluded);
774 #endif /* CONFIG_SECLUDED_MEMORY */
775 vm_page_queue_init(&vm_page_queue_cleaned);
776 vm_page_queue_init(&vm_page_queue_throttled);
777 vm_page_queue_init(&vm_page_queue_anonymous);
778 queue_init(&vm_objects_wired);
779
780 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
781 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
782
783 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
784 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
785 }
786 #if CONFIG_BACKGROUND_QUEUE
787 vm_page_queue_init(&vm_page_queue_background);
788
789 vm_page_background_count = 0;
790 vm_page_background_internal_count = 0;
791 vm_page_background_external_count = 0;
792 vm_page_background_promoted_count = 0;
793
794 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
795
796 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
797 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
798
799 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
800 vm_page_background_exclude_external = 0;
801
802 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
803 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
804 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
805
806 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1)
807 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
808 #endif
809 vm_page_free_wanted = 0;
810 vm_page_free_wanted_privileged = 0;
811 #if CONFIG_SECLUDED_MEMORY
812 vm_page_free_wanted_secluded = 0;
813 #endif /* CONFIG_SECLUDED_MEMORY */
814
815 #if defined (__x86_64__)
816 /* this must be called before vm_page_set_colors() */
817 vm_page_setup_clump();
818 #endif
819
820 vm_page_set_colors();
821
822 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
823 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
824 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
825 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
826
827 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
828 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
829 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
830 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
831 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
832 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
833 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
834 #if CONFIG_SECLUDED_MEMORY
835 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
836 #endif /* CONFIG_SECLUDED_MEMORY */
837
838 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
839 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
840 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
841 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
842 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
843 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
844 #if CONFIG_SECLUDED_MEMORY
845 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
846 #endif /* CONFIG_SECLUDED_MEMORY */
847
848 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
849 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
850 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
851 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
852 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
853 #if CONFIG_SECLUDED_MEMORY
854 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
855 #endif /* CONFIG_SECLUDED_MEMORY */
856
857 for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++)
858 {
859 vm_allocation_sites_static[i].refcount = 2;
860 vm_allocation_sites_static[i].tag = i;
861 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
862 }
863 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
864 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
865 vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
866
867 /*
868 * Steal memory for the map and zone subsystems.
869 */
870 #if CONFIG_GZALLOC
871 gzalloc_configure();
872 #endif
873 kernel_debug_string_early("vm_map_steal_memory");
874 vm_map_steal_memory();
875
876 /*
877 * Allocate (and initialize) the virtual-to-physical
878 * table hash buckets.
879 *
880 * The number of buckets should be a power of two to
881 * get a good hash function. The following computation
882 * chooses the first power of two that is greater
883 * than the number of physical pages in the system.
884 */
885
886 if (vm_page_bucket_count == 0) {
887 unsigned int npages = pmap_free_pages();
888
889 vm_page_bucket_count = 1;
890 while (vm_page_bucket_count < npages)
891 vm_page_bucket_count <<= 1;
892 }
893 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
894
895 vm_page_hash_mask = vm_page_bucket_count - 1;
896
897 /*
898 * Calculate object shift value for hashing algorithm:
899 * O = log2(sizeof(struct vm_object))
900 * B = log2(vm_page_bucket_count)
901 * hash shifts the object left by
902 * B/2 - O
903 */
904 size = vm_page_bucket_count;
905 for (log1 = 0; size > 1; log1++)
906 size /= 2;
907 size = sizeof(struct vm_object);
908 for (log2 = 0; size > 1; log2++)
909 size /= 2;
910 vm_page_hash_shift = log1/2 - log2 + 1;
911
912 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
913 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
914 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
915
916 if (vm_page_hash_mask & vm_page_bucket_count)
917 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
918
919 #if VM_PAGE_BUCKETS_CHECK
920 #if VM_PAGE_FAKE_BUCKETS
921 /*
922 * Allocate a decoy set of page buckets, to detect
923 * any stomping there.
924 */
925 vm_page_fake_buckets = (vm_page_bucket_t *)
926 pmap_steal_memory(vm_page_bucket_count *
927 sizeof(vm_page_bucket_t));
928 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
929 vm_page_fake_buckets_end =
930 vm_map_round_page((vm_page_fake_buckets_start +
931 (vm_page_bucket_count *
932 sizeof (vm_page_bucket_t))),
933 PAGE_MASK);
934 char *cp;
935 for (cp = (char *)vm_page_fake_buckets_start;
936 cp < (char *)vm_page_fake_buckets_end;
937 cp++) {
938 *cp = 0x5a;
939 }
940 #endif /* VM_PAGE_FAKE_BUCKETS */
941 #endif /* VM_PAGE_BUCKETS_CHECK */
942
943 kernel_debug_string_early("vm_page_buckets");
944 vm_page_buckets = (vm_page_bucket_t *)
945 pmap_steal_memory(vm_page_bucket_count *
946 sizeof(vm_page_bucket_t));
947
948 kernel_debug_string_early("vm_page_bucket_locks");
949 vm_page_bucket_locks = (lck_spin_t *)
950 pmap_steal_memory(vm_page_bucket_lock_count *
951 sizeof(lck_spin_t));
952
953 for (i = 0; i < vm_page_bucket_count; i++) {
954 vm_page_bucket_t *bucket = &vm_page_buckets[i];
955
956 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
957 #if MACH_PAGE_HASH_STATS
958 bucket->cur_count = 0;
959 bucket->hi_count = 0;
960 #endif /* MACH_PAGE_HASH_STATS */
961 }
962
963 for (i = 0; i < vm_page_bucket_lock_count; i++)
964 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
965
966 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
967 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
968 vm_tag_init();
969
970 #if VM_PAGE_BUCKETS_CHECK
971 vm_page_buckets_check_ready = TRUE;
972 #endif /* VM_PAGE_BUCKETS_CHECK */
973
974 /*
975 * Machine-dependent code allocates the resident page table.
976 * It uses vm_page_init to initialize the page frames.
977 * The code also returns to us the virtual space available
978 * to the kernel. We don't trust the pmap module
979 * to get the alignment right.
980 */
981
982 kernel_debug_string_early("pmap_startup");
983 pmap_startup(&virtual_space_start, &virtual_space_end);
984 virtual_space_start = round_page(virtual_space_start);
985 virtual_space_end = trunc_page(virtual_space_end);
986
987 *startp = virtual_space_start;
988 *endp = virtual_space_end;
989
990 /*
991 * Compute the initial "wire" count.
992 * Up until now, the pages which have been set aside are not under
993 * the VM system's control, so although they aren't explicitly
994 * wired, they nonetheless can't be moved. At this moment,
995 * all VM managed pages are "free", courtesy of pmap_startup.
996 */
997 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
998 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
999 #if CONFIG_SECLUDED_MEMORY
1000 vm_page_wire_count -= vm_page_secluded_count;
1001 #endif
1002 vm_page_wire_count_initial = vm_page_wire_count;
1003 vm_page_pages_initial = vm_page_pages;
1004
1005 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1006 vm_page_free_count, vm_page_wire_count);
1007
1008 kernel_debug_string_early("vm_page_bootstrap complete");
1009 simple_lock_init(&vm_paging_lock, 0);
1010 }
1011
1012 #ifndef MACHINE_PAGES
1013 /*
1014 * We implement pmap_steal_memory and pmap_startup with the help
1015 * of two simpler functions, pmap_virtual_space and pmap_next_page.
1016 */
1017
1018 void *
1019 pmap_steal_memory(
1020 vm_size_t size)
1021 {
1022 kern_return_t kr;
1023 vm_offset_t addr, vaddr;
1024 ppnum_t phys_page;
1025
1026 /*
1027 * We round the size to a round multiple.
1028 */
1029
1030 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1031
1032 /*
1033 * If this is the first call to pmap_steal_memory,
1034 * we have to initialize ourself.
1035 */
1036
1037 if (virtual_space_start == virtual_space_end) {
1038 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1039
1040 /*
1041 * The initial values must be aligned properly, and
1042 * we don't trust the pmap module to do it right.
1043 */
1044
1045 virtual_space_start = round_page(virtual_space_start);
1046 virtual_space_end = trunc_page(virtual_space_end);
1047 }
1048
1049 /*
1050 * Allocate virtual memory for this request.
1051 */
1052
1053 addr = virtual_space_start;
1054 virtual_space_start += size;
1055
1056 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1057
1058 /*
1059 * Allocate and map physical pages to back new virtual pages.
1060 */
1061
1062 for (vaddr = round_page(addr);
1063 vaddr < addr + size;
1064 vaddr += PAGE_SIZE) {
1065
1066 if (!pmap_next_page_hi(&phys_page))
1067 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1068
1069 /*
1070 * XXX Logically, these mappings should be wired,
1071 * but some pmap modules barf if they are.
1072 */
1073 #if defined(__LP64__)
1074 #ifdef __arm64__
1075 /* ARM64_TODO: verify that we really don't need this */
1076 #else
1077 pmap_pre_expand(kernel_pmap, vaddr);
1078 #endif
1079 #endif
1080
1081 kr = pmap_enter(kernel_pmap, vaddr, phys_page,
1082 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
1083 VM_WIMG_USE_DEFAULT, FALSE);
1084
1085 if (kr != KERN_SUCCESS) {
1086 panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1087 (unsigned long)vaddr, phys_page);
1088 }
1089
1090 /*
1091 * Account for newly stolen memory
1092 */
1093 vm_page_wire_count++;
1094 vm_page_stolen_count++;
1095 }
1096
1097 #if KASAN
1098 kasan_notify_address(round_page(addr), size);
1099 #endif
1100 return (void *) addr;
1101 }
1102
1103 #if CONFIG_SECLUDED_MEMORY
1104 /* boot-args to control secluded memory */
1105 unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
1106 int secluded_for_iokit = 1; /* IOKit can use secluded memory */
1107 int secluded_for_apps = 1; /* apps can use secluded memory */
1108 int secluded_for_filecache = 2; /* filecache can use seclude memory */
1109 #if 11
1110 int secluded_for_fbdp = 0;
1111 #endif
1112 #endif /* CONFIG_SECLUDED_MEMORY */
1113
1114
1115 #if defined(__arm__) || defined(__arm64__)
1116 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1117 unsigned int vm_first_phys_ppnum = 0;
1118 #endif
1119
1120
1121 void vm_page_release_startup(vm_page_t mem);
1122 void
1123 pmap_startup(
1124 vm_offset_t *startp,
1125 vm_offset_t *endp)
1126 {
1127 unsigned int i, npages, pages_initialized, fill, fillval;
1128 ppnum_t phys_page;
1129 addr64_t tmpaddr;
1130
1131 #if defined(__LP64__)
1132 /*
1133 * make sure we are aligned on a 64 byte boundary
1134 * for VM_PAGE_PACK_PTR (it clips off the low-order
1135 * 6 bits of the pointer)
1136 */
1137 if (virtual_space_start != virtual_space_end)
1138 virtual_space_start = round_page(virtual_space_start);
1139 #endif
1140
1141 /*
1142 * We calculate how many page frames we will have
1143 * and then allocate the page structures in one chunk.
1144 */
1145
1146 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
1147 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
1148 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1149
1150 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1151
1152 /*
1153 * Initialize the page frames.
1154 */
1155 kernel_debug_string_early("Initialize the page frames");
1156
1157 vm_page_array_beginning_addr = &vm_pages[0];
1158 vm_page_array_ending_addr = &vm_pages[npages];
1159
1160 for (i = 0, pages_initialized = 0; i < npages; i++) {
1161 if (!pmap_next_page(&phys_page))
1162 break;
1163 #if defined(__arm__) || defined(__arm64__)
1164 if (pages_initialized == 0) {
1165 vm_first_phys_ppnum = phys_page;
1166 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr, (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1167 }
1168 assert((i + vm_first_phys_ppnum) == phys_page);
1169 #endif
1170 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1171 vm_page_lowest = phys_page;
1172
1173 vm_page_init(&vm_pages[i], phys_page, FALSE);
1174 vm_page_pages++;
1175 pages_initialized++;
1176 }
1177 vm_pages_count = pages_initialized;
1178 vm_page_array_boundary = &vm_pages[pages_initialized];
1179
1180 #if defined(__LP64__)
1181
1182 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
1183 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1184
1185 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
1186 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1187 #endif
1188 kernel_debug_string_early("page fill/release");
1189 /*
1190 * Check if we want to initialize pages to a known value
1191 */
1192 fill = 0; /* Assume no fill */
1193 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
1194 #if DEBUG
1195 /* This slows down booting the DEBUG kernel, particularly on
1196 * large memory systems, but is worthwhile in deterministically
1197 * trapping uninitialized memory usage.
1198 */
1199 if (fill == 0) {
1200 fill = 1;
1201 fillval = 0xDEB8F177;
1202 }
1203 #endif
1204 if (fill)
1205 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1206
1207 #if CONFIG_SECLUDED_MEMORY
1208 /* default: no secluded mem */
1209 secluded_mem_mb = 0;
1210 if (max_mem > 1*1024*1024*1024) {
1211 /* default to 90MB for devices with > 1GB of RAM */
1212 secluded_mem_mb = 90;
1213 }
1214 /* override with value from device tree, if provided */
1215 PE_get_default("kern.secluded_mem_mb",
1216 &secluded_mem_mb, sizeof(secluded_mem_mb));
1217 /* override with value from boot-args, if provided */
1218 PE_parse_boot_argn("secluded_mem_mb",
1219 &secluded_mem_mb,
1220 sizeof (secluded_mem_mb));
1221
1222 vm_page_secluded_target = (unsigned int)
1223 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1224 PE_parse_boot_argn("secluded_for_iokit",
1225 &secluded_for_iokit,
1226 sizeof (secluded_for_iokit));
1227 PE_parse_boot_argn("secluded_for_apps",
1228 &secluded_for_apps,
1229 sizeof (secluded_for_apps));
1230 PE_parse_boot_argn("secluded_for_filecache",
1231 &secluded_for_filecache,
1232 sizeof (secluded_for_filecache));
1233 #if 11
1234 PE_parse_boot_argn("secluded_for_fbdp",
1235 &secluded_for_fbdp,
1236 sizeof (secluded_for_fbdp));
1237 #endif
1238 #endif /* CONFIG_SECLUDED_MEMORY */
1239
1240 // -debug code remove
1241 if (2 == vm_himemory_mode) {
1242 // free low -> high so high is preferred
1243 for (i = 1; i <= pages_initialized; i++) {
1244 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
1245 vm_page_release_startup(&vm_pages[i - 1]);
1246 }
1247 }
1248 else
1249 // debug code remove-
1250
1251 /*
1252 * Release pages in reverse order so that physical pages
1253 * initially get allocated in ascending addresses. This keeps
1254 * the devices (which must address physical memory) happy if
1255 * they require several consecutive pages.
1256 */
1257 for (i = pages_initialized; i > 0; i--) {
1258 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
1259 vm_page_release_startup(&vm_pages[i - 1]);
1260 }
1261
1262 VM_CHECK_MEMORYSTATUS;
1263
1264 #if 0
1265 {
1266 vm_page_t xx, xxo, xxl;
1267 int i, j, k, l;
1268
1269 j = 0; /* (BRINGUP) */
1270 xxl = 0;
1271
1272 for( i = 0; i < vm_colors; i++ ) {
1273 queue_iterate(&vm_page_queue_free[i].qhead,
1274 xx,
1275 vm_page_t,
1276 pageq) { /* BRINGUP */
1277 j++; /* (BRINGUP) */
1278 if(j > vm_page_free_count) { /* (BRINGUP) */
1279 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
1280 }
1281
1282 l = vm_page_free_count - j; /* (BRINGUP) */
1283 k = 0; /* (BRINGUP) */
1284
1285 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1286
1287 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
1288 k++;
1289 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1290 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
1291 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1292 }
1293 }
1294
1295 xxl = xx;
1296 }
1297 }
1298
1299 if(j != vm_page_free_count) { /* (BRINGUP) */
1300 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1301 }
1302 }
1303 #endif
1304
1305
1306 /*
1307 * We have to re-align virtual_space_start,
1308 * because pmap_steal_memory has been using it.
1309 */
1310
1311 virtual_space_start = round_page(virtual_space_start);
1312
1313 *startp = virtual_space_start;
1314 *endp = virtual_space_end;
1315 }
1316 #endif /* MACHINE_PAGES */
1317
1318 /*
1319 * Routine: vm_page_module_init
1320 * Purpose:
1321 * Second initialization pass, to be done after
1322 * the basic VM system is ready.
1323 */
1324 void
1325 vm_page_module_init(void)
1326 {
1327 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1328 vm_size_t vm_page_with_ppnum_size;
1329
1330 vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1331 0, PAGE_SIZE, "vm pages array");
1332
1333 zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1334 zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1335 zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1336 zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1337 zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1338 /*
1339 * Adjust zone statistics to account for the real pages allocated
1340 * in vm_page_create(). [Q: is this really what we want?]
1341 */
1342 vm_page_array_zone->count += vm_page_pages;
1343 vm_page_array_zone->sum_count += vm_page_pages;
1344 vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1345 vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1346 vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1347 OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1348 /* since zone accounts for these, take them out of stolen */
1349 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1350
1351 vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1352
1353 vm_page_zone = zinit(vm_page_with_ppnum_size,
1354 0, PAGE_SIZE, "vm pages");
1355
1356 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1357 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1358 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1359 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1360 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1361 zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1362 }
1363
1364 /*
1365 * Routine: vm_page_create
1366 * Purpose:
1367 * After the VM system is up, machine-dependent code
1368 * may stumble across more physical memory. For example,
1369 * memory that it was reserving for a frame buffer.
1370 * vm_page_create turns this memory into available pages.
1371 */
1372
1373 void
1374 vm_page_create(
1375 ppnum_t start,
1376 ppnum_t end)
1377 {
1378 ppnum_t phys_page;
1379 vm_page_t m;
1380
1381 for (phys_page = start;
1382 phys_page < end;
1383 phys_page++) {
1384 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1385 == VM_PAGE_NULL)
1386 vm_page_more_fictitious();
1387
1388 m->fictitious = FALSE;
1389 pmap_clear_noencrypt(phys_page);
1390
1391 vm_page_pages++;
1392 vm_page_release(m, FALSE);
1393 }
1394 }
1395
1396 /*
1397 * vm_page_hash:
1398 *
1399 * Distributes the object/offset key pair among hash buckets.
1400 *
1401 * NOTE: The bucket count must be a power of 2
1402 */
1403 #define vm_page_hash(object, offset) (\
1404 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1405 & vm_page_hash_mask)
1406
1407
1408 /*
1409 * vm_page_insert: [ internal use only ]
1410 *
1411 * Inserts the given mem entry into the object/object-page
1412 * table and object list.
1413 *
1414 * The object must be locked.
1415 */
1416 void
1417 vm_page_insert(
1418 vm_page_t mem,
1419 vm_object_t object,
1420 vm_object_offset_t offset)
1421 {
1422 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1423 }
1424
1425 void
1426 vm_page_insert_wired(
1427 vm_page_t mem,
1428 vm_object_t object,
1429 vm_object_offset_t offset,
1430 vm_tag_t tag)
1431 {
1432 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1433 }
1434
1435 void
1436 vm_page_insert_internal(
1437 vm_page_t mem,
1438 vm_object_t object,
1439 vm_object_offset_t offset,
1440 vm_tag_t tag,
1441 boolean_t queues_lock_held,
1442 boolean_t insert_in_hash,
1443 boolean_t batch_pmap_op,
1444 boolean_t batch_accounting,
1445 uint64_t *delayed_ledger_update)
1446 {
1447 vm_page_bucket_t *bucket;
1448 lck_spin_t *bucket_lock;
1449 int hash_id;
1450 task_t owner;
1451
1452 XPR(XPR_VM_PAGE,
1453 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1454 object, offset, mem, 0,0);
1455 #if 0
1456 /*
1457 * we may not hold the page queue lock
1458 * so this check isn't safe to make
1459 */
1460 VM_PAGE_CHECK(mem);
1461 #endif
1462
1463 assert(page_aligned(offset));
1464
1465 assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1466
1467 /* the vm_submap_object is only a placeholder for submaps */
1468 assert(object != vm_submap_object);
1469
1470 vm_object_lock_assert_exclusive(object);
1471 LCK_MTX_ASSERT(&vm_page_queue_lock,
1472 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1473 : LCK_MTX_ASSERT_NOTOWNED);
1474
1475 if (queues_lock_held == FALSE)
1476 assert(!VM_PAGE_PAGEABLE(mem));
1477
1478 if (insert_in_hash == TRUE) {
1479 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1480 if (mem->tabled || mem->vm_page_object)
1481 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1482 "already in (obj=%p,off=0x%llx)",
1483 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1484 #endif
1485 if (object->internal && (offset >= object->vo_size)) {
1486 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1487 mem, object, offset, object->vo_size);
1488 }
1489
1490 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1491
1492 /*
1493 * Record the object/offset pair in this page
1494 */
1495
1496 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1497 mem->offset = offset;
1498
1499 #if CONFIG_SECLUDED_MEMORY
1500 if (object->eligible_for_secluded) {
1501 vm_page_secluded.eligible_for_secluded++;
1502 }
1503 #endif /* CONFIG_SECLUDED_MEMORY */
1504
1505 /*
1506 * Insert it into the object_object/offset hash table
1507 */
1508 hash_id = vm_page_hash(object, offset);
1509 bucket = &vm_page_buckets[hash_id];
1510 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1511
1512 lck_spin_lock(bucket_lock);
1513
1514 mem->next_m = bucket->page_list;
1515 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1516 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1517
1518 #if MACH_PAGE_HASH_STATS
1519 if (++bucket->cur_count > bucket->hi_count)
1520 bucket->hi_count = bucket->cur_count;
1521 #endif /* MACH_PAGE_HASH_STATS */
1522 mem->hashed = TRUE;
1523 lck_spin_unlock(bucket_lock);
1524 }
1525
1526 {
1527 unsigned int cache_attr;
1528
1529 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1530
1531 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1532 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1533 }
1534 }
1535 /*
1536 * Now link into the object's list of backed pages.
1537 */
1538 vm_page_queue_enter(&object->memq, mem, vm_page_t, listq);
1539 object->memq_hint = mem;
1540 mem->tabled = TRUE;
1541
1542 /*
1543 * Show that the object has one more resident page.
1544 */
1545
1546 object->resident_page_count++;
1547 if (VM_PAGE_WIRED(mem)) {
1548 assert(mem->wire_count > 0);
1549 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1550 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1551 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1552 }
1553 assert(object->resident_page_count >= object->wired_page_count);
1554
1555 if (batch_accounting == FALSE) {
1556 if (object->internal) {
1557 OSAddAtomic(1, &vm_page_internal_count);
1558 } else {
1559 OSAddAtomic(1, &vm_page_external_count);
1560 }
1561 }
1562
1563 /*
1564 * It wouldn't make sense to insert a "reusable" page in
1565 * an object (the page would have been marked "reusable" only
1566 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1567 * in the object at that time).
1568 * But a page could be inserted in a "all_reusable" object, if
1569 * something faults it in (a vm_read() from another task or a
1570 * "use-after-free" issue in user space, for example). It can
1571 * also happen if we're relocating a page from that object to
1572 * a different physical page during a physically-contiguous
1573 * allocation.
1574 */
1575 assert(!mem->reusable);
1576 if (object->all_reusable) {
1577 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1578 }
1579
1580 if (object->purgable == VM_PURGABLE_DENY) {
1581 owner = TASK_NULL;
1582 } else {
1583 owner = object->vo_purgeable_owner;
1584 }
1585 if (owner &&
1586 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1587 VM_PAGE_WIRED(mem))) {
1588
1589 if (delayed_ledger_update)
1590 *delayed_ledger_update += PAGE_SIZE;
1591 else {
1592 /* more non-volatile bytes */
1593 ledger_credit(owner->ledger,
1594 task_ledgers.purgeable_nonvolatile,
1595 PAGE_SIZE);
1596 /* more footprint */
1597 ledger_credit(owner->ledger,
1598 task_ledgers.phys_footprint,
1599 PAGE_SIZE);
1600 }
1601
1602 } else if (owner &&
1603 (object->purgable == VM_PURGABLE_VOLATILE ||
1604 object->purgable == VM_PURGABLE_EMPTY)) {
1605 assert(! VM_PAGE_WIRED(mem));
1606 /* more volatile bytes */
1607 ledger_credit(owner->ledger,
1608 task_ledgers.purgeable_volatile,
1609 PAGE_SIZE);
1610 }
1611
1612 if (object->purgable == VM_PURGABLE_VOLATILE) {
1613 if (VM_PAGE_WIRED(mem)) {
1614 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1615 } else {
1616 OSAddAtomic(+1, &vm_page_purgeable_count);
1617 }
1618 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1619 mem->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q) {
1620 /*
1621 * This page belongs to a purged VM object but hasn't
1622 * been purged (because it was "busy").
1623 * It's in the "throttled" queue and hence not
1624 * visible to vm_pageout_scan(). Move it to a pageable
1625 * queue, so that it can eventually be reclaimed, instead
1626 * of lingering in the "empty" object.
1627 */
1628 if (queues_lock_held == FALSE)
1629 vm_page_lockspin_queues();
1630 vm_page_deactivate(mem);
1631 if (queues_lock_held == FALSE)
1632 vm_page_unlock_queues();
1633 }
1634
1635 #if VM_OBJECT_TRACKING_OP_MODIFIED
1636 if (vm_object_tracking_inited &&
1637 object->internal &&
1638 object->resident_page_count == 0 &&
1639 object->pager == NULL &&
1640 object->shadow != NULL &&
1641 object->shadow->copy == object) {
1642 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1643 int numsaved = 0;
1644
1645 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1646 btlog_add_entry(vm_object_tracking_btlog,
1647 object,
1648 VM_OBJECT_TRACKING_OP_MODIFIED,
1649 bt,
1650 numsaved);
1651 }
1652 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1653 }
1654
1655 /*
1656 * vm_page_replace:
1657 *
1658 * Exactly like vm_page_insert, except that we first
1659 * remove any existing page at the given offset in object.
1660 *
1661 * The object must be locked.
1662 */
1663 void
1664 vm_page_replace(
1665 vm_page_t mem,
1666 vm_object_t object,
1667 vm_object_offset_t offset)
1668 {
1669 vm_page_bucket_t *bucket;
1670 vm_page_t found_m = VM_PAGE_NULL;
1671 lck_spin_t *bucket_lock;
1672 int hash_id;
1673
1674 #if 0
1675 /*
1676 * we don't hold the page queue lock
1677 * so this check isn't safe to make
1678 */
1679 VM_PAGE_CHECK(mem);
1680 #endif
1681 vm_object_lock_assert_exclusive(object);
1682 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1683 if (mem->tabled || mem->vm_page_object)
1684 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1685 "already in (obj=%p,off=0x%llx)",
1686 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1687 #endif
1688 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1689
1690 assert(!VM_PAGE_PAGEABLE(mem));
1691
1692 /*
1693 * Record the object/offset pair in this page
1694 */
1695 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1696 mem->offset = offset;
1697
1698 /*
1699 * Insert it into the object_object/offset hash table,
1700 * replacing any page that might have been there.
1701 */
1702
1703 hash_id = vm_page_hash(object, offset);
1704 bucket = &vm_page_buckets[hash_id];
1705 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1706
1707 lck_spin_lock(bucket_lock);
1708
1709 if (bucket->page_list) {
1710 vm_page_packed_t *mp = &bucket->page_list;
1711 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
1712
1713 do {
1714 /*
1715 * compare packed object pointers
1716 */
1717 if (m->vm_page_object == mem->vm_page_object && m->offset == offset) {
1718 /*
1719 * Remove old page from hash list
1720 */
1721 *mp = m->next_m;
1722 m->hashed = FALSE;
1723 m->next_m = VM_PAGE_PACK_PTR(NULL);
1724
1725 found_m = m;
1726 break;
1727 }
1728 mp = &m->next_m;
1729 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
1730
1731 mem->next_m = bucket->page_list;
1732 } else {
1733 mem->next_m = VM_PAGE_PACK_PTR(NULL);
1734 }
1735 /*
1736 * insert new page at head of hash list
1737 */
1738 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1739 mem->hashed = TRUE;
1740
1741 lck_spin_unlock(bucket_lock);
1742
1743 if (found_m) {
1744 /*
1745 * there was already a page at the specified
1746 * offset for this object... remove it from
1747 * the object and free it back to the free list
1748 */
1749 vm_page_free_unlocked(found_m, FALSE);
1750 }
1751 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1752 }
1753
1754 /*
1755 * vm_page_remove: [ internal use only ]
1756 *
1757 * Removes the given mem entry from the object/offset-page
1758 * table and the object page list.
1759 *
1760 * The object must be locked.
1761 */
1762
1763 void
1764 vm_page_remove(
1765 vm_page_t mem,
1766 boolean_t remove_from_hash)
1767 {
1768 vm_page_bucket_t *bucket;
1769 vm_page_t this;
1770 lck_spin_t *bucket_lock;
1771 int hash_id;
1772 task_t owner;
1773 vm_object_t m_object;
1774
1775 m_object = VM_PAGE_OBJECT(mem);
1776
1777 XPR(XPR_VM_PAGE,
1778 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1779 m_object, mem->offset,
1780 mem, 0,0);
1781
1782 vm_object_lock_assert_exclusive(m_object);
1783 assert(mem->tabled);
1784 assert(!mem->cleaning);
1785 assert(!mem->laundry);
1786
1787 if (VM_PAGE_PAGEABLE(mem)) {
1788 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1789 }
1790 #if 0
1791 /*
1792 * we don't hold the page queue lock
1793 * so this check isn't safe to make
1794 */
1795 VM_PAGE_CHECK(mem);
1796 #endif
1797 if (remove_from_hash == TRUE) {
1798 /*
1799 * Remove from the object_object/offset hash table
1800 */
1801 hash_id = vm_page_hash(m_object, mem->offset);
1802 bucket = &vm_page_buckets[hash_id];
1803 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1804
1805 lck_spin_lock(bucket_lock);
1806
1807 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
1808 /* optimize for common case */
1809
1810 bucket->page_list = mem->next_m;
1811 } else {
1812 vm_page_packed_t *prev;
1813
1814 for (prev = &this->next_m;
1815 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
1816 prev = &this->next_m)
1817 continue;
1818 *prev = this->next_m;
1819 }
1820 #if MACH_PAGE_HASH_STATS
1821 bucket->cur_count--;
1822 #endif /* MACH_PAGE_HASH_STATS */
1823 mem->hashed = FALSE;
1824 this->next_m = VM_PAGE_PACK_PTR(NULL);
1825 lck_spin_unlock(bucket_lock);
1826 }
1827 /*
1828 * Now remove from the object's list of backed pages.
1829 */
1830
1831 vm_page_remove_internal(mem);
1832
1833 /*
1834 * And show that the object has one fewer resident
1835 * page.
1836 */
1837
1838 assert(m_object->resident_page_count > 0);
1839 m_object->resident_page_count--;
1840
1841 if (m_object->internal) {
1842 #if DEBUG
1843 assert(vm_page_internal_count);
1844 #endif /* DEBUG */
1845
1846 OSAddAtomic(-1, &vm_page_internal_count);
1847 } else {
1848 assert(vm_page_external_count);
1849 OSAddAtomic(-1, &vm_page_external_count);
1850
1851 if (mem->xpmapped) {
1852 assert(vm_page_xpmapped_external_count);
1853 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1854 }
1855 }
1856 if (!m_object->internal && (m_object->objq.next || m_object->objq.prev)) {
1857 if (m_object->resident_page_count == 0)
1858 vm_object_cache_remove(m_object);
1859 }
1860
1861 if (VM_PAGE_WIRED(mem)) {
1862 assert(mem->wire_count > 0);
1863 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
1864 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
1865 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
1866 }
1867 assert(m_object->resident_page_count >=
1868 m_object->wired_page_count);
1869 if (mem->reusable) {
1870 assert(m_object->reusable_page_count > 0);
1871 m_object->reusable_page_count--;
1872 assert(m_object->reusable_page_count <=
1873 m_object->resident_page_count);
1874 mem->reusable = FALSE;
1875 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1876 vm_page_stats_reusable.reused_remove++;
1877 } else if (m_object->all_reusable) {
1878 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1879 vm_page_stats_reusable.reused_remove++;
1880 }
1881
1882 if (m_object->purgable == VM_PURGABLE_DENY) {
1883 owner = TASK_NULL;
1884 } else {
1885 owner = m_object->vo_purgeable_owner;
1886 }
1887 if (owner &&
1888 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
1889 VM_PAGE_WIRED(mem))) {
1890 /* less non-volatile bytes */
1891 ledger_debit(owner->ledger,
1892 task_ledgers.purgeable_nonvolatile,
1893 PAGE_SIZE);
1894 /* less footprint */
1895 ledger_debit(owner->ledger,
1896 task_ledgers.phys_footprint,
1897 PAGE_SIZE);
1898 } else if (owner &&
1899 (m_object->purgable == VM_PURGABLE_VOLATILE ||
1900 m_object->purgable == VM_PURGABLE_EMPTY)) {
1901 assert(! VM_PAGE_WIRED(mem));
1902 /* less volatile bytes */
1903 ledger_debit(owner->ledger,
1904 task_ledgers.purgeable_volatile,
1905 PAGE_SIZE);
1906 }
1907 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
1908 if (VM_PAGE_WIRED(mem)) {
1909 assert(vm_page_purgeable_wired_count > 0);
1910 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1911 } else {
1912 assert(vm_page_purgeable_count > 0);
1913 OSAddAtomic(-1, &vm_page_purgeable_count);
1914 }
1915 }
1916
1917 if (m_object->set_cache_attr == TRUE)
1918 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
1919
1920 mem->tabled = FALSE;
1921 mem->vm_page_object = 0;
1922 mem->offset = (vm_object_offset_t) -1;
1923 }
1924
1925
1926 /*
1927 * vm_page_lookup:
1928 *
1929 * Returns the page associated with the object/offset
1930 * pair specified; if none is found, VM_PAGE_NULL is returned.
1931 *
1932 * The object must be locked. No side effects.
1933 */
1934
1935 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1936
1937 #if DEBUG_VM_PAGE_LOOKUP
1938
1939 struct {
1940 uint64_t vpl_total;
1941 uint64_t vpl_empty_obj;
1942 uint64_t vpl_bucket_NULL;
1943 uint64_t vpl_hit_hint;
1944 uint64_t vpl_hit_hint_next;
1945 uint64_t vpl_hit_hint_prev;
1946 uint64_t vpl_fast;
1947 uint64_t vpl_slow;
1948 uint64_t vpl_hit;
1949 uint64_t vpl_miss;
1950
1951 uint64_t vpl_fast_elapsed;
1952 uint64_t vpl_slow_elapsed;
1953 } vm_page_lookup_stats __attribute__((aligned(8)));
1954
1955 #endif
1956
1957 #define KDP_VM_PAGE_WALK_MAX 1000
1958
1959 vm_page_t
1960 kdp_vm_page_lookup(
1961 vm_object_t object,
1962 vm_object_offset_t offset)
1963 {
1964 vm_page_t cur_page;
1965 int num_traversed = 0;
1966
1967 if (not_in_kdp) {
1968 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1969 }
1970
1971 vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1972 if (cur_page->offset == offset) {
1973 return cur_page;
1974 }
1975 num_traversed++;
1976
1977 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1978 return VM_PAGE_NULL;
1979 }
1980 }
1981
1982 return VM_PAGE_NULL;
1983 }
1984
1985 vm_page_t
1986 vm_page_lookup(
1987 vm_object_t object,
1988 vm_object_offset_t offset)
1989 {
1990 vm_page_t mem;
1991 vm_page_bucket_t *bucket;
1992 vm_page_queue_entry_t qe;
1993 lck_spin_t *bucket_lock = NULL;
1994 int hash_id;
1995 #if DEBUG_VM_PAGE_LOOKUP
1996 uint64_t start, elapsed;
1997
1998 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
1999 #endif
2000 vm_object_lock_assert_held(object);
2001
2002 if (object->resident_page_count == 0) {
2003 #if DEBUG_VM_PAGE_LOOKUP
2004 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2005 #endif
2006 return (VM_PAGE_NULL);
2007 }
2008
2009 mem = object->memq_hint;
2010
2011 if (mem != VM_PAGE_NULL) {
2012 assert(VM_PAGE_OBJECT(mem) == object);
2013
2014 if (mem->offset == offset) {
2015 #if DEBUG_VM_PAGE_LOOKUP
2016 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2017 #endif
2018 return (mem);
2019 }
2020 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->listq);
2021
2022 if (! vm_page_queue_end(&object->memq, qe)) {
2023 vm_page_t next_page;
2024
2025 next_page = (vm_page_t)((uintptr_t)qe);
2026 assert(VM_PAGE_OBJECT(next_page) == object);
2027
2028 if (next_page->offset == offset) {
2029 object->memq_hint = next_page; /* new hint */
2030 #if DEBUG_VM_PAGE_LOOKUP
2031 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2032 #endif
2033 return (next_page);
2034 }
2035 }
2036 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->listq);
2037
2038 if (! vm_page_queue_end(&object->memq, qe)) {
2039 vm_page_t prev_page;
2040
2041 prev_page = (vm_page_t)((uintptr_t)qe);
2042 assert(VM_PAGE_OBJECT(prev_page) == object);
2043
2044 if (prev_page->offset == offset) {
2045 object->memq_hint = prev_page; /* new hint */
2046 #if DEBUG_VM_PAGE_LOOKUP
2047 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2048 #endif
2049 return (prev_page);
2050 }
2051 }
2052 }
2053 /*
2054 * Search the hash table for this object/offset pair
2055 */
2056 hash_id = vm_page_hash(object, offset);
2057 bucket = &vm_page_buckets[hash_id];
2058
2059 /*
2060 * since we hold the object lock, we are guaranteed that no
2061 * new pages can be inserted into this object... this in turn
2062 * guarantess that the page we're looking for can't exist
2063 * if the bucket it hashes to is currently NULL even when looked
2064 * at outside the scope of the hash bucket lock... this is a
2065 * really cheap optimiztion to avoid taking the lock
2066 */
2067 if (!bucket->page_list) {
2068 #if DEBUG_VM_PAGE_LOOKUP
2069 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2070 #endif
2071 return (VM_PAGE_NULL);
2072 }
2073
2074 #if DEBUG_VM_PAGE_LOOKUP
2075 start = mach_absolute_time();
2076 #endif
2077 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2078 /*
2079 * on average, it's roughly 3 times faster to run a short memq list
2080 * than to take the spin lock and go through the hash list
2081 */
2082 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2083
2084 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2085
2086 if (mem->offset == offset)
2087 break;
2088
2089 mem = (vm_page_t)vm_page_queue_next(&mem->listq);
2090 }
2091 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
2092 mem = NULL;
2093 } else {
2094 vm_page_object_t packed_object;
2095
2096 packed_object = VM_PAGE_PACK_OBJECT(object);
2097
2098 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2099
2100 lck_spin_lock(bucket_lock);
2101
2102 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2103 mem != VM_PAGE_NULL;
2104 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m))) {
2105 #if 0
2106 /*
2107 * we don't hold the page queue lock
2108 * so this check isn't safe to make
2109 */
2110 VM_PAGE_CHECK(mem);
2111 #endif
2112 if ((mem->vm_page_object == packed_object) && (mem->offset == offset))
2113 break;
2114 }
2115 lck_spin_unlock(bucket_lock);
2116 }
2117
2118 #if DEBUG_VM_PAGE_LOOKUP
2119 elapsed = mach_absolute_time() - start;
2120
2121 if (bucket_lock) {
2122 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2123 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2124 } else {
2125 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2126 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2127 }
2128 if (mem != VM_PAGE_NULL)
2129 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2130 else
2131 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2132 #endif
2133 if (mem != VM_PAGE_NULL) {
2134 assert(VM_PAGE_OBJECT(mem) == object);
2135
2136 object->memq_hint = mem;
2137 }
2138 return (mem);
2139 }
2140
2141
2142 /*
2143 * vm_page_rename:
2144 *
2145 * Move the given memory entry from its
2146 * current object to the specified target object/offset.
2147 *
2148 * The object must be locked.
2149 */
2150 void
2151 vm_page_rename(
2152 vm_page_t mem,
2153 vm_object_t new_object,
2154 vm_object_offset_t new_offset)
2155 {
2156 boolean_t internal_to_external, external_to_internal;
2157 vm_tag_t tag;
2158 vm_object_t m_object;
2159
2160 m_object = VM_PAGE_OBJECT(mem);
2161
2162 assert(m_object != new_object);
2163 assert(m_object);
2164
2165 XPR(XPR_VM_PAGE,
2166 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2167 new_object, new_offset,
2168 mem, 0,0);
2169
2170 /*
2171 * Changes to mem->object require the page lock because
2172 * the pageout daemon uses that lock to get the object.
2173 */
2174 vm_page_lockspin_queues();
2175
2176 internal_to_external = FALSE;
2177 external_to_internal = FALSE;
2178
2179 if (mem->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2180 /*
2181 * it's much easier to get the vm_page_pageable_xxx accounting correct
2182 * if we first move the page to the active queue... it's going to end
2183 * up there anyway, and we don't do vm_page_rename's frequently enough
2184 * for this to matter.
2185 */
2186 vm_page_queues_remove(mem, FALSE);
2187 vm_page_activate(mem);
2188 }
2189 if (VM_PAGE_PAGEABLE(mem)) {
2190 if (m_object->internal && !new_object->internal) {
2191 internal_to_external = TRUE;
2192 }
2193 if (!m_object->internal && new_object->internal) {
2194 external_to_internal = TRUE;
2195 }
2196 }
2197
2198 tag = m_object->wire_tag;
2199 vm_page_remove(mem, TRUE);
2200 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2201
2202 if (internal_to_external) {
2203 vm_page_pageable_internal_count--;
2204 vm_page_pageable_external_count++;
2205 } else if (external_to_internal) {
2206 vm_page_pageable_external_count--;
2207 vm_page_pageable_internal_count++;
2208 }
2209
2210 vm_page_unlock_queues();
2211 }
2212
2213 /*
2214 * vm_page_init:
2215 *
2216 * Initialize the fields in a new page.
2217 * This takes a structure with random values and initializes it
2218 * so that it can be given to vm_page_release or vm_page_insert.
2219 */
2220 void
2221 vm_page_init(
2222 vm_page_t mem,
2223 ppnum_t phys_page,
2224 boolean_t lopage)
2225 {
2226 assert(phys_page);
2227
2228 #if DEBUG
2229 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2230 if (!(pmap_valid_page(phys_page))) {
2231 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2232 }
2233 }
2234 #endif
2235 *mem = vm_page_template;
2236
2237 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2238 #if 0
2239 /*
2240 * we're leaving this turned off for now... currently pages
2241 * come off the free list and are either immediately dirtied/referenced
2242 * due to zero-fill or COW faults, or are used to read or write files...
2243 * in the file I/O case, the UPL mechanism takes care of clearing
2244 * the state of the HW ref/mod bits in a somewhat fragile way.
2245 * Since we may change the way this works in the future (to toughen it up),
2246 * I'm leaving this as a reminder of where these bits could get cleared
2247 */
2248
2249 /*
2250 * make sure both the h/w referenced and modified bits are
2251 * clear at this point... we are especially dependent on
2252 * not finding a 'stale' h/w modified in a number of spots
2253 * once this page goes back into use
2254 */
2255 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2256 #endif
2257 mem->lopage = lopage;
2258 }
2259
2260 /*
2261 * vm_page_grab_fictitious:
2262 *
2263 * Remove a fictitious page from the free list.
2264 * Returns VM_PAGE_NULL if there are no free pages.
2265 */
2266 int c_vm_page_grab_fictitious = 0;
2267 int c_vm_page_grab_fictitious_failed = 0;
2268 int c_vm_page_release_fictitious = 0;
2269 int c_vm_page_more_fictitious = 0;
2270
2271 vm_page_t
2272 vm_page_grab_fictitious_common(
2273 ppnum_t phys_addr)
2274 {
2275 vm_page_t m;
2276
2277 if ((m = (vm_page_t)zget(vm_page_zone))) {
2278
2279 vm_page_init(m, phys_addr, FALSE);
2280 m->fictitious = TRUE;
2281
2282 c_vm_page_grab_fictitious++;
2283 } else
2284 c_vm_page_grab_fictitious_failed++;
2285
2286 return m;
2287 }
2288
2289 vm_page_t
2290 vm_page_grab_fictitious(void)
2291 {
2292 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2293 }
2294
2295 int vm_guard_count;
2296
2297
2298 vm_page_t
2299 vm_page_grab_guard(void)
2300 {
2301 vm_page_t page;
2302 page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2303 if (page) OSAddAtomic(1, &vm_guard_count);
2304 return page;
2305 }
2306
2307
2308 /*
2309 * vm_page_release_fictitious:
2310 *
2311 * Release a fictitious page to the zone pool
2312 */
2313 void
2314 vm_page_release_fictitious(
2315 vm_page_t m)
2316 {
2317 assert((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) || (m->vm_page_q_state == VM_PAGE_IS_WIRED));
2318 assert(m->fictitious);
2319 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2320 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2321
2322
2323 if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) OSAddAtomic(-1, &vm_guard_count);
2324
2325 c_vm_page_release_fictitious++;
2326
2327 zfree(vm_page_zone, m);
2328 }
2329
2330 /*
2331 * vm_page_more_fictitious:
2332 *
2333 * Add more fictitious pages to the zone.
2334 * Allowed to block. This routine is way intimate
2335 * with the zones code, for several reasons:
2336 * 1. we need to carve some page structures out of physical
2337 * memory before zones work, so they _cannot_ come from
2338 * the zone_map.
2339 * 2. the zone needs to be collectable in order to prevent
2340 * growth without bound. These structures are used by
2341 * the device pager (by the hundreds and thousands), as
2342 * private pages for pageout, and as blocking pages for
2343 * pagein. Temporary bursts in demand should not result in
2344 * permanent allocation of a resource.
2345 * 3. To smooth allocation humps, we allocate single pages
2346 * with kernel_memory_allocate(), and cram them into the
2347 * zone.
2348 */
2349
2350 void vm_page_more_fictitious(void)
2351 {
2352 vm_offset_t addr;
2353 kern_return_t retval;
2354
2355 c_vm_page_more_fictitious++;
2356
2357 /*
2358 * Allocate a single page from the zone_map. Do not wait if no physical
2359 * pages are immediately available, and do not zero the space. We need
2360 * our own blocking lock here to prevent having multiple,
2361 * simultaneous requests from piling up on the zone_map lock. Exactly
2362 * one (of our) threads should be potentially waiting on the map lock.
2363 * If winner is not vm-privileged, then the page allocation will fail,
2364 * and it will temporarily block here in the vm_page_wait().
2365 */
2366 lck_mtx_lock(&vm_page_alloc_lock);
2367 /*
2368 * If another thread allocated space, just bail out now.
2369 */
2370 if (zone_free_count(vm_page_zone) > 5) {
2371 /*
2372 * The number "5" is a small number that is larger than the
2373 * number of fictitious pages that any single caller will
2374 * attempt to allocate. Otherwise, a thread will attempt to
2375 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2376 * release all of the resources and locks already acquired,
2377 * and then call this routine. This routine finds the pages
2378 * that the caller released, so fails to allocate new space.
2379 * The process repeats infinitely. The largest known number
2380 * of fictitious pages required in this manner is 2. 5 is
2381 * simply a somewhat larger number.
2382 */
2383 lck_mtx_unlock(&vm_page_alloc_lock);
2384 return;
2385 }
2386
2387 retval = kernel_memory_allocate(zone_map,
2388 &addr, PAGE_SIZE, 0,
2389 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2390 if (retval != KERN_SUCCESS) {
2391 /*
2392 * No page was available. Drop the
2393 * lock to give another thread a chance at it, and
2394 * wait for the pageout daemon to make progress.
2395 */
2396 lck_mtx_unlock(&vm_page_alloc_lock);
2397 vm_page_wait(THREAD_UNINT);
2398 return;
2399 }
2400
2401 zcram(vm_page_zone, addr, PAGE_SIZE);
2402
2403 lck_mtx_unlock(&vm_page_alloc_lock);
2404 }
2405
2406
2407 /*
2408 * vm_pool_low():
2409 *
2410 * Return true if it is not likely that a non-vm_privileged thread
2411 * can get memory without blocking. Advisory only, since the
2412 * situation may change under us.
2413 */
2414 int
2415 vm_pool_low(void)
2416 {
2417 /* No locking, at worst we will fib. */
2418 return( vm_page_free_count <= vm_page_free_reserved );
2419 }
2420
2421
2422 #if CONFIG_BACKGROUND_QUEUE
2423
2424 void
2425 vm_page_update_background_state(vm_page_t mem)
2426 {
2427 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2428 return;
2429
2430 if (mem->vm_page_in_background == FALSE)
2431 return;
2432
2433 #if BACKGROUNDQ_BASED_ON_QOS
2434 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2435 return;
2436 #else
2437 task_t my_task;
2438
2439 my_task = current_task();
2440
2441 if (my_task) {
2442 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2443 return;
2444 }
2445 #endif
2446 vm_page_lockspin_queues();
2447
2448 mem->vm_page_in_background = FALSE;
2449 vm_page_background_promoted_count++;
2450
2451 vm_page_remove_from_backgroundq(mem);
2452
2453 vm_page_unlock_queues();
2454 }
2455
2456
2457 void
2458 vm_page_assign_background_state(vm_page_t mem)
2459 {
2460 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2461 return;
2462
2463 #if BACKGROUNDQ_BASED_ON_QOS
2464 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2465 mem->vm_page_in_background = TRUE;
2466 else
2467 mem->vm_page_in_background = FALSE;
2468 #else
2469 task_t my_task;
2470
2471 my_task = current_task();
2472
2473 if (my_task)
2474 mem->vm_page_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2475 #endif
2476 }
2477
2478
2479 void
2480 vm_page_remove_from_backgroundq(
2481 vm_page_t mem)
2482 {
2483 vm_object_t m_object;
2484
2485 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2486
2487 if (mem->vm_page_on_backgroundq) {
2488 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2489
2490 mem->vm_page_backgroundq.next = 0;
2491 mem->vm_page_backgroundq.prev = 0;
2492 mem->vm_page_on_backgroundq = FALSE;
2493
2494 vm_page_background_count--;
2495
2496 m_object = VM_PAGE_OBJECT(mem);
2497
2498 if (m_object->internal)
2499 vm_page_background_internal_count--;
2500 else
2501 vm_page_background_external_count--;
2502 } else {
2503 assert(VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.next) == (uintptr_t)NULL &&
2504 VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.prev) == (uintptr_t)NULL);
2505 }
2506 }
2507
2508
2509 void
2510 vm_page_add_to_backgroundq(
2511 vm_page_t mem,
2512 boolean_t first)
2513 {
2514 vm_object_t m_object;
2515
2516 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2517
2518 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2519 return;
2520
2521 if (mem->vm_page_on_backgroundq == FALSE) {
2522
2523 m_object = VM_PAGE_OBJECT(mem);
2524
2525 if (vm_page_background_exclude_external && !m_object->internal)
2526 return;
2527
2528 if (first == TRUE)
2529 vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2530 else
2531 vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2532 mem->vm_page_on_backgroundq = TRUE;
2533
2534 vm_page_background_count++;
2535
2536 if (m_object->internal)
2537 vm_page_background_internal_count++;
2538 else
2539 vm_page_background_external_count++;
2540 }
2541 }
2542
2543 #endif
2544
2545 /*
2546 * this is an interface to support bring-up of drivers
2547 * on platforms with physical memory > 4G...
2548 */
2549 int vm_himemory_mode = 2;
2550
2551
2552 /*
2553 * this interface exists to support hardware controllers
2554 * incapable of generating DMAs with more than 32 bits
2555 * of address on platforms with physical memory > 4G...
2556 */
2557 unsigned int vm_lopages_allocated_q = 0;
2558 unsigned int vm_lopages_allocated_cpm_success = 0;
2559 unsigned int vm_lopages_allocated_cpm_failed = 0;
2560 vm_page_queue_head_t vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2561
2562 vm_page_t
2563 vm_page_grablo(void)
2564 {
2565 vm_page_t mem;
2566
2567 if (vm_lopage_needed == FALSE)
2568 return (vm_page_grab());
2569
2570 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2571
2572 if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2573 vm_page_queue_remove_first(&vm_lopage_queue_free,
2574 mem,
2575 vm_page_t,
2576 pageq);
2577 assert(vm_lopage_free_count);
2578 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2579 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2580
2581 vm_lopage_free_count--;
2582 vm_lopages_allocated_q++;
2583
2584 if (vm_lopage_free_count < vm_lopage_lowater)
2585 vm_lopage_refill = TRUE;
2586
2587 lck_mtx_unlock(&vm_page_queue_free_lock);
2588
2589 #if CONFIG_BACKGROUND_QUEUE
2590 vm_page_assign_background_state(mem);
2591 #endif
2592 } else {
2593 lck_mtx_unlock(&vm_page_queue_free_lock);
2594
2595 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2596
2597 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2598 vm_lopages_allocated_cpm_failed++;
2599 lck_mtx_unlock(&vm_page_queue_free_lock);
2600
2601 return (VM_PAGE_NULL);
2602 }
2603 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2604
2605 mem->busy = TRUE;
2606
2607 vm_page_lockspin_queues();
2608
2609 mem->gobbled = FALSE;
2610 vm_page_gobble_count--;
2611 vm_page_wire_count--;
2612
2613 vm_lopages_allocated_cpm_success++;
2614 vm_page_unlock_queues();
2615 }
2616 assert(mem->busy);
2617 assert(!mem->pmapped);
2618 assert(!mem->wpmapped);
2619 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2620
2621 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2622
2623 return (mem);
2624 }
2625
2626
2627 /*
2628 * vm_page_grab:
2629 *
2630 * first try to grab a page from the per-cpu free list...
2631 * this must be done while pre-emption is disabled... if
2632 * a page is available, we're done...
2633 * if no page is available, grab the vm_page_queue_free_lock
2634 * and see if current number of free pages would allow us
2635 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2636 * if there are pages available, disable preemption and
2637 * recheck the state of the per-cpu free list... we could
2638 * have been preempted and moved to a different cpu, or
2639 * some other thread could have re-filled it... if still
2640 * empty, figure out how many pages we can steal from the
2641 * global free queue and move to the per-cpu queue...
2642 * return 1 of these pages when done... only wakeup the
2643 * pageout_scan thread if we moved pages from the global
2644 * list... no need for the wakeup if we've satisfied the
2645 * request from the per-cpu queue.
2646 */
2647
2648 #if CONFIG_SECLUDED_MEMORY
2649 vm_page_t vm_page_grab_secluded(void);
2650 #endif /* CONFIG_SECLUDED_MEMORY */
2651
2652 vm_page_t
2653 vm_page_grab(void)
2654 {
2655 return vm_page_grab_options(0);
2656 }
2657
2658 #if HIBERNATION
2659 boolean_t hibernate_rebuild_needed = FALSE;
2660 #endif /* HIBERNATION */
2661
2662 vm_page_t
2663 vm_page_grab_options(
2664 int grab_options)
2665 {
2666 vm_page_t mem;
2667
2668 disable_preemption();
2669
2670 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2671 return_page_from_cpu_list:
2672 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2673
2674 #if HIBERNATION
2675 if (hibernate_rebuild_needed) {
2676 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2677 }
2678 #endif /* HIBERNATION */
2679 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2680 PROCESSOR_DATA(current_processor(), free_pages) = mem->snext;
2681
2682 enable_preemption();
2683 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2684 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2685
2686 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2687 assert(mem->tabled == FALSE);
2688 assert(mem->vm_page_object == 0);
2689 assert(!mem->laundry);
2690 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2691 assert(mem->busy);
2692 assert(!mem->pmapped);
2693 assert(!mem->wpmapped);
2694 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2695
2696 #if CONFIG_BACKGROUND_QUEUE
2697 vm_page_assign_background_state(mem);
2698 #endif
2699 return mem;
2700 }
2701 enable_preemption();
2702
2703
2704 /*
2705 * Optionally produce warnings if the wire or gobble
2706 * counts exceed some threshold.
2707 */
2708 #if VM_PAGE_WIRE_COUNT_WARNING
2709 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2710 printf("mk: vm_page_grab(): high wired page count of %d\n",
2711 vm_page_wire_count);
2712 }
2713 #endif
2714 #if VM_PAGE_GOBBLE_COUNT_WARNING
2715 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2716 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2717 vm_page_gobble_count);
2718 }
2719 #endif
2720
2721 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2722
2723 /*
2724 * Only let privileged threads (involved in pageout)
2725 * dip into the reserved pool.
2726 */
2727 if ((vm_page_free_count < vm_page_free_reserved) &&
2728 !(current_thread()->options & TH_OPT_VMPRIV)) {
2729 /* no page for us in the free queue... */
2730 lck_mtx_unlock(&vm_page_queue_free_lock);
2731 mem = VM_PAGE_NULL;
2732
2733 #if CONFIG_SECLUDED_MEMORY
2734 /* ... but can we try and grab from the secluded queue? */
2735 if (vm_page_secluded_count > 0 &&
2736 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2737 task_can_use_secluded_mem(current_task()))) {
2738 mem = vm_page_grab_secluded();
2739 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2740 vm_page_secluded.grab_for_iokit++;
2741 if (mem) {
2742 vm_page_secluded.grab_for_iokit_success++;
2743 }
2744 }
2745 if (mem) {
2746 VM_CHECK_MEMORYSTATUS;
2747 return mem;
2748 }
2749 }
2750 #else /* CONFIG_SECLUDED_MEMORY */
2751 (void) grab_options;
2752 #endif /* CONFIG_SECLUDED_MEMORY */
2753 }
2754 else {
2755 vm_page_t head;
2756 vm_page_t tail;
2757 unsigned int pages_to_steal;
2758 unsigned int color;
2759 unsigned int clump_end, sub_count;
2760
2761 while ( vm_page_free_count == 0 ) {
2762
2763 lck_mtx_unlock(&vm_page_queue_free_lock);
2764 /*
2765 * must be a privileged thread to be
2766 * in this state since a non-privileged
2767 * thread would have bailed if we were
2768 * under the vm_page_free_reserved mark
2769 */
2770 VM_PAGE_WAIT();
2771 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2772 }
2773
2774 disable_preemption();
2775
2776 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2777 lck_mtx_unlock(&vm_page_queue_free_lock);
2778
2779 /*
2780 * we got preempted and moved to another processor
2781 * or we got preempted and someone else ran and filled the cache
2782 */
2783 goto return_page_from_cpu_list;
2784 }
2785 if (vm_page_free_count <= vm_page_free_reserved)
2786 pages_to_steal = 1;
2787 else {
2788 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2789 pages_to_steal = vm_free_magazine_refill_limit;
2790 else
2791 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2792 }
2793 color = PROCESSOR_DATA(current_processor(), start_color);
2794 head = tail = NULL;
2795
2796 vm_page_free_count -= pages_to_steal;
2797 clump_end = sub_count = 0;
2798
2799 while (pages_to_steal--) {
2800
2801 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2802 color = (color + 1) & vm_color_mask;
2803 #if defined(__x86_64__)
2804 vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
2805 mem,
2806 vm_page_t,
2807 pageq,
2808 clump_end);
2809 #else
2810 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2811 mem,
2812 vm_page_t,
2813 pageq);
2814 #endif
2815
2816 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_Q);
2817
2818 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2819
2820 #if defined(__arm__) || defined(__arm64__)
2821 color = (color + 1) & vm_color_mask;
2822 #else
2823
2824 #if DEVELOPMENT || DEBUG
2825
2826 sub_count++;
2827 if (clump_end) {
2828 vm_clump_update_stats(sub_count);
2829 sub_count = 0;
2830 color = (color + 1) & vm_color_mask;
2831 }
2832 #else
2833 if (clump_end) color = (color + 1) & vm_color_mask;
2834
2835 #endif /* if DEVELOPMENT || DEBUG */
2836
2837 #endif /* if defined(__arm__) || defined(__arm64__) */
2838
2839 if (head == NULL)
2840 head = mem;
2841 else
2842 tail->snext = mem;
2843 tail = mem;
2844
2845 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2846 assert(mem->tabled == FALSE);
2847 assert(mem->vm_page_object == 0);
2848 assert(!mem->laundry);
2849
2850 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2851
2852 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2853 assert(mem->busy);
2854 assert(!mem->pmapped);
2855 assert(!mem->wpmapped);
2856 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2857 }
2858 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2859 vm_clump_update_stats(sub_count);
2860 #endif
2861 lck_mtx_unlock(&vm_page_queue_free_lock);
2862
2863 #if HIBERNATION
2864 if (hibernate_rebuild_needed) {
2865 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2866 }
2867 #endif /* HIBERNATION */
2868 PROCESSOR_DATA(current_processor(), free_pages) = head->snext;
2869 PROCESSOR_DATA(current_processor(), start_color) = color;
2870
2871 /*
2872 * satisfy this request
2873 */
2874 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2875 mem = head;
2876 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2877
2878 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2879 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2880
2881 enable_preemption();
2882 }
2883 /*
2884 * Decide if we should poke the pageout daemon.
2885 * We do this if the free count is less than the low
2886 * water mark, or if the free count is less than the high
2887 * water mark (but above the low water mark) and the inactive
2888 * count is less than its target.
2889 *
2890 * We don't have the counts locked ... if they change a little,
2891 * it doesn't really matter.
2892 */
2893 if ((vm_page_free_count < vm_page_free_min) ||
2894 ((vm_page_free_count < vm_page_free_target) &&
2895 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2896 thread_wakeup((event_t) &vm_page_free_wanted);
2897
2898 VM_CHECK_MEMORYSTATUS;
2899
2900 if (mem) {
2901 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2902
2903 #if CONFIG_BACKGROUND_QUEUE
2904 vm_page_assign_background_state(mem);
2905 #endif
2906 }
2907 return mem;
2908 }
2909
2910 #if CONFIG_SECLUDED_MEMORY
2911 vm_page_t
2912 vm_page_grab_secluded(void)
2913 {
2914 vm_page_t mem;
2915 vm_object_t object;
2916 int refmod_state;
2917
2918 if (vm_page_secluded_count == 0) {
2919 /* no secluded pages to grab... */
2920 return VM_PAGE_NULL;
2921 }
2922
2923 /* secluded queue is protected by the VM page queue lock */
2924 vm_page_lock_queues();
2925
2926 if (vm_page_secluded_count == 0) {
2927 /* no secluded pages to grab... */
2928 vm_page_unlock_queues();
2929 return VM_PAGE_NULL;
2930 }
2931
2932 #if 00
2933 /* can we grab from the secluded queue? */
2934 if (vm_page_secluded_count > vm_page_secluded_target ||
2935 (vm_page_secluded_count > 0 &&
2936 task_can_use_secluded_mem(current_task()))) {
2937 /* OK */
2938 } else {
2939 /* can't grab from secluded queue... */
2940 vm_page_unlock_queues();
2941 return VM_PAGE_NULL;
2942 }
2943 #endif
2944
2945 /* we can grab a page from secluded queue! */
2946 assert((vm_page_secluded_count_free +
2947 vm_page_secluded_count_inuse) ==
2948 vm_page_secluded_count);
2949 if (current_task()->task_can_use_secluded_mem) {
2950 assert(num_tasks_can_use_secluded_mem > 0);
2951 }
2952 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
2953 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2954 mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
2955 assert(mem->vm_page_q_state == VM_PAGE_ON_SECLUDED_Q);
2956 vm_page_queues_remove(mem, TRUE);
2957
2958 object = VM_PAGE_OBJECT(mem);
2959
2960 assert(!mem->fictitious);
2961 assert(!VM_PAGE_WIRED(mem));
2962 if (object == VM_OBJECT_NULL) {
2963 /* free for grab! */
2964 vm_page_unlock_queues();
2965 vm_page_secluded.grab_success_free++;
2966
2967 assert(mem->busy);
2968 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2969 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
2970 assert(mem->pageq.next == 0);
2971 assert(mem->pageq.prev == 0);
2972 assert(mem->listq.next == 0);
2973 assert(mem->listq.prev == 0);
2974 #if CONFIG_BACKGROUND_QUEUE
2975 assert(mem->vm_page_on_backgroundq == 0);
2976 assert(mem->vm_page_backgroundq.next == 0);
2977 assert(mem->vm_page_backgroundq.prev == 0);
2978 #endif /* CONFIG_BACKGROUND_QUEUE */
2979 return mem;
2980 }
2981
2982 assert(!object->internal);
2983 // vm_page_pageable_external_count--;
2984
2985 if (!vm_object_lock_try(object)) {
2986 // printf("SECLUDED: page %p: object %p locked\n", mem, object);
2987 vm_page_secluded.grab_failure_locked++;
2988 reactivate_secluded_page:
2989 vm_page_activate(mem);
2990 vm_page_unlock_queues();
2991 return VM_PAGE_NULL;
2992 }
2993 if (mem->busy ||
2994 mem->cleaning ||
2995 mem->laundry) {
2996 /* can't steal page in this state... */
2997 vm_object_unlock(object);
2998 vm_page_secluded.grab_failure_state++;
2999 goto reactivate_secluded_page;
3000 }
3001
3002 mem->busy = TRUE;
3003 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3004 if (refmod_state & VM_MEM_REFERENCED) {
3005 mem->reference = TRUE;
3006 }
3007 if (refmod_state & VM_MEM_MODIFIED) {
3008 SET_PAGE_DIRTY(mem, FALSE);
3009 }
3010 if (mem->dirty || mem->precious) {
3011 /* can't grab a dirty page; re-activate */
3012 // printf("SECLUDED: dirty page %p\n", mem);
3013 PAGE_WAKEUP_DONE(mem);
3014 vm_page_secluded.grab_failure_dirty++;
3015 vm_object_unlock(object);
3016 goto reactivate_secluded_page;
3017 }
3018 if (mem->reference) {
3019 /* it's been used but we do need to grab a page... */
3020 }
3021
3022 vm_page_unlock_queues();
3023
3024 /* finish what vm_page_free() would have done... */
3025 vm_page_free_prepare_object(mem, TRUE);
3026 vm_object_unlock(object);
3027 object = VM_OBJECT_NULL;
3028 if (vm_page_free_verify) {
3029 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3030 }
3031 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3032 vm_page_secluded.grab_success_other++;
3033
3034 assert(mem->busy);
3035 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3036 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3037 assert(mem->pageq.next == 0);
3038 assert(mem->pageq.prev == 0);
3039 assert(mem->listq.next == 0);
3040 assert(mem->listq.prev == 0);
3041 #if CONFIG_BACKGROUND_QUEUE
3042 assert(mem->vm_page_on_backgroundq == 0);
3043 assert(mem->vm_page_backgroundq.next == 0);
3044 assert(mem->vm_page_backgroundq.prev == 0);
3045 #endif /* CONFIG_BACKGROUND_QUEUE */
3046
3047 return mem;
3048 }
3049 #endif /* CONFIG_SECLUDED_MEMORY */
3050
3051 /*
3052 * vm_page_release:
3053 *
3054 * Return a page to the free list.
3055 */
3056
3057 void
3058 vm_page_release(
3059 vm_page_t mem,
3060 boolean_t page_queues_locked)
3061 {
3062 unsigned int color;
3063 int need_wakeup = 0;
3064 int need_priv_wakeup = 0;
3065 #if CONFIG_SECLUDED_MEMORY
3066 int need_secluded_wakeup = 0;
3067 #endif /* CONFIG_SECLUDED_MEMORY */
3068
3069 if (page_queues_locked) {
3070 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3071 } else {
3072 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3073 }
3074
3075 assert(!mem->private && !mem->fictitious);
3076 if (vm_page_free_verify) {
3077 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3078 }
3079 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
3080
3081 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3082
3083 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3084
3085 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3086 assert(mem->busy);
3087 assert(!mem->laundry);
3088 assert(mem->vm_page_object == 0);
3089 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3090 assert(mem->listq.next == 0 && mem->listq.prev == 0);
3091 #if CONFIG_BACKGROUND_QUEUE
3092 assert(mem->vm_page_backgroundq.next == 0 &&
3093 mem->vm_page_backgroundq.prev == 0 &&
3094 mem->vm_page_on_backgroundq == FALSE);
3095 #endif
3096 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3097 vm_lopage_free_count < vm_lopage_free_limit &&
3098 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3099 /*
3100 * this exists to support hardware controllers
3101 * incapable of generating DMAs with more than 32 bits
3102 * of address on platforms with physical memory > 4G...
3103 */
3104 vm_page_queue_enter_first(&vm_lopage_queue_free,
3105 mem,
3106 vm_page_t,
3107 pageq);
3108 vm_lopage_free_count++;
3109
3110 if (vm_lopage_free_count >= vm_lopage_free_limit)
3111 vm_lopage_refill = FALSE;
3112
3113 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3114 mem->lopage = TRUE;
3115 #if CONFIG_SECLUDED_MEMORY
3116 } else if (vm_page_free_count > vm_page_free_reserved &&
3117 vm_page_secluded_count < vm_page_secluded_target &&
3118 num_tasks_can_use_secluded_mem == 0) {
3119 /*
3120 * XXX FBDP TODO: also avoid refilling secluded queue
3121 * when some IOKit objects are already grabbing from it...
3122 */
3123 if (!page_queues_locked) {
3124 if (!vm_page_trylock_queues()) {
3125 /* take locks in right order */
3126 lck_mtx_unlock(&vm_page_queue_free_lock);
3127 vm_page_lock_queues();
3128 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3129 }
3130 }
3131 mem->lopage = FALSE;
3132 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3133 vm_page_queue_enter_first(&vm_page_queue_secluded,
3134 mem,
3135 vm_page_t,
3136 pageq);
3137 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3138 vm_page_secluded_count++;
3139 vm_page_secluded_count_free++;
3140 if (!page_queues_locked) {
3141 vm_page_unlock_queues();
3142 }
3143 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3144 if (vm_page_free_wanted_secluded > 0) {
3145 vm_page_free_wanted_secluded--;
3146 need_secluded_wakeup = 1;
3147 }
3148 #endif /* CONFIG_SECLUDED_MEMORY */
3149 } else {
3150 mem->lopage = FALSE;
3151 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3152
3153 color = VM_PAGE_GET_COLOR(mem);
3154 #if defined(__x86_64__)
3155 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3156 mem,
3157 vm_page_t,
3158 pageq);
3159 #else
3160 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3161 mem,
3162 vm_page_t,
3163 pageq);
3164 #endif
3165 vm_page_free_count++;
3166 /*
3167 * Check if we should wake up someone waiting for page.
3168 * But don't bother waking them unless they can allocate.
3169 *
3170 * We wakeup only one thread, to prevent starvation.
3171 * Because the scheduling system handles wait queues FIFO,
3172 * if we wakeup all waiting threads, one greedy thread
3173 * can starve multiple niceguy threads. When the threads
3174 * all wakeup, the greedy threads runs first, grabs the page,
3175 * and waits for another page. It will be the first to run
3176 * when the next page is freed.
3177 *
3178 * However, there is a slight danger here.
3179 * The thread we wake might not use the free page.
3180 * Then the other threads could wait indefinitely
3181 * while the page goes unused. To forestall this,
3182 * the pageout daemon will keep making free pages
3183 * as long as vm_page_free_wanted is non-zero.
3184 */
3185
3186 assert(vm_page_free_count > 0);
3187 if (vm_page_free_wanted_privileged > 0) {
3188 vm_page_free_wanted_privileged--;
3189 need_priv_wakeup = 1;
3190 #if CONFIG_SECLUDED_MEMORY
3191 } else if (vm_page_free_wanted_secluded > 0 &&
3192 vm_page_free_count > vm_page_free_reserved) {
3193 vm_page_free_wanted_secluded--;
3194 need_secluded_wakeup = 1;
3195 #endif /* CONFIG_SECLUDED_MEMORY */
3196 } else if (vm_page_free_wanted > 0 &&
3197 vm_page_free_count > vm_page_free_reserved) {
3198 vm_page_free_wanted--;
3199 need_wakeup = 1;
3200 }
3201 }
3202 lck_mtx_unlock(&vm_page_queue_free_lock);
3203
3204 if (need_priv_wakeup)
3205 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
3206 #if CONFIG_SECLUDED_MEMORY
3207 else if (need_secluded_wakeup)
3208 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3209 #endif /* CONFIG_SECLUDED_MEMORY */
3210 else if (need_wakeup)
3211 thread_wakeup_one((event_t) &vm_page_free_count);
3212
3213 VM_CHECK_MEMORYSTATUS;
3214 }
3215
3216 /*
3217 * This version of vm_page_release() is used only at startup
3218 * when we are single-threaded and pages are being released
3219 * for the first time. Hence, no locking or unnecessary checks are made.
3220 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3221 */
3222 void
3223 vm_page_release_startup(
3224 vm_page_t mem)
3225 {
3226 vm_page_queue_t queue_free;
3227
3228 if (vm_lopage_free_count < vm_lopage_free_limit &&
3229 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3230 mem->lopage = TRUE;
3231 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3232 vm_lopage_free_count++;
3233 queue_free = &vm_lopage_queue_free;
3234 #if CONFIG_SECLUDED_MEMORY
3235 } else if (vm_page_secluded_count < vm_page_secluded_target) {
3236 mem->lopage = FALSE;
3237 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3238 vm_page_secluded_count++;
3239 vm_page_secluded_count_free++;
3240 queue_free = &vm_page_queue_secluded;
3241 #endif /* CONFIG_SECLUDED_MEMORY */
3242 } else {
3243 mem->lopage = FALSE;
3244 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3245 vm_page_free_count++;
3246 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3247 }
3248 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
3249 #if defined(__x86_64__)
3250 vm_page_queue_enter_clump(queue_free, mem, vm_page_t, pageq);
3251 #else
3252 vm_page_queue_enter(queue_free, mem, vm_page_t, pageq);
3253 #endif
3254 } else
3255 vm_page_queue_enter_first(queue_free, mem, vm_page_t, pageq);
3256 }
3257
3258 /*
3259 * vm_page_wait:
3260 *
3261 * Wait for a page to become available.
3262 * If there are plenty of free pages, then we don't sleep.
3263 *
3264 * Returns:
3265 * TRUE: There may be another page, try again
3266 * FALSE: We were interrupted out of our wait, don't try again
3267 */
3268
3269 boolean_t
3270 vm_page_wait(
3271 int interruptible )
3272 {
3273 /*
3274 * We can't use vm_page_free_reserved to make this
3275 * determination. Consider: some thread might
3276 * need to allocate two pages. The first allocation
3277 * succeeds, the second fails. After the first page is freed,
3278 * a call to vm_page_wait must really block.
3279 */
3280 kern_return_t wait_result;
3281 int need_wakeup = 0;
3282 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3283
3284 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3285
3286 if (is_privileged && vm_page_free_count) {
3287 lck_mtx_unlock(&vm_page_queue_free_lock);
3288 return TRUE;
3289 }
3290
3291 if (vm_page_free_count >= vm_page_free_target) {
3292 lck_mtx_unlock(&vm_page_queue_free_lock);
3293 return TRUE;
3294 }
3295
3296 if (is_privileged) {
3297 if (vm_page_free_wanted_privileged++ == 0)
3298 need_wakeup = 1;
3299 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3300 #if CONFIG_SECLUDED_MEMORY
3301 } else if (secluded_for_apps &&
3302 task_can_use_secluded_mem(current_task())) {
3303 #if 00
3304 /* XXX FBDP: need pageq lock for this... */
3305 /* XXX FBDP: might wait even if pages available, */
3306 /* XXX FBDP: hopefully not for too long... */
3307 if (vm_page_secluded_count > 0) {
3308 lck_mtx_unlock(&vm_page_queue_free_lock);
3309 return TRUE;
3310 }
3311 #endif
3312 if (vm_page_free_wanted_secluded++ == 0) {
3313 need_wakeup = 1;
3314 }
3315 wait_result = assert_wait(
3316 (event_t)&vm_page_free_wanted_secluded,
3317 interruptible);
3318 #endif /* CONFIG_SECLUDED_MEMORY */
3319 } else {
3320 if (vm_page_free_wanted++ == 0)
3321 need_wakeup = 1;
3322 wait_result = assert_wait((event_t)&vm_page_free_count,
3323 interruptible);
3324 }
3325 lck_mtx_unlock(&vm_page_queue_free_lock);
3326 counter(c_vm_page_wait_block++);
3327
3328 if (need_wakeup)
3329 thread_wakeup((event_t)&vm_page_free_wanted);
3330
3331 if (wait_result == THREAD_WAITING) {
3332 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3333 vm_page_free_wanted_privileged,
3334 vm_page_free_wanted,
3335 #if CONFIG_SECLUDED_MEMORY
3336 vm_page_free_wanted_secluded,
3337 #else /* CONFIG_SECLUDED_MEMORY */
3338 0,
3339 #endif /* CONFIG_SECLUDED_MEMORY */
3340 0);
3341 wait_result = thread_block(THREAD_CONTINUE_NULL);
3342 VM_DEBUG_EVENT(vm_page_wait_block,
3343 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3344 }
3345
3346 return (wait_result == THREAD_AWAKENED);
3347 }
3348
3349 /*
3350 * vm_page_alloc:
3351 *
3352 * Allocate and return a memory cell associated
3353 * with this VM object/offset pair.
3354 *
3355 * Object must be locked.
3356 */
3357
3358 vm_page_t
3359 vm_page_alloc(
3360 vm_object_t object,
3361 vm_object_offset_t offset)
3362 {
3363 vm_page_t mem;
3364 int grab_options;
3365
3366 vm_object_lock_assert_exclusive(object);
3367 grab_options = 0;
3368 #if CONFIG_SECLUDED_MEMORY
3369 if (object->can_grab_secluded) {
3370 grab_options |= VM_PAGE_GRAB_SECLUDED;
3371 }
3372 #endif /* CONFIG_SECLUDED_MEMORY */
3373 mem = vm_page_grab_options(grab_options);
3374 if (mem == VM_PAGE_NULL)
3375 return VM_PAGE_NULL;
3376
3377 vm_page_insert(mem, object, offset);
3378
3379 return(mem);
3380 }
3381
3382 /*
3383 * vm_page_alloc_guard:
3384 *
3385 * Allocate a fictitious page which will be used
3386 * as a guard page. The page will be inserted into
3387 * the object and returned to the caller.
3388 */
3389
3390 vm_page_t
3391 vm_page_alloc_guard(
3392 vm_object_t object,
3393 vm_object_offset_t offset)
3394 {
3395 vm_page_t mem;
3396
3397 vm_object_lock_assert_exclusive(object);
3398 mem = vm_page_grab_guard();
3399 if (mem == VM_PAGE_NULL)
3400 return VM_PAGE_NULL;
3401
3402 vm_page_insert(mem, object, offset);
3403
3404 return(mem);
3405 }
3406
3407
3408 counter(unsigned int c_laundry_pages_freed = 0;)
3409
3410 /*
3411 * vm_page_free_prepare:
3412 *
3413 * Removes page from any queue it may be on
3414 * and disassociates it from its VM object.
3415 *
3416 * Object and page queues must be locked prior to entry.
3417 */
3418 static void
3419 vm_page_free_prepare(
3420 vm_page_t mem)
3421 {
3422 vm_page_free_prepare_queues(mem);
3423 vm_page_free_prepare_object(mem, TRUE);
3424 }
3425
3426
3427 void
3428 vm_page_free_prepare_queues(
3429 vm_page_t mem)
3430 {
3431 vm_object_t m_object;
3432
3433 VM_PAGE_CHECK(mem);
3434
3435 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
3436 assert(!mem->cleaning);
3437 m_object = VM_PAGE_OBJECT(mem);
3438
3439 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3440 if (m_object) {
3441 vm_object_lock_assert_exclusive(m_object);
3442 }
3443 if (mem->laundry) {
3444 /*
3445 * We may have to free a page while it's being laundered
3446 * if we lost its pager (due to a forced unmount, for example).
3447 * We need to call vm_pageout_steal_laundry() before removing
3448 * the page from its VM object, so that we can remove it
3449 * from its pageout queue and adjust the laundry accounting
3450 */
3451 vm_pageout_steal_laundry(mem, TRUE);
3452 counter(++c_laundry_pages_freed);
3453 }
3454
3455 vm_page_queues_remove(mem, TRUE);
3456
3457 if (VM_PAGE_WIRED(mem)) {
3458 assert(mem->wire_count > 0);
3459
3460 if (m_object) {
3461
3462 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3463 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3464 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3465
3466 assert(m_object->resident_page_count >=
3467 m_object->wired_page_count);
3468
3469 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3470 OSAddAtomic(+1, &vm_page_purgeable_count);
3471 assert(vm_page_purgeable_wired_count > 0);
3472 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3473 }
3474 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3475 m_object->purgable == VM_PURGABLE_EMPTY) &&
3476 m_object->vo_purgeable_owner != TASK_NULL) {
3477 task_t owner;
3478
3479 owner = m_object->vo_purgeable_owner;
3480 /*
3481 * While wired, this page was accounted
3482 * as "non-volatile" but it should now
3483 * be accounted as "volatile".
3484 */
3485 /* one less "non-volatile"... */
3486 ledger_debit(owner->ledger,
3487 task_ledgers.purgeable_nonvolatile,
3488 PAGE_SIZE);
3489 /* ... and "phys_footprint" */
3490 ledger_debit(owner->ledger,
3491 task_ledgers.phys_footprint,
3492 PAGE_SIZE);
3493 /* one more "volatile" */
3494 ledger_credit(owner->ledger,
3495 task_ledgers.purgeable_volatile,
3496 PAGE_SIZE);
3497 }
3498 }
3499 if (!mem->private && !mem->fictitious)
3500 vm_page_wire_count--;
3501
3502 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3503 mem->wire_count = 0;
3504 assert(!mem->gobbled);
3505 } else if (mem->gobbled) {
3506 if (!mem->private && !mem->fictitious)
3507 vm_page_wire_count--;
3508 vm_page_gobble_count--;
3509 }
3510 }
3511
3512
3513 void
3514 vm_page_free_prepare_object(
3515 vm_page_t mem,
3516 boolean_t remove_from_hash)
3517 {
3518 if (mem->tabled)
3519 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
3520
3521 PAGE_WAKEUP(mem); /* clears wanted */
3522
3523 if (mem->private) {
3524 mem->private = FALSE;
3525 mem->fictitious = TRUE;
3526 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
3527 }
3528 if ( !mem->fictitious) {
3529 assert(mem->pageq.next == 0);
3530 assert(mem->pageq.prev == 0);
3531 assert(mem->listq.next == 0);
3532 assert(mem->listq.prev == 0);
3533 #if CONFIG_BACKGROUND_QUEUE
3534 assert(mem->vm_page_backgroundq.next == 0);
3535 assert(mem->vm_page_backgroundq.prev == 0);
3536 #endif /* CONFIG_BACKGROUND_QUEUE */
3537 assert(mem->next_m == 0);
3538 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->lopage);
3539 }
3540 }
3541
3542
3543 /*
3544 * vm_page_free:
3545 *
3546 * Returns the given page to the free list,
3547 * disassociating it with any VM object.
3548 *
3549 * Object and page queues must be locked prior to entry.
3550 */
3551 void
3552 vm_page_free(
3553 vm_page_t mem)
3554 {
3555 vm_page_free_prepare(mem);
3556
3557 if (mem->fictitious) {
3558 vm_page_release_fictitious(mem);
3559 } else {
3560 vm_page_release(mem,
3561 TRUE); /* page queues are locked */
3562 }
3563 }
3564
3565
3566 void
3567 vm_page_free_unlocked(
3568 vm_page_t mem,
3569 boolean_t remove_from_hash)
3570 {
3571 vm_page_lockspin_queues();
3572 vm_page_free_prepare_queues(mem);
3573 vm_page_unlock_queues();
3574
3575 vm_page_free_prepare_object(mem, remove_from_hash);
3576
3577 if (mem->fictitious) {
3578 vm_page_release_fictitious(mem);
3579 } else {
3580 vm_page_release(mem, FALSE); /* page queues are not locked */
3581 }
3582 }
3583
3584
3585 /*
3586 * Free a list of pages. The list can be up to several hundred pages,
3587 * as blocked up by vm_pageout_scan().
3588 * The big win is not having to take the free list lock once
3589 * per page.
3590 *
3591 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3592 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
3593 */
3594 void
3595 vm_page_free_list(
3596 vm_page_t freeq,
3597 boolean_t prepare_object)
3598 {
3599 vm_page_t mem;
3600 vm_page_t nxt;
3601 vm_page_t local_freeq;
3602 int pg_count;
3603
3604 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3605 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3606
3607 while (freeq) {
3608
3609 pg_count = 0;
3610 local_freeq = VM_PAGE_NULL;
3611 mem = freeq;
3612
3613 /*
3614 * break up the processing into smaller chunks so
3615 * that we can 'pipeline' the pages onto the
3616 * free list w/o introducing too much
3617 * contention on the global free queue lock
3618 */
3619 while (mem && pg_count < 64) {
3620
3621 assert((mem->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
3622 (mem->vm_page_q_state == VM_PAGE_IS_WIRED));
3623 #if CONFIG_BACKGROUND_QUEUE
3624 assert(mem->vm_page_backgroundq.next == 0 &&
3625 mem->vm_page_backgroundq.prev == 0 &&
3626 mem->vm_page_on_backgroundq == FALSE);
3627 #endif
3628 nxt = mem->snext;
3629 mem->snext = NULL;
3630 assert(mem->pageq.prev == 0);
3631
3632 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
3633 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3634 }
3635 if (prepare_object == TRUE)
3636 vm_page_free_prepare_object(mem, TRUE);
3637
3638 if (!mem->fictitious) {
3639 assert(mem->busy);
3640
3641 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3642 vm_lopage_free_count < vm_lopage_free_limit &&
3643 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3644 vm_page_release(mem, FALSE); /* page queues are not locked */
3645 #if CONFIG_SECLUDED_MEMORY
3646 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3647 num_tasks_can_use_secluded_mem == 0) {
3648 vm_page_release(mem,
3649 FALSE); /* page queues are not locked */
3650 #endif /* CONFIG_SECLUDED_MEMORY */
3651 } else {
3652 /*
3653 * IMPORTANT: we can't set the page "free" here
3654 * because that would make the page eligible for
3655 * a physically-contiguous allocation (see
3656 * vm_page_find_contiguous()) right away (we don't
3657 * hold the vm_page_queue_free lock). That would
3658 * cause trouble because the page is not actually
3659 * in the free queue yet...
3660 */
3661 mem->snext = local_freeq;
3662 local_freeq = mem;
3663 pg_count++;
3664
3665 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3666 }
3667 } else {
3668 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3669 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
3670 vm_page_release_fictitious(mem);
3671 }
3672 mem = nxt;
3673 }
3674 freeq = mem;
3675
3676 if ( (mem = local_freeq) ) {
3677 unsigned int avail_free_count;
3678 unsigned int need_wakeup = 0;
3679 unsigned int need_priv_wakeup = 0;
3680 #if CONFIG_SECLUDED_MEMORY
3681 unsigned int need_wakeup_secluded = 0;
3682 #endif /* CONFIG_SECLUDED_MEMORY */
3683
3684 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3685
3686 while (mem) {
3687 int color;
3688
3689 nxt = mem->snext;
3690
3691 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3692 assert(mem->busy);
3693 mem->lopage = FALSE;
3694 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3695
3696 color = VM_PAGE_GET_COLOR(mem);
3697 #if defined(__x86_64__)
3698 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3699 mem,
3700 vm_page_t,
3701 pageq);
3702 #else
3703 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3704 mem,
3705 vm_page_t,
3706 pageq);
3707 #endif
3708 mem = nxt;
3709 }
3710 vm_page_free_count += pg_count;
3711 avail_free_count = vm_page_free_count;
3712
3713 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3714
3715 if (avail_free_count < vm_page_free_wanted_privileged) {
3716 need_priv_wakeup = avail_free_count;
3717 vm_page_free_wanted_privileged -= avail_free_count;
3718 avail_free_count = 0;
3719 } else {
3720 need_priv_wakeup = vm_page_free_wanted_privileged;
3721 avail_free_count -= vm_page_free_wanted_privileged;
3722 vm_page_free_wanted_privileged = 0;
3723 }
3724 }
3725 #if CONFIG_SECLUDED_MEMORY
3726 if (vm_page_free_wanted_secluded > 0 &&
3727 avail_free_count > vm_page_free_reserved) {
3728 unsigned int available_pages;
3729 available_pages = (avail_free_count -
3730 vm_page_free_reserved);
3731 if (available_pages <
3732 vm_page_free_wanted_secluded) {
3733 need_wakeup_secluded = available_pages;
3734 vm_page_free_wanted_secluded -=
3735 available_pages;
3736 avail_free_count -= available_pages;
3737 } else {
3738 need_wakeup_secluded =
3739 vm_page_free_wanted_secluded;
3740 avail_free_count -=
3741 vm_page_free_wanted_secluded;
3742 vm_page_free_wanted_secluded = 0;
3743 }
3744 }
3745 #endif /* CONFIG_SECLUDED_MEMORY */
3746 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3747 unsigned int available_pages;
3748
3749 available_pages = avail_free_count - vm_page_free_reserved;
3750
3751 if (available_pages >= vm_page_free_wanted) {
3752 need_wakeup = vm_page_free_wanted;
3753 vm_page_free_wanted = 0;
3754 } else {
3755 need_wakeup = available_pages;
3756 vm_page_free_wanted -= available_pages;
3757 }
3758 }
3759 lck_mtx_unlock(&vm_page_queue_free_lock);
3760
3761 if (need_priv_wakeup != 0) {
3762 /*
3763 * There shouldn't be that many VM-privileged threads,
3764 * so let's wake them all up, even if we don't quite
3765 * have enough pages to satisfy them all.
3766 */
3767 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3768 }
3769 #if CONFIG_SECLUDED_MEMORY
3770 if (need_wakeup_secluded != 0 &&
3771 vm_page_free_wanted_secluded == 0) {
3772 thread_wakeup((event_t)
3773 &vm_page_free_wanted_secluded);
3774 } else {
3775 for (;
3776 need_wakeup_secluded != 0;
3777 need_wakeup_secluded--) {
3778 thread_wakeup_one(
3779 (event_t)
3780 &vm_page_free_wanted_secluded);
3781 }
3782 }
3783 #endif /* CONFIG_SECLUDED_MEMORY */
3784 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3785 /*
3786 * We don't expect to have any more waiters
3787 * after this, so let's wake them all up at
3788 * once.
3789 */
3790 thread_wakeup((event_t) &vm_page_free_count);
3791 } else for (; need_wakeup != 0; need_wakeup--) {
3792 /*
3793 * Wake up one waiter per page we just released.
3794 */
3795 thread_wakeup_one((event_t) &vm_page_free_count);
3796 }
3797
3798 VM_CHECK_MEMORYSTATUS;
3799 }
3800 }
3801 }
3802
3803
3804 /*
3805 * vm_page_wire:
3806 *
3807 * Mark this page as wired down by yet
3808 * another map, removing it from paging queues
3809 * as necessary.
3810 *
3811 * The page's object and the page queues must be locked.
3812 */
3813
3814
3815 void
3816 vm_page_wire(
3817 vm_page_t mem,
3818 vm_tag_t tag,
3819 boolean_t check_memorystatus)
3820 {
3821 vm_object_t m_object;
3822
3823 m_object = VM_PAGE_OBJECT(mem);
3824
3825 // dbgLog(current_thread(), mem->offset, m_object, 1); /* (TEST/DEBUG) */
3826
3827 VM_PAGE_CHECK(mem);
3828 if (m_object) {
3829 vm_object_lock_assert_exclusive(m_object);
3830 } else {
3831 /*
3832 * In theory, the page should be in an object before it
3833 * gets wired, since we need to hold the object lock
3834 * to update some fields in the page structure.
3835 * However, some code (i386 pmap, for example) might want
3836 * to wire a page before it gets inserted into an object.
3837 * That's somewhat OK, as long as nobody else can get to
3838 * that page and update it at the same time.
3839 */
3840 }
3841 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3842 if ( !VM_PAGE_WIRED(mem)) {
3843
3844 if (mem->laundry)
3845 vm_pageout_steal_laundry(mem, TRUE);
3846
3847 vm_page_queues_remove(mem, TRUE);
3848
3849 assert(mem->wire_count == 0);
3850 mem->vm_page_q_state = VM_PAGE_IS_WIRED;
3851
3852 if (m_object) {
3853
3854 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3855 VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
3856 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
3857
3858 assert(m_object->resident_page_count >=
3859 m_object->wired_page_count);
3860 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3861 assert(vm_page_purgeable_count > 0);
3862 OSAddAtomic(-1, &vm_page_purgeable_count);
3863 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3864 }
3865 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3866 m_object->purgable == VM_PURGABLE_EMPTY) &&
3867 m_object->vo_purgeable_owner != TASK_NULL) {
3868 task_t owner;
3869
3870 owner = m_object->vo_purgeable_owner;
3871 /* less volatile bytes */
3872 ledger_debit(owner->ledger,
3873 task_ledgers.purgeable_volatile,
3874 PAGE_SIZE);
3875 /* more not-quite-volatile bytes */
3876 ledger_credit(owner->ledger,
3877 task_ledgers.purgeable_nonvolatile,
3878 PAGE_SIZE);
3879 /* more footprint */
3880 ledger_credit(owner->ledger,
3881 task_ledgers.phys_footprint,
3882 PAGE_SIZE);
3883 }
3884 if (m_object->all_reusable) {
3885 /*
3886 * Wired pages are not counted as "re-usable"
3887 * in "all_reusable" VM objects, so nothing
3888 * to do here.
3889 */
3890 } else if (mem->reusable) {
3891 /*
3892 * This page is not "re-usable" when it's
3893 * wired, so adjust its state and the
3894 * accounting.
3895 */
3896 vm_object_reuse_pages(m_object,
3897 mem->offset,
3898 mem->offset+PAGE_SIZE_64,
3899 FALSE);
3900 }
3901 }
3902 assert(!mem->reusable);
3903
3904 if (!mem->private && !mem->fictitious && !mem->gobbled)
3905 vm_page_wire_count++;
3906 if (mem->gobbled)
3907 vm_page_gobble_count--;
3908 mem->gobbled = FALSE;
3909
3910 if (check_memorystatus == TRUE) {
3911 VM_CHECK_MEMORYSTATUS;
3912 }
3913 }
3914 assert(!mem->gobbled);
3915 assert(mem->vm_page_q_state == VM_PAGE_IS_WIRED);
3916 mem->wire_count++;
3917 if (__improbable(mem->wire_count == 0)) {
3918 panic("vm_page_wire(%p): wire_count overflow", mem);
3919 }
3920 VM_PAGE_CHECK(mem);
3921 }
3922
3923 /*
3924 * vm_page_unwire:
3925 *
3926 * Release one wiring of this page, potentially
3927 * enabling it to be paged again.
3928 *
3929 * The page's object and the page queues must be locked.
3930 */
3931 void
3932 vm_page_unwire(
3933 vm_page_t mem,
3934 boolean_t queueit)
3935 {
3936 vm_object_t m_object;
3937
3938 m_object = VM_PAGE_OBJECT(mem);
3939
3940 // dbgLog(current_thread(), mem->offset, m_object, 0); /* (TEST/DEBUG) */
3941
3942 VM_PAGE_CHECK(mem);
3943 assert(VM_PAGE_WIRED(mem));
3944 assert(mem->wire_count > 0);
3945 assert(!mem->gobbled);
3946 assert(m_object != VM_OBJECT_NULL);
3947 vm_object_lock_assert_exclusive(m_object);
3948 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3949 if (--mem->wire_count == 0) {
3950
3951 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3952
3953 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3954 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3955 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3956 if (!mem->private && !mem->fictitious) {
3957 vm_page_wire_count--;
3958 }
3959
3960 assert(m_object->resident_page_count >=
3961 m_object->wired_page_count);
3962 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3963 OSAddAtomic(+1, &vm_page_purgeable_count);
3964 assert(vm_page_purgeable_wired_count > 0);
3965 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3966 }
3967 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3968 m_object->purgable == VM_PURGABLE_EMPTY) &&
3969 m_object->vo_purgeable_owner != TASK_NULL) {
3970 task_t owner;
3971
3972 owner = m_object->vo_purgeable_owner;
3973 /* more volatile bytes */
3974 ledger_credit(owner->ledger,
3975 task_ledgers.purgeable_volatile,
3976 PAGE_SIZE);
3977 /* less not-quite-volatile bytes */
3978 ledger_debit(owner->ledger,
3979 task_ledgers.purgeable_nonvolatile,
3980 PAGE_SIZE);
3981 /* less footprint */
3982 ledger_debit(owner->ledger,
3983 task_ledgers.phys_footprint,
3984 PAGE_SIZE);
3985 }
3986 assert(m_object != kernel_object);
3987 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3988
3989 if (queueit == TRUE) {
3990 if (m_object->purgable == VM_PURGABLE_EMPTY) {
3991 vm_page_deactivate(mem);
3992 } else {
3993 vm_page_activate(mem);
3994 }
3995 }
3996
3997 VM_CHECK_MEMORYSTATUS;
3998
3999 }
4000 VM_PAGE_CHECK(mem);
4001 }
4002
4003 /*
4004 * vm_page_deactivate:
4005 *
4006 * Returns the given page to the inactive list,
4007 * indicating that no physical maps have access
4008 * to this page. [Used by the physical mapping system.]
4009 *
4010 * The page queues must be locked.
4011 */
4012 void
4013 vm_page_deactivate(
4014 vm_page_t m)
4015 {
4016 vm_page_deactivate_internal(m, TRUE);
4017 }
4018
4019
4020 void
4021 vm_page_deactivate_internal(
4022 vm_page_t m,
4023 boolean_t clear_hw_reference)
4024 {
4025 vm_object_t m_object;
4026
4027 m_object = VM_PAGE_OBJECT(m);
4028
4029 VM_PAGE_CHECK(m);
4030 assert(m_object != kernel_object);
4031 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4032
4033 // dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4034 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4035 /*
4036 * This page is no longer very interesting. If it was
4037 * interesting (active or inactive/referenced), then we
4038 * clear the reference bit and (re)enter it in the
4039 * inactive queue. Note wired pages should not have
4040 * their reference bit cleared.
4041 */
4042 assert ( !(m->absent && !m->unusual));
4043
4044 if (m->gobbled) { /* can this happen? */
4045 assert( !VM_PAGE_WIRED(m));
4046
4047 if (!m->private && !m->fictitious)
4048 vm_page_wire_count--;
4049 vm_page_gobble_count--;
4050 m->gobbled = FALSE;
4051 }
4052 /*
4053 * if this page is currently on the pageout queue, we can't do the
4054 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4055 * and we can't remove it manually since we would need the object lock
4056 * (which is not required here) to decrement the activity_in_progress
4057 * reference which is held on the object while the page is in the pageout queue...
4058 * just let the normal laundry processing proceed
4059 */
4060 if (m->laundry || m->private || m->fictitious ||
4061 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4062 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4063 VM_PAGE_WIRED(m)) {
4064 return;
4065 }
4066 if (!m->absent && clear_hw_reference == TRUE)
4067 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
4068
4069 m->reference = FALSE;
4070 m->no_cache = FALSE;
4071
4072 if ( !VM_PAGE_INACTIVE(m)) {
4073 vm_page_queues_remove(m, FALSE);
4074
4075 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4076 m->dirty && m_object->internal &&
4077 (m_object->purgable == VM_PURGABLE_DENY ||
4078 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4079 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4080 vm_page_check_pageable_safe(m);
4081 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4082 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4083 vm_page_throttled_count++;
4084 } else {
4085 if (m_object->named && m_object->ref_count == 1) {
4086 vm_page_speculate(m, FALSE);
4087 #if DEVELOPMENT || DEBUG
4088 vm_page_speculative_recreated++;
4089 #endif
4090 } else {
4091 vm_page_enqueue_inactive(m, FALSE);
4092 }
4093 }
4094 }
4095 }
4096
4097 /*
4098 * vm_page_enqueue_cleaned
4099 *
4100 * Put the page on the cleaned queue, mark it cleaned, etc.
4101 * Being on the cleaned queue (and having m->clean_queue set)
4102 * does ** NOT ** guarantee that the page is clean!
4103 *
4104 * Call with the queues lock held.
4105 */
4106
4107 void vm_page_enqueue_cleaned(vm_page_t m)
4108 {
4109 vm_object_t m_object;
4110
4111 m_object = VM_PAGE_OBJECT(m);
4112
4113 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4114 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4115 assert( !(m->absent && !m->unusual));
4116
4117 if (VM_PAGE_WIRED(m)) {
4118 return;
4119 }
4120
4121 if (m->gobbled) {
4122 if (!m->private && !m->fictitious)
4123 vm_page_wire_count--;
4124 vm_page_gobble_count--;
4125 m->gobbled = FALSE;
4126 }
4127 /*
4128 * if this page is currently on the pageout queue, we can't do the
4129 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4130 * and we can't remove it manually since we would need the object lock
4131 * (which is not required here) to decrement the activity_in_progress
4132 * reference which is held on the object while the page is in the pageout queue...
4133 * just let the normal laundry processing proceed
4134 */
4135 if (m->laundry || m->private || m->fictitious ||
4136 (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4137 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4138 return;
4139 }
4140 vm_page_queues_remove(m, FALSE);
4141
4142 vm_page_check_pageable_safe(m);
4143 vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
4144 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
4145 vm_page_cleaned_count++;
4146
4147 vm_page_inactive_count++;
4148 if (m_object->internal) {
4149 vm_page_pageable_internal_count++;
4150 } else {
4151 vm_page_pageable_external_count++;
4152 }
4153 #if CONFIG_BACKGROUND_QUEUE
4154 if (m->vm_page_in_background)
4155 vm_page_add_to_backgroundq(m, TRUE);
4156 #endif
4157 vm_pageout_enqueued_cleaned++;
4158 }
4159
4160 /*
4161 * vm_page_activate:
4162 *
4163 * Put the specified page on the active list (if appropriate).
4164 *
4165 * The page queues must be locked.
4166 */
4167
4168 void
4169 vm_page_activate(
4170 vm_page_t m)
4171 {
4172 vm_object_t m_object;
4173
4174 m_object = VM_PAGE_OBJECT(m);
4175
4176 VM_PAGE_CHECK(m);
4177 #ifdef FIXME_4778297
4178 assert(m_object != kernel_object);
4179 #endif
4180 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4181 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4182 assert( !(m->absent && !m->unusual));
4183
4184 if (m->gobbled) {
4185 assert( !VM_PAGE_WIRED(m));
4186 if (!m->private && !m->fictitious)
4187 vm_page_wire_count--;
4188 vm_page_gobble_count--;
4189 m->gobbled = FALSE;
4190 }
4191 /*
4192 * if this page is currently on the pageout queue, we can't do the
4193 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4194 * and we can't remove it manually since we would need the object lock
4195 * (which is not required here) to decrement the activity_in_progress
4196 * reference which is held on the object while the page is in the pageout queue...
4197 * just let the normal laundry processing proceed
4198 */
4199 if (m->laundry || m->private || m->fictitious ||
4200 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4201 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4202 return;
4203
4204 #if DEBUG
4205 if (m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q)
4206 panic("vm_page_activate: already active");
4207 #endif
4208
4209 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4210 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4211 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4212 }
4213
4214 vm_page_queues_remove(m, FALSE);
4215
4216 if ( !VM_PAGE_WIRED(m)) {
4217 vm_page_check_pageable_safe(m);
4218 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4219 m->dirty && m_object->internal &&
4220 (m_object->purgable == VM_PURGABLE_DENY ||
4221 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4222 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4223 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4224 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4225 vm_page_throttled_count++;
4226 } else {
4227 #if CONFIG_SECLUDED_MEMORY
4228 if (secluded_for_filecache &&
4229 vm_page_secluded_target != 0 &&
4230 num_tasks_can_use_secluded_mem == 0 &&
4231 m_object->eligible_for_secluded) {
4232 vm_page_queue_enter(&vm_page_queue_secluded, m,
4233 vm_page_t, pageq);
4234 m->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
4235 vm_page_secluded_count++;
4236 vm_page_secluded_count_inuse++;
4237 assert(!m_object->internal);
4238 // vm_page_pageable_external_count++;
4239 } else
4240 #endif /* CONFIG_SECLUDED_MEMORY */
4241 vm_page_enqueue_active(m, FALSE);
4242 }
4243 m->reference = TRUE;
4244 m->no_cache = FALSE;
4245 }
4246 VM_PAGE_CHECK(m);
4247 }
4248
4249
4250 /*
4251 * vm_page_speculate:
4252 *
4253 * Put the specified page on the speculative list (if appropriate).
4254 *
4255 * The page queues must be locked.
4256 */
4257 void
4258 vm_page_speculate(
4259 vm_page_t m,
4260 boolean_t new)
4261 {
4262 struct vm_speculative_age_q *aq;
4263 vm_object_t m_object;
4264
4265 m_object = VM_PAGE_OBJECT(m);
4266
4267 VM_PAGE_CHECK(m);
4268 vm_page_check_pageable_safe(m);
4269
4270 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4271 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4272 assert( !(m->absent && !m->unusual));
4273 assert(m_object->internal == FALSE);
4274
4275 /*
4276 * if this page is currently on the pageout queue, we can't do the
4277 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4278 * and we can't remove it manually since we would need the object lock
4279 * (which is not required here) to decrement the activity_in_progress
4280 * reference which is held on the object while the page is in the pageout queue...
4281 * just let the normal laundry processing proceed
4282 */
4283 if (m->laundry || m->private || m->fictitious ||
4284 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4285 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4286 return;
4287
4288 vm_page_queues_remove(m, FALSE);
4289
4290 if ( !VM_PAGE_WIRED(m)) {
4291 mach_timespec_t ts;
4292 clock_sec_t sec;
4293 clock_nsec_t nsec;
4294
4295 clock_get_system_nanotime(&sec, &nsec);
4296 ts.tv_sec = (unsigned int) sec;
4297 ts.tv_nsec = nsec;
4298
4299 if (vm_page_speculative_count == 0) {
4300
4301 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4302 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4303
4304 aq = &vm_page_queue_speculative[speculative_age_index];
4305
4306 /*
4307 * set the timer to begin a new group
4308 */
4309 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4310 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4311
4312 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4313 } else {
4314 aq = &vm_page_queue_speculative[speculative_age_index];
4315
4316 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4317
4318 speculative_age_index++;
4319
4320 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4321 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4322 if (speculative_age_index == speculative_steal_index) {
4323 speculative_steal_index = speculative_age_index + 1;
4324
4325 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4326 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4327 }
4328 aq = &vm_page_queue_speculative[speculative_age_index];
4329
4330 if (!vm_page_queue_empty(&aq->age_q))
4331 vm_page_speculate_ageit(aq);
4332
4333 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4334 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4335
4336 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4337 }
4338 }
4339 vm_page_enqueue_tail(&aq->age_q, &m->pageq);
4340 m->vm_page_q_state = VM_PAGE_ON_SPECULATIVE_Q;
4341 vm_page_speculative_count++;
4342 vm_page_pageable_external_count++;
4343
4344 if (new == TRUE) {
4345 vm_object_lock_assert_exclusive(m_object);
4346
4347 m_object->pages_created++;
4348 #if DEVELOPMENT || DEBUG
4349 vm_page_speculative_created++;
4350 #endif
4351 }
4352 }
4353 VM_PAGE_CHECK(m);
4354 }
4355
4356
4357 /*
4358 * move pages from the specified aging bin to
4359 * the speculative bin that pageout_scan claims from
4360 *
4361 * The page queues must be locked.
4362 */
4363 void
4364 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4365 {
4366 struct vm_speculative_age_q *sq;
4367 vm_page_t t;
4368
4369 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4370
4371 if (vm_page_queue_empty(&sq->age_q)) {
4372 sq->age_q.next = aq->age_q.next;
4373 sq->age_q.prev = aq->age_q.prev;
4374
4375 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4376 t->pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
4377
4378 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4379 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4380 } else {
4381 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4382 t->pageq.next = aq->age_q.next;
4383
4384 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
4385 t->pageq.prev = sq->age_q.prev;
4386
4387 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4388 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4389
4390 sq->age_q.prev = aq->age_q.prev;
4391 }
4392 vm_page_queue_init(&aq->age_q);
4393 }
4394
4395
4396 void
4397 vm_page_lru(
4398 vm_page_t m)
4399 {
4400 VM_PAGE_CHECK(m);
4401 assert(VM_PAGE_OBJECT(m) != kernel_object);
4402 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4403
4404 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4405 /*
4406 * if this page is currently on the pageout queue, we can't do the
4407 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4408 * and we can't remove it manually since we would need the object lock
4409 * (which is not required here) to decrement the activity_in_progress
4410 * reference which is held on the object while the page is in the pageout queue...
4411 * just let the normal laundry processing proceed
4412 */
4413 if (m->laundry || m->private ||
4414 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4415 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4416 VM_PAGE_WIRED(m))
4417 return;
4418
4419 m->no_cache = FALSE;
4420
4421 vm_page_queues_remove(m, FALSE);
4422
4423 vm_page_enqueue_inactive(m, FALSE);
4424 }
4425
4426
4427 void
4428 vm_page_reactivate_all_throttled(void)
4429 {
4430 vm_page_t first_throttled, last_throttled;
4431 vm_page_t first_active;
4432 vm_page_t m;
4433 int extra_active_count;
4434 int extra_internal_count, extra_external_count;
4435 vm_object_t m_object;
4436
4437 if (!VM_DYNAMIC_PAGING_ENABLED())
4438 return;
4439
4440 extra_active_count = 0;
4441 extra_internal_count = 0;
4442 extra_external_count = 0;
4443 vm_page_lock_queues();
4444 if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
4445 /*
4446 * Switch "throttled" pages to "active".
4447 */
4448 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
4449 VM_PAGE_CHECK(m);
4450 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
4451
4452 m_object = VM_PAGE_OBJECT(m);
4453
4454 extra_active_count++;
4455 if (m_object->internal) {
4456 extra_internal_count++;
4457 } else {
4458 extra_external_count++;
4459 }
4460
4461 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4462 VM_PAGE_CHECK(m);
4463 #if CONFIG_BACKGROUND_QUEUE
4464 if (m->vm_page_in_background)
4465 vm_page_add_to_backgroundq(m, FALSE);
4466 #endif
4467 }
4468
4469 /*
4470 * Transfer the entire throttled queue to a regular LRU page queues.
4471 * We insert it at the head of the active queue, so that these pages
4472 * get re-evaluated by the LRU algorithm first, since they've been
4473 * completely out of it until now.
4474 */
4475 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4476 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4477 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4478 if (vm_page_queue_empty(&vm_page_queue_active)) {
4479 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4480 } else {
4481 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4482 }
4483 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4484 first_throttled->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4485 last_throttled->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4486
4487 #if DEBUG
4488 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4489 #endif
4490 vm_page_queue_init(&vm_page_queue_throttled);
4491 /*
4492 * Adjust the global page counts.
4493 */
4494 vm_page_active_count += extra_active_count;
4495 vm_page_pageable_internal_count += extra_internal_count;
4496 vm_page_pageable_external_count += extra_external_count;
4497 vm_page_throttled_count = 0;
4498 }
4499 assert(vm_page_throttled_count == 0);
4500 assert(vm_page_queue_empty(&vm_page_queue_throttled));
4501 vm_page_unlock_queues();
4502 }
4503
4504
4505 /*
4506 * move pages from the indicated local queue to the global active queue
4507 * its ok to fail if we're below the hard limit and force == FALSE
4508 * the nolocks == TRUE case is to allow this function to be run on
4509 * the hibernate path
4510 */
4511
4512 void
4513 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4514 {
4515 struct vpl *lq;
4516 vm_page_t first_local, last_local;
4517 vm_page_t first_active;
4518 vm_page_t m;
4519 uint32_t count = 0;
4520
4521 if (vm_page_local_q == NULL)
4522 return;
4523
4524 lq = &vm_page_local_q[lid].vpl_un.vpl;
4525
4526 if (nolocks == FALSE) {
4527 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4528 if ( !vm_page_trylockspin_queues())
4529 return;
4530 } else
4531 vm_page_lockspin_queues();
4532
4533 VPL_LOCK(&lq->vpl_lock);
4534 }
4535 if (lq->vpl_count) {
4536 /*
4537 * Switch "local" pages to "active".
4538 */
4539 assert(!vm_page_queue_empty(&lq->vpl_queue));
4540
4541 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
4542 VM_PAGE_CHECK(m);
4543 vm_page_check_pageable_safe(m);
4544 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4545 assert(!m->fictitious);
4546
4547 if (m->local_id != lid)
4548 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4549
4550 m->local_id = 0;
4551 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4552 VM_PAGE_CHECK(m);
4553 #if CONFIG_BACKGROUND_QUEUE
4554 if (m->vm_page_in_background)
4555 vm_page_add_to_backgroundq(m, FALSE);
4556 #endif
4557 count++;
4558 }
4559 if (count != lq->vpl_count)
4560 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4561
4562 /*
4563 * Transfer the entire local queue to a regular LRU page queues.
4564 */
4565 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4566 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4567 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4568
4569 if (vm_page_queue_empty(&vm_page_queue_active)) {
4570 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4571 } else {
4572 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4573 }
4574 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4575 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4576 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4577
4578 vm_page_queue_init(&lq->vpl_queue);
4579 /*
4580 * Adjust the global page counts.
4581 */
4582 vm_page_active_count += lq->vpl_count;
4583 vm_page_pageable_internal_count += lq->vpl_internal_count;
4584 vm_page_pageable_external_count += lq->vpl_external_count;
4585 lq->vpl_count = 0;
4586 lq->vpl_internal_count = 0;
4587 lq->vpl_external_count = 0;
4588 }
4589 assert(vm_page_queue_empty(&lq->vpl_queue));
4590
4591 if (nolocks == FALSE) {
4592 VPL_UNLOCK(&lq->vpl_lock);
4593 vm_page_unlock_queues();
4594 }
4595 }
4596
4597 /*
4598 * vm_page_part_zero_fill:
4599 *
4600 * Zero-fill a part of the page.
4601 */
4602 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4603 void
4604 vm_page_part_zero_fill(
4605 vm_page_t m,
4606 vm_offset_t m_pa,
4607 vm_size_t len)
4608 {
4609
4610 #if 0
4611 /*
4612 * we don't hold the page queue lock
4613 * so this check isn't safe to make
4614 */
4615 VM_PAGE_CHECK(m);
4616 #endif
4617
4618 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4619 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
4620 #else
4621 vm_page_t tmp;
4622 while (1) {
4623 tmp = vm_page_grab();
4624 if (tmp == VM_PAGE_NULL) {
4625 vm_page_wait(THREAD_UNINT);
4626 continue;
4627 }
4628 break;
4629 }
4630 vm_page_zero_fill(tmp);
4631 if(m_pa != 0) {
4632 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4633 }
4634 if((m_pa + len) < PAGE_SIZE) {
4635 vm_page_part_copy(m, m_pa + len, tmp,
4636 m_pa + len, PAGE_SIZE - (m_pa + len));
4637 }
4638 vm_page_copy(tmp,m);
4639 VM_PAGE_FREE(tmp);
4640 #endif
4641
4642 }
4643
4644 /*
4645 * vm_page_zero_fill:
4646 *
4647 * Zero-fill the specified page.
4648 */
4649 void
4650 vm_page_zero_fill(
4651 vm_page_t m)
4652 {
4653 XPR(XPR_VM_PAGE,
4654 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4655 VM_PAGE_OBJECT(m), m->offset, m, 0,0);
4656 #if 0
4657 /*
4658 * we don't hold the page queue lock
4659 * so this check isn't safe to make
4660 */
4661 VM_PAGE_CHECK(m);
4662 #endif
4663
4664 // dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4665 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
4666 }
4667
4668 /*
4669 * vm_page_part_copy:
4670 *
4671 * copy part of one page to another
4672 */
4673
4674 void
4675 vm_page_part_copy(
4676 vm_page_t src_m,
4677 vm_offset_t src_pa,
4678 vm_page_t dst_m,
4679 vm_offset_t dst_pa,
4680 vm_size_t len)
4681 {
4682 #if 0
4683 /*
4684 * we don't hold the page queue lock
4685 * so this check isn't safe to make
4686 */
4687 VM_PAGE_CHECK(src_m);
4688 VM_PAGE_CHECK(dst_m);
4689 #endif
4690 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4691 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
4692 }
4693
4694 /*
4695 * vm_page_copy:
4696 *
4697 * Copy one page to another
4698 */
4699
4700 int vm_page_copy_cs_validations = 0;
4701 int vm_page_copy_cs_tainted = 0;
4702
4703 void
4704 vm_page_copy(
4705 vm_page_t src_m,
4706 vm_page_t dest_m)
4707 {
4708 vm_object_t src_m_object;
4709
4710 src_m_object = VM_PAGE_OBJECT(src_m);
4711
4712 XPR(XPR_VM_PAGE,
4713 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4714 src_m_object, src_m->offset,
4715 VM_PAGE_OBJECT(dest_m), dest_m->offset,
4716 0);
4717 #if 0
4718 /*
4719 * we don't hold the page queue lock
4720 * so this check isn't safe to make
4721 */
4722 VM_PAGE_CHECK(src_m);
4723 VM_PAGE_CHECK(dest_m);
4724 #endif
4725 vm_object_lock_assert_held(src_m_object);
4726
4727 if (src_m_object != VM_OBJECT_NULL &&
4728 src_m_object->code_signed) {
4729 /*
4730 * We're copying a page from a code-signed object.
4731 * Whoever ends up mapping the copy page might care about
4732 * the original page's integrity, so let's validate the
4733 * source page now.
4734 */
4735 vm_page_copy_cs_validations++;
4736 vm_page_validate_cs(src_m);
4737 #if DEVELOPMENT || DEBUG
4738 DTRACE_VM4(codesigned_copy,
4739 vm_object_t, src_m_object,
4740 vm_object_offset_t, src_m->offset,
4741 int, src_m->cs_validated,
4742 int, src_m->cs_tainted);
4743 #endif /* DEVELOPMENT || DEBUG */
4744
4745 }
4746
4747 if (vm_page_is_slideable(src_m)) {
4748 boolean_t was_busy = src_m->busy;
4749 src_m->busy = TRUE;
4750 (void) vm_page_slide(src_m, 0);
4751 assert(src_m->busy);
4752 if (!was_busy) {
4753 PAGE_WAKEUP_DONE(src_m);
4754 }
4755 }
4756
4757 /*
4758 * Propagate the cs_tainted bit to the copy page. Do not propagate
4759 * the cs_validated bit.
4760 */
4761 dest_m->cs_tainted = src_m->cs_tainted;
4762 if (dest_m->cs_tainted) {
4763 vm_page_copy_cs_tainted++;
4764 }
4765 dest_m->slid = src_m->slid;
4766 dest_m->error = src_m->error; /* sliding src_m might have failed... */
4767 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
4768 }
4769
4770 #if MACH_ASSERT
4771 static void
4772 _vm_page_print(
4773 vm_page_t p)
4774 {
4775 printf("vm_page %p: \n", p);
4776 printf(" pageq: next=%p prev=%p\n",
4777 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.next),
4778 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.prev));
4779 printf(" listq: next=%p prev=%p\n",
4780 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.next)),
4781 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.prev)));
4782 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m)));
4783 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->offset);
4784 printf(" wire_count=%u\n", p->wire_count);
4785 printf(" q_state=%u\n", p->vm_page_q_state);
4786
4787 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
4788 (p->laundry ? "" : "!"),
4789 (p->reference ? "" : "!"),
4790 (p->gobbled ? "" : "!"),
4791 (p->private ? "" : "!"));
4792 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4793 (p->busy ? "" : "!"),
4794 (p->wanted ? "" : "!"),
4795 (p->tabled ? "" : "!"),
4796 (p->fictitious ? "" : "!"),
4797 (p->pmapped ? "" : "!"),
4798 (p->wpmapped ? "" : "!"));
4799 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4800 (p->free_when_done ? "" : "!"),
4801 (p->absent ? "" : "!"),
4802 (p->error ? "" : "!"),
4803 (p->dirty ? "" : "!"),
4804 (p->cleaning ? "" : "!"),
4805 (p->precious ? "" : "!"),
4806 (p->clustered ? "" : "!"));
4807 printf(" %soverwriting, %srestart, %sunusual\n",
4808 (p->overwriting ? "" : "!"),
4809 (p->restart ? "" : "!"),
4810 (p->unusual ? "" : "!"));
4811 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
4812 (p->cs_validated ? "" : "!"),
4813 (p->cs_tainted ? "" : "!"),
4814 (p->cs_nx ? "" : "!"),
4815 (p->no_cache ? "" : "!"));
4816
4817 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
4818 }
4819
4820 /*
4821 * Check that the list of pages is ordered by
4822 * ascending physical address and has no holes.
4823 */
4824 static int
4825 vm_page_verify_contiguous(
4826 vm_page_t pages,
4827 unsigned int npages)
4828 {
4829 vm_page_t m;
4830 unsigned int page_count;
4831 vm_offset_t prev_addr;
4832
4833 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
4834 page_count = 1;
4835 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
4836 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4837 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
4838 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
4839 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
4840 panic("vm_page_verify_contiguous: not contiguous!");
4841 }
4842 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4843 ++page_count;
4844 }
4845 if (page_count != npages) {
4846 printf("pages %p actual count 0x%x but requested 0x%x\n",
4847 pages, page_count, npages);
4848 panic("vm_page_verify_contiguous: count error");
4849 }
4850 return 1;
4851 }
4852
4853
4854 /*
4855 * Check the free lists for proper length etc.
4856 */
4857 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
4858 static unsigned int
4859 vm_page_verify_free_list(
4860 vm_page_queue_head_t *vm_page_queue,
4861 unsigned int color,
4862 vm_page_t look_for_page,
4863 boolean_t expect_page)
4864 {
4865 unsigned int npages;
4866 vm_page_t m;
4867 vm_page_t prev_m;
4868 boolean_t found_page;
4869
4870 if (! vm_page_verify_this_free_list_enabled)
4871 return 0;
4872
4873 found_page = FALSE;
4874 npages = 0;
4875 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
4876
4877 vm_page_queue_iterate(vm_page_queue,
4878 m,
4879 vm_page_t,
4880 pageq) {
4881
4882 if (m == look_for_page) {
4883 found_page = TRUE;
4884 }
4885 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev) != prev_m)
4886 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4887 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev), prev_m);
4888 if ( ! m->busy )
4889 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4890 color, npages, m);
4891 if (color != (unsigned int) -1) {
4892 if (VM_PAGE_GET_COLOR(m) != color)
4893 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4894 color, npages, m, VM_PAGE_GET_COLOR(m), color);
4895 if (m->vm_page_q_state != VM_PAGE_ON_FREE_Q)
4896 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
4897 color, npages, m, m->vm_page_q_state);
4898 } else {
4899 if (m->vm_page_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
4900 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
4901 npages, m, m->vm_page_q_state);
4902 }
4903 ++npages;
4904 prev_m = m;
4905 }
4906 if (look_for_page != VM_PAGE_NULL) {
4907 unsigned int other_color;
4908
4909 if (expect_page && !found_page) {
4910 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4911 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4912 _vm_page_print(look_for_page);
4913 for (other_color = 0;
4914 other_color < vm_colors;
4915 other_color++) {
4916 if (other_color == color)
4917 continue;
4918 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
4919 other_color, look_for_page, FALSE);
4920 }
4921 if (color == (unsigned int) -1) {
4922 vm_page_verify_free_list(&vm_lopage_queue_free,
4923 (unsigned int) -1, look_for_page, FALSE);
4924 }
4925 panic("vm_page_verify_free_list(color=%u)\n", color);
4926 }
4927 if (!expect_page && found_page) {
4928 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4929 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4930 }
4931 }
4932 return npages;
4933 }
4934
4935 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
4936 static void
4937 vm_page_verify_free_lists( void )
4938 {
4939 unsigned int color, npages, nlopages;
4940 boolean_t toggle = TRUE;
4941
4942 if (! vm_page_verify_all_free_lists_enabled)
4943 return;
4944
4945 npages = 0;
4946
4947 lck_mtx_lock(&vm_page_queue_free_lock);
4948
4949 if (vm_page_verify_this_free_list_enabled == TRUE) {
4950 /*
4951 * This variable has been set globally for extra checking of
4952 * each free list Q. Since we didn't set it, we don't own it
4953 * and we shouldn't toggle it.
4954 */
4955 toggle = FALSE;
4956 }
4957
4958 if (toggle == TRUE) {
4959 vm_page_verify_this_free_list_enabled = TRUE;
4960 }
4961
4962 for( color = 0; color < vm_colors; color++ ) {
4963 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
4964 color, VM_PAGE_NULL, FALSE);
4965 }
4966 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4967 (unsigned int) -1,
4968 VM_PAGE_NULL, FALSE);
4969 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4970 panic("vm_page_verify_free_lists: "
4971 "npages %u free_count %d nlopages %u lo_free_count %u",
4972 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
4973
4974 if (toggle == TRUE) {
4975 vm_page_verify_this_free_list_enabled = FALSE;
4976 }
4977
4978 lck_mtx_unlock(&vm_page_queue_free_lock);
4979 }
4980
4981 #endif /* MACH_ASSERT */
4982
4983
4984
4985 #if __arm64__
4986 /*
4987 * 1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
4988 * after the system has 'aged'. To ensure that other allocation requests don't mess
4989 * with the chances of that request being satisfied, we pre-allocate a single contiguous
4990 * 10MB buffer and hand it out to the first request of >= 4MB.
4991 */
4992
4993 kern_return_t cpm_preallocate_early(void);
4994
4995 vm_page_t cpm_preallocated_pages_list = NULL;
4996 boolean_t preallocated_buffer_available = FALSE;
4997
4998 #define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
4999 #define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER ((4 * 1024 *1024) / PAGE_SIZE_64) /* 4 MB */
5000
5001 kern_return_t
5002 cpm_preallocate_early(void)
5003 {
5004
5005 kern_return_t kr = KERN_SUCCESS;
5006 vm_map_size_t prealloc_size = (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT * PAGE_SIZE_64);
5007
5008 printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT);
5009
5010 kr = cpm_allocate(CAST_DOWN(vm_size_t, prealloc_size), &cpm_preallocated_pages_list, 0, 0, TRUE, 0);
5011
5012 if (kr != KERN_SUCCESS) {
5013 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr);
5014 } else {
5015 preallocated_buffer_available = TRUE;
5016 }
5017
5018 return kr;
5019 }
5020 #endif /* __arm64__ */
5021
5022
5023 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
5024
5025 /*
5026 * CONTIGUOUS PAGE ALLOCATION
5027 *
5028 * Find a region large enough to contain at least n pages
5029 * of contiguous physical memory.
5030 *
5031 * This is done by traversing the vm_page_t array in a linear fashion
5032 * we assume that the vm_page_t array has the avaiable physical pages in an
5033 * ordered, ascending list... this is currently true of all our implementations
5034 * and must remain so... there can be 'holes' in the array... we also can
5035 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5036 * which use to happen via 'vm_page_convert'... that function was no longer
5037 * being called and was removed...
5038 *
5039 * The basic flow consists of stabilizing some of the interesting state of
5040 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5041 * sweep at the beginning of the array looking for pages that meet our criterea
5042 * for a 'stealable' page... currently we are pretty conservative... if the page
5043 * meets this criterea and is physically contiguous to the previous page in the 'run'
5044 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5045 * and start to develop a new run... if at this point we've already considered
5046 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5047 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5048 * to other threads trying to acquire free pages (or move pages from q to q),
5049 * and then continue from the spot we left off... we only make 1 pass through the
5050 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5051 * which steals the pages from the queues they're currently on... pages on the free
5052 * queue can be stolen directly... pages that are on any of the other queues
5053 * must be removed from the object they are tabled on... this requires taking the
5054 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5055 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5056 * dump the pages we've currently stolen back to the free list, and pick up our
5057 * scan from the point where we aborted the 'current' run.
5058 *
5059 *
5060 * Requirements:
5061 * - neither vm_page_queue nor vm_free_list lock can be held on entry
5062 *
5063 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5064 *
5065 * Algorithm:
5066 */
5067
5068 #define MAX_CONSIDERED_BEFORE_YIELD 1000
5069
5070
5071 #define RESET_STATE_OF_RUN() \
5072 MACRO_BEGIN \
5073 prevcontaddr = -2; \
5074 start_pnum = -1; \
5075 free_considered = 0; \
5076 substitute_needed = 0; \
5077 npages = 0; \
5078 MACRO_END
5079
5080 /*
5081 * Can we steal in-use (i.e. not free) pages when searching for
5082 * physically-contiguous pages ?
5083 */
5084 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5085
5086 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
5087 #if DEBUG
5088 int vm_page_find_contig_debug = 0;
5089 #endif
5090
5091 static vm_page_t
5092 vm_page_find_contiguous(
5093 unsigned int contig_pages,
5094 ppnum_t max_pnum,
5095 ppnum_t pnum_mask,
5096 boolean_t wire,
5097 int flags)
5098 {
5099 vm_page_t m = NULL;
5100 ppnum_t prevcontaddr = 0;
5101 ppnum_t start_pnum = 0;
5102 unsigned int npages = 0, considered = 0, scanned = 0;
5103 unsigned int page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5104 unsigned int idx_last_contig_page_found = 0;
5105 int free_considered = 0, free_available = 0;
5106 int substitute_needed = 0;
5107 boolean_t wrapped, zone_gc_called = FALSE;
5108 kern_return_t kr;
5109 #if DEBUG
5110 clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
5111 clock_usec_t tv_start_usec = 0, tv_end_usec = 0;
5112 #endif
5113
5114 int yielded = 0;
5115 int dumped_run = 0;
5116 int stolen_pages = 0;
5117 int compressed_pages = 0;
5118
5119
5120 if (contig_pages == 0)
5121 return VM_PAGE_NULL;
5122
5123 full_scan_again:
5124
5125 #if MACH_ASSERT
5126 vm_page_verify_free_lists();
5127 #endif
5128 #if DEBUG
5129 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5130 #endif
5131 PAGE_REPLACEMENT_ALLOWED(TRUE);
5132
5133 vm_page_lock_queues();
5134
5135 #if __arm64__
5136 if (preallocated_buffer_available) {
5137
5138 if ((contig_pages >= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER) && (contig_pages <= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT)) {
5139
5140 m = cpm_preallocated_pages_list;
5141
5142 start_idx = (unsigned int) (m - &vm_pages[0]);
5143
5144 if (wire == FALSE) {
5145
5146 last_idx = start_idx;
5147
5148 for(npages = 0; npages < contig_pages; npages++, last_idx++) {
5149
5150 assert(vm_pages[last_idx].gobbled == FALSE);
5151
5152 vm_pages[last_idx].gobbled = TRUE;
5153 vm_page_gobble_count++;
5154
5155 assert(1 == vm_pages[last_idx].wire_count);
5156 /*
5157 * Gobbled pages are counted as wired pages. So no need to drop
5158 * the global wired page count. Just the page's wire count is fine.
5159 */
5160 vm_pages[last_idx].wire_count--;
5161 vm_pages[last_idx].vm_page_q_state = VM_PAGE_NOT_ON_Q;
5162 }
5163
5164 }
5165
5166 last_idx = start_idx + contig_pages - 1;
5167
5168 vm_pages[last_idx].snext = NULL;
5169
5170 printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages, start_idx, last_idx, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT - contig_pages);
5171
5172 last_idx += 1;
5173 for(npages = contig_pages; npages < PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT; npages++, last_idx++) {
5174
5175 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages[last_idx]);
5176 vm_page_free(&vm_pages[last_idx]);
5177 }
5178
5179 cpm_preallocated_pages_list = NULL;
5180 preallocated_buffer_available = FALSE;
5181
5182 goto done_scanning;
5183 }
5184 }
5185 #endif /* __arm64__ */
5186
5187 lck_mtx_lock(&vm_page_queue_free_lock);
5188
5189 RESET_STATE_OF_RUN();
5190
5191 scanned = 0;
5192 considered = 0;
5193 free_available = vm_page_free_count - vm_page_free_reserved;
5194
5195 wrapped = FALSE;
5196
5197 if(flags & KMA_LOMEM)
5198 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5199 else
5200 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
5201
5202 orig_last_idx = idx_last_contig_page_found;
5203 last_idx = orig_last_idx;
5204
5205 for (page_idx = last_idx, start_idx = last_idx;
5206 npages < contig_pages && page_idx < vm_pages_count;
5207 page_idx++) {
5208 retry:
5209 if (wrapped &&
5210 npages == 0 &&
5211 page_idx >= orig_last_idx) {
5212 /*
5213 * We're back where we started and we haven't
5214 * found any suitable contiguous range. Let's
5215 * give up.
5216 */
5217 break;
5218 }
5219 scanned++;
5220 m = &vm_pages[page_idx];
5221
5222 assert(!m->fictitious);
5223 assert(!m->private);
5224
5225 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
5226 /* no more low pages... */
5227 break;
5228 }
5229 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
5230 /*
5231 * not aligned
5232 */
5233 RESET_STATE_OF_RUN();
5234
5235 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
5236 m->laundry || m->wanted ||
5237 m->cleaning || m->overwriting || m->free_when_done) {
5238 /*
5239 * page is in a transient state
5240 * or a state we don't want to deal
5241 * with, so don't consider it which
5242 * means starting a new run
5243 */
5244 RESET_STATE_OF_RUN();
5245
5246 } else if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
5247 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5248 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5249 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5250 /*
5251 * page needs to be on one of our queues (other then the pageout or special free queues)
5252 * or it needs to belong to the compressor pool (which is now indicated
5253 * by vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5254 * from the check for VM_PAGE_NOT_ON_Q)
5255 * in order for it to be stable behind the
5256 * locks we hold at this point...
5257 * if not, don't consider it which
5258 * means starting a new run
5259 */
5260 RESET_STATE_OF_RUN();
5261
5262 } else if ((m->vm_page_q_state != VM_PAGE_ON_FREE_Q) && (!m->tabled || m->busy)) {
5263 /*
5264 * pages on the free list are always 'busy'
5265 * so we couldn't test for 'busy' in the check
5266 * for the transient states... pages that are
5267 * 'free' are never 'tabled', so we also couldn't
5268 * test for 'tabled'. So we check here to make
5269 * sure that a non-free page is not busy and is
5270 * tabled on an object...
5271 * if not, don't consider it which
5272 * means starting a new run
5273 */
5274 RESET_STATE_OF_RUN();
5275
5276 } else {
5277 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5278 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5279 RESET_STATE_OF_RUN();
5280 goto did_consider;
5281 } else {
5282 npages = 1;
5283 start_idx = page_idx;
5284 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5285 }
5286 } else {
5287 npages++;
5288 }
5289 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5290
5291 VM_PAGE_CHECK(m);
5292 if (m->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5293 free_considered++;
5294 } else {
5295 /*
5296 * This page is not free.
5297 * If we can't steal used pages,
5298 * we have to give up this run
5299 * and keep looking.
5300 * Otherwise, we might need to
5301 * move the contents of this page
5302 * into a substitute page.
5303 */
5304 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5305 if (m->pmapped || m->dirty || m->precious) {
5306 substitute_needed++;
5307 }
5308 #else
5309 RESET_STATE_OF_RUN();
5310 #endif
5311 }
5312
5313 if ((free_considered + substitute_needed) > free_available) {
5314 /*
5315 * if we let this run continue
5316 * we will end up dropping the vm_page_free_count
5317 * below the reserve limit... we need to abort
5318 * this run, but we can at least re-consider this
5319 * page... thus the jump back to 'retry'
5320 */
5321 RESET_STATE_OF_RUN();
5322
5323 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5324 considered++;
5325 goto retry;
5326 }
5327 /*
5328 * free_available == 0
5329 * so can't consider any free pages... if
5330 * we went to retry in this case, we'd
5331 * get stuck looking at the same page
5332 * w/o making any forward progress
5333 * we also want to take this path if we've already
5334 * reached our limit that controls the lock latency
5335 */
5336 }
5337 }
5338 did_consider:
5339 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5340
5341 PAGE_REPLACEMENT_ALLOWED(FALSE);
5342
5343 lck_mtx_unlock(&vm_page_queue_free_lock);
5344 vm_page_unlock_queues();
5345
5346 mutex_pause(0);
5347
5348 PAGE_REPLACEMENT_ALLOWED(TRUE);
5349
5350 vm_page_lock_queues();
5351 lck_mtx_lock(&vm_page_queue_free_lock);
5352
5353 RESET_STATE_OF_RUN();
5354 /*
5355 * reset our free page limit since we
5356 * dropped the lock protecting the vm_page_free_queue
5357 */
5358 free_available = vm_page_free_count - vm_page_free_reserved;
5359 considered = 0;
5360
5361 yielded++;
5362
5363 goto retry;
5364 }
5365 considered++;
5366 }
5367 m = VM_PAGE_NULL;
5368
5369 if (npages != contig_pages) {
5370 if (!wrapped) {
5371 /*
5372 * We didn't find a contiguous range but we didn't
5373 * start from the very first page.
5374 * Start again from the very first page.
5375 */
5376 RESET_STATE_OF_RUN();
5377 if( flags & KMA_LOMEM)
5378 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
5379 else
5380 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5381 last_idx = 0;
5382 page_idx = last_idx;
5383 wrapped = TRUE;
5384 goto retry;
5385 }
5386 lck_mtx_unlock(&vm_page_queue_free_lock);
5387 } else {
5388 vm_page_t m1;
5389 vm_page_t m2;
5390 unsigned int cur_idx;
5391 unsigned int tmp_start_idx;
5392 vm_object_t locked_object = VM_OBJECT_NULL;
5393 boolean_t abort_run = FALSE;
5394
5395 assert(page_idx - start_idx == contig_pages);
5396
5397 tmp_start_idx = start_idx;
5398
5399 /*
5400 * first pass through to pull the free pages
5401 * off of the free queue so that in case we
5402 * need substitute pages, we won't grab any
5403 * of the free pages in the run... we'll clear
5404 * the 'free' bit in the 2nd pass, and even in
5405 * an abort_run case, we'll collect all of the
5406 * free pages in this run and return them to the free list
5407 */
5408 while (start_idx < page_idx) {
5409
5410 m1 = &vm_pages[start_idx++];
5411
5412 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5413 assert(m1->vm_page_q_state == VM_PAGE_ON_FREE_Q);
5414 #endif
5415
5416 if (m1->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5417 unsigned int color;
5418
5419 color = VM_PAGE_GET_COLOR(m1);
5420 #if MACH_ASSERT
5421 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
5422 #endif
5423 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5424 m1,
5425 vm_page_t,
5426 pageq);
5427
5428 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5429 #if MACH_ASSERT
5430 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
5431 #endif
5432 /*
5433 * Clear the "free" bit so that this page
5434 * does not get considered for another
5435 * concurrent physically-contiguous allocation.
5436 */
5437 m1->vm_page_q_state = VM_PAGE_NOT_ON_Q;
5438 assert(m1->busy);
5439
5440 vm_page_free_count--;
5441 }
5442 }
5443 if( flags & KMA_LOMEM)
5444 vm_page_lomem_find_contiguous_last_idx = page_idx;
5445 else
5446 vm_page_find_contiguous_last_idx = page_idx;
5447
5448 /*
5449 * we can drop the free queue lock at this point since
5450 * we've pulled any 'free' candidates off of the list
5451 * we need it dropped so that we can do a vm_page_grab
5452 * when substituing for pmapped/dirty pages
5453 */
5454 lck_mtx_unlock(&vm_page_queue_free_lock);
5455
5456 start_idx = tmp_start_idx;
5457 cur_idx = page_idx - 1;
5458
5459 while (start_idx++ < page_idx) {
5460 /*
5461 * must go through the list from back to front
5462 * so that the page list is created in the
5463 * correct order - low -> high phys addresses
5464 */
5465 m1 = &vm_pages[cur_idx--];
5466
5467 if (m1->vm_page_object == 0) {
5468 /*
5469 * page has already been removed from
5470 * the free list in the 1st pass
5471 */
5472 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5473 assert(m1->offset == (vm_object_offset_t) -1);
5474 assert(m1->busy);
5475 assert(!m1->wanted);
5476 assert(!m1->laundry);
5477 } else {
5478 vm_object_t object;
5479 int refmod;
5480 boolean_t disconnected, reusable;
5481
5482 if (abort_run == TRUE)
5483 continue;
5484
5485 assert(m1->vm_page_q_state != VM_PAGE_NOT_ON_Q);
5486
5487 object = VM_PAGE_OBJECT(m1);
5488
5489 if (object != locked_object) {
5490 if (locked_object) {
5491 vm_object_unlock(locked_object);
5492 locked_object = VM_OBJECT_NULL;
5493 }
5494 if (vm_object_lock_try(object))
5495 locked_object = object;
5496 }
5497 if (locked_object == VM_OBJECT_NULL ||
5498 (VM_PAGE_WIRED(m1) || m1->gobbled ||
5499 m1->laundry || m1->wanted ||
5500 m1->cleaning || m1->overwriting || m1->free_when_done || m1->busy) ||
5501 (m1->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5502
5503 if (locked_object) {
5504 vm_object_unlock(locked_object);
5505 locked_object = VM_OBJECT_NULL;
5506 }
5507 tmp_start_idx = cur_idx;
5508 abort_run = TRUE;
5509 continue;
5510 }
5511
5512 disconnected = FALSE;
5513 reusable = FALSE;
5514
5515 if ((m1->reusable ||
5516 object->all_reusable) &&
5517 (m1->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5518 !m1->dirty &&
5519 !m1->reference) {
5520 /* reusable page... */
5521 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5522 disconnected = TRUE;
5523 if (refmod == 0) {
5524 /*
5525 * ... not reused: can steal
5526 * without relocating contents.
5527 */
5528 reusable = TRUE;
5529 }
5530 }
5531
5532 if ((m1->pmapped &&
5533 ! reusable) ||
5534 m1->dirty ||
5535 m1->precious) {
5536 vm_object_offset_t offset;
5537
5538 m2 = vm_page_grab();
5539
5540 if (m2 == VM_PAGE_NULL) {
5541 if (locked_object) {
5542 vm_object_unlock(locked_object);
5543 locked_object = VM_OBJECT_NULL;
5544 }
5545 tmp_start_idx = cur_idx;
5546 abort_run = TRUE;
5547 continue;
5548 }
5549 if (! disconnected) {
5550 if (m1->pmapped)
5551 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5552 else
5553 refmod = 0;
5554 }
5555
5556 /* copy the page's contents */
5557 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
5558 /* copy the page's state */
5559 assert(!VM_PAGE_WIRED(m1));
5560 assert(m1->vm_page_q_state != VM_PAGE_ON_FREE_Q);
5561 assert(m1->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q);
5562 assert(!m1->laundry);
5563 m2->reference = m1->reference;
5564 assert(!m1->gobbled);
5565 assert(!m1->private);
5566 m2->no_cache = m1->no_cache;
5567 m2->xpmapped = 0;
5568 assert(!m1->busy);
5569 assert(!m1->wanted);
5570 assert(!m1->fictitious);
5571 m2->pmapped = m1->pmapped; /* should flush cache ? */
5572 m2->wpmapped = m1->wpmapped;
5573 assert(!m1->free_when_done);
5574 m2->absent = m1->absent;
5575 m2->error = m1->error;
5576 m2->dirty = m1->dirty;
5577 assert(!m1->cleaning);
5578 m2->precious = m1->precious;
5579 m2->clustered = m1->clustered;
5580 assert(!m1->overwriting);
5581 m2->restart = m1->restart;
5582 m2->unusual = m1->unusual;
5583 m2->cs_validated = m1->cs_validated;
5584 m2->cs_tainted = m1->cs_tainted;
5585 m2->cs_nx = m1->cs_nx;
5586
5587 /*
5588 * If m1 had really been reusable,
5589 * we would have just stolen it, so
5590 * let's not propagate it's "reusable"
5591 * bit and assert that m2 is not
5592 * marked as "reusable".
5593 */
5594 // m2->reusable = m1->reusable;
5595 assert(!m2->reusable);
5596
5597 // assert(!m1->lopage);
5598 m2->slid = m1->slid;
5599
5600 if (m1->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5601 m2->vm_page_q_state = VM_PAGE_USED_BY_COMPRESSOR;
5602
5603 /*
5604 * page may need to be flushed if
5605 * it is marshalled into a UPL
5606 * that is going to be used by a device
5607 * that doesn't support coherency
5608 */
5609 m2->written_by_kernel = TRUE;
5610
5611 /*
5612 * make sure we clear the ref/mod state
5613 * from the pmap layer... else we risk
5614 * inheriting state from the last time
5615 * this page was used...
5616 */
5617 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5618
5619 if (refmod & VM_MEM_REFERENCED)
5620 m2->reference = TRUE;
5621 if (refmod & VM_MEM_MODIFIED) {
5622 SET_PAGE_DIRTY(m2, TRUE);
5623 }
5624 offset = m1->offset;
5625
5626 /*
5627 * completely cleans up the state
5628 * of the page so that it is ready
5629 * to be put onto the free list, or
5630 * for this purpose it looks like it
5631 * just came off of the free list
5632 */
5633 vm_page_free_prepare(m1);
5634
5635 /*
5636 * now put the substitute page
5637 * on the object
5638 */
5639 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
5640
5641 if (m2->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5642 m2->pmapped = TRUE;
5643 m2->wpmapped = TRUE;
5644
5645 PMAP_ENTER(kernel_pmap, m2->offset, m2,
5646 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5647
5648 assert(kr == KERN_SUCCESS);
5649
5650 compressed_pages++;
5651
5652 } else {
5653 if (m2->reference)
5654 vm_page_activate(m2);
5655 else
5656 vm_page_deactivate(m2);
5657 }
5658 PAGE_WAKEUP_DONE(m2);
5659
5660 } else {
5661 assert(m1->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
5662
5663 /*
5664 * completely cleans up the state
5665 * of the page so that it is ready
5666 * to be put onto the free list, or
5667 * for this purpose it looks like it
5668 * just came off of the free list
5669 */
5670 vm_page_free_prepare(m1);
5671 }
5672
5673 stolen_pages++;
5674
5675 }
5676 #if CONFIG_BACKGROUND_QUEUE
5677 vm_page_assign_background_state(m1);
5678 #endif
5679 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5680 m1->snext = m;
5681 m = m1;
5682 }
5683 if (locked_object) {
5684 vm_object_unlock(locked_object);
5685 locked_object = VM_OBJECT_NULL;
5686 }
5687
5688 if (abort_run == TRUE) {
5689 /*
5690 * want the index of the last
5691 * page in this run that was
5692 * successfully 'stolen', so back
5693 * it up 1 for the auto-decrement on use
5694 * and 1 more to bump back over this page
5695 */
5696 page_idx = tmp_start_idx + 2;
5697 if (page_idx >= vm_pages_count) {
5698 if (wrapped) {
5699 if (m != VM_PAGE_NULL) {
5700 vm_page_unlock_queues();
5701 vm_page_free_list(m, FALSE);
5702 vm_page_lock_queues();
5703 m = VM_PAGE_NULL;
5704 }
5705 dumped_run++;
5706 goto done_scanning;
5707 }
5708 page_idx = last_idx = 0;
5709 wrapped = TRUE;
5710 }
5711 abort_run = FALSE;
5712
5713 /*
5714 * We didn't find a contiguous range but we didn't
5715 * start from the very first page.
5716 * Start again from the very first page.
5717 */
5718 RESET_STATE_OF_RUN();
5719
5720 if( flags & KMA_LOMEM)
5721 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
5722 else
5723 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5724
5725 last_idx = page_idx;
5726
5727 if (m != VM_PAGE_NULL) {
5728 vm_page_unlock_queues();
5729 vm_page_free_list(m, FALSE);
5730 vm_page_lock_queues();
5731 m = VM_PAGE_NULL;
5732 }
5733 dumped_run++;
5734
5735 lck_mtx_lock(&vm_page_queue_free_lock);
5736 /*
5737 * reset our free page limit since we
5738 * dropped the lock protecting the vm_page_free_queue
5739 */
5740 free_available = vm_page_free_count - vm_page_free_reserved;
5741 goto retry;
5742 }
5743
5744 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
5745
5746 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5747 assert(m1->wire_count == 0);
5748
5749 if (wire == TRUE) {
5750 m1->wire_count++;
5751 m1->vm_page_q_state = VM_PAGE_IS_WIRED;
5752 } else
5753 m1->gobbled = TRUE;
5754 }
5755 if (wire == FALSE)
5756 vm_page_gobble_count += npages;
5757
5758 /*
5759 * gobbled pages are also counted as wired pages
5760 */
5761 vm_page_wire_count += npages;
5762
5763 assert(vm_page_verify_contiguous(m, npages));
5764 }
5765 done_scanning:
5766 PAGE_REPLACEMENT_ALLOWED(FALSE);
5767
5768 vm_page_unlock_queues();
5769
5770 #if DEBUG
5771 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5772
5773 tv_end_sec -= tv_start_sec;
5774 if (tv_end_usec < tv_start_usec) {
5775 tv_end_sec--;
5776 tv_end_usec += 1000000;
5777 }
5778 tv_end_usec -= tv_start_usec;
5779 if (tv_end_usec >= 1000000) {
5780 tv_end_sec++;
5781 tv_end_sec -= 1000000;
5782 }
5783 if (vm_page_find_contig_debug) {
5784 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5785 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5786 (long)tv_end_sec, tv_end_usec, orig_last_idx,
5787 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
5788 }
5789
5790 #endif
5791 #if MACH_ASSERT
5792 vm_page_verify_free_lists();
5793 #endif
5794 if (m == NULL && zone_gc_called == FALSE) {
5795 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5796 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5797 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5798
5799 if (consider_buffer_cache_collect != NULL) {
5800 (void)(*consider_buffer_cache_collect)(1);
5801 }
5802
5803 consider_zone_gc(FALSE);
5804
5805 zone_gc_called = TRUE;
5806
5807 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5808 goto full_scan_again;
5809 }
5810
5811 return m;
5812 }
5813
5814 /*
5815 * Allocate a list of contiguous, wired pages.
5816 */
5817 kern_return_t
5818 cpm_allocate(
5819 vm_size_t size,
5820 vm_page_t *list,
5821 ppnum_t max_pnum,
5822 ppnum_t pnum_mask,
5823 boolean_t wire,
5824 int flags)
5825 {
5826 vm_page_t pages;
5827 unsigned int npages;
5828
5829 if (size % PAGE_SIZE != 0)
5830 return KERN_INVALID_ARGUMENT;
5831
5832 npages = (unsigned int) (size / PAGE_SIZE);
5833 if (npages != size / PAGE_SIZE) {
5834 /* 32-bit overflow */
5835 return KERN_INVALID_ARGUMENT;
5836 }
5837
5838 /*
5839 * Obtain a pointer to a subset of the free
5840 * list large enough to satisfy the request;
5841 * the region will be physically contiguous.
5842 */
5843 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
5844
5845 if (pages == VM_PAGE_NULL)
5846 return KERN_NO_SPACE;
5847 /*
5848 * determine need for wakeups
5849 */
5850 if ((vm_page_free_count < vm_page_free_min) ||
5851 ((vm_page_free_count < vm_page_free_target) &&
5852 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
5853 thread_wakeup((event_t) &vm_page_free_wanted);
5854
5855 VM_CHECK_MEMORYSTATUS;
5856
5857 /*
5858 * The CPM pages should now be available and
5859 * ordered by ascending physical address.
5860 */
5861 assert(vm_page_verify_contiguous(pages, npages));
5862
5863 *list = pages;
5864 return KERN_SUCCESS;
5865 }
5866
5867
5868 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
5869
5870 /*
5871 * when working on a 'run' of pages, it is necessary to hold
5872 * the vm_page_queue_lock (a hot global lock) for certain operations
5873 * on the page... however, the majority of the work can be done
5874 * while merely holding the object lock... in fact there are certain
5875 * collections of pages that don't require any work brokered by the
5876 * vm_page_queue_lock... to mitigate the time spent behind the global
5877 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
5878 * while doing all of the work that doesn't require the vm_page_queue_lock...
5879 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
5880 * necessary work for each page... we will grab the busy bit on the page
5881 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
5882 * if it can't immediately take the vm_page_queue_lock in order to compete
5883 * for the locks in the same order that vm_pageout_scan takes them.
5884 * the operation names are modeled after the names of the routines that
5885 * need to be called in order to make the changes very obvious in the
5886 * original loop
5887 */
5888
5889 void
5890 vm_page_do_delayed_work(
5891 vm_object_t object,
5892 vm_tag_t tag,
5893 struct vm_page_delayed_work *dwp,
5894 int dw_count)
5895 {
5896 int j;
5897 vm_page_t m;
5898 vm_page_t local_free_q = VM_PAGE_NULL;
5899
5900 /*
5901 * pageout_scan takes the vm_page_lock_queues first
5902 * then tries for the object lock... to avoid what
5903 * is effectively a lock inversion, we'll go to the
5904 * trouble of taking them in that same order... otherwise
5905 * if this object contains the majority of the pages resident
5906 * in the UBC (or a small set of large objects actively being
5907 * worked on contain the majority of the pages), we could
5908 * cause the pageout_scan thread to 'starve' in its attempt
5909 * to find pages to move to the free queue, since it has to
5910 * successfully acquire the object lock of any candidate page
5911 * before it can steal/clean it.
5912 */
5913 if (!vm_page_trylockspin_queues()) {
5914 vm_object_unlock(object);
5915
5916 vm_page_lockspin_queues();
5917
5918 for (j = 0; ; j++) {
5919 if (!vm_object_lock_avoid(object) &&
5920 _vm_object_lock_try(object))
5921 break;
5922 vm_page_unlock_queues();
5923 mutex_pause(j);
5924 vm_page_lockspin_queues();
5925 }
5926 }
5927 for (j = 0; j < dw_count; j++, dwp++) {
5928
5929 m = dwp->dw_m;
5930
5931 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
5932 vm_pageout_throttle_up(m);
5933 #if CONFIG_PHANTOM_CACHE
5934 if (dwp->dw_mask & DW_vm_phantom_cache_update)
5935 vm_phantom_cache_update(m);
5936 #endif
5937 if (dwp->dw_mask & DW_vm_page_wire)
5938 vm_page_wire(m, tag, FALSE);
5939 else if (dwp->dw_mask & DW_vm_page_unwire) {
5940 boolean_t queueit;
5941
5942 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
5943
5944 vm_page_unwire(m, queueit);
5945 }
5946 if (dwp->dw_mask & DW_vm_page_free) {
5947 vm_page_free_prepare_queues(m);
5948
5949 assert(m->pageq.next == 0 && m->pageq.prev == 0);
5950 /*
5951 * Add this page to our list of reclaimed pages,
5952 * to be freed later.
5953 */
5954 m->snext = local_free_q;
5955 local_free_q = m;
5956 } else {
5957 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5958 vm_page_deactivate_internal(m, FALSE);
5959 else if (dwp->dw_mask & DW_vm_page_activate) {
5960 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q) {
5961 vm_page_activate(m);
5962 }
5963 }
5964 else if (dwp->dw_mask & DW_vm_page_speculate)
5965 vm_page_speculate(m, TRUE);
5966 else if (dwp->dw_mask & DW_enqueue_cleaned) {
5967 /*
5968 * if we didn't hold the object lock and did this,
5969 * we might disconnect the page, then someone might
5970 * soft fault it back in, then we would put it on the
5971 * cleaned queue, and so we would have a referenced (maybe even dirty)
5972 * page on that queue, which we don't want
5973 */
5974 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5975
5976 if ((refmod_state & VM_MEM_REFERENCED)) {
5977 /*
5978 * this page has been touched since it got cleaned; let's activate it
5979 * if it hasn't already been
5980 */
5981 vm_pageout_enqueued_cleaned++;
5982 vm_pageout_cleaned_reactivated++;
5983 vm_pageout_cleaned_commit_reactivated++;
5984
5985 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q)
5986 vm_page_activate(m);
5987 } else {
5988 m->reference = FALSE;
5989 vm_page_enqueue_cleaned(m);
5990 }
5991 }
5992 else if (dwp->dw_mask & DW_vm_page_lru)
5993 vm_page_lru(m);
5994 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5995 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q)
5996 vm_page_queues_remove(m, TRUE);
5997 }
5998 if (dwp->dw_mask & DW_set_reference)
5999 m->reference = TRUE;
6000 else if (dwp->dw_mask & DW_clear_reference)
6001 m->reference = FALSE;
6002
6003 if (dwp->dw_mask & DW_move_page) {
6004 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6005 vm_page_queues_remove(m, FALSE);
6006
6007 assert(VM_PAGE_OBJECT(m) != kernel_object);
6008
6009 vm_page_enqueue_inactive(m, FALSE);
6010 }
6011 }
6012 if (dwp->dw_mask & DW_clear_busy)
6013 m->busy = FALSE;
6014
6015 if (dwp->dw_mask & DW_PAGE_WAKEUP)
6016 PAGE_WAKEUP(m);
6017 }
6018 }
6019 vm_page_unlock_queues();
6020
6021 if (local_free_q)
6022 vm_page_free_list(local_free_q, TRUE);
6023
6024 VM_CHECK_MEMORYSTATUS;
6025
6026 }
6027
6028 kern_return_t
6029 vm_page_alloc_list(
6030 int page_count,
6031 int flags,
6032 vm_page_t *list)
6033 {
6034 vm_page_t lo_page_list = VM_PAGE_NULL;
6035 vm_page_t mem;
6036 int i;
6037
6038 if ( !(flags & KMA_LOMEM))
6039 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6040
6041 for (i = 0; i < page_count; i++) {
6042
6043 mem = vm_page_grablo();
6044
6045 if (mem == VM_PAGE_NULL) {
6046 if (lo_page_list)
6047 vm_page_free_list(lo_page_list, FALSE);
6048
6049 *list = VM_PAGE_NULL;
6050
6051 return (KERN_RESOURCE_SHORTAGE);
6052 }
6053 mem->snext = lo_page_list;
6054 lo_page_list = mem;
6055 }
6056 *list = lo_page_list;
6057
6058 return (KERN_SUCCESS);
6059 }
6060
6061 void
6062 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6063 {
6064 page->offset = offset;
6065 }
6066
6067 vm_page_t
6068 vm_page_get_next(vm_page_t page)
6069 {
6070 return (page->snext);
6071 }
6072
6073 vm_object_offset_t
6074 vm_page_get_offset(vm_page_t page)
6075 {
6076 return (page->offset);
6077 }
6078
6079 ppnum_t
6080 vm_page_get_phys_page(vm_page_t page)
6081 {
6082 return (VM_PAGE_GET_PHYS_PAGE(page));
6083 }
6084
6085
6086 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6087
6088 #if HIBERNATION
6089
6090 static vm_page_t hibernate_gobble_queue;
6091
6092 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
6093 static int hibernate_flush_dirty_pages(int);
6094 static int hibernate_flush_queue(vm_page_queue_head_t *, int);
6095
6096 void hibernate_flush_wait(void);
6097 void hibernate_mark_in_progress(void);
6098 void hibernate_clear_in_progress(void);
6099
6100 void hibernate_free_range(int, int);
6101 void hibernate_hash_insert_page(vm_page_t);
6102 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6103 void hibernate_rebuild_vm_structs(void);
6104 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6105 ppnum_t hibernate_lookup_paddr(unsigned int);
6106
6107 struct hibernate_statistics {
6108 int hibernate_considered;
6109 int hibernate_reentered_on_q;
6110 int hibernate_found_dirty;
6111 int hibernate_skipped_cleaning;
6112 int hibernate_skipped_transient;
6113 int hibernate_skipped_precious;
6114 int hibernate_skipped_external;
6115 int hibernate_queue_nolock;
6116 int hibernate_queue_paused;
6117 int hibernate_throttled;
6118 int hibernate_throttle_timeout;
6119 int hibernate_drained;
6120 int hibernate_drain_timeout;
6121 int cd_lock_failed;
6122 int cd_found_precious;
6123 int cd_found_wired;
6124 int cd_found_busy;
6125 int cd_found_unusual;
6126 int cd_found_cleaning;
6127 int cd_found_laundry;
6128 int cd_found_dirty;
6129 int cd_found_xpmapped;
6130 int cd_skipped_xpmapped;
6131 int cd_local_free;
6132 int cd_total_free;
6133 int cd_vm_page_wire_count;
6134 int cd_vm_struct_pages_unneeded;
6135 int cd_pages;
6136 int cd_discarded;
6137 int cd_count_wire;
6138 } hibernate_stats;
6139
6140
6141 /*
6142 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6143 * so that we don't overrun the estimated image size, which would
6144 * result in a hibernation failure.
6145 */
6146 #define HIBERNATE_XPMAPPED_LIMIT 40000
6147
6148
6149 static int
6150 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6151 {
6152 wait_result_t wait_result;
6153
6154 vm_page_lock_queues();
6155
6156 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
6157
6158 q->pgo_draining = TRUE;
6159
6160 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
6161
6162 vm_page_unlock_queues();
6163
6164 wait_result = thread_block(THREAD_CONTINUE_NULL);
6165
6166 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
6167 hibernate_stats.hibernate_drain_timeout++;
6168
6169 if (q == &vm_pageout_queue_external)
6170 return (0);
6171
6172 return (1);
6173 }
6174 vm_page_lock_queues();
6175
6176 hibernate_stats.hibernate_drained++;
6177 }
6178 vm_page_unlock_queues();
6179
6180 return (0);
6181 }
6182
6183
6184 boolean_t hibernate_skip_external = FALSE;
6185
6186 static int
6187 hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
6188 {
6189 vm_page_t m;
6190 vm_object_t l_object = NULL;
6191 vm_object_t m_object = NULL;
6192 int refmod_state = 0;
6193 int try_failed_count = 0;
6194 int retval = 0;
6195 int current_run = 0;
6196 struct vm_pageout_queue *iq;
6197 struct vm_pageout_queue *eq;
6198 struct vm_pageout_queue *tq;
6199
6200 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6201 VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
6202
6203 iq = &vm_pageout_queue_internal;
6204 eq = &vm_pageout_queue_external;
6205
6206 vm_page_lock_queues();
6207
6208 while (qcount && !vm_page_queue_empty(q)) {
6209
6210 if (current_run++ == 1000) {
6211 if (hibernate_should_abort()) {
6212 retval = 1;
6213 break;
6214 }
6215 current_run = 0;
6216 }
6217
6218 m = (vm_page_t) vm_page_queue_first(q);
6219 m_object = VM_PAGE_OBJECT(m);
6220
6221 /*
6222 * check to see if we currently are working
6223 * with the same object... if so, we've
6224 * already got the lock
6225 */
6226 if (m_object != l_object) {
6227 /*
6228 * the object associated with candidate page is
6229 * different from the one we were just working
6230 * with... dump the lock if we still own it
6231 */
6232 if (l_object != NULL) {
6233 vm_object_unlock(l_object);
6234 l_object = NULL;
6235 }
6236 /*
6237 * Try to lock object; since we've alread got the
6238 * page queues lock, we can only 'try' for this one.
6239 * if the 'try' fails, we need to do a mutex_pause
6240 * to allow the owner of the object lock a chance to
6241 * run...
6242 */
6243 if ( !vm_object_lock_try_scan(m_object)) {
6244
6245 if (try_failed_count > 20) {
6246 hibernate_stats.hibernate_queue_nolock++;
6247
6248 goto reenter_pg_on_q;
6249 }
6250
6251 vm_page_unlock_queues();
6252 mutex_pause(try_failed_count++);
6253 vm_page_lock_queues();
6254
6255 hibernate_stats.hibernate_queue_paused++;
6256 continue;
6257 } else {
6258 l_object = m_object;
6259 }
6260 }
6261 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
6262 /*
6263 * page is not to be cleaned
6264 * put it back on the head of its queue
6265 */
6266 if (m->cleaning)
6267 hibernate_stats.hibernate_skipped_cleaning++;
6268 else
6269 hibernate_stats.hibernate_skipped_transient++;
6270
6271 goto reenter_pg_on_q;
6272 }
6273 if (m_object->copy == VM_OBJECT_NULL) {
6274 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6275 /*
6276 * let the normal hibernate image path
6277 * deal with these
6278 */
6279 goto reenter_pg_on_q;
6280 }
6281 }
6282 if ( !m->dirty && m->pmapped) {
6283 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6284
6285 if ((refmod_state & VM_MEM_MODIFIED)) {
6286 SET_PAGE_DIRTY(m, FALSE);
6287 }
6288 } else
6289 refmod_state = 0;
6290
6291 if ( !m->dirty) {
6292 /*
6293 * page is not to be cleaned
6294 * put it back on the head of its queue
6295 */
6296 if (m->precious)
6297 hibernate_stats.hibernate_skipped_precious++;
6298
6299 goto reenter_pg_on_q;
6300 }
6301
6302 if (hibernate_skip_external == TRUE && !m_object->internal) {
6303
6304 hibernate_stats.hibernate_skipped_external++;
6305
6306 goto reenter_pg_on_q;
6307 }
6308 tq = NULL;
6309
6310 if (m_object->internal) {
6311 if (VM_PAGE_Q_THROTTLED(iq))
6312 tq = iq;
6313 } else if (VM_PAGE_Q_THROTTLED(eq))
6314 tq = eq;
6315
6316 if (tq != NULL) {
6317 wait_result_t wait_result;
6318 int wait_count = 5;
6319
6320 if (l_object != NULL) {
6321 vm_object_unlock(l_object);
6322 l_object = NULL;
6323 }
6324
6325 while (retval == 0) {
6326
6327 tq->pgo_throttled = TRUE;
6328
6329 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6330
6331 vm_page_unlock_queues();
6332
6333 wait_result = thread_block(THREAD_CONTINUE_NULL);
6334
6335 vm_page_lock_queues();
6336
6337 if (wait_result != THREAD_TIMED_OUT)
6338 break;
6339 if (!VM_PAGE_Q_THROTTLED(tq))
6340 break;
6341
6342 if (hibernate_should_abort())
6343 retval = 1;
6344
6345 if (--wait_count == 0) {
6346
6347 hibernate_stats.hibernate_throttle_timeout++;
6348
6349 if (tq == eq) {
6350 hibernate_skip_external = TRUE;
6351 break;
6352 }
6353 retval = 1;
6354 }
6355 }
6356 if (retval)
6357 break;
6358
6359 hibernate_stats.hibernate_throttled++;
6360
6361 continue;
6362 }
6363 /*
6364 * we've already factored out pages in the laundry which
6365 * means this page can't be on the pageout queue so it's
6366 * safe to do the vm_page_queues_remove
6367 */
6368 vm_page_queues_remove(m, TRUE);
6369
6370 if (m_object->internal == TRUE)
6371 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
6372
6373 vm_pageout_cluster(m);
6374
6375 hibernate_stats.hibernate_found_dirty++;
6376
6377 goto next_pg;
6378
6379 reenter_pg_on_q:
6380 vm_page_queue_remove(q, m, vm_page_t, pageq);
6381 vm_page_queue_enter(q, m, vm_page_t, pageq);
6382
6383 hibernate_stats.hibernate_reentered_on_q++;
6384 next_pg:
6385 hibernate_stats.hibernate_considered++;
6386
6387 qcount--;
6388 try_failed_count = 0;
6389 }
6390 if (l_object != NULL) {
6391 vm_object_unlock(l_object);
6392 l_object = NULL;
6393 }
6394
6395 vm_page_unlock_queues();
6396
6397 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6398
6399 return (retval);
6400 }
6401
6402
6403 static int
6404 hibernate_flush_dirty_pages(int pass)
6405 {
6406 struct vm_speculative_age_q *aq;
6407 uint32_t i;
6408
6409 if (vm_page_local_q) {
6410 for (i = 0; i < vm_page_local_q_count; i++)
6411 vm_page_reactivate_local(i, TRUE, FALSE);
6412 }
6413
6414 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6415 int qcount;
6416 vm_page_t m;
6417
6418 aq = &vm_page_queue_speculative[i];
6419
6420 if (vm_page_queue_empty(&aq->age_q))
6421 continue;
6422 qcount = 0;
6423
6424 vm_page_lockspin_queues();
6425
6426 vm_page_queue_iterate(&aq->age_q,
6427 m,
6428 vm_page_t,
6429 pageq)
6430 {
6431 qcount++;
6432 }
6433 vm_page_unlock_queues();
6434
6435 if (qcount) {
6436 if (hibernate_flush_queue(&aq->age_q, qcount))
6437 return (1);
6438 }
6439 }
6440 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
6441 return (1);
6442 /* XXX FBDP TODO: flush secluded queue */
6443 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6444 return (1);
6445 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
6446 return (1);
6447 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6448 return (1);
6449
6450 if (pass == 1)
6451 vm_compressor_record_warmup_start();
6452
6453 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
6454 if (pass == 1)
6455 vm_compressor_record_warmup_end();
6456 return (1);
6457 }
6458 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
6459 if (pass == 1)
6460 vm_compressor_record_warmup_end();
6461 return (1);
6462 }
6463 if (pass == 1)
6464 vm_compressor_record_warmup_end();
6465
6466 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6467 return (1);
6468
6469 return (0);
6470 }
6471
6472
6473 void
6474 hibernate_reset_stats()
6475 {
6476 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6477 }
6478
6479
6480 int
6481 hibernate_flush_memory()
6482 {
6483 int retval;
6484
6485 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6486
6487 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6488
6489 hibernate_cleaning_in_progress = TRUE;
6490 hibernate_skip_external = FALSE;
6491
6492 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6493
6494 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6495
6496 vm_compressor_flush();
6497
6498 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6499
6500 if (consider_buffer_cache_collect != NULL) {
6501 unsigned int orig_wire_count;
6502
6503 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6504 orig_wire_count = vm_page_wire_count;
6505
6506 (void)(*consider_buffer_cache_collect)(1);
6507 consider_zone_gc(FALSE);
6508
6509 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6510
6511 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
6512 }
6513 }
6514 hibernate_cleaning_in_progress = FALSE;
6515
6516 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6517
6518 if (retval)
6519 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6520
6521
6522 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6523 hibernate_stats.hibernate_considered,
6524 hibernate_stats.hibernate_reentered_on_q,
6525 hibernate_stats.hibernate_found_dirty);
6526 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6527 hibernate_stats.hibernate_skipped_cleaning,
6528 hibernate_stats.hibernate_skipped_transient,
6529 hibernate_stats.hibernate_skipped_precious,
6530 hibernate_stats.hibernate_skipped_external,
6531 hibernate_stats.hibernate_queue_nolock);
6532 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6533 hibernate_stats.hibernate_queue_paused,
6534 hibernate_stats.hibernate_throttled,
6535 hibernate_stats.hibernate_throttle_timeout,
6536 hibernate_stats.hibernate_drained,
6537 hibernate_stats.hibernate_drain_timeout);
6538
6539 return (retval);
6540 }
6541
6542
6543 static void
6544 hibernate_page_list_zero(hibernate_page_list_t *list)
6545 {
6546 uint32_t bank;
6547 hibernate_bitmap_t * bitmap;
6548
6549 bitmap = &list->bank_bitmap[0];
6550 for (bank = 0; bank < list->bank_count; bank++)
6551 {
6552 uint32_t last_bit;
6553
6554 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6555 // set out-of-bound bits at end of bitmap.
6556 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6557 if (last_bit)
6558 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6559
6560 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6561 }
6562 }
6563
6564 void
6565 hibernate_free_gobble_pages(void)
6566 {
6567 vm_page_t m, next;
6568 uint32_t count = 0;
6569
6570 m = (vm_page_t) hibernate_gobble_queue;
6571 while(m)
6572 {
6573 next = m->snext;
6574 vm_page_free(m);
6575 count++;
6576 m = next;
6577 }
6578 hibernate_gobble_queue = VM_PAGE_NULL;
6579
6580 if (count)
6581 HIBLOG("Freed %d pages\n", count);
6582 }
6583
6584 static boolean_t
6585 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
6586 {
6587 vm_object_t object = NULL;
6588 int refmod_state;
6589 boolean_t discard = FALSE;
6590
6591 do
6592 {
6593 if (m->private)
6594 panic("hibernate_consider_discard: private");
6595
6596 object = VM_PAGE_OBJECT(m);
6597
6598 if (!vm_object_lock_try(object)) {
6599 object = NULL;
6600 if (!preflight) hibernate_stats.cd_lock_failed++;
6601 break;
6602 }
6603 if (VM_PAGE_WIRED(m)) {
6604 if (!preflight) hibernate_stats.cd_found_wired++;
6605 break;
6606 }
6607 if (m->precious) {
6608 if (!preflight) hibernate_stats.cd_found_precious++;
6609 break;
6610 }
6611 if (m->busy || !object->alive) {
6612 /*
6613 * Somebody is playing with this page.
6614 */
6615 if (!preflight) hibernate_stats.cd_found_busy++;
6616 break;
6617 }
6618 if (m->absent || m->unusual || m->error) {
6619 /*
6620 * If it's unusual in anyway, ignore it
6621 */
6622 if (!preflight) hibernate_stats.cd_found_unusual++;
6623 break;
6624 }
6625 if (m->cleaning) {
6626 if (!preflight) hibernate_stats.cd_found_cleaning++;
6627 break;
6628 }
6629 if (m->laundry) {
6630 if (!preflight) hibernate_stats.cd_found_laundry++;
6631 break;
6632 }
6633 if (!m->dirty)
6634 {
6635 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6636
6637 if (refmod_state & VM_MEM_REFERENCED)
6638 m->reference = TRUE;
6639 if (refmod_state & VM_MEM_MODIFIED) {
6640 SET_PAGE_DIRTY(m, FALSE);
6641 }
6642 }
6643
6644 /*
6645 * If it's clean or purgeable we can discard the page on wakeup.
6646 */
6647 discard = (!m->dirty)
6648 || (VM_PURGABLE_VOLATILE == object->purgable)
6649 || (VM_PURGABLE_EMPTY == object->purgable);
6650
6651
6652 if (discard == FALSE) {
6653 if (!preflight)
6654 hibernate_stats.cd_found_dirty++;
6655 } else if (m->xpmapped && m->reference && !object->internal) {
6656 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6657 if (!preflight)
6658 hibernate_stats.cd_found_xpmapped++;
6659 discard = FALSE;
6660 } else {
6661 if (!preflight)
6662 hibernate_stats.cd_skipped_xpmapped++;
6663 }
6664 }
6665 }
6666 while (FALSE);
6667
6668 if (object)
6669 vm_object_unlock(object);
6670
6671 return (discard);
6672 }
6673
6674
6675 static void
6676 hibernate_discard_page(vm_page_t m)
6677 {
6678 vm_object_t m_object;
6679
6680 if (m->absent || m->unusual || m->error)
6681 /*
6682 * If it's unusual in anyway, ignore
6683 */
6684 return;
6685
6686 m_object = VM_PAGE_OBJECT(m);
6687
6688 #if MACH_ASSERT || DEBUG
6689 if (!vm_object_lock_try(m_object))
6690 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6691 #else
6692 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6693 makes sure these locks are uncontended before sleep */
6694 #endif /* MACH_ASSERT || DEBUG */
6695
6696 if (m->pmapped == TRUE)
6697 {
6698 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6699 }
6700
6701 if (m->laundry)
6702 panic("hibernate_discard_page(%p) laundry", m);
6703 if (m->private)
6704 panic("hibernate_discard_page(%p) private", m);
6705 if (m->fictitious)
6706 panic("hibernate_discard_page(%p) fictitious", m);
6707
6708 if (VM_PURGABLE_VOLATILE == m_object->purgable)
6709 {
6710 /* object should be on a queue */
6711 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6712 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
6713 assert(old_queue);
6714 if (m_object->purgeable_when_ripe) {
6715 vm_purgeable_token_delete_first(old_queue);
6716 }
6717 vm_object_lock_assert_exclusive(m_object);
6718 m_object->purgable = VM_PURGABLE_EMPTY;
6719
6720 /*
6721 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6722 * accounted in the "volatile" ledger, so no change here.
6723 * We have to update vm_page_purgeable_count, though, since we're
6724 * effectively purging this object.
6725 */
6726 unsigned int delta;
6727 assert(m_object->resident_page_count >= m_object->wired_page_count);
6728 delta = (m_object->resident_page_count - m_object->wired_page_count);
6729 assert(vm_page_purgeable_count >= delta);
6730 assert(delta > 0);
6731 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
6732 }
6733
6734 vm_page_free(m);
6735
6736 #if MACH_ASSERT || DEBUG
6737 vm_object_unlock(m_object);
6738 #endif /* MACH_ASSERT || DEBUG */
6739 }
6740
6741 /*
6742 Grab locks for hibernate_page_list_setall()
6743 */
6744 void
6745 hibernate_vm_lock_queues(void)
6746 {
6747 vm_object_lock(compressor_object);
6748 vm_page_lock_queues();
6749 lck_mtx_lock(&vm_page_queue_free_lock);
6750 lck_mtx_lock(&vm_purgeable_queue_lock);
6751
6752 if (vm_page_local_q) {
6753 uint32_t i;
6754 for (i = 0; i < vm_page_local_q_count; i++) {
6755 struct vpl *lq;
6756 lq = &vm_page_local_q[i].vpl_un.vpl;
6757 VPL_LOCK(&lq->vpl_lock);
6758 }
6759 }
6760 }
6761
6762 void
6763 hibernate_vm_unlock_queues(void)
6764 {
6765 if (vm_page_local_q) {
6766 uint32_t i;
6767 for (i = 0; i < vm_page_local_q_count; i++) {
6768 struct vpl *lq;
6769 lq = &vm_page_local_q[i].vpl_un.vpl;
6770 VPL_UNLOCK(&lq->vpl_lock);
6771 }
6772 }
6773 lck_mtx_unlock(&vm_purgeable_queue_lock);
6774 lck_mtx_unlock(&vm_page_queue_free_lock);
6775 vm_page_unlock_queues();
6776 vm_object_unlock(compressor_object);
6777 }
6778
6779 /*
6780 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6781 pages known to VM to not need saving are subtracted.
6782 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6783 */
6784
6785 void
6786 hibernate_page_list_setall(hibernate_page_list_t * page_list,
6787 hibernate_page_list_t * page_list_wired,
6788 hibernate_page_list_t * page_list_pal,
6789 boolean_t preflight,
6790 boolean_t will_discard,
6791 uint32_t * pagesOut)
6792 {
6793 uint64_t start, end, nsec;
6794 vm_page_t m;
6795 vm_page_t next;
6796 uint32_t pages = page_list->page_count;
6797 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
6798 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
6799 uint32_t count_wire = pages;
6800 uint32_t count_discard_active = 0;
6801 uint32_t count_discard_inactive = 0;
6802 uint32_t count_discard_cleaned = 0;
6803 uint32_t count_discard_purgeable = 0;
6804 uint32_t count_discard_speculative = 0;
6805 uint32_t count_discard_vm_struct_pages = 0;
6806 uint32_t i;
6807 uint32_t bank;
6808 hibernate_bitmap_t * bitmap;
6809 hibernate_bitmap_t * bitmap_wired;
6810 boolean_t discard_all;
6811 boolean_t discard;
6812
6813 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
6814
6815 if (preflight) {
6816 page_list = NULL;
6817 page_list_wired = NULL;
6818 page_list_pal = NULL;
6819 discard_all = FALSE;
6820 } else {
6821 discard_all = will_discard;
6822 }
6823
6824 #if MACH_ASSERT || DEBUG
6825 if (!preflight)
6826 {
6827 assert(hibernate_vm_locks_are_safe());
6828 vm_page_lock_queues();
6829 if (vm_page_local_q) {
6830 for (i = 0; i < vm_page_local_q_count; i++) {
6831 struct vpl *lq;
6832 lq = &vm_page_local_q[i].vpl_un.vpl;
6833 VPL_LOCK(&lq->vpl_lock);
6834 }
6835 }
6836 }
6837 #endif /* MACH_ASSERT || DEBUG */
6838
6839
6840 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
6841
6842 clock_get_uptime(&start);
6843
6844 if (!preflight) {
6845 hibernate_page_list_zero(page_list);
6846 hibernate_page_list_zero(page_list_wired);
6847 hibernate_page_list_zero(page_list_pal);
6848
6849 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
6850 hibernate_stats.cd_pages = pages;
6851 }
6852
6853 if (vm_page_local_q) {
6854 for (i = 0; i < vm_page_local_q_count; i++)
6855 vm_page_reactivate_local(i, TRUE, !preflight);
6856 }
6857
6858 if (preflight) {
6859 vm_object_lock(compressor_object);
6860 vm_page_lock_queues();
6861 lck_mtx_lock(&vm_page_queue_free_lock);
6862 }
6863
6864 m = (vm_page_t) hibernate_gobble_queue;
6865 while (m)
6866 {
6867 pages--;
6868 count_wire--;
6869 if (!preflight) {
6870 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6871 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6872 }
6873 m = m->snext;
6874 }
6875
6876 if (!preflight) for( i = 0; i < real_ncpus; i++ )
6877 {
6878 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
6879 {
6880 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->snext)
6881 {
6882 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
6883
6884 pages--;
6885 count_wire--;
6886 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6887 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6888
6889 hibernate_stats.cd_local_free++;
6890 hibernate_stats.cd_total_free++;
6891 }
6892 }
6893 }
6894
6895 for( i = 0; i < vm_colors; i++ )
6896 {
6897 vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
6898 m,
6899 vm_page_t,
6900 pageq)
6901 {
6902 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_Q);
6903
6904 pages--;
6905 count_wire--;
6906 if (!preflight) {
6907 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6908 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6909
6910 hibernate_stats.cd_total_free++;
6911 }
6912 }
6913 }
6914
6915 vm_page_queue_iterate(&vm_lopage_queue_free,
6916 m,
6917 vm_page_t,
6918 pageq)
6919 {
6920 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
6921
6922 pages--;
6923 count_wire--;
6924 if (!preflight) {
6925 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6926 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6927
6928 hibernate_stats.cd_total_free++;
6929 }
6930 }
6931
6932 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
6933 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
6934 {
6935 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
6936
6937 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6938 discard = FALSE;
6939 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6940 && hibernate_consider_discard(m, preflight))
6941 {
6942 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6943 count_discard_inactive++;
6944 discard = discard_all;
6945 }
6946 else
6947 count_throttled++;
6948 count_wire--;
6949 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6950
6951 if (discard) hibernate_discard_page(m);
6952 m = next;
6953 }
6954
6955 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6956 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
6957 {
6958 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6959
6960 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6961 discard = FALSE;
6962 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6963 && hibernate_consider_discard(m, preflight))
6964 {
6965 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6966 if (m->dirty)
6967 count_discard_purgeable++;
6968 else
6969 count_discard_inactive++;
6970 discard = discard_all;
6971 }
6972 else
6973 count_anonymous++;
6974 count_wire--;
6975 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6976 if (discard) hibernate_discard_page(m);
6977 m = next;
6978 }
6979
6980 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
6981 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
6982 {
6983 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
6984
6985 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6986 discard = FALSE;
6987 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6988 && hibernate_consider_discard(m, preflight))
6989 {
6990 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6991 if (m->dirty)
6992 count_discard_purgeable++;
6993 else
6994 count_discard_cleaned++;
6995 discard = discard_all;
6996 }
6997 else
6998 count_cleaned++;
6999 count_wire--;
7000 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7001 if (discard) hibernate_discard_page(m);
7002 m = next;
7003 }
7004
7005 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7006 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7007 {
7008 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7009
7010 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7011 discard = FALSE;
7012 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
7013 && hibernate_consider_discard(m, preflight))
7014 {
7015 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7016 if (m->dirty)
7017 count_discard_purgeable++;
7018 else
7019 count_discard_active++;
7020 discard = discard_all;
7021 }
7022 else
7023 count_active++;
7024 count_wire--;
7025 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7026 if (discard) hibernate_discard_page(m);
7027 m = next;
7028 }
7029
7030 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7031 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7032 {
7033 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7034
7035 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7036 discard = FALSE;
7037 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7038 && hibernate_consider_discard(m, preflight))
7039 {
7040 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7041 if (m->dirty)
7042 count_discard_purgeable++;
7043 else
7044 count_discard_inactive++;
7045 discard = discard_all;
7046 }
7047 else
7048 count_inactive++;
7049 count_wire--;
7050 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7051 if (discard) hibernate_discard_page(m);
7052 m = next;
7053 }
7054 /* XXX FBDP TODO: secluded queue */
7055
7056 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7057 {
7058 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7059 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7060 {
7061 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7062
7063 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7064 discard = FALSE;
7065 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7066 && hibernate_consider_discard(m, preflight))
7067 {
7068 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7069 count_discard_speculative++;
7070 discard = discard_all;
7071 }
7072 else
7073 count_speculative++;
7074 count_wire--;
7075 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7076 if (discard) hibernate_discard_page(m);
7077 m = next;
7078 }
7079 }
7080
7081 vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
7082 {
7083 assert(m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR);
7084
7085 count_compressor++;
7086 count_wire--;
7087 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7088 }
7089
7090 if (preflight == FALSE && discard_all == TRUE) {
7091 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
7092
7093 HIBLOG("hibernate_teardown started\n");
7094 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7095 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7096
7097 pages -= count_discard_vm_struct_pages;
7098 count_wire -= count_discard_vm_struct_pages;
7099
7100 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7101
7102 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7103 }
7104
7105 if (!preflight) {
7106 // pull wired from hibernate_bitmap
7107 bitmap = &page_list->bank_bitmap[0];
7108 bitmap_wired = &page_list_wired->bank_bitmap[0];
7109 for (bank = 0; bank < page_list->bank_count; bank++)
7110 {
7111 for (i = 0; i < bitmap->bitmapwords; i++)
7112 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7113 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
7114 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7115 }
7116 }
7117
7118 // machine dependent adjustments
7119 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
7120
7121 if (!preflight) {
7122 hibernate_stats.cd_count_wire = count_wire;
7123 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7124 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
7125 }
7126
7127 clock_get_uptime(&end);
7128 absolutetime_to_nanoseconds(end - start, &nsec);
7129 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7130
7131 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7132 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7133 discard_all ? "did" : "could",
7134 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7135
7136 if (hibernate_stats.cd_skipped_xpmapped)
7137 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7138
7139 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7140
7141 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7142
7143 #if MACH_ASSERT || DEBUG
7144 if (!preflight)
7145 {
7146 if (vm_page_local_q) {
7147 for (i = 0; i < vm_page_local_q_count; i++) {
7148 struct vpl *lq;
7149 lq = &vm_page_local_q[i].vpl_un.vpl;
7150 VPL_UNLOCK(&lq->vpl_lock);
7151 }
7152 }
7153 vm_page_unlock_queues();
7154 }
7155 #endif /* MACH_ASSERT || DEBUG */
7156
7157 if (preflight) {
7158 lck_mtx_unlock(&vm_page_queue_free_lock);
7159 vm_page_unlock_queues();
7160 vm_object_unlock(compressor_object);
7161 }
7162
7163 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
7164 }
7165
7166 void
7167 hibernate_page_list_discard(hibernate_page_list_t * page_list)
7168 {
7169 uint64_t start, end, nsec;
7170 vm_page_t m;
7171 vm_page_t next;
7172 uint32_t i;
7173 uint32_t count_discard_active = 0;
7174 uint32_t count_discard_inactive = 0;
7175 uint32_t count_discard_purgeable = 0;
7176 uint32_t count_discard_cleaned = 0;
7177 uint32_t count_discard_speculative = 0;
7178
7179
7180 #if MACH_ASSERT || DEBUG
7181 vm_page_lock_queues();
7182 if (vm_page_local_q) {
7183 for (i = 0; i < vm_page_local_q_count; i++) {
7184 struct vpl *lq;
7185 lq = &vm_page_local_q[i].vpl_un.vpl;
7186 VPL_LOCK(&lq->vpl_lock);
7187 }
7188 }
7189 #endif /* MACH_ASSERT || DEBUG */
7190
7191 clock_get_uptime(&start);
7192
7193 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7194 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7195 {
7196 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7197
7198 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7199 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7200 {
7201 if (m->dirty)
7202 count_discard_purgeable++;
7203 else
7204 count_discard_inactive++;
7205 hibernate_discard_page(m);
7206 }
7207 m = next;
7208 }
7209
7210 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7211 {
7212 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7213 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7214 {
7215 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7216
7217 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7218 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7219 {
7220 count_discard_speculative++;
7221 hibernate_discard_page(m);
7222 }
7223 m = next;
7224 }
7225 }
7226
7227 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7228 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7229 {
7230 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7231
7232 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7233 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7234 {
7235 if (m->dirty)
7236 count_discard_purgeable++;
7237 else
7238 count_discard_inactive++;
7239 hibernate_discard_page(m);
7240 }
7241 m = next;
7242 }
7243 /* XXX FBDP TODO: secluded queue */
7244
7245 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7246 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7247 {
7248 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7249
7250 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7251 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7252 {
7253 if (m->dirty)
7254 count_discard_purgeable++;
7255 else
7256 count_discard_active++;
7257 hibernate_discard_page(m);
7258 }
7259 m = next;
7260 }
7261
7262 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7263 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7264 {
7265 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7266
7267 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7268 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7269 {
7270 if (m->dirty)
7271 count_discard_purgeable++;
7272 else
7273 count_discard_cleaned++;
7274 hibernate_discard_page(m);
7275 }
7276 m = next;
7277 }
7278
7279 #if MACH_ASSERT || DEBUG
7280 if (vm_page_local_q) {
7281 for (i = 0; i < vm_page_local_q_count; i++) {
7282 struct vpl *lq;
7283 lq = &vm_page_local_q[i].vpl_un.vpl;
7284 VPL_UNLOCK(&lq->vpl_lock);
7285 }
7286 }
7287 vm_page_unlock_queues();
7288 #endif /* MACH_ASSERT || DEBUG */
7289
7290 clock_get_uptime(&end);
7291 absolutetime_to_nanoseconds(end - start, &nsec);
7292 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7293 nsec / 1000000ULL,
7294 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7295 }
7296
7297 boolean_t hibernate_paddr_map_inited = FALSE;
7298 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
7299 vm_page_t hibernate_rebuild_hash_list = NULL;
7300
7301 unsigned int hibernate_teardown_found_tabled_pages = 0;
7302 unsigned int hibernate_teardown_found_created_pages = 0;
7303 unsigned int hibernate_teardown_found_free_pages = 0;
7304 unsigned int hibernate_teardown_vm_page_free_count;
7305
7306
7307 struct ppnum_mapping {
7308 struct ppnum_mapping *ppnm_next;
7309 ppnum_t ppnm_base_paddr;
7310 unsigned int ppnm_sindx;
7311 unsigned int ppnm_eindx;
7312 };
7313
7314 struct ppnum_mapping *ppnm_head;
7315 struct ppnum_mapping *ppnm_last_found = NULL;
7316
7317
7318 void
7319 hibernate_create_paddr_map()
7320 {
7321 unsigned int i;
7322 ppnum_t next_ppnum_in_run = 0;
7323 struct ppnum_mapping *ppnm = NULL;
7324
7325 if (hibernate_paddr_map_inited == FALSE) {
7326
7327 for (i = 0; i < vm_pages_count; i++) {
7328
7329 if (ppnm)
7330 ppnm->ppnm_eindx = i;
7331
7332 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
7333
7334 ppnm = kalloc(sizeof(struct ppnum_mapping));
7335
7336 ppnm->ppnm_next = ppnm_head;
7337 ppnm_head = ppnm;
7338
7339 ppnm->ppnm_sindx = i;
7340 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
7341 }
7342 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
7343 }
7344 ppnm->ppnm_eindx++;
7345
7346 hibernate_paddr_map_inited = TRUE;
7347 }
7348 }
7349
7350 ppnum_t
7351 hibernate_lookup_paddr(unsigned int indx)
7352 {
7353 struct ppnum_mapping *ppnm = NULL;
7354
7355 ppnm = ppnm_last_found;
7356
7357 if (ppnm) {
7358 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7359 goto done;
7360 }
7361 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7362
7363 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7364 ppnm_last_found = ppnm;
7365 break;
7366 }
7367 }
7368 if (ppnm == NULL)
7369 panic("hibernate_lookup_paddr of %d failed\n", indx);
7370 done:
7371 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7372 }
7373
7374
7375 uint32_t
7376 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7377 {
7378 addr64_t saddr_aligned;
7379 addr64_t eaddr_aligned;
7380 addr64_t addr;
7381 ppnum_t paddr;
7382 unsigned int mark_as_unneeded_pages = 0;
7383
7384 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7385 eaddr_aligned = eaddr & ~PAGE_MASK_64;
7386
7387 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7388
7389 paddr = pmap_find_phys(kernel_pmap, addr);
7390
7391 assert(paddr);
7392
7393 hibernate_page_bitset(page_list, TRUE, paddr);
7394 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7395
7396 mark_as_unneeded_pages++;
7397 }
7398 return (mark_as_unneeded_pages);
7399 }
7400
7401
7402 void
7403 hibernate_hash_insert_page(vm_page_t mem)
7404 {
7405 vm_page_bucket_t *bucket;
7406 int hash_id;
7407 vm_object_t m_object;
7408
7409 m_object = VM_PAGE_OBJECT(mem);
7410
7411 assert(mem->hashed);
7412 assert(m_object);
7413 assert(mem->offset != (vm_object_offset_t) -1);
7414
7415 /*
7416 * Insert it into the object_object/offset hash table
7417 */
7418 hash_id = vm_page_hash(m_object, mem->offset);
7419 bucket = &vm_page_buckets[hash_id];
7420
7421 mem->next_m = bucket->page_list;
7422 bucket->page_list = VM_PAGE_PACK_PTR(mem);
7423 }
7424
7425
7426 void
7427 hibernate_free_range(int sindx, int eindx)
7428 {
7429 vm_page_t mem;
7430 unsigned int color;
7431
7432 while (sindx < eindx) {
7433 mem = &vm_pages[sindx];
7434
7435 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7436
7437 mem->lopage = FALSE;
7438 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7439
7440 color = VM_PAGE_GET_COLOR(mem);
7441 #if defined(__x86_64__)
7442 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
7443 mem,
7444 vm_page_t,
7445 pageq);
7446 #else
7447 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
7448 mem,
7449 vm_page_t,
7450 pageq);
7451 #endif
7452 vm_page_free_count++;
7453
7454 sindx++;
7455 }
7456 }
7457
7458
7459 extern void hibernate_rebuild_pmap_structs(void);
7460
7461 void
7462 hibernate_rebuild_vm_structs(void)
7463 {
7464 int i, cindx, sindx, eindx;
7465 vm_page_t mem, tmem, mem_next;
7466 AbsoluteTime startTime, endTime;
7467 uint64_t nsec;
7468
7469 if (hibernate_rebuild_needed == FALSE)
7470 return;
7471
7472 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
7473 HIBLOG("hibernate_rebuild started\n");
7474
7475 clock_get_uptime(&startTime);
7476
7477 hibernate_rebuild_pmap_structs();
7478
7479 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7480 eindx = vm_pages_count;
7481
7482 /*
7483 * Mark all the vm_pages[] that have not been initialized yet as being
7484 * transient. This is needed to ensure that buddy page search is corrrect.
7485 * Without this random data in these vm_pages[] can trip the buddy search
7486 */
7487 for (i = hibernate_teardown_last_valid_compact_indx+1; i < eindx; ++i)
7488 vm_pages[i].vm_page_q_state = VM_PAGE_NOT_ON_Q;
7489
7490 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7491
7492 mem = &vm_pages[cindx];
7493 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
7494 /*
7495 * hibernate_teardown_vm_structs leaves the location where
7496 * this vm_page_t must be located in "next".
7497 */
7498 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7499 mem->next_m = VM_PAGE_PACK_PTR(NULL);
7500
7501 sindx = (int)(tmem - &vm_pages[0]);
7502
7503 if (mem != tmem) {
7504 /*
7505 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7506 * so move it back to its real location
7507 */
7508 *tmem = *mem;
7509 mem = tmem;
7510 }
7511 if (mem->hashed)
7512 hibernate_hash_insert_page(mem);
7513 /*
7514 * the 'hole' between this vm_page_t and the previous
7515 * vm_page_t we moved needs to be initialized as
7516 * a range of free vm_page_t's
7517 */
7518 hibernate_free_range(sindx + 1, eindx);
7519
7520 eindx = sindx;
7521 }
7522 if (sindx)
7523 hibernate_free_range(0, sindx);
7524
7525 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7526
7527 /*
7528 * process the list of vm_page_t's that were entered in the hash,
7529 * but were not located in the vm_pages arrary... these are
7530 * vm_page_t's that were created on the fly (i.e. fictitious)
7531 */
7532 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
7533 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7534
7535 mem->next_m = 0;
7536 hibernate_hash_insert_page(mem);
7537 }
7538 hibernate_rebuild_hash_list = NULL;
7539
7540 clock_get_uptime(&endTime);
7541 SUB_ABSOLUTETIME(&endTime, &startTime);
7542 absolutetime_to_nanoseconds(endTime, &nsec);
7543
7544 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7545
7546 hibernate_rebuild_needed = FALSE;
7547
7548 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
7549 }
7550
7551
7552 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7553
7554 uint32_t
7555 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7556 {
7557 unsigned int i;
7558 unsigned int compact_target_indx;
7559 vm_page_t mem, mem_next;
7560 vm_page_bucket_t *bucket;
7561 unsigned int mark_as_unneeded_pages = 0;
7562 unsigned int unneeded_vm_page_bucket_pages = 0;
7563 unsigned int unneeded_vm_pages_pages = 0;
7564 unsigned int unneeded_pmap_pages = 0;
7565 addr64_t start_of_unneeded = 0;
7566 addr64_t end_of_unneeded = 0;
7567
7568
7569 if (hibernate_should_abort())
7570 return (0);
7571
7572 hibernate_rebuild_needed = TRUE;
7573
7574 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7575 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7576 vm_page_cleaned_count, compressor_object->resident_page_count);
7577
7578 for (i = 0; i < vm_page_bucket_count; i++) {
7579
7580 bucket = &vm_page_buckets[i];
7581
7582 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
7583 assert(mem->hashed);
7584
7585 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7586
7587 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
7588 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
7589 hibernate_rebuild_hash_list = mem;
7590 }
7591 }
7592 }
7593 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7594 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7595
7596 hibernate_teardown_vm_page_free_count = vm_page_free_count;
7597
7598 compact_target_indx = 0;
7599
7600 for (i = 0; i < vm_pages_count; i++) {
7601
7602 mem = &vm_pages[i];
7603
7604 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7605 unsigned int color;
7606
7607 assert(mem->busy);
7608 assert(!mem->lopage);
7609
7610 color = VM_PAGE_GET_COLOR(mem);
7611
7612 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7613 mem,
7614 vm_page_t,
7615 pageq);
7616
7617 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7618
7619 vm_page_free_count--;
7620
7621 hibernate_teardown_found_free_pages++;
7622
7623 if (vm_pages[compact_target_indx].vm_page_q_state != VM_PAGE_ON_FREE_Q)
7624 compact_target_indx = i;
7625 } else {
7626 /*
7627 * record this vm_page_t's original location
7628 * we need this even if it doesn't get moved
7629 * as an indicator to the rebuild function that
7630 * we don't have to move it
7631 */
7632 mem->next_m = VM_PAGE_PACK_PTR(mem);
7633
7634 if (vm_pages[compact_target_indx].vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7635 /*
7636 * we've got a hole to fill, so
7637 * move this vm_page_t to it's new home
7638 */
7639 vm_pages[compact_target_indx] = *mem;
7640 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7641
7642 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7643 compact_target_indx++;
7644 } else
7645 hibernate_teardown_last_valid_compact_indx = i;
7646 }
7647 }
7648 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7649 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7650 mark_as_unneeded_pages += unneeded_vm_pages_pages;
7651
7652 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7653
7654 if (start_of_unneeded) {
7655 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7656 mark_as_unneeded_pages += unneeded_pmap_pages;
7657 }
7658 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7659
7660 return (mark_as_unneeded_pages);
7661 }
7662
7663
7664 #endif /* HIBERNATION */
7665
7666 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7667
7668 #include <mach_vm_debug.h>
7669 #if MACH_VM_DEBUG
7670
7671 #include <mach_debug/hash_info.h>
7672 #include <vm/vm_debug.h>
7673
7674 /*
7675 * Routine: vm_page_info
7676 * Purpose:
7677 * Return information about the global VP table.
7678 * Fills the buffer with as much information as possible
7679 * and returns the desired size of the buffer.
7680 * Conditions:
7681 * Nothing locked. The caller should provide
7682 * possibly-pageable memory.
7683 */
7684
7685 unsigned int
7686 vm_page_info(
7687 hash_info_bucket_t *info,
7688 unsigned int count)
7689 {
7690 unsigned int i;
7691 lck_spin_t *bucket_lock;
7692
7693 if (vm_page_bucket_count < count)
7694 count = vm_page_bucket_count;
7695
7696 for (i = 0; i < count; i++) {
7697 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7698 unsigned int bucket_count = 0;
7699 vm_page_t m;
7700
7701 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7702 lck_spin_lock(bucket_lock);
7703
7704 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7705 m != VM_PAGE_NULL;
7706 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->next_m)))
7707 bucket_count++;
7708
7709 lck_spin_unlock(bucket_lock);
7710
7711 /* don't touch pageable memory while holding locks */
7712 info[i].hib_count = bucket_count;
7713 }
7714
7715 return vm_page_bucket_count;
7716 }
7717 #endif /* MACH_VM_DEBUG */
7718
7719 #if VM_PAGE_BUCKETS_CHECK
7720 void
7721 vm_page_buckets_check(void)
7722 {
7723 unsigned int i;
7724 vm_page_t p;
7725 unsigned int p_hash;
7726 vm_page_bucket_t *bucket;
7727 lck_spin_t *bucket_lock;
7728
7729 if (!vm_page_buckets_check_ready) {
7730 return;
7731 }
7732
7733 #if HIBERNATION
7734 if (hibernate_rebuild_needed ||
7735 hibernate_rebuild_hash_list) {
7736 panic("BUCKET_CHECK: hibernation in progress: "
7737 "rebuild_needed=%d rebuild_hash_list=%p\n",
7738 hibernate_rebuild_needed,
7739 hibernate_rebuild_hash_list);
7740 }
7741 #endif /* HIBERNATION */
7742
7743 #if VM_PAGE_FAKE_BUCKETS
7744 char *cp;
7745 for (cp = (char *) vm_page_fake_buckets_start;
7746 cp < (char *) vm_page_fake_buckets_end;
7747 cp++) {
7748 if (*cp != 0x5a) {
7749 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7750 "[0x%llx:0x%llx]\n",
7751 cp,
7752 (uint64_t) vm_page_fake_buckets_start,
7753 (uint64_t) vm_page_fake_buckets_end);
7754 }
7755 }
7756 #endif /* VM_PAGE_FAKE_BUCKETS */
7757
7758 for (i = 0; i < vm_page_bucket_count; i++) {
7759 vm_object_t p_object;
7760
7761 bucket = &vm_page_buckets[i];
7762 if (!bucket->page_list) {
7763 continue;
7764 }
7765
7766 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7767 lck_spin_lock(bucket_lock);
7768 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7769
7770 while (p != VM_PAGE_NULL) {
7771 p_object = VM_PAGE_OBJECT(p);
7772
7773 if (!p->hashed) {
7774 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7775 "hash %d in bucket %d at %p "
7776 "is not hashed\n",
7777 p, p_object, p->offset,
7778 p_hash, i, bucket);
7779 }
7780 p_hash = vm_page_hash(p_object, p->offset);
7781 if (p_hash != i) {
7782 panic("BUCKET_CHECK: corruption in bucket %d "
7783 "at %p: page %p object %p offset 0x%llx "
7784 "hash %d\n",
7785 i, bucket, p, p_object, p->offset,
7786 p_hash);
7787 }
7788 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m));
7789 }
7790 lck_spin_unlock(bucket_lock);
7791 }
7792
7793 // printf("BUCKET_CHECK: checked buckets\n");
7794 }
7795 #endif /* VM_PAGE_BUCKETS_CHECK */
7796
7797 /*
7798 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7799 * local queues if they exist... its the only spot in the system where we add pages
7800 * to those queues... once on those queues, those pages can only move to one of the
7801 * global page queues or the free queues... they NEVER move from local q to local q.
7802 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7803 * the global vm_page_queue_lock at this point... we still need to take the local lock
7804 * in case this operation is being run on a different CPU then the local queue's identity,
7805 * but we don't have to worry about the page moving to a global queue or becoming wired
7806 * while we're grabbing the local lock since those operations would require the global
7807 * vm_page_queue_lock to be held, and we already own it.
7808 *
7809 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7810 * 'wired' and local are ALWAYS mutually exclusive conditions.
7811 */
7812
7813 #if CONFIG_BACKGROUND_QUEUE
7814 void
7815 vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
7816 #else
7817 void
7818 vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
7819 #endif
7820 {
7821 boolean_t was_pageable = TRUE;
7822 vm_object_t m_object;
7823
7824 m_object = VM_PAGE_OBJECT(mem);
7825
7826 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7827
7828 if (mem->vm_page_q_state == VM_PAGE_NOT_ON_Q)
7829 {
7830 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7831 #if CONFIG_BACKGROUND_QUEUE
7832 if (remove_from_backgroundq == TRUE) {
7833 vm_page_remove_from_backgroundq(mem);
7834 }
7835 if (mem->vm_page_on_backgroundq) {
7836 assert(mem->vm_page_backgroundq.next != 0);
7837 assert(mem->vm_page_backgroundq.prev != 0);
7838 } else {
7839 assert(mem->vm_page_backgroundq.next == 0);
7840 assert(mem->vm_page_backgroundq.prev == 0);
7841 }
7842 #endif /* CONFIG_BACKGROUND_QUEUE */
7843 return;
7844 }
7845
7846 if (mem->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
7847 {
7848 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7849 #if CONFIG_BACKGROUND_QUEUE
7850 assert(mem->vm_page_backgroundq.next == 0 &&
7851 mem->vm_page_backgroundq.prev == 0 &&
7852 mem->vm_page_on_backgroundq == FALSE);
7853 #endif
7854 return;
7855 }
7856 if (mem->vm_page_q_state == VM_PAGE_IS_WIRED) {
7857 /*
7858 * might put these guys on a list for debugging purposes
7859 * if we do, we'll need to remove this assert
7860 */
7861 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7862 #if CONFIG_BACKGROUND_QUEUE
7863 assert(mem->vm_page_backgroundq.next == 0 &&
7864 mem->vm_page_backgroundq.prev == 0 &&
7865 mem->vm_page_on_backgroundq == FALSE);
7866 #endif
7867 return;
7868 }
7869
7870 assert(m_object != compressor_object);
7871 assert(m_object != kernel_object);
7872 assert(m_object != vm_submap_object);
7873 assert(!mem->fictitious);
7874
7875 switch(mem->vm_page_q_state) {
7876
7877 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
7878 {
7879 struct vpl *lq;
7880
7881 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
7882 VPL_LOCK(&lq->vpl_lock);
7883 vm_page_queue_remove(&lq->vpl_queue,
7884 mem, vm_page_t, pageq);
7885 mem->local_id = 0;
7886 lq->vpl_count--;
7887 if (m_object->internal) {
7888 lq->vpl_internal_count--;
7889 } else {
7890 lq->vpl_external_count--;
7891 }
7892 VPL_UNLOCK(&lq->vpl_lock);
7893 was_pageable = FALSE;
7894 break;
7895 }
7896 case VM_PAGE_ON_ACTIVE_Q:
7897 {
7898 vm_page_queue_remove(&vm_page_queue_active,
7899 mem, vm_page_t, pageq);
7900 vm_page_active_count--;
7901 break;
7902 }
7903
7904 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
7905 {
7906 assert(m_object->internal == TRUE);
7907
7908 vm_page_inactive_count--;
7909 vm_page_queue_remove(&vm_page_queue_anonymous,
7910 mem, vm_page_t, pageq);
7911 vm_page_anonymous_count--;
7912 vm_purgeable_q_advance_all();
7913 break;
7914 }
7915
7916 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
7917 {
7918 assert(m_object->internal == FALSE);
7919
7920 vm_page_inactive_count--;
7921 vm_page_queue_remove(&vm_page_queue_inactive,
7922 mem, vm_page_t, pageq);
7923 vm_purgeable_q_advance_all();
7924 break;
7925 }
7926
7927 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
7928 {
7929 assert(m_object->internal == FALSE);
7930
7931 vm_page_inactive_count--;
7932 vm_page_queue_remove(&vm_page_queue_cleaned,
7933 mem, vm_page_t, pageq);
7934 vm_page_cleaned_count--;
7935 break;
7936 }
7937
7938 case VM_PAGE_ON_THROTTLED_Q:
7939 {
7940 assert(m_object->internal == TRUE);
7941
7942 vm_page_queue_remove(&vm_page_queue_throttled,
7943 mem, vm_page_t, pageq);
7944 vm_page_throttled_count--;
7945 was_pageable = FALSE;
7946 break;
7947 }
7948
7949 case VM_PAGE_ON_SPECULATIVE_Q:
7950 {
7951 assert(m_object->internal == FALSE);
7952
7953 vm_page_remque(&mem->pageq);
7954 vm_page_speculative_count--;
7955 break;
7956 }
7957
7958 #if CONFIG_SECLUDED_MEMORY
7959 case VM_PAGE_ON_SECLUDED_Q:
7960 {
7961 vm_page_queue_remove(&vm_page_queue_secluded,
7962 mem, vm_page_t, pageq);
7963 vm_page_secluded_count--;
7964 if (m_object == VM_OBJECT_NULL) {
7965 vm_page_secluded_count_free--;
7966 was_pageable = FALSE;
7967 } else {
7968 assert(!m_object->internal);
7969 vm_page_secluded_count_inuse--;
7970 was_pageable = FALSE;
7971 // was_pageable = TRUE;
7972 }
7973 break;
7974 }
7975 #endif /* CONFIG_SECLUDED_MEMORY */
7976
7977 default:
7978 {
7979 /*
7980 * if (mem->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
7981 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
7982 * the caller is responsible for determing if the page is on that queue, and if so, must
7983 * either first remove it (it needs both the page queues lock and the object lock to do
7984 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
7985 *
7986 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
7987 * or any of the undefined states
7988 */
7989 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vm_page_q_state);
7990 break;
7991 }
7992
7993 }
7994 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7995 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
7996
7997 #if CONFIG_BACKGROUND_QUEUE
7998 if (remove_from_backgroundq == TRUE)
7999 vm_page_remove_from_backgroundq(mem);
8000 #endif
8001 if (was_pageable) {
8002 if (m_object->internal) {
8003 vm_page_pageable_internal_count--;
8004 } else {
8005 vm_page_pageable_external_count--;
8006 }
8007 }
8008 }
8009
8010 void
8011 vm_page_remove_internal(vm_page_t page)
8012 {
8013 vm_object_t __object = VM_PAGE_OBJECT(page);
8014 if (page == __object->memq_hint) {
8015 vm_page_t __new_hint;
8016 vm_page_queue_entry_t __qe;
8017 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->listq);
8018 if (vm_page_queue_end(&__object->memq, __qe)) {
8019 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->listq);
8020 if (vm_page_queue_end(&__object->memq, __qe)) {
8021 __qe = NULL;
8022 }
8023 }
8024 __new_hint = (vm_page_t)((uintptr_t) __qe);
8025 __object->memq_hint = __new_hint;
8026 }
8027 vm_page_queue_remove(&__object->memq, page, vm_page_t, listq);
8028 #if CONFIG_SECLUDED_MEMORY
8029 if (__object->eligible_for_secluded) {
8030 vm_page_secluded.eligible_for_secluded--;
8031 }
8032 #endif /* CONFIG_SECLUDED_MEMORY */
8033 }
8034
8035 void
8036 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8037 {
8038 vm_object_t m_object;
8039
8040 m_object = VM_PAGE_OBJECT(mem);
8041
8042 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8043 assert(!mem->fictitious);
8044 assert(!mem->laundry);
8045 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8046 vm_page_check_pageable_safe(mem);
8047
8048 if (m_object->internal) {
8049 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
8050
8051 if (first == TRUE)
8052 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8053 else
8054 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8055
8056 vm_page_anonymous_count++;
8057 vm_page_pageable_internal_count++;
8058 } else {
8059 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
8060
8061 if (first == TRUE)
8062 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8063 else
8064 vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8065
8066 vm_page_pageable_external_count++;
8067 }
8068 vm_page_inactive_count++;
8069 token_new_pagecount++;
8070
8071 #if CONFIG_BACKGROUND_QUEUE
8072 if (mem->vm_page_in_background)
8073 vm_page_add_to_backgroundq(mem, FALSE);
8074 #endif
8075 }
8076
8077 void
8078 vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8079 {
8080 vm_object_t m_object;
8081
8082 m_object = VM_PAGE_OBJECT(mem);
8083
8084 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8085 assert(!mem->fictitious);
8086 assert(!mem->laundry);
8087 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8088 vm_page_check_pageable_safe(mem);
8089
8090 mem->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
8091 if (first == TRUE)
8092 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, pageq);
8093 else
8094 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
8095 vm_page_active_count++;
8096
8097 if (m_object->internal) {
8098 vm_page_pageable_internal_count++;
8099 } else {
8100 vm_page_pageable_external_count++;
8101 }
8102
8103 #if CONFIG_BACKGROUND_QUEUE
8104 if (mem->vm_page_in_background)
8105 vm_page_add_to_backgroundq(mem, FALSE);
8106 #endif
8107 }
8108
8109 /*
8110 * Pages from special kernel objects shouldn't
8111 * be placed on pageable queues.
8112 */
8113 void
8114 vm_page_check_pageable_safe(vm_page_t page)
8115 {
8116 vm_object_t page_object;
8117
8118 page_object = VM_PAGE_OBJECT(page);
8119
8120 if (page_object == kernel_object) {
8121 panic("vm_page_check_pageable_safe: trying to add page" \
8122 "from kernel object (%p) to pageable queue", kernel_object);
8123 }
8124
8125 if (page_object == compressor_object) {
8126 panic("vm_page_check_pageable_safe: trying to add page" \
8127 "from compressor object (%p) to pageable queue", compressor_object);
8128 }
8129
8130 if (page_object == vm_submap_object) {
8131 panic("vm_page_check_pageable_safe: trying to add page" \
8132 "from submap object (%p) to pageable queue", vm_submap_object);
8133 }
8134 }
8135
8136 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8137 * wired page diagnose
8138 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8139
8140 #include <libkern/OSKextLibPrivate.h>
8141
8142 #define KA_SIZE(namelen, subtotalscount) \
8143 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8144
8145 #define KA_NAME(alloc) \
8146 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8147
8148 #define KA_NAME_LEN(alloc) \
8149 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8150
8151 vm_tag_t
8152 vm_tag_bt(void)
8153 {
8154 uintptr_t* frameptr;
8155 uintptr_t* frameptr_next;
8156 uintptr_t retaddr;
8157 uintptr_t kstackb, kstackt;
8158 const vm_allocation_site_t * site;
8159 thread_t cthread;
8160 kern_allocation_name_t name;
8161
8162 cthread = current_thread();
8163 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
8164
8165 if ((name = thread_get_kernel_state(cthread)->allocation_name))
8166 {
8167 if (!name->tag) vm_tag_alloc(name);
8168 return name->tag;
8169 }
8170
8171 kstackb = cthread->kernel_stack;
8172 kstackt = kstackb + kernel_stack_size;
8173
8174 /* Load stack frame pointer (EBP on x86) into frameptr */
8175 frameptr = __builtin_frame_address(0);
8176 site = NULL;
8177 while (frameptr != NULL)
8178 {
8179 /* Verify thread stack bounds */
8180 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
8181
8182 /* Next frame pointer is pointed to by the previous one */
8183 frameptr_next = (uintptr_t*) *frameptr;
8184
8185 /* Pull return address from one spot above the frame pointer */
8186 retaddr = *(frameptr + 1);
8187
8188
8189 if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
8190 {
8191 site = OSKextGetAllocationSiteForCaller(retaddr);
8192 break;
8193 }
8194 frameptr = frameptr_next;
8195 }
8196
8197 return (site ? site->tag : VM_KERN_MEMORY_NONE);
8198 }
8199
8200 static uint64_t free_tag_bits[VM_MAX_TAG_VALUE/64];
8201
8202 void
8203 vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
8204 {
8205 vm_tag_t tag;
8206 uint64_t avail;
8207 uint32_t idx;
8208 vm_allocation_site_t * prev;
8209
8210 if (site->tag) return;
8211
8212 idx = 0;
8213 while (TRUE)
8214 {
8215 avail = free_tag_bits[idx];
8216 if (avail)
8217 {
8218 tag = __builtin_clzll(avail);
8219 avail &= ~(1ULL << (63 - tag));
8220 free_tag_bits[idx] = avail;
8221 tag += (idx << 6);
8222 break;
8223 }
8224 idx++;
8225 if (idx >= ARRAY_COUNT(free_tag_bits))
8226 {
8227 for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++)
8228 {
8229 prev = vm_allocation_sites[idx];
8230 if (!prev) continue;
8231 if (!KA_NAME_LEN(prev)) continue;
8232 if (!prev->tag) continue;
8233 if (prev->total) continue;
8234 if (1 != prev->refcount) continue;
8235
8236 assert(idx == prev->tag);
8237 tag = idx;
8238 prev->tag = VM_KERN_MEMORY_NONE;
8239 *releasesiteP = prev;
8240 break;
8241 }
8242 if (idx >= ARRAY_COUNT(vm_allocation_sites))
8243 {
8244 tag = VM_KERN_MEMORY_ANY;
8245 }
8246 break;
8247 }
8248 }
8249 site->tag = tag;
8250
8251 OSAddAtomic16(1, &site->refcount);
8252
8253 if (VM_KERN_MEMORY_ANY != tag) vm_allocation_sites[tag] = site;
8254
8255 if (tag > vm_allocation_tag_highest) vm_allocation_tag_highest = tag;
8256 }
8257
8258 static void
8259 vm_tag_free_locked(vm_tag_t tag)
8260 {
8261 uint64_t avail;
8262 uint32_t idx;
8263 uint64_t bit;
8264
8265 if (VM_KERN_MEMORY_ANY == tag) return;
8266
8267 idx = (tag >> 6);
8268 avail = free_tag_bits[idx];
8269 tag &= 63;
8270 bit = (1ULL << (63 - tag));
8271 assert(!(avail & bit));
8272 free_tag_bits[idx] = (avail | bit);
8273 }
8274
8275 static void
8276 vm_tag_init(void)
8277 {
8278 vm_tag_t tag;
8279 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8280 {
8281 vm_tag_free_locked(tag);
8282 }
8283
8284 for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++)
8285 {
8286 vm_tag_free_locked(tag);
8287 }
8288 }
8289
8290 vm_tag_t
8291 vm_tag_alloc(vm_allocation_site_t * site)
8292 {
8293 vm_tag_t tag;
8294 vm_allocation_site_t * releasesite;
8295
8296 if (VM_TAG_BT & site->flags)
8297 {
8298 tag = vm_tag_bt();
8299 if (VM_KERN_MEMORY_NONE != tag) return (tag);
8300 }
8301
8302 if (!site->tag)
8303 {
8304 releasesite = NULL;
8305 lck_spin_lock(&vm_allocation_sites_lock);
8306 vm_tag_alloc_locked(site, &releasesite);
8307 lck_spin_unlock(&vm_allocation_sites_lock);
8308 if (releasesite) kern_allocation_name_release(releasesite);
8309 }
8310
8311 return (site->tag);
8312 }
8313
8314 void
8315 vm_tag_update_size(vm_tag_t tag, int64_t delta)
8316 {
8317 vm_allocation_site_t * allocation;
8318 uint64_t prior;
8319
8320 assert(VM_KERN_MEMORY_NONE != tag);
8321 assert(tag < VM_MAX_TAG_VALUE);
8322
8323 allocation = vm_allocation_sites[tag];
8324 assert(allocation);
8325
8326 if (delta < 0) {
8327 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
8328 }
8329 prior = OSAddAtomic64(delta, &allocation->total);
8330
8331 #if DEBUG || DEVELOPMENT
8332
8333 uint64_t new, peak;
8334 new = prior + delta;
8335 do
8336 {
8337 peak = allocation->peak;
8338 if (new <= peak) break;
8339 }
8340 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8341
8342 #endif /* DEBUG || DEVELOPMENT */
8343
8344 if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) return;
8345
8346 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8347 }
8348
8349 void
8350 kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
8351 {
8352 uint64_t prior;
8353
8354 if (delta < 0) {
8355 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
8356 }
8357 prior = OSAddAtomic64(delta, &allocation->total);
8358
8359 #if DEBUG || DEVELOPMENT
8360
8361 uint64_t new, peak;
8362 new = prior + delta;
8363 do
8364 {
8365 peak = allocation->peak;
8366 if (new <= peak) break;
8367 }
8368 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8369
8370 #endif /* DEBUG || DEVELOPMENT */
8371
8372 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8373 }
8374
8375 #if VM_MAX_TAG_ZONES
8376
8377 void
8378 vm_allocation_zones_init(void)
8379 {
8380 kern_return_t ret;
8381 vm_offset_t addr;
8382 vm_size_t size;
8383
8384 size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **)
8385 + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8386
8387 ret = kernel_memory_allocate(kernel_map,
8388 &addr, round_page(size), 0,
8389 KMA_ZERO, VM_KERN_MEMORY_DIAG);
8390 assert(KERN_SUCCESS == ret);
8391
8392 vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
8393 addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **);
8394
8395 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8396 // in vm_tag_update_zone_size() won't recurse
8397 vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG] = (vm_allocation_zone_total_t *) addr;
8398 addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8399 vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
8400 }
8401
8402 void
8403 vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
8404 {
8405 vm_allocation_zone_total_t * zone;
8406
8407 assert(VM_KERN_MEMORY_NONE != tag);
8408 assert(tag < VM_MAX_TAG_VALUE);
8409
8410 if (zidx >= VM_MAX_TAG_ZONES) return;
8411
8412 zone = vm_allocation_zone_totals[tag];
8413 if (!zone)
8414 {
8415 zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
8416 if (!zone) return;
8417 bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8418 if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag]))
8419 {
8420 kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8421 }
8422 }
8423 }
8424
8425 void
8426 vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
8427 {
8428 vm_allocation_zone_total_t * zone;
8429 uint32_t new;
8430
8431 assert(VM_KERN_MEMORY_NONE != tag);
8432 assert(tag < VM_MAX_TAG_VALUE);
8433
8434 if (zidx >= VM_MAX_TAG_ZONES) return;
8435
8436 zone = vm_allocation_zone_totals[tag];
8437 assert(zone);
8438 zone += zidx;
8439
8440 /* the zone is locked */
8441 if (delta < 0)
8442 {
8443 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
8444 zone->total += delta;
8445 }
8446 else
8447 {
8448 zone->total += delta;
8449 if (zone->total > zone->peak) zone->peak = zone->total;
8450 if (dwaste)
8451 {
8452 new = zone->waste;
8453 if (zone->wastediv < 65536) zone->wastediv++;
8454 else new -= (new >> 16);
8455 __assert_only bool ov = os_add_overflow(new, dwaste, &new);
8456 assert(!ov);
8457 zone->waste = new;
8458 }
8459 }
8460 }
8461
8462 #endif /* VM_MAX_TAG_ZONES */
8463
8464 void
8465 kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
8466 {
8467 kern_allocation_name_t other;
8468 struct vm_allocation_total * total;
8469 uint32_t subidx;
8470
8471 subidx = 0;
8472 assert(VM_KERN_MEMORY_NONE != subtag);
8473 for (; subidx < allocation->subtotalscount; subidx++)
8474 {
8475 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag)
8476 {
8477 allocation->subtotals[subidx].tag = subtag;
8478 break;
8479 }
8480 if (subtag == allocation->subtotals[subidx].tag) break;
8481 }
8482 assert(subidx < allocation->subtotalscount);
8483 if (subidx >= allocation->subtotalscount) return;
8484
8485 total = &allocation->subtotals[subidx];
8486 other = vm_allocation_sites[subtag];
8487 assert(other);
8488
8489 if (delta < 0)
8490 {
8491 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
8492 OSAddAtomic64(delta, &total->total);
8493 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
8494 OSAddAtomic64(delta, &other->mapped);
8495 }
8496 else
8497 {
8498 OSAddAtomic64(delta, &other->mapped);
8499 OSAddAtomic64(delta, &total->total);
8500 }
8501 }
8502
8503 const char *
8504 kern_allocation_get_name(kern_allocation_name_t allocation)
8505 {
8506 return (KA_NAME(allocation));
8507 }
8508
8509 kern_allocation_name_t
8510 kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
8511 {
8512 uint32_t namelen;
8513
8514 namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
8515
8516 kern_allocation_name_t allocation;
8517 allocation = kalloc(KA_SIZE(namelen, subtotalscount));
8518 bzero(allocation, KA_SIZE(namelen, subtotalscount));
8519
8520 allocation->refcount = 1;
8521 allocation->subtotalscount = subtotalscount;
8522 allocation->flags = (namelen << VM_TAG_NAME_LEN_SHIFT);
8523 strlcpy(KA_NAME(allocation), name, namelen + 1);
8524
8525 return (allocation);
8526 }
8527
8528 void
8529 kern_allocation_name_release(kern_allocation_name_t allocation)
8530 {
8531 assert(allocation->refcount > 0);
8532 if (1 == OSAddAtomic16(-1, &allocation->refcount))
8533 {
8534 kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
8535 }
8536 }
8537
8538 vm_tag_t
8539 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
8540 {
8541 return (vm_tag_alloc(allocation));
8542 }
8543
8544 static void
8545 vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
8546 {
8547 if (!object->wired_page_count) return;
8548 if (object != kernel_object)
8549 {
8550 assert(object->wire_tag < num_info);
8551 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
8552 }
8553 }
8554
8555 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
8556 unsigned int num_info, vm_object_t object);
8557
8558 static void
8559 vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
8560 vm_page_iterate_proc proc, purgeable_q_t queue,
8561 int group)
8562 {
8563 vm_object_t object;
8564
8565 for (object = (vm_object_t) queue_first(&queue->objq[group]);
8566 !queue_end(&queue->objq[group], (queue_entry_t) object);
8567 object = (vm_object_t) queue_next(&object->objq))
8568 {
8569 proc(info, num_info, object);
8570 }
8571 }
8572
8573 static void
8574 vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
8575 vm_page_iterate_proc proc)
8576 {
8577 purgeable_q_t volatile_q;
8578 queue_head_t * nonvolatile_q;
8579 vm_object_t object;
8580 int group;
8581
8582 lck_spin_lock(&vm_objects_wired_lock);
8583 queue_iterate(&vm_objects_wired,
8584 object,
8585 vm_object_t,
8586 objq)
8587 {
8588 proc(info, num_info, object);
8589 }
8590 lck_spin_unlock(&vm_objects_wired_lock);
8591
8592 lck_mtx_lock(&vm_purgeable_queue_lock);
8593 nonvolatile_q = &purgeable_nonvolatile_queue;
8594 for (object = (vm_object_t) queue_first(nonvolatile_q);
8595 !queue_end(nonvolatile_q, (queue_entry_t) object);
8596 object = (vm_object_t) queue_next(&object->objq))
8597 {
8598 proc(info, num_info, object);
8599 }
8600
8601 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
8602 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, 0);
8603
8604 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
8605 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8606 {
8607 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8608 }
8609
8610 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
8611 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8612 {
8613 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8614 }
8615 lck_mtx_unlock(&vm_purgeable_queue_lock);
8616 }
8617
8618 static uint64_t
8619 process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
8620 {
8621 size_t namelen;
8622 unsigned int idx, count, nextinfo;
8623 vm_allocation_site_t * site;
8624 lck_spin_lock(&vm_allocation_sites_lock);
8625
8626 for (idx = 0; idx <= vm_allocation_tag_highest; idx++)
8627 {
8628 site = vm_allocation_sites[idx];
8629 if (!site) continue;
8630 info[idx].mapped = site->mapped;
8631 info[idx].tag = site->tag;
8632 if (!iterated)
8633 {
8634 info[idx].size = site->total;
8635 #if DEBUG || DEVELOPMENT
8636 info[idx].peak = site->peak;
8637 #endif /* DEBUG || DEVELOPMENT */
8638 }
8639 else
8640 {
8641 if (!site->subtotalscount && (site->total != info[idx].size))
8642 {
8643 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
8644 info[idx].size = site->total;
8645 }
8646 }
8647 }
8648
8649 nextinfo = (vm_allocation_tag_highest + 1);
8650 count = nextinfo;
8651 if (count >= num_info) count = num_info;
8652
8653 for (idx = 0; idx < count; idx++)
8654 {
8655 site = vm_allocation_sites[idx];
8656 if (!site) continue;
8657 info[idx].flags |= VM_KERN_SITE_WIRED;
8658 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8659 {
8660 info[idx].site = idx;
8661 info[idx].flags |= VM_KERN_SITE_TAG;
8662 if (VM_KERN_MEMORY_ZONE == idx)
8663 {
8664 info[idx].flags |= VM_KERN_SITE_HIDE;
8665 info[idx].flags &= ~VM_KERN_SITE_WIRED;
8666 info[idx].collectable_bytes = zones_collectable_bytes;
8667 }
8668 }
8669 else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT))))
8670 {
8671 info[idx].site = 0;
8672 info[idx].flags |= VM_KERN_SITE_NAMED;
8673 if (namelen > sizeof(info[idx].name)) namelen = sizeof(info[idx].name);
8674 strncpy(&info[idx].name[0], KA_NAME(site), namelen);
8675 }
8676 else if (VM_TAG_KMOD & site->flags)
8677 {
8678 info[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
8679 info[idx].flags |= VM_KERN_SITE_KMOD;
8680 }
8681 else
8682 {
8683 info[idx].site = VM_KERNEL_UNSLIDE(site);
8684 info[idx].flags |= VM_KERN_SITE_KERNEL;
8685 }
8686 #if VM_MAX_TAG_ZONES
8687 vm_allocation_zone_total_t * zone;
8688 unsigned int zidx;
8689 vm_size_t elem_size;
8690
8691 if (vm_allocation_zone_totals
8692 && (zone = vm_allocation_zone_totals[idx])
8693 && (nextinfo < num_info))
8694 {
8695 for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++)
8696 {
8697 if (!zone[zidx].peak) continue;
8698 info[nextinfo] = info[idx];
8699 info[nextinfo].zone = zone_index_from_tag_index(zidx, &elem_size);
8700 info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
8701 info[nextinfo].flags |= VM_KERN_SITE_ZONE;
8702 info[nextinfo].size = zone[zidx].total;
8703 info[nextinfo].peak = zone[zidx].peak;
8704 info[nextinfo].mapped = 0;
8705 if (zone[zidx].wastediv)
8706 {
8707 info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
8708 }
8709 nextinfo++;
8710 }
8711 }
8712 #endif /* VM_MAX_TAG_ZONES */
8713 if (site->subtotalscount)
8714 {
8715 uint64_t mapped, mapcost, take;
8716 uint32_t sub;
8717 vm_tag_t alloctag;
8718
8719 info[idx].size = site->total;
8720 mapped = info[idx].size;
8721 info[idx].mapped = mapped;
8722 mapcost = 0;
8723 for (sub = 0; sub < site->subtotalscount; sub++)
8724 {
8725 alloctag = site->subtotals[sub].tag;
8726 assert(alloctag < num_info);
8727 if (info[alloctag].name[0]) continue;
8728 take = info[alloctag].mapped;
8729 if (take > info[alloctag].size) take = info[alloctag].size;
8730 if (take > mapped) take = mapped;
8731 info[alloctag].mapped -= take;
8732 info[alloctag].size -= take;
8733 mapped -= take;
8734 mapcost += take;
8735 }
8736 info[idx].size = mapcost;
8737 }
8738 }
8739 lck_spin_unlock(&vm_allocation_sites_lock);
8740
8741 return (0);
8742 }
8743
8744 uint32_t
8745 vm_page_diagnose_estimate(void)
8746 {
8747 vm_allocation_site_t * site;
8748 uint32_t count;
8749 uint32_t idx;
8750
8751 lck_spin_lock(&vm_allocation_sites_lock);
8752 for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++)
8753 {
8754 site = vm_allocation_sites[idx];
8755 if (!site) continue;
8756 count++;
8757 #if VM_MAX_TAG_ZONES
8758 if (vm_allocation_zone_totals)
8759 {
8760 vm_allocation_zone_total_t * zone;
8761 zone = vm_allocation_zone_totals[idx];
8762 if (!zone) continue;
8763 for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) if (zone[zidx].peak) count++;
8764 }
8765 #endif
8766 }
8767 lck_spin_unlock(&vm_allocation_sites_lock);
8768
8769 /* some slop for new tags created */
8770 count += 8;
8771 count += VM_KERN_COUNTER_COUNT;
8772
8773 return (count);
8774 }
8775
8776
8777 kern_return_t
8778 vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
8779 {
8780 uint64_t wired_size;
8781 uint64_t wired_managed_size;
8782 uint64_t wired_reserved_size;
8783 uint64_t booter_size;
8784 boolean_t iterate;
8785 mach_memory_info_t * counts;
8786
8787 bzero(info, num_info * sizeof(mach_memory_info_t));
8788
8789 if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8790
8791 #if CONFIG_EMBEDDED
8792 wired_size = ptoa_64(vm_page_wire_count);
8793 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
8794 #else
8795 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8796 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8797 #endif
8798 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8799
8800 booter_size = ml_get_booter_memory_size();
8801 wired_size += booter_size;
8802
8803 assert(num_info >= VM_KERN_COUNTER_COUNT);
8804 num_info -= VM_KERN_COUNTER_COUNT;
8805 counts = &info[num_info];
8806
8807 #define SET_COUNT(xcount, xsize, xflags) \
8808 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
8809 counts[xcount].site = (xcount); \
8810 counts[xcount].size = (xsize); \
8811 counts[xcount].mapped = (xsize); \
8812 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8813
8814 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
8815 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
8816 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
8817 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
8818 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8819 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8820 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
8821 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
8822
8823 #define SET_MAP(xcount, xsize, xfree, xlargest) \
8824 counts[xcount].site = (xcount); \
8825 counts[xcount].size = (xsize); \
8826 counts[xcount].mapped = (xsize); \
8827 counts[xcount].free = (xfree); \
8828 counts[xcount].largest = (xlargest); \
8829 counts[xcount].flags = VM_KERN_SITE_COUNTER;
8830
8831 vm_map_size_t map_size, map_free, map_largest;
8832
8833 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
8834 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
8835
8836 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
8837 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
8838
8839 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
8840 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
8841
8842 iterate = !VM_TAG_ACTIVE_UPDATE;
8843 if (iterate)
8844 {
8845 enum { kMaxKernelDepth = 1 };
8846 vm_map_t maps [kMaxKernelDepth];
8847 vm_map_entry_t entries[kMaxKernelDepth];
8848 vm_map_t map;
8849 vm_map_entry_t entry;
8850 vm_object_offset_t offset;
8851 vm_page_t page;
8852 int stackIdx, count;
8853
8854 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
8855
8856 map = kernel_map;
8857 stackIdx = 0;
8858 while (map)
8859 {
8860 vm_map_lock(map);
8861 for (entry = map->hdr.links.next; map; entry = entry->links.next)
8862 {
8863 if (entry->is_sub_map)
8864 {
8865 assert(stackIdx < kMaxKernelDepth);
8866 maps[stackIdx] = map;
8867 entries[stackIdx] = entry;
8868 stackIdx++;
8869 map = VME_SUBMAP(entry);
8870 entry = NULL;
8871 break;
8872 }
8873 if (VME_OBJECT(entry) == kernel_object)
8874 {
8875 count = 0;
8876 vm_object_lock(VME_OBJECT(entry));
8877 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
8878 {
8879 page = vm_page_lookup(VME_OBJECT(entry), offset);
8880 if (page && VM_PAGE_WIRED(page)) count++;
8881 }
8882 vm_object_unlock(VME_OBJECT(entry));
8883
8884 if (count)
8885 {
8886 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
8887 assert(VME_ALIAS(entry) < num_info);
8888 info[VME_ALIAS(entry)].size += ptoa_64(count);
8889 }
8890 }
8891 while (map && (entry == vm_map_last_entry(map)))
8892 {
8893 vm_map_unlock(map);
8894 if (!stackIdx) map = NULL;
8895 else
8896 {
8897 --stackIdx;
8898 map = maps[stackIdx];
8899 entry = entries[stackIdx];
8900 }
8901 }
8902 }
8903 }
8904 }
8905
8906 process_account(info, num_info, zones_collectable_bytes, iterate);
8907
8908 return (KERN_SUCCESS);
8909 }
8910
8911 #if DEBUG || DEVELOPMENT
8912
8913 kern_return_t
8914 vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
8915 {
8916 kern_return_t ret;
8917 vm_size_t zsize;
8918 vm_map_t map;
8919 vm_map_entry_t entry;
8920
8921 zsize = zone_element_info((void *) addr, tag);
8922 if (zsize)
8923 {
8924 *zone_size = *size = zsize;
8925 return (KERN_SUCCESS);
8926 }
8927
8928 *zone_size = 0;
8929 ret = KERN_INVALID_ADDRESS;
8930 for (map = kernel_map; map; )
8931 {
8932 vm_map_lock(map);
8933 if (!vm_map_lookup_entry(map, addr, &entry)) break;
8934 if (entry->is_sub_map)
8935 {
8936 if (map != kernel_map) break;
8937 map = VME_SUBMAP(entry);
8938 continue;
8939 }
8940 if (entry->vme_start != addr) break;
8941 *tag = VME_ALIAS(entry);
8942 *size = (entry->vme_end - addr);
8943 ret = KERN_SUCCESS;
8944 break;
8945 }
8946 if (map != kernel_map) vm_map_unlock(map);
8947 vm_map_unlock(kernel_map);
8948
8949 return (ret);
8950 }
8951
8952 #endif /* DEBUG || DEVELOPMENT */
8953
8954 uint32_t
8955 vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
8956 {
8957 vm_allocation_site_t * site;
8958 uint32_t kmodId;
8959
8960 kmodId = 0;
8961 lck_spin_lock(&vm_allocation_sites_lock);
8962 if ((site = vm_allocation_sites[tag]))
8963 {
8964 if (VM_TAG_KMOD & site->flags)
8965 {
8966 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
8967 }
8968 }
8969 lck_spin_unlock(&vm_allocation_sites_lock);
8970
8971 return (kmodId);
8972 }