]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-4570.51.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
68
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
72 #include <mach/sdt.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/policy_internal.h>
76 #include <kern/task.h>
77 #include <kern/thread.h>
78 #include <kern/kalloc.h>
79 #include <kern/zalloc.h>
80 #include <kern/xpr.h>
81 #include <kern/ledger.h>
82 #include <vm/pmap.h>
83 #include <vm/vm_init.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
88 #include <kern/misc_protos.h>
89 #include <zone_debug.h>
90 #include <mach_debug/zone_info.h>
91 #include <vm/cpm.h>
92 #include <pexpert/pexpert.h>
93 #include <san/kasan.h>
94
95 #include <vm/vm_protos.h>
96 #include <vm/memory_object.h>
97 #include <vm/vm_purgeable_internal.h>
98 #include <vm/vm_compressor.h>
99
100 #if CONFIG_PHANTOM_CACHE
101 #include <vm/vm_phantom_cache.h>
102 #endif
103
104 #include <IOKit/IOHibernatePrivate.h>
105
106 #include <sys/kdebug.h>
107
108
109 char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
110 char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
111 char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
112 char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
113
114 #if CONFIG_SECLUDED_MEMORY
115 struct vm_page_secluded_data vm_page_secluded;
116 #endif /* CONFIG_SECLUDED_MEMORY */
117
118 boolean_t hibernate_cleaning_in_progress = FALSE;
119 boolean_t vm_page_free_verify = TRUE;
120
121 uint32_t vm_lopage_free_count = 0;
122 uint32_t vm_lopage_free_limit = 0;
123 uint32_t vm_lopage_lowater = 0;
124 boolean_t vm_lopage_refill = FALSE;
125 boolean_t vm_lopage_needed = FALSE;
126
127 lck_mtx_ext_t vm_page_queue_lock_ext;
128 lck_mtx_ext_t vm_page_queue_free_lock_ext;
129 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
130
131 int speculative_age_index = 0;
132 int speculative_steal_index = 0;
133 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
134
135
136 __private_extern__ void vm_page_init_lck_grp(void);
137
138 static void vm_page_free_prepare(vm_page_t page);
139 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
140
141 static void vm_tag_init(void);
142
143 uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
144 uint32_t vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
145 uint32_t vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
146
147 /*
148 * Associated with page of user-allocatable memory is a
149 * page structure.
150 */
151
152 /*
153 * These variables record the values returned by vm_page_bootstrap,
154 * for debugging purposes. The implementation of pmap_steal_memory
155 * and pmap_startup here also uses them internally.
156 */
157
158 vm_offset_t virtual_space_start;
159 vm_offset_t virtual_space_end;
160 uint32_t vm_page_pages;
161
162 /*
163 * The vm_page_lookup() routine, which provides for fast
164 * (virtual memory object, offset) to page lookup, employs
165 * the following hash table. The vm_page_{insert,remove}
166 * routines install and remove associations in the table.
167 * [This table is often called the virtual-to-physical,
168 * or VP, table.]
169 */
170 typedef struct {
171 vm_page_packed_t page_list;
172 #if MACH_PAGE_HASH_STATS
173 int cur_count; /* current count */
174 int hi_count; /* high water mark */
175 #endif /* MACH_PAGE_HASH_STATS */
176 } vm_page_bucket_t;
177
178
179 #define BUCKETS_PER_LOCK 16
180
181 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
182 unsigned int vm_page_bucket_count = 0; /* How big is array? */
183 unsigned int vm_page_hash_mask; /* Mask for hash function */
184 unsigned int vm_page_hash_shift; /* Shift for hash function */
185 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
186 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
187
188 #ifndef VM_TAG_ACTIVE_UPDATE
189 #error VM_TAG_ACTIVE_UPDATE
190 #endif
191 #ifndef VM_MAX_TAG_ZONES
192 #error VM_MAX_TAG_ZONES
193 #endif
194
195 boolean_t vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
196 lck_spin_t *vm_page_bucket_locks;
197 lck_spin_t vm_objects_wired_lock;
198 lck_spin_t vm_allocation_sites_lock;
199
200 vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
201 vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
202 #if VM_MAX_TAG_ZONES
203 vm_allocation_zone_total_t ** vm_allocation_zone_totals;
204 #endif /* VM_MAX_TAG_ZONES */
205
206 vm_tag_t vm_allocation_tag_highest;
207
208 #if VM_PAGE_BUCKETS_CHECK
209 boolean_t vm_page_buckets_check_ready = FALSE;
210 #if VM_PAGE_FAKE_BUCKETS
211 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
212 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
213 #endif /* VM_PAGE_FAKE_BUCKETS */
214 #endif /* VM_PAGE_BUCKETS_CHECK */
215
216
217
218 #if MACH_PAGE_HASH_STATS
219 /* This routine is only for debug. It is intended to be called by
220 * hand by a developer using a kernel debugger. This routine prints
221 * out vm_page_hash table statistics to the kernel debug console.
222 */
223 void
224 hash_debug(void)
225 {
226 int i;
227 int numbuckets = 0;
228 int highsum = 0;
229 int maxdepth = 0;
230
231 for (i = 0; i < vm_page_bucket_count; i++) {
232 if (vm_page_buckets[i].hi_count) {
233 numbuckets++;
234 highsum += vm_page_buckets[i].hi_count;
235 if (vm_page_buckets[i].hi_count > maxdepth)
236 maxdepth = vm_page_buckets[i].hi_count;
237 }
238 }
239 printf("Total number of buckets: %d\n", vm_page_bucket_count);
240 printf("Number used buckets: %d = %d%%\n",
241 numbuckets, 100*numbuckets/vm_page_bucket_count);
242 printf("Number unused buckets: %d = %d%%\n",
243 vm_page_bucket_count - numbuckets,
244 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
245 printf("Sum of bucket max depth: %d\n", highsum);
246 printf("Average bucket depth: %d.%2d\n",
247 highsum/vm_page_bucket_count,
248 highsum%vm_page_bucket_count);
249 printf("Maximum bucket depth: %d\n", maxdepth);
250 }
251 #endif /* MACH_PAGE_HASH_STATS */
252
253 /*
254 * The virtual page size is currently implemented as a runtime
255 * variable, but is constant once initialized using vm_set_page_size.
256 * This initialization must be done in the machine-dependent
257 * bootstrap sequence, before calling other machine-independent
258 * initializations.
259 *
260 * All references to the virtual page size outside this
261 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
262 * constants.
263 */
264 #if defined(__arm__) || defined(__arm64__)
265 vm_size_t page_size;
266 vm_size_t page_mask;
267 int page_shift;
268 #else
269 vm_size_t page_size = PAGE_SIZE;
270 vm_size_t page_mask = PAGE_MASK;
271 int page_shift = PAGE_SHIFT;
272 #endif
273
274 /*
275 * Resident page structures are initialized from
276 * a template (see vm_page_alloc).
277 *
278 * When adding a new field to the virtual memory
279 * object structure, be sure to add initialization
280 * (see vm_page_bootstrap).
281 */
282 struct vm_page vm_page_template;
283
284 vm_page_t vm_pages = VM_PAGE_NULL;
285 vm_page_t vm_page_array_beginning_addr;
286 vm_page_t vm_page_array_ending_addr;
287 vm_page_t vm_page_array_boundary;
288
289 unsigned int vm_pages_count = 0;
290 ppnum_t vm_page_lowest = 0;
291
292 /*
293 * Resident pages that represent real memory
294 * are allocated from a set of free lists,
295 * one per color.
296 */
297 unsigned int vm_colors;
298 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
299 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
300 unsigned int vm_free_magazine_refill_limit = 0;
301
302
303 struct vm_page_queue_free_head {
304 vm_page_queue_head_t qhead;
305 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
306
307 struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
308
309
310 unsigned int vm_page_free_wanted;
311 unsigned int vm_page_free_wanted_privileged;
312 #if CONFIG_SECLUDED_MEMORY
313 unsigned int vm_page_free_wanted_secluded;
314 #endif /* CONFIG_SECLUDED_MEMORY */
315 unsigned int vm_page_free_count;
316
317 /*
318 * Occasionally, the virtual memory system uses
319 * resident page structures that do not refer to
320 * real pages, for example to leave a page with
321 * important state information in the VP table.
322 *
323 * These page structures are allocated the way
324 * most other kernel structures are.
325 */
326 zone_t vm_page_array_zone;
327 zone_t vm_page_zone;
328 vm_locks_array_t vm_page_locks;
329 decl_lck_mtx_data(,vm_page_alloc_lock)
330 lck_mtx_ext_t vm_page_alloc_lock_ext;
331
332 unsigned int io_throttle_zero_fill;
333
334 unsigned int vm_page_local_q_count = 0;
335 unsigned int vm_page_local_q_soft_limit = 250;
336 unsigned int vm_page_local_q_hard_limit = 500;
337 struct vplq *vm_page_local_q = NULL;
338
339 /* N.B. Guard and fictitious pages must not
340 * be assigned a zero phys_page value.
341 */
342 /*
343 * Fictitious pages don't have a physical address,
344 * but we must initialize phys_page to something.
345 * For debugging, this should be a strange value
346 * that the pmap module can recognize in assertions.
347 */
348 const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
349
350 /*
351 * Guard pages are not accessible so they don't
352 * need a physical address, but we need to enter
353 * one in the pmap.
354 * Let's make it recognizable and make sure that
355 * we don't use a real physical page with that
356 * physical address.
357 */
358 const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
359
360 /*
361 * Resident page structures are also chained on
362 * queues that are used by the page replacement
363 * system (pageout daemon). These queues are
364 * defined here, but are shared by the pageout
365 * module. The inactive queue is broken into
366 * file backed and anonymous for convenience as the
367 * pageout daemon often assignes a higher
368 * importance to anonymous pages (less likely to pick)
369 */
370 vm_page_queue_head_t vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
371 vm_page_queue_head_t vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
372 #if CONFIG_SECLUDED_MEMORY
373 vm_page_queue_head_t vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
374 #endif /* CONFIG_SECLUDED_MEMORY */
375 vm_page_queue_head_t vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* inactive memory queue for anonymous pages */
376 vm_page_queue_head_t vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
377
378 queue_head_t vm_objects_wired;
379
380 #if CONFIG_BACKGROUND_QUEUE
381 vm_page_queue_head_t vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
382 uint32_t vm_page_background_target;
383 uint32_t vm_page_background_count;
384 uint64_t vm_page_background_promoted_count;
385
386 uint32_t vm_page_background_internal_count;
387 uint32_t vm_page_background_external_count;
388
389 uint32_t vm_page_background_mode;
390 uint32_t vm_page_background_exclude_external;
391 #endif
392
393 unsigned int vm_page_active_count;
394 unsigned int vm_page_inactive_count;
395 #if CONFIG_SECLUDED_MEMORY
396 unsigned int vm_page_secluded_count;
397 unsigned int vm_page_secluded_count_free;
398 unsigned int vm_page_secluded_count_inuse;
399 #endif /* CONFIG_SECLUDED_MEMORY */
400 unsigned int vm_page_anonymous_count;
401 unsigned int vm_page_throttled_count;
402 unsigned int vm_page_speculative_count;
403
404 unsigned int vm_page_wire_count;
405 unsigned int vm_page_wire_count_on_boot = 0;
406 unsigned int vm_page_stolen_count;
407 unsigned int vm_page_wire_count_initial;
408 unsigned int vm_page_pages_initial;
409 unsigned int vm_page_gobble_count = 0;
410
411 #define VM_PAGE_WIRE_COUNT_WARNING 0
412 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
413
414 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
415 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
416 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
417
418 unsigned int vm_page_xpmapped_external_count = 0;
419 unsigned int vm_page_external_count = 0;
420 unsigned int vm_page_internal_count = 0;
421 unsigned int vm_page_pageable_external_count = 0;
422 unsigned int vm_page_pageable_internal_count = 0;
423
424 #if DEVELOPMENT || DEBUG
425 unsigned int vm_page_speculative_recreated = 0;
426 unsigned int vm_page_speculative_created = 0;
427 unsigned int vm_page_speculative_used = 0;
428 #endif
429
430 vm_page_queue_head_t vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
431
432 unsigned int vm_page_cleaned_count = 0;
433 unsigned int vm_pageout_enqueued_cleaned = 0;
434
435 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
436 ppnum_t max_valid_low_ppnum = 0xffffffff;
437
438
439 /*
440 * Several page replacement parameters are also
441 * shared with this module, so that page allocation
442 * (done here in vm_page_alloc) can trigger the
443 * pageout daemon.
444 */
445 unsigned int vm_page_free_target = 0;
446 unsigned int vm_page_free_min = 0;
447 unsigned int vm_page_throttle_limit = 0;
448 unsigned int vm_page_inactive_target = 0;
449 #if CONFIG_SECLUDED_MEMORY
450 unsigned int vm_page_secluded_target = 0;
451 #endif /* CONFIG_SECLUDED_MEMORY */
452 unsigned int vm_page_anonymous_min = 0;
453 unsigned int vm_page_inactive_min = 0;
454 unsigned int vm_page_free_reserved = 0;
455 unsigned int vm_page_throttle_count = 0;
456
457
458 /*
459 * The VM system has a couple of heuristics for deciding
460 * that pages are "uninteresting" and should be placed
461 * on the inactive queue as likely candidates for replacement.
462 * These variables let the heuristics be controlled at run-time
463 * to make experimentation easier.
464 */
465
466 boolean_t vm_page_deactivate_hint = TRUE;
467
468 struct vm_page_stats_reusable vm_page_stats_reusable;
469
470 /*
471 * vm_set_page_size:
472 *
473 * Sets the page size, perhaps based upon the memory
474 * size. Must be called before any use of page-size
475 * dependent functions.
476 *
477 * Sets page_shift and page_mask from page_size.
478 */
479 void
480 vm_set_page_size(void)
481 {
482 page_size = PAGE_SIZE;
483 page_mask = PAGE_MASK;
484 page_shift = PAGE_SHIFT;
485
486 if ((page_mask & page_size) != 0)
487 panic("vm_set_page_size: page size not a power of two");
488
489 for (page_shift = 0; ; page_shift++)
490 if ((1U << page_shift) == page_size)
491 break;
492 }
493
494 #if defined (__x86_64__)
495
496 #define MAX_CLUMP_SIZE 16
497 #define DEFAULT_CLUMP_SIZE 4
498
499 unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
500
501 #if DEVELOPMENT || DEBUG
502 unsigned long vm_clump_stats[MAX_CLUMP_SIZE+1];
503 unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
504
505 static inline void vm_clump_update_stats(unsigned int c) {
506 assert(c<=vm_clump_size);
507 if(c>0 && c<=vm_clump_size) vm_clump_stats[c]+=c;
508 vm_clump_allocs+=c;
509 }
510 #endif /* if DEVELOPMENT || DEBUG */
511
512 /* Called once to setup the VM clump knobs */
513 static void
514 vm_page_setup_clump( void )
515 {
516 unsigned int override, n;
517
518 vm_clump_size = DEFAULT_CLUMP_SIZE;
519 if ( PE_parse_boot_argn("clump_size", &override, sizeof (override)) ) vm_clump_size = override;
520
521 if(vm_clump_size > MAX_CLUMP_SIZE) panic("vm_page_setup_clump:: clump_size is too large!");
522 if(vm_clump_size < 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
523 if((vm_clump_size & (vm_clump_size-1)) != 0) panic("vm_page_setup_clump:: clump_size must be a power of 2");
524
525 vm_clump_promote_threshold = vm_clump_size;
526 vm_clump_mask = vm_clump_size - 1;
527 for(vm_clump_shift=0, n=vm_clump_size; n>1; n>>=1, vm_clump_shift++);
528
529 #if DEVELOPMENT || DEBUG
530 bzero(vm_clump_stats, sizeof(vm_clump_stats));
531 vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
532 #endif /* if DEVELOPMENT || DEBUG */
533 }
534
535 #endif /* #if defined (__x86_64__) */
536
537 #define COLOR_GROUPS_TO_STEAL 4
538
539 /* Called once during statup, once the cache geometry is known.
540 */
541 static void
542 vm_page_set_colors( void )
543 {
544 unsigned int n, override;
545
546 #if defined (__x86_64__)
547 /* adjust #colors because we need to color outside the clump boundary */
548 vm_cache_geometry_colors >>= vm_clump_shift;
549 #endif
550 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
551 n = override;
552 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
553 n = vm_cache_geometry_colors;
554 else n = DEFAULT_COLORS; /* use default if all else fails */
555
556 if ( n == 0 )
557 n = 1;
558 if ( n > MAX_COLORS )
559 n = MAX_COLORS;
560
561 /* the count must be a power of 2 */
562 if ( ( n & (n - 1)) != 0 )
563 n = DEFAULT_COLORS; /* use default if all else fails */
564
565 vm_colors = n;
566 vm_color_mask = n - 1;
567
568 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
569
570 #if defined (__x86_64__)
571 /* adjust for reduction in colors due to clumping and multiple cores */
572 if (real_ncpus)
573 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
574 #endif
575 }
576
577
578 lck_grp_t vm_page_lck_grp_free;
579 lck_grp_t vm_page_lck_grp_queue;
580 lck_grp_t vm_page_lck_grp_local;
581 lck_grp_t vm_page_lck_grp_purge;
582 lck_grp_t vm_page_lck_grp_alloc;
583 lck_grp_t vm_page_lck_grp_bucket;
584 lck_grp_attr_t vm_page_lck_grp_attr;
585 lck_attr_t vm_page_lck_attr;
586
587
588 __private_extern__ void
589 vm_page_init_lck_grp(void)
590 {
591 /*
592 * initialze the vm_page lock world
593 */
594 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
595 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
596 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
597 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
598 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
599 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
600 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
601 lck_attr_setdefault(&vm_page_lck_attr);
602 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
603
604 vm_compressor_init_locks();
605 }
606
607 #define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
608
609 void
610 vm_page_init_local_q()
611 {
612 unsigned int num_cpus;
613 unsigned int i;
614 struct vplq *t_local_q;
615
616 num_cpus = ml_get_max_cpus();
617
618 /*
619 * no point in this for a uni-processor system
620 */
621 if (num_cpus >= 2) {
622 #if KASAN
623 /* KASAN breaks the expectation of a size-aligned object by adding a
624 * redzone, so explicitly align. */
625 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
626 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT-1));
627 #else
628 /* round the size up to the nearest power of two */
629 t_local_q = (struct vplq *)kalloc(ROUNDUP_NEXTP2(num_cpus * sizeof(struct vplq)));
630 #endif
631
632 for (i = 0; i < num_cpus; i++) {
633 struct vpl *lq;
634
635 lq = &t_local_q[i].vpl_un.vpl;
636 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
637 vm_page_queue_init(&lq->vpl_queue);
638 lq->vpl_count = 0;
639 lq->vpl_internal_count = 0;
640 lq->vpl_external_count = 0;
641 }
642 vm_page_local_q_count = num_cpus;
643
644 vm_page_local_q = (struct vplq *)t_local_q;
645 }
646 }
647
648 /*
649 * vm_init_before_launchd
650 *
651 * This should be called right before launchd is loaded.
652 */
653 void
654 vm_init_before_launchd()
655 {
656 vm_page_wire_count_on_boot = vm_page_wire_count;
657 }
658
659
660 /*
661 * vm_page_bootstrap:
662 *
663 * Initializes the resident memory module.
664 *
665 * Allocates memory for the page cells, and
666 * for the object/offset-to-page hash table headers.
667 * Each page cell is initialized and placed on the free list.
668 * Returns the range of available kernel virtual memory.
669 */
670
671 void
672 vm_page_bootstrap(
673 vm_offset_t *startp,
674 vm_offset_t *endp)
675 {
676 vm_page_t m;
677 unsigned int i;
678 unsigned int log1;
679 unsigned int log2;
680 unsigned int size;
681
682 /*
683 * Initialize the vm_page template.
684 */
685
686 m = &vm_page_template;
687 bzero(m, sizeof (*m));
688
689 #if CONFIG_BACKGROUND_QUEUE
690 m->vm_page_backgroundq.next = 0;
691 m->vm_page_backgroundq.prev = 0;
692 m->vm_page_in_background = FALSE;
693 m->vm_page_on_backgroundq = FALSE;
694 #endif
695
696 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
697 m->listq.next = 0;
698 m->listq.prev = 0;
699 m->next_m = 0;
700
701 m->vm_page_object = 0; /* reset later */
702 m->offset = (vm_object_offset_t) -1; /* reset later */
703
704 m->wire_count = 0;
705 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
706 m->laundry = FALSE;
707 m->reference = FALSE;
708 m->gobbled = FALSE;
709 m->private = FALSE;
710 m->__unused_pageq_bits = 0;
711
712 #if !defined(__arm__) && !defined(__arm64__)
713 VM_PAGE_SET_PHYS_PAGE(m, 0); /* reset later */
714 #endif
715 m->busy = TRUE;
716 m->wanted = FALSE;
717 m->tabled = FALSE;
718 m->hashed = FALSE;
719 m->fictitious = FALSE;
720 m->pmapped = FALSE;
721 m->wpmapped = FALSE;
722 m->free_when_done = FALSE;
723 m->absent = FALSE;
724 m->error = FALSE;
725 m->dirty = FALSE;
726 m->cleaning = FALSE;
727 m->precious = FALSE;
728 m->clustered = FALSE;
729 m->overwriting = FALSE;
730 m->restart = FALSE;
731 m->unusual = FALSE;
732 m->cs_validated = FALSE;
733 m->cs_tainted = FALSE;
734 m->cs_nx = FALSE;
735 m->no_cache = FALSE;
736 m->reusable = FALSE;
737 m->slid = FALSE;
738 m->xpmapped = FALSE;
739 m->written_by_kernel = FALSE;
740 m->__unused_object_bits = 0;
741
742 /*
743 * Initialize the page queues.
744 */
745 vm_page_init_lck_grp();
746
747 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
748 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
749 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
750
751 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
752 int group;
753
754 purgeable_queues[i].token_q_head = 0;
755 purgeable_queues[i].token_q_tail = 0;
756 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
757 queue_init(&purgeable_queues[i].objq[group]);
758
759 purgeable_queues[i].type = i;
760 purgeable_queues[i].new_pages = 0;
761 #if MACH_ASSERT
762 purgeable_queues[i].debug_count_tokens = 0;
763 purgeable_queues[i].debug_count_objects = 0;
764 #endif
765 };
766 purgeable_nonvolatile_count = 0;
767 queue_init(&purgeable_nonvolatile_queue);
768
769 for (i = 0; i < MAX_COLORS; i++ )
770 vm_page_queue_init(&vm_page_queue_free[i].qhead);
771
772 vm_page_queue_init(&vm_lopage_queue_free);
773 vm_page_queue_init(&vm_page_queue_active);
774 vm_page_queue_init(&vm_page_queue_inactive);
775 #if CONFIG_SECLUDED_MEMORY
776 vm_page_queue_init(&vm_page_queue_secluded);
777 #endif /* CONFIG_SECLUDED_MEMORY */
778 vm_page_queue_init(&vm_page_queue_cleaned);
779 vm_page_queue_init(&vm_page_queue_throttled);
780 vm_page_queue_init(&vm_page_queue_anonymous);
781 queue_init(&vm_objects_wired);
782
783 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
784 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
785
786 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
787 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
788 }
789 #if CONFIG_BACKGROUND_QUEUE
790 vm_page_queue_init(&vm_page_queue_background);
791
792 vm_page_background_count = 0;
793 vm_page_background_internal_count = 0;
794 vm_page_background_external_count = 0;
795 vm_page_background_promoted_count = 0;
796
797 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
798
799 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
800 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
801
802 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
803 vm_page_background_exclude_external = 0;
804
805 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
806 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
807 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
808
809 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1)
810 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
811 #endif
812 vm_page_free_wanted = 0;
813 vm_page_free_wanted_privileged = 0;
814 #if CONFIG_SECLUDED_MEMORY
815 vm_page_free_wanted_secluded = 0;
816 #endif /* CONFIG_SECLUDED_MEMORY */
817
818 #if defined (__x86_64__)
819 /* this must be called before vm_page_set_colors() */
820 vm_page_setup_clump();
821 #endif
822
823 vm_page_set_colors();
824
825 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
826 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
827 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
828 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
829
830 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
831 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
832 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
833 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
834 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
835 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
836 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
837 #if CONFIG_SECLUDED_MEMORY
838 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
839 #endif /* CONFIG_SECLUDED_MEMORY */
840
841 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
842 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
843 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
844 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
845 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
846 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
847 #if CONFIG_SECLUDED_MEMORY
848 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
849 #endif /* CONFIG_SECLUDED_MEMORY */
850
851 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
852 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
853 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
854 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
855 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
856 #if CONFIG_SECLUDED_MEMORY
857 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
858 #endif /* CONFIG_SECLUDED_MEMORY */
859
860 for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++)
861 {
862 vm_allocation_sites_static[i].refcount = 2;
863 vm_allocation_sites_static[i].tag = i;
864 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
865 }
866 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
867 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
868 vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
869
870 /*
871 * Steal memory for the map and zone subsystems.
872 */
873 #if CONFIG_GZALLOC
874 gzalloc_configure();
875 #endif
876 kernel_debug_string_early("vm_map_steal_memory");
877 vm_map_steal_memory();
878
879 /*
880 * Allocate (and initialize) the virtual-to-physical
881 * table hash buckets.
882 *
883 * The number of buckets should be a power of two to
884 * get a good hash function. The following computation
885 * chooses the first power of two that is greater
886 * than the number of physical pages in the system.
887 */
888
889 if (vm_page_bucket_count == 0) {
890 unsigned int npages = pmap_free_pages();
891
892 vm_page_bucket_count = 1;
893 while (vm_page_bucket_count < npages)
894 vm_page_bucket_count <<= 1;
895 }
896 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
897
898 vm_page_hash_mask = vm_page_bucket_count - 1;
899
900 /*
901 * Calculate object shift value for hashing algorithm:
902 * O = log2(sizeof(struct vm_object))
903 * B = log2(vm_page_bucket_count)
904 * hash shifts the object left by
905 * B/2 - O
906 */
907 size = vm_page_bucket_count;
908 for (log1 = 0; size > 1; log1++)
909 size /= 2;
910 size = sizeof(struct vm_object);
911 for (log2 = 0; size > 1; log2++)
912 size /= 2;
913 vm_page_hash_shift = log1/2 - log2 + 1;
914
915 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
916 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
917 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
918
919 if (vm_page_hash_mask & vm_page_bucket_count)
920 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
921
922 #if VM_PAGE_BUCKETS_CHECK
923 #if VM_PAGE_FAKE_BUCKETS
924 /*
925 * Allocate a decoy set of page buckets, to detect
926 * any stomping there.
927 */
928 vm_page_fake_buckets = (vm_page_bucket_t *)
929 pmap_steal_memory(vm_page_bucket_count *
930 sizeof(vm_page_bucket_t));
931 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
932 vm_page_fake_buckets_end =
933 vm_map_round_page((vm_page_fake_buckets_start +
934 (vm_page_bucket_count *
935 sizeof (vm_page_bucket_t))),
936 PAGE_MASK);
937 char *cp;
938 for (cp = (char *)vm_page_fake_buckets_start;
939 cp < (char *)vm_page_fake_buckets_end;
940 cp++) {
941 *cp = 0x5a;
942 }
943 #endif /* VM_PAGE_FAKE_BUCKETS */
944 #endif /* VM_PAGE_BUCKETS_CHECK */
945
946 kernel_debug_string_early("vm_page_buckets");
947 vm_page_buckets = (vm_page_bucket_t *)
948 pmap_steal_memory(vm_page_bucket_count *
949 sizeof(vm_page_bucket_t));
950
951 kernel_debug_string_early("vm_page_bucket_locks");
952 vm_page_bucket_locks = (lck_spin_t *)
953 pmap_steal_memory(vm_page_bucket_lock_count *
954 sizeof(lck_spin_t));
955
956 for (i = 0; i < vm_page_bucket_count; i++) {
957 vm_page_bucket_t *bucket = &vm_page_buckets[i];
958
959 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
960 #if MACH_PAGE_HASH_STATS
961 bucket->cur_count = 0;
962 bucket->hi_count = 0;
963 #endif /* MACH_PAGE_HASH_STATS */
964 }
965
966 for (i = 0; i < vm_page_bucket_lock_count; i++)
967 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
968
969 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
970 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
971 vm_tag_init();
972
973 #if VM_PAGE_BUCKETS_CHECK
974 vm_page_buckets_check_ready = TRUE;
975 #endif /* VM_PAGE_BUCKETS_CHECK */
976
977 /*
978 * Machine-dependent code allocates the resident page table.
979 * It uses vm_page_init to initialize the page frames.
980 * The code also returns to us the virtual space available
981 * to the kernel. We don't trust the pmap module
982 * to get the alignment right.
983 */
984
985 kernel_debug_string_early("pmap_startup");
986 pmap_startup(&virtual_space_start, &virtual_space_end);
987 virtual_space_start = round_page(virtual_space_start);
988 virtual_space_end = trunc_page(virtual_space_end);
989
990 *startp = virtual_space_start;
991 *endp = virtual_space_end;
992
993 /*
994 * Compute the initial "wire" count.
995 * Up until now, the pages which have been set aside are not under
996 * the VM system's control, so although they aren't explicitly
997 * wired, they nonetheless can't be moved. At this moment,
998 * all VM managed pages are "free", courtesy of pmap_startup.
999 */
1000 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
1001 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
1002 #if CONFIG_SECLUDED_MEMORY
1003 vm_page_wire_count -= vm_page_secluded_count;
1004 #endif
1005 vm_page_wire_count_initial = vm_page_wire_count;
1006 vm_page_pages_initial = vm_page_pages;
1007
1008 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1009 vm_page_free_count, vm_page_wire_count);
1010
1011 kernel_debug_string_early("vm_page_bootstrap complete");
1012 simple_lock_init(&vm_paging_lock, 0);
1013 }
1014
1015 #ifndef MACHINE_PAGES
1016 /*
1017 * We implement pmap_steal_memory and pmap_startup with the help
1018 * of two simpler functions, pmap_virtual_space and pmap_next_page.
1019 */
1020
1021 void *
1022 pmap_steal_memory(
1023 vm_size_t size)
1024 {
1025 kern_return_t kr;
1026 vm_offset_t addr, vaddr;
1027 ppnum_t phys_page;
1028
1029 /*
1030 * We round the size to a round multiple.
1031 */
1032
1033 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1034
1035 /*
1036 * If this is the first call to pmap_steal_memory,
1037 * we have to initialize ourself.
1038 */
1039
1040 if (virtual_space_start == virtual_space_end) {
1041 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1042
1043 /*
1044 * The initial values must be aligned properly, and
1045 * we don't trust the pmap module to do it right.
1046 */
1047
1048 virtual_space_start = round_page(virtual_space_start);
1049 virtual_space_end = trunc_page(virtual_space_end);
1050 }
1051
1052 /*
1053 * Allocate virtual memory for this request.
1054 */
1055
1056 addr = virtual_space_start;
1057 virtual_space_start += size;
1058
1059 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1060
1061 /*
1062 * Allocate and map physical pages to back new virtual pages.
1063 */
1064
1065 for (vaddr = round_page(addr);
1066 vaddr < addr + size;
1067 vaddr += PAGE_SIZE) {
1068
1069 if (!pmap_next_page_hi(&phys_page))
1070 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1071
1072 /*
1073 * XXX Logically, these mappings should be wired,
1074 * but some pmap modules barf if they are.
1075 */
1076 #if defined(__LP64__)
1077 #ifdef __arm64__
1078 /* ARM64_TODO: verify that we really don't need this */
1079 #else
1080 pmap_pre_expand(kernel_pmap, vaddr);
1081 #endif
1082 #endif
1083
1084 kr = pmap_enter(kernel_pmap, vaddr, phys_page,
1085 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
1086 VM_WIMG_USE_DEFAULT, FALSE);
1087
1088 if (kr != KERN_SUCCESS) {
1089 panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1090 (unsigned long)vaddr, phys_page);
1091 }
1092
1093 /*
1094 * Account for newly stolen memory
1095 */
1096 vm_page_wire_count++;
1097 vm_page_stolen_count++;
1098 }
1099
1100 #if KASAN
1101 kasan_notify_address(round_page(addr), size);
1102 #endif
1103 return (void *) addr;
1104 }
1105
1106 #if CONFIG_SECLUDED_MEMORY
1107 /* boot-args to control secluded memory */
1108 unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
1109 int secluded_for_iokit = 1; /* IOKit can use secluded memory */
1110 int secluded_for_apps = 1; /* apps can use secluded memory */
1111 int secluded_for_filecache = 2; /* filecache can use seclude memory */
1112 #if 11
1113 int secluded_for_fbdp = 0;
1114 #endif
1115 #endif /* CONFIG_SECLUDED_MEMORY */
1116
1117
1118 #if defined(__arm__) || defined(__arm64__)
1119 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1120 unsigned int vm_first_phys_ppnum = 0;
1121 #endif
1122
1123
1124 void vm_page_release_startup(vm_page_t mem);
1125 void
1126 pmap_startup(
1127 vm_offset_t *startp,
1128 vm_offset_t *endp)
1129 {
1130 unsigned int i, npages, pages_initialized, fill, fillval;
1131 ppnum_t phys_page;
1132 addr64_t tmpaddr;
1133
1134 #if defined(__LP64__)
1135 /*
1136 * make sure we are aligned on a 64 byte boundary
1137 * for VM_PAGE_PACK_PTR (it clips off the low-order
1138 * 6 bits of the pointer)
1139 */
1140 if (virtual_space_start != virtual_space_end)
1141 virtual_space_start = round_page(virtual_space_start);
1142 #endif
1143
1144 /*
1145 * We calculate how many page frames we will have
1146 * and then allocate the page structures in one chunk.
1147 */
1148
1149 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
1150 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
1151 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1152
1153 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1154
1155 /*
1156 * Initialize the page frames.
1157 */
1158 kernel_debug_string_early("Initialize the page frames");
1159
1160 vm_page_array_beginning_addr = &vm_pages[0];
1161 vm_page_array_ending_addr = &vm_pages[npages];
1162
1163 for (i = 0, pages_initialized = 0; i < npages; i++) {
1164 if (!pmap_next_page(&phys_page))
1165 break;
1166 #if defined(__arm__) || defined(__arm64__)
1167 if (pages_initialized == 0) {
1168 vm_first_phys_ppnum = phys_page;
1169 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr, (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1170 }
1171 assert((i + vm_first_phys_ppnum) == phys_page);
1172 #endif
1173 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1174 vm_page_lowest = phys_page;
1175
1176 vm_page_init(&vm_pages[i], phys_page, FALSE);
1177 vm_page_pages++;
1178 pages_initialized++;
1179 }
1180 vm_pages_count = pages_initialized;
1181 vm_page_array_boundary = &vm_pages[pages_initialized];
1182
1183 #if defined(__LP64__)
1184
1185 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
1186 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1187
1188 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
1189 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1190 #endif
1191 kernel_debug_string_early("page fill/release");
1192 /*
1193 * Check if we want to initialize pages to a known value
1194 */
1195 fill = 0; /* Assume no fill */
1196 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
1197 #if DEBUG
1198 /* This slows down booting the DEBUG kernel, particularly on
1199 * large memory systems, but is worthwhile in deterministically
1200 * trapping uninitialized memory usage.
1201 */
1202 if (fill == 0) {
1203 fill = 1;
1204 fillval = 0xDEB8F177;
1205 }
1206 #endif
1207 if (fill)
1208 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1209
1210 #if CONFIG_SECLUDED_MEMORY
1211 /* default: no secluded mem */
1212 secluded_mem_mb = 0;
1213 if (max_mem > 1*1024*1024*1024) {
1214 /* default to 90MB for devices with > 1GB of RAM */
1215 secluded_mem_mb = 90;
1216 }
1217 /* override with value from device tree, if provided */
1218 PE_get_default("kern.secluded_mem_mb",
1219 &secluded_mem_mb, sizeof(secluded_mem_mb));
1220 /* override with value from boot-args, if provided */
1221 PE_parse_boot_argn("secluded_mem_mb",
1222 &secluded_mem_mb,
1223 sizeof (secluded_mem_mb));
1224
1225 vm_page_secluded_target = (unsigned int)
1226 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1227 PE_parse_boot_argn("secluded_for_iokit",
1228 &secluded_for_iokit,
1229 sizeof (secluded_for_iokit));
1230 PE_parse_boot_argn("secluded_for_apps",
1231 &secluded_for_apps,
1232 sizeof (secluded_for_apps));
1233 PE_parse_boot_argn("secluded_for_filecache",
1234 &secluded_for_filecache,
1235 sizeof (secluded_for_filecache));
1236 #if 11
1237 PE_parse_boot_argn("secluded_for_fbdp",
1238 &secluded_for_fbdp,
1239 sizeof (secluded_for_fbdp));
1240 #endif
1241 #endif /* CONFIG_SECLUDED_MEMORY */
1242
1243 // -debug code remove
1244 if (2 == vm_himemory_mode) {
1245 // free low -> high so high is preferred
1246 for (i = 1; i <= pages_initialized; i++) {
1247 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
1248 vm_page_release_startup(&vm_pages[i - 1]);
1249 }
1250 }
1251 else
1252 // debug code remove-
1253
1254 /*
1255 * Release pages in reverse order so that physical pages
1256 * initially get allocated in ascending addresses. This keeps
1257 * the devices (which must address physical memory) happy if
1258 * they require several consecutive pages.
1259 */
1260 for (i = pages_initialized; i > 0; i--) {
1261 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
1262 vm_page_release_startup(&vm_pages[i - 1]);
1263 }
1264
1265 VM_CHECK_MEMORYSTATUS;
1266
1267 #if 0
1268 {
1269 vm_page_t xx, xxo, xxl;
1270 int i, j, k, l;
1271
1272 j = 0; /* (BRINGUP) */
1273 xxl = 0;
1274
1275 for( i = 0; i < vm_colors; i++ ) {
1276 queue_iterate(&vm_page_queue_free[i].qhead,
1277 xx,
1278 vm_page_t,
1279 pageq) { /* BRINGUP */
1280 j++; /* (BRINGUP) */
1281 if(j > vm_page_free_count) { /* (BRINGUP) */
1282 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
1283 }
1284
1285 l = vm_page_free_count - j; /* (BRINGUP) */
1286 k = 0; /* (BRINGUP) */
1287
1288 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1289
1290 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
1291 k++;
1292 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1293 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
1294 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1295 }
1296 }
1297
1298 xxl = xx;
1299 }
1300 }
1301
1302 if(j != vm_page_free_count) { /* (BRINGUP) */
1303 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1304 }
1305 }
1306 #endif
1307
1308
1309 /*
1310 * We have to re-align virtual_space_start,
1311 * because pmap_steal_memory has been using it.
1312 */
1313
1314 virtual_space_start = round_page(virtual_space_start);
1315
1316 *startp = virtual_space_start;
1317 *endp = virtual_space_end;
1318 }
1319 #endif /* MACHINE_PAGES */
1320
1321 /*
1322 * Routine: vm_page_module_init
1323 * Purpose:
1324 * Second initialization pass, to be done after
1325 * the basic VM system is ready.
1326 */
1327 void
1328 vm_page_module_init(void)
1329 {
1330 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1331 vm_size_t vm_page_with_ppnum_size;
1332
1333 vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1334 0, PAGE_SIZE, "vm pages array");
1335
1336 zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1337 zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1338 zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1339 zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1340 zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1341 /*
1342 * Adjust zone statistics to account for the real pages allocated
1343 * in vm_page_create(). [Q: is this really what we want?]
1344 */
1345 vm_page_array_zone->count += vm_page_pages;
1346 vm_page_array_zone->sum_count += vm_page_pages;
1347 vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1348 vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1349 vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1350 OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1351 /* since zone accounts for these, take them out of stolen */
1352 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1353
1354 vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1355
1356 vm_page_zone = zinit(vm_page_with_ppnum_size,
1357 0, PAGE_SIZE, "vm pages");
1358
1359 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1360 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1361 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1362 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1363 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1364 zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1365 }
1366
1367 /*
1368 * Routine: vm_page_create
1369 * Purpose:
1370 * After the VM system is up, machine-dependent code
1371 * may stumble across more physical memory. For example,
1372 * memory that it was reserving for a frame buffer.
1373 * vm_page_create turns this memory into available pages.
1374 */
1375
1376 void
1377 vm_page_create(
1378 ppnum_t start,
1379 ppnum_t end)
1380 {
1381 ppnum_t phys_page;
1382 vm_page_t m;
1383
1384 for (phys_page = start;
1385 phys_page < end;
1386 phys_page++) {
1387 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1388 == VM_PAGE_NULL)
1389 vm_page_more_fictitious();
1390
1391 m->fictitious = FALSE;
1392 pmap_clear_noencrypt(phys_page);
1393
1394 vm_page_pages++;
1395 vm_page_release(m, FALSE);
1396 }
1397 }
1398
1399 /*
1400 * vm_page_hash:
1401 *
1402 * Distributes the object/offset key pair among hash buckets.
1403 *
1404 * NOTE: The bucket count must be a power of 2
1405 */
1406 #define vm_page_hash(object, offset) (\
1407 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1408 & vm_page_hash_mask)
1409
1410
1411 /*
1412 * vm_page_insert: [ internal use only ]
1413 *
1414 * Inserts the given mem entry into the object/object-page
1415 * table and object list.
1416 *
1417 * The object must be locked.
1418 */
1419 void
1420 vm_page_insert(
1421 vm_page_t mem,
1422 vm_object_t object,
1423 vm_object_offset_t offset)
1424 {
1425 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1426 }
1427
1428 void
1429 vm_page_insert_wired(
1430 vm_page_t mem,
1431 vm_object_t object,
1432 vm_object_offset_t offset,
1433 vm_tag_t tag)
1434 {
1435 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1436 }
1437
1438 void
1439 vm_page_insert_internal(
1440 vm_page_t mem,
1441 vm_object_t object,
1442 vm_object_offset_t offset,
1443 vm_tag_t tag,
1444 boolean_t queues_lock_held,
1445 boolean_t insert_in_hash,
1446 boolean_t batch_pmap_op,
1447 boolean_t batch_accounting,
1448 uint64_t *delayed_ledger_update)
1449 {
1450 vm_page_bucket_t *bucket;
1451 lck_spin_t *bucket_lock;
1452 int hash_id;
1453 task_t owner;
1454
1455 XPR(XPR_VM_PAGE,
1456 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1457 object, offset, mem, 0,0);
1458 #if 0
1459 /*
1460 * we may not hold the page queue lock
1461 * so this check isn't safe to make
1462 */
1463 VM_PAGE_CHECK(mem);
1464 #endif
1465
1466 assert(page_aligned(offset));
1467
1468 assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1469
1470 /* the vm_submap_object is only a placeholder for submaps */
1471 assert(object != vm_submap_object);
1472
1473 vm_object_lock_assert_exclusive(object);
1474 LCK_MTX_ASSERT(&vm_page_queue_lock,
1475 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1476 : LCK_MTX_ASSERT_NOTOWNED);
1477
1478 if (queues_lock_held == FALSE)
1479 assert(!VM_PAGE_PAGEABLE(mem));
1480
1481 if (insert_in_hash == TRUE) {
1482 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1483 if (mem->tabled || mem->vm_page_object)
1484 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1485 "already in (obj=%p,off=0x%llx)",
1486 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1487 #endif
1488 if (object->internal && (offset >= object->vo_size)) {
1489 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1490 mem, object, offset, object->vo_size);
1491 }
1492
1493 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1494
1495 /*
1496 * Record the object/offset pair in this page
1497 */
1498
1499 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1500 mem->offset = offset;
1501
1502 #if CONFIG_SECLUDED_MEMORY
1503 if (object->eligible_for_secluded) {
1504 vm_page_secluded.eligible_for_secluded++;
1505 }
1506 #endif /* CONFIG_SECLUDED_MEMORY */
1507
1508 /*
1509 * Insert it into the object_object/offset hash table
1510 */
1511 hash_id = vm_page_hash(object, offset);
1512 bucket = &vm_page_buckets[hash_id];
1513 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1514
1515 lck_spin_lock(bucket_lock);
1516
1517 mem->next_m = bucket->page_list;
1518 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1519 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1520
1521 #if MACH_PAGE_HASH_STATS
1522 if (++bucket->cur_count > bucket->hi_count)
1523 bucket->hi_count = bucket->cur_count;
1524 #endif /* MACH_PAGE_HASH_STATS */
1525 mem->hashed = TRUE;
1526 lck_spin_unlock(bucket_lock);
1527 }
1528
1529 {
1530 unsigned int cache_attr;
1531
1532 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1533
1534 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1535 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1536 }
1537 }
1538 /*
1539 * Now link into the object's list of backed pages.
1540 */
1541 vm_page_queue_enter(&object->memq, mem, vm_page_t, listq);
1542 object->memq_hint = mem;
1543 mem->tabled = TRUE;
1544
1545 /*
1546 * Show that the object has one more resident page.
1547 */
1548
1549 object->resident_page_count++;
1550 if (VM_PAGE_WIRED(mem)) {
1551 assert(mem->wire_count > 0);
1552 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1553 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1554 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1555 }
1556 assert(object->resident_page_count >= object->wired_page_count);
1557
1558 if (batch_accounting == FALSE) {
1559 if (object->internal) {
1560 OSAddAtomic(1, &vm_page_internal_count);
1561 } else {
1562 OSAddAtomic(1, &vm_page_external_count);
1563 }
1564 }
1565
1566 /*
1567 * It wouldn't make sense to insert a "reusable" page in
1568 * an object (the page would have been marked "reusable" only
1569 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1570 * in the object at that time).
1571 * But a page could be inserted in a "all_reusable" object, if
1572 * something faults it in (a vm_read() from another task or a
1573 * "use-after-free" issue in user space, for example). It can
1574 * also happen if we're relocating a page from that object to
1575 * a different physical page during a physically-contiguous
1576 * allocation.
1577 */
1578 assert(!mem->reusable);
1579 if (object->all_reusable) {
1580 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1581 }
1582
1583 if (object->purgable == VM_PURGABLE_DENY) {
1584 owner = TASK_NULL;
1585 } else {
1586 owner = object->vo_purgeable_owner;
1587 }
1588 if (owner &&
1589 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1590 VM_PAGE_WIRED(mem))) {
1591
1592 if (delayed_ledger_update)
1593 *delayed_ledger_update += PAGE_SIZE;
1594 else {
1595 /* more non-volatile bytes */
1596 ledger_credit(owner->ledger,
1597 task_ledgers.purgeable_nonvolatile,
1598 PAGE_SIZE);
1599 /* more footprint */
1600 ledger_credit(owner->ledger,
1601 task_ledgers.phys_footprint,
1602 PAGE_SIZE);
1603 }
1604
1605 } else if (owner &&
1606 (object->purgable == VM_PURGABLE_VOLATILE ||
1607 object->purgable == VM_PURGABLE_EMPTY)) {
1608 assert(! VM_PAGE_WIRED(mem));
1609 /* more volatile bytes */
1610 ledger_credit(owner->ledger,
1611 task_ledgers.purgeable_volatile,
1612 PAGE_SIZE);
1613 }
1614
1615 if (object->purgable == VM_PURGABLE_VOLATILE) {
1616 if (VM_PAGE_WIRED(mem)) {
1617 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1618 } else {
1619 OSAddAtomic(+1, &vm_page_purgeable_count);
1620 }
1621 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1622 mem->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q) {
1623 /*
1624 * This page belongs to a purged VM object but hasn't
1625 * been purged (because it was "busy").
1626 * It's in the "throttled" queue and hence not
1627 * visible to vm_pageout_scan(). Move it to a pageable
1628 * queue, so that it can eventually be reclaimed, instead
1629 * of lingering in the "empty" object.
1630 */
1631 if (queues_lock_held == FALSE)
1632 vm_page_lockspin_queues();
1633 vm_page_deactivate(mem);
1634 if (queues_lock_held == FALSE)
1635 vm_page_unlock_queues();
1636 }
1637
1638 #if VM_OBJECT_TRACKING_OP_MODIFIED
1639 if (vm_object_tracking_inited &&
1640 object->internal &&
1641 object->resident_page_count == 0 &&
1642 object->pager == NULL &&
1643 object->shadow != NULL &&
1644 object->shadow->copy == object) {
1645 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1646 int numsaved = 0;
1647
1648 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1649 btlog_add_entry(vm_object_tracking_btlog,
1650 object,
1651 VM_OBJECT_TRACKING_OP_MODIFIED,
1652 bt,
1653 numsaved);
1654 }
1655 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1656 }
1657
1658 /*
1659 * vm_page_replace:
1660 *
1661 * Exactly like vm_page_insert, except that we first
1662 * remove any existing page at the given offset in object.
1663 *
1664 * The object must be locked.
1665 */
1666 void
1667 vm_page_replace(
1668 vm_page_t mem,
1669 vm_object_t object,
1670 vm_object_offset_t offset)
1671 {
1672 vm_page_bucket_t *bucket;
1673 vm_page_t found_m = VM_PAGE_NULL;
1674 lck_spin_t *bucket_lock;
1675 int hash_id;
1676
1677 #if 0
1678 /*
1679 * we don't hold the page queue lock
1680 * so this check isn't safe to make
1681 */
1682 VM_PAGE_CHECK(mem);
1683 #endif
1684 vm_object_lock_assert_exclusive(object);
1685 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1686 if (mem->tabled || mem->vm_page_object)
1687 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1688 "already in (obj=%p,off=0x%llx)",
1689 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
1690 #endif
1691 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1692
1693 assert(!VM_PAGE_PAGEABLE(mem));
1694
1695 /*
1696 * Record the object/offset pair in this page
1697 */
1698 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1699 mem->offset = offset;
1700
1701 /*
1702 * Insert it into the object_object/offset hash table,
1703 * replacing any page that might have been there.
1704 */
1705
1706 hash_id = vm_page_hash(object, offset);
1707 bucket = &vm_page_buckets[hash_id];
1708 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1709
1710 lck_spin_lock(bucket_lock);
1711
1712 if (bucket->page_list) {
1713 vm_page_packed_t *mp = &bucket->page_list;
1714 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
1715
1716 do {
1717 /*
1718 * compare packed object pointers
1719 */
1720 if (m->vm_page_object == mem->vm_page_object && m->offset == offset) {
1721 /*
1722 * Remove old page from hash list
1723 */
1724 *mp = m->next_m;
1725 m->hashed = FALSE;
1726 m->next_m = VM_PAGE_PACK_PTR(NULL);
1727
1728 found_m = m;
1729 break;
1730 }
1731 mp = &m->next_m;
1732 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
1733
1734 mem->next_m = bucket->page_list;
1735 } else {
1736 mem->next_m = VM_PAGE_PACK_PTR(NULL);
1737 }
1738 /*
1739 * insert new page at head of hash list
1740 */
1741 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1742 mem->hashed = TRUE;
1743
1744 lck_spin_unlock(bucket_lock);
1745
1746 if (found_m) {
1747 /*
1748 * there was already a page at the specified
1749 * offset for this object... remove it from
1750 * the object and free it back to the free list
1751 */
1752 vm_page_free_unlocked(found_m, FALSE);
1753 }
1754 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1755 }
1756
1757 /*
1758 * vm_page_remove: [ internal use only ]
1759 *
1760 * Removes the given mem entry from the object/offset-page
1761 * table and the object page list.
1762 *
1763 * The object must be locked.
1764 */
1765
1766 void
1767 vm_page_remove(
1768 vm_page_t mem,
1769 boolean_t remove_from_hash)
1770 {
1771 vm_page_bucket_t *bucket;
1772 vm_page_t this;
1773 lck_spin_t *bucket_lock;
1774 int hash_id;
1775 task_t owner;
1776 vm_object_t m_object;
1777
1778 m_object = VM_PAGE_OBJECT(mem);
1779
1780 XPR(XPR_VM_PAGE,
1781 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1782 m_object, mem->offset,
1783 mem, 0,0);
1784
1785 vm_object_lock_assert_exclusive(m_object);
1786 assert(mem->tabled);
1787 assert(!mem->cleaning);
1788 assert(!mem->laundry);
1789
1790 if (VM_PAGE_PAGEABLE(mem)) {
1791 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1792 }
1793 #if 0
1794 /*
1795 * we don't hold the page queue lock
1796 * so this check isn't safe to make
1797 */
1798 VM_PAGE_CHECK(mem);
1799 #endif
1800 if (remove_from_hash == TRUE) {
1801 /*
1802 * Remove from the object_object/offset hash table
1803 */
1804 hash_id = vm_page_hash(m_object, mem->offset);
1805 bucket = &vm_page_buckets[hash_id];
1806 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1807
1808 lck_spin_lock(bucket_lock);
1809
1810 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
1811 /* optimize for common case */
1812
1813 bucket->page_list = mem->next_m;
1814 } else {
1815 vm_page_packed_t *prev;
1816
1817 for (prev = &this->next_m;
1818 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
1819 prev = &this->next_m)
1820 continue;
1821 *prev = this->next_m;
1822 }
1823 #if MACH_PAGE_HASH_STATS
1824 bucket->cur_count--;
1825 #endif /* MACH_PAGE_HASH_STATS */
1826 mem->hashed = FALSE;
1827 this->next_m = VM_PAGE_PACK_PTR(NULL);
1828 lck_spin_unlock(bucket_lock);
1829 }
1830 /*
1831 * Now remove from the object's list of backed pages.
1832 */
1833
1834 vm_page_remove_internal(mem);
1835
1836 /*
1837 * And show that the object has one fewer resident
1838 * page.
1839 */
1840
1841 assert(m_object->resident_page_count > 0);
1842 m_object->resident_page_count--;
1843
1844 if (m_object->internal) {
1845 #if DEBUG
1846 assert(vm_page_internal_count);
1847 #endif /* DEBUG */
1848
1849 OSAddAtomic(-1, &vm_page_internal_count);
1850 } else {
1851 assert(vm_page_external_count);
1852 OSAddAtomic(-1, &vm_page_external_count);
1853
1854 if (mem->xpmapped) {
1855 assert(vm_page_xpmapped_external_count);
1856 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1857 }
1858 }
1859 if (!m_object->internal && (m_object->objq.next || m_object->objq.prev)) {
1860 if (m_object->resident_page_count == 0)
1861 vm_object_cache_remove(m_object);
1862 }
1863
1864 if (VM_PAGE_WIRED(mem)) {
1865 assert(mem->wire_count > 0);
1866 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
1867 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
1868 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
1869 }
1870 assert(m_object->resident_page_count >=
1871 m_object->wired_page_count);
1872 if (mem->reusable) {
1873 assert(m_object->reusable_page_count > 0);
1874 m_object->reusable_page_count--;
1875 assert(m_object->reusable_page_count <=
1876 m_object->resident_page_count);
1877 mem->reusable = FALSE;
1878 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1879 vm_page_stats_reusable.reused_remove++;
1880 } else if (m_object->all_reusable) {
1881 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1882 vm_page_stats_reusable.reused_remove++;
1883 }
1884
1885 if (m_object->purgable == VM_PURGABLE_DENY) {
1886 owner = TASK_NULL;
1887 } else {
1888 owner = m_object->vo_purgeable_owner;
1889 }
1890 if (owner &&
1891 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
1892 VM_PAGE_WIRED(mem))) {
1893 /* less non-volatile bytes */
1894 ledger_debit(owner->ledger,
1895 task_ledgers.purgeable_nonvolatile,
1896 PAGE_SIZE);
1897 /* less footprint */
1898 ledger_debit(owner->ledger,
1899 task_ledgers.phys_footprint,
1900 PAGE_SIZE);
1901 } else if (owner &&
1902 (m_object->purgable == VM_PURGABLE_VOLATILE ||
1903 m_object->purgable == VM_PURGABLE_EMPTY)) {
1904 assert(! VM_PAGE_WIRED(mem));
1905 /* less volatile bytes */
1906 ledger_debit(owner->ledger,
1907 task_ledgers.purgeable_volatile,
1908 PAGE_SIZE);
1909 }
1910 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
1911 if (VM_PAGE_WIRED(mem)) {
1912 assert(vm_page_purgeable_wired_count > 0);
1913 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1914 } else {
1915 assert(vm_page_purgeable_count > 0);
1916 OSAddAtomic(-1, &vm_page_purgeable_count);
1917 }
1918 }
1919
1920 if (m_object->set_cache_attr == TRUE)
1921 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
1922
1923 mem->tabled = FALSE;
1924 mem->vm_page_object = 0;
1925 mem->offset = (vm_object_offset_t) -1;
1926 }
1927
1928
1929 /*
1930 * vm_page_lookup:
1931 *
1932 * Returns the page associated with the object/offset
1933 * pair specified; if none is found, VM_PAGE_NULL is returned.
1934 *
1935 * The object must be locked. No side effects.
1936 */
1937
1938 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1939
1940 #if DEBUG_VM_PAGE_LOOKUP
1941
1942 struct {
1943 uint64_t vpl_total;
1944 uint64_t vpl_empty_obj;
1945 uint64_t vpl_bucket_NULL;
1946 uint64_t vpl_hit_hint;
1947 uint64_t vpl_hit_hint_next;
1948 uint64_t vpl_hit_hint_prev;
1949 uint64_t vpl_fast;
1950 uint64_t vpl_slow;
1951 uint64_t vpl_hit;
1952 uint64_t vpl_miss;
1953
1954 uint64_t vpl_fast_elapsed;
1955 uint64_t vpl_slow_elapsed;
1956 } vm_page_lookup_stats __attribute__((aligned(8)));
1957
1958 #endif
1959
1960 #define KDP_VM_PAGE_WALK_MAX 1000
1961
1962 vm_page_t
1963 kdp_vm_page_lookup(
1964 vm_object_t object,
1965 vm_object_offset_t offset)
1966 {
1967 vm_page_t cur_page;
1968 int num_traversed = 0;
1969
1970 if (not_in_kdp) {
1971 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1972 }
1973
1974 vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1975 if (cur_page->offset == offset) {
1976 return cur_page;
1977 }
1978 num_traversed++;
1979
1980 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1981 return VM_PAGE_NULL;
1982 }
1983 }
1984
1985 return VM_PAGE_NULL;
1986 }
1987
1988 vm_page_t
1989 vm_page_lookup(
1990 vm_object_t object,
1991 vm_object_offset_t offset)
1992 {
1993 vm_page_t mem;
1994 vm_page_bucket_t *bucket;
1995 vm_page_queue_entry_t qe;
1996 lck_spin_t *bucket_lock = NULL;
1997 int hash_id;
1998 #if DEBUG_VM_PAGE_LOOKUP
1999 uint64_t start, elapsed;
2000
2001 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2002 #endif
2003 vm_object_lock_assert_held(object);
2004
2005 if (object->resident_page_count == 0) {
2006 #if DEBUG_VM_PAGE_LOOKUP
2007 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2008 #endif
2009 return (VM_PAGE_NULL);
2010 }
2011
2012 mem = object->memq_hint;
2013
2014 if (mem != VM_PAGE_NULL) {
2015 assert(VM_PAGE_OBJECT(mem) == object);
2016
2017 if (mem->offset == offset) {
2018 #if DEBUG_VM_PAGE_LOOKUP
2019 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2020 #endif
2021 return (mem);
2022 }
2023 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->listq);
2024
2025 if (! vm_page_queue_end(&object->memq, qe)) {
2026 vm_page_t next_page;
2027
2028 next_page = (vm_page_t)((uintptr_t)qe);
2029 assert(VM_PAGE_OBJECT(next_page) == object);
2030
2031 if (next_page->offset == offset) {
2032 object->memq_hint = next_page; /* new hint */
2033 #if DEBUG_VM_PAGE_LOOKUP
2034 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2035 #endif
2036 return (next_page);
2037 }
2038 }
2039 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->listq);
2040
2041 if (! vm_page_queue_end(&object->memq, qe)) {
2042 vm_page_t prev_page;
2043
2044 prev_page = (vm_page_t)((uintptr_t)qe);
2045 assert(VM_PAGE_OBJECT(prev_page) == object);
2046
2047 if (prev_page->offset == offset) {
2048 object->memq_hint = prev_page; /* new hint */
2049 #if DEBUG_VM_PAGE_LOOKUP
2050 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2051 #endif
2052 return (prev_page);
2053 }
2054 }
2055 }
2056 /*
2057 * Search the hash table for this object/offset pair
2058 */
2059 hash_id = vm_page_hash(object, offset);
2060 bucket = &vm_page_buckets[hash_id];
2061
2062 /*
2063 * since we hold the object lock, we are guaranteed that no
2064 * new pages can be inserted into this object... this in turn
2065 * guarantess that the page we're looking for can't exist
2066 * if the bucket it hashes to is currently NULL even when looked
2067 * at outside the scope of the hash bucket lock... this is a
2068 * really cheap optimiztion to avoid taking the lock
2069 */
2070 if (!bucket->page_list) {
2071 #if DEBUG_VM_PAGE_LOOKUP
2072 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2073 #endif
2074 return (VM_PAGE_NULL);
2075 }
2076
2077 #if DEBUG_VM_PAGE_LOOKUP
2078 start = mach_absolute_time();
2079 #endif
2080 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2081 /*
2082 * on average, it's roughly 3 times faster to run a short memq list
2083 * than to take the spin lock and go through the hash list
2084 */
2085 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2086
2087 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2088
2089 if (mem->offset == offset)
2090 break;
2091
2092 mem = (vm_page_t)vm_page_queue_next(&mem->listq);
2093 }
2094 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
2095 mem = NULL;
2096 } else {
2097 vm_page_object_t packed_object;
2098
2099 packed_object = VM_PAGE_PACK_OBJECT(object);
2100
2101 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2102
2103 lck_spin_lock(bucket_lock);
2104
2105 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2106 mem != VM_PAGE_NULL;
2107 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m))) {
2108 #if 0
2109 /*
2110 * we don't hold the page queue lock
2111 * so this check isn't safe to make
2112 */
2113 VM_PAGE_CHECK(mem);
2114 #endif
2115 if ((mem->vm_page_object == packed_object) && (mem->offset == offset))
2116 break;
2117 }
2118 lck_spin_unlock(bucket_lock);
2119 }
2120
2121 #if DEBUG_VM_PAGE_LOOKUP
2122 elapsed = mach_absolute_time() - start;
2123
2124 if (bucket_lock) {
2125 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2126 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2127 } else {
2128 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2129 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2130 }
2131 if (mem != VM_PAGE_NULL)
2132 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2133 else
2134 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2135 #endif
2136 if (mem != VM_PAGE_NULL) {
2137 assert(VM_PAGE_OBJECT(mem) == object);
2138
2139 object->memq_hint = mem;
2140 }
2141 return (mem);
2142 }
2143
2144
2145 /*
2146 * vm_page_rename:
2147 *
2148 * Move the given memory entry from its
2149 * current object to the specified target object/offset.
2150 *
2151 * The object must be locked.
2152 */
2153 void
2154 vm_page_rename(
2155 vm_page_t mem,
2156 vm_object_t new_object,
2157 vm_object_offset_t new_offset)
2158 {
2159 boolean_t internal_to_external, external_to_internal;
2160 vm_tag_t tag;
2161 vm_object_t m_object;
2162
2163 m_object = VM_PAGE_OBJECT(mem);
2164
2165 assert(m_object != new_object);
2166 assert(m_object);
2167
2168 XPR(XPR_VM_PAGE,
2169 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2170 new_object, new_offset,
2171 mem, 0,0);
2172
2173 /*
2174 * Changes to mem->object require the page lock because
2175 * the pageout daemon uses that lock to get the object.
2176 */
2177 vm_page_lockspin_queues();
2178
2179 internal_to_external = FALSE;
2180 external_to_internal = FALSE;
2181
2182 if (mem->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2183 /*
2184 * it's much easier to get the vm_page_pageable_xxx accounting correct
2185 * if we first move the page to the active queue... it's going to end
2186 * up there anyway, and we don't do vm_page_rename's frequently enough
2187 * for this to matter.
2188 */
2189 vm_page_queues_remove(mem, FALSE);
2190 vm_page_activate(mem);
2191 }
2192 if (VM_PAGE_PAGEABLE(mem)) {
2193 if (m_object->internal && !new_object->internal) {
2194 internal_to_external = TRUE;
2195 }
2196 if (!m_object->internal && new_object->internal) {
2197 external_to_internal = TRUE;
2198 }
2199 }
2200
2201 tag = m_object->wire_tag;
2202 vm_page_remove(mem, TRUE);
2203 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2204
2205 if (internal_to_external) {
2206 vm_page_pageable_internal_count--;
2207 vm_page_pageable_external_count++;
2208 } else if (external_to_internal) {
2209 vm_page_pageable_external_count--;
2210 vm_page_pageable_internal_count++;
2211 }
2212
2213 vm_page_unlock_queues();
2214 }
2215
2216 /*
2217 * vm_page_init:
2218 *
2219 * Initialize the fields in a new page.
2220 * This takes a structure with random values and initializes it
2221 * so that it can be given to vm_page_release or vm_page_insert.
2222 */
2223 void
2224 vm_page_init(
2225 vm_page_t mem,
2226 ppnum_t phys_page,
2227 boolean_t lopage)
2228 {
2229 assert(phys_page);
2230
2231 #if DEBUG
2232 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2233 if (!(pmap_valid_page(phys_page))) {
2234 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2235 }
2236 }
2237 #endif
2238 *mem = vm_page_template;
2239
2240 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2241 #if 0
2242 /*
2243 * we're leaving this turned off for now... currently pages
2244 * come off the free list and are either immediately dirtied/referenced
2245 * due to zero-fill or COW faults, or are used to read or write files...
2246 * in the file I/O case, the UPL mechanism takes care of clearing
2247 * the state of the HW ref/mod bits in a somewhat fragile way.
2248 * Since we may change the way this works in the future (to toughen it up),
2249 * I'm leaving this as a reminder of where these bits could get cleared
2250 */
2251
2252 /*
2253 * make sure both the h/w referenced and modified bits are
2254 * clear at this point... we are especially dependent on
2255 * not finding a 'stale' h/w modified in a number of spots
2256 * once this page goes back into use
2257 */
2258 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2259 #endif
2260 mem->lopage = lopage;
2261 }
2262
2263 /*
2264 * vm_page_grab_fictitious:
2265 *
2266 * Remove a fictitious page from the free list.
2267 * Returns VM_PAGE_NULL if there are no free pages.
2268 */
2269 int c_vm_page_grab_fictitious = 0;
2270 int c_vm_page_grab_fictitious_failed = 0;
2271 int c_vm_page_release_fictitious = 0;
2272 int c_vm_page_more_fictitious = 0;
2273
2274 vm_page_t
2275 vm_page_grab_fictitious_common(
2276 ppnum_t phys_addr)
2277 {
2278 vm_page_t m;
2279
2280 if ((m = (vm_page_t)zget(vm_page_zone))) {
2281
2282 vm_page_init(m, phys_addr, FALSE);
2283 m->fictitious = TRUE;
2284
2285 c_vm_page_grab_fictitious++;
2286 } else
2287 c_vm_page_grab_fictitious_failed++;
2288
2289 return m;
2290 }
2291
2292 vm_page_t
2293 vm_page_grab_fictitious(void)
2294 {
2295 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2296 }
2297
2298 int vm_guard_count;
2299
2300
2301 vm_page_t
2302 vm_page_grab_guard(void)
2303 {
2304 vm_page_t page;
2305 page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2306 if (page) OSAddAtomic(1, &vm_guard_count);
2307 return page;
2308 }
2309
2310
2311 /*
2312 * vm_page_release_fictitious:
2313 *
2314 * Release a fictitious page to the zone pool
2315 */
2316 void
2317 vm_page_release_fictitious(
2318 vm_page_t m)
2319 {
2320 assert((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) || (m->vm_page_q_state == VM_PAGE_IS_WIRED));
2321 assert(m->fictitious);
2322 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2323 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2324
2325
2326 if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) OSAddAtomic(-1, &vm_guard_count);
2327
2328 c_vm_page_release_fictitious++;
2329
2330 zfree(vm_page_zone, m);
2331 }
2332
2333 /*
2334 * vm_page_more_fictitious:
2335 *
2336 * Add more fictitious pages to the zone.
2337 * Allowed to block. This routine is way intimate
2338 * with the zones code, for several reasons:
2339 * 1. we need to carve some page structures out of physical
2340 * memory before zones work, so they _cannot_ come from
2341 * the zone_map.
2342 * 2. the zone needs to be collectable in order to prevent
2343 * growth without bound. These structures are used by
2344 * the device pager (by the hundreds and thousands), as
2345 * private pages for pageout, and as blocking pages for
2346 * pagein. Temporary bursts in demand should not result in
2347 * permanent allocation of a resource.
2348 * 3. To smooth allocation humps, we allocate single pages
2349 * with kernel_memory_allocate(), and cram them into the
2350 * zone.
2351 */
2352
2353 void vm_page_more_fictitious(void)
2354 {
2355 vm_offset_t addr;
2356 kern_return_t retval;
2357
2358 c_vm_page_more_fictitious++;
2359
2360 /*
2361 * Allocate a single page from the zone_map. Do not wait if no physical
2362 * pages are immediately available, and do not zero the space. We need
2363 * our own blocking lock here to prevent having multiple,
2364 * simultaneous requests from piling up on the zone_map lock. Exactly
2365 * one (of our) threads should be potentially waiting on the map lock.
2366 * If winner is not vm-privileged, then the page allocation will fail,
2367 * and it will temporarily block here in the vm_page_wait().
2368 */
2369 lck_mtx_lock(&vm_page_alloc_lock);
2370 /*
2371 * If another thread allocated space, just bail out now.
2372 */
2373 if (zone_free_count(vm_page_zone) > 5) {
2374 /*
2375 * The number "5" is a small number that is larger than the
2376 * number of fictitious pages that any single caller will
2377 * attempt to allocate. Otherwise, a thread will attempt to
2378 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2379 * release all of the resources and locks already acquired,
2380 * and then call this routine. This routine finds the pages
2381 * that the caller released, so fails to allocate new space.
2382 * The process repeats infinitely. The largest known number
2383 * of fictitious pages required in this manner is 2. 5 is
2384 * simply a somewhat larger number.
2385 */
2386 lck_mtx_unlock(&vm_page_alloc_lock);
2387 return;
2388 }
2389
2390 retval = kernel_memory_allocate(zone_map,
2391 &addr, PAGE_SIZE, 0,
2392 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2393 if (retval != KERN_SUCCESS) {
2394 /*
2395 * No page was available. Drop the
2396 * lock to give another thread a chance at it, and
2397 * wait for the pageout daemon to make progress.
2398 */
2399 lck_mtx_unlock(&vm_page_alloc_lock);
2400 vm_page_wait(THREAD_UNINT);
2401 return;
2402 }
2403
2404 zcram(vm_page_zone, addr, PAGE_SIZE);
2405
2406 lck_mtx_unlock(&vm_page_alloc_lock);
2407 }
2408
2409
2410 /*
2411 * vm_pool_low():
2412 *
2413 * Return true if it is not likely that a non-vm_privileged thread
2414 * can get memory without blocking. Advisory only, since the
2415 * situation may change under us.
2416 */
2417 int
2418 vm_pool_low(void)
2419 {
2420 /* No locking, at worst we will fib. */
2421 return( vm_page_free_count <= vm_page_free_reserved );
2422 }
2423
2424
2425 #if CONFIG_BACKGROUND_QUEUE
2426
2427 void
2428 vm_page_update_background_state(vm_page_t mem)
2429 {
2430 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2431 return;
2432
2433 if (mem->vm_page_in_background == FALSE)
2434 return;
2435
2436 #if BACKGROUNDQ_BASED_ON_QOS
2437 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2438 return;
2439 #else
2440 task_t my_task;
2441
2442 my_task = current_task();
2443
2444 if (my_task) {
2445 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2446 return;
2447 }
2448 #endif
2449 vm_page_lockspin_queues();
2450
2451 mem->vm_page_in_background = FALSE;
2452 vm_page_background_promoted_count++;
2453
2454 vm_page_remove_from_backgroundq(mem);
2455
2456 vm_page_unlock_queues();
2457 }
2458
2459
2460 void
2461 vm_page_assign_background_state(vm_page_t mem)
2462 {
2463 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2464 return;
2465
2466 #if BACKGROUNDQ_BASED_ON_QOS
2467 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2468 mem->vm_page_in_background = TRUE;
2469 else
2470 mem->vm_page_in_background = FALSE;
2471 #else
2472 task_t my_task;
2473
2474 my_task = current_task();
2475
2476 if (my_task)
2477 mem->vm_page_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2478 #endif
2479 }
2480
2481
2482 void
2483 vm_page_remove_from_backgroundq(
2484 vm_page_t mem)
2485 {
2486 vm_object_t m_object;
2487
2488 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2489
2490 if (mem->vm_page_on_backgroundq) {
2491 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2492
2493 mem->vm_page_backgroundq.next = 0;
2494 mem->vm_page_backgroundq.prev = 0;
2495 mem->vm_page_on_backgroundq = FALSE;
2496
2497 vm_page_background_count--;
2498
2499 m_object = VM_PAGE_OBJECT(mem);
2500
2501 if (m_object->internal)
2502 vm_page_background_internal_count--;
2503 else
2504 vm_page_background_external_count--;
2505 } else {
2506 assert(VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.next) == (uintptr_t)NULL &&
2507 VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.prev) == (uintptr_t)NULL);
2508 }
2509 }
2510
2511
2512 void
2513 vm_page_add_to_backgroundq(
2514 vm_page_t mem,
2515 boolean_t first)
2516 {
2517 vm_object_t m_object;
2518
2519 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2520
2521 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2522 return;
2523
2524 if (mem->vm_page_on_backgroundq == FALSE) {
2525
2526 m_object = VM_PAGE_OBJECT(mem);
2527
2528 if (vm_page_background_exclude_external && !m_object->internal)
2529 return;
2530
2531 if (first == TRUE)
2532 vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2533 else
2534 vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2535 mem->vm_page_on_backgroundq = TRUE;
2536
2537 vm_page_background_count++;
2538
2539 if (m_object->internal)
2540 vm_page_background_internal_count++;
2541 else
2542 vm_page_background_external_count++;
2543 }
2544 }
2545
2546 #endif
2547
2548 /*
2549 * this is an interface to support bring-up of drivers
2550 * on platforms with physical memory > 4G...
2551 */
2552 int vm_himemory_mode = 2;
2553
2554
2555 /*
2556 * this interface exists to support hardware controllers
2557 * incapable of generating DMAs with more than 32 bits
2558 * of address on platforms with physical memory > 4G...
2559 */
2560 unsigned int vm_lopages_allocated_q = 0;
2561 unsigned int vm_lopages_allocated_cpm_success = 0;
2562 unsigned int vm_lopages_allocated_cpm_failed = 0;
2563 vm_page_queue_head_t vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2564
2565 vm_page_t
2566 vm_page_grablo(void)
2567 {
2568 vm_page_t mem;
2569
2570 if (vm_lopage_needed == FALSE)
2571 return (vm_page_grab());
2572
2573 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2574
2575 if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2576 vm_page_queue_remove_first(&vm_lopage_queue_free,
2577 mem,
2578 vm_page_t,
2579 pageq);
2580 assert(vm_lopage_free_count);
2581 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2582 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2583
2584 vm_lopage_free_count--;
2585 vm_lopages_allocated_q++;
2586
2587 if (vm_lopage_free_count < vm_lopage_lowater)
2588 vm_lopage_refill = TRUE;
2589
2590 lck_mtx_unlock(&vm_page_queue_free_lock);
2591
2592 #if CONFIG_BACKGROUND_QUEUE
2593 vm_page_assign_background_state(mem);
2594 #endif
2595 } else {
2596 lck_mtx_unlock(&vm_page_queue_free_lock);
2597
2598 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2599
2600 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2601 vm_lopages_allocated_cpm_failed++;
2602 lck_mtx_unlock(&vm_page_queue_free_lock);
2603
2604 return (VM_PAGE_NULL);
2605 }
2606 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2607
2608 mem->busy = TRUE;
2609
2610 vm_page_lockspin_queues();
2611
2612 mem->gobbled = FALSE;
2613 vm_page_gobble_count--;
2614 vm_page_wire_count--;
2615
2616 vm_lopages_allocated_cpm_success++;
2617 vm_page_unlock_queues();
2618 }
2619 assert(mem->busy);
2620 assert(!mem->pmapped);
2621 assert(!mem->wpmapped);
2622 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2623
2624 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2625
2626 return (mem);
2627 }
2628
2629
2630 /*
2631 * vm_page_grab:
2632 *
2633 * first try to grab a page from the per-cpu free list...
2634 * this must be done while pre-emption is disabled... if
2635 * a page is available, we're done...
2636 * if no page is available, grab the vm_page_queue_free_lock
2637 * and see if current number of free pages would allow us
2638 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2639 * if there are pages available, disable preemption and
2640 * recheck the state of the per-cpu free list... we could
2641 * have been preempted and moved to a different cpu, or
2642 * some other thread could have re-filled it... if still
2643 * empty, figure out how many pages we can steal from the
2644 * global free queue and move to the per-cpu queue...
2645 * return 1 of these pages when done... only wakeup the
2646 * pageout_scan thread if we moved pages from the global
2647 * list... no need for the wakeup if we've satisfied the
2648 * request from the per-cpu queue.
2649 */
2650
2651 #if CONFIG_SECLUDED_MEMORY
2652 vm_page_t vm_page_grab_secluded(void);
2653 #endif /* CONFIG_SECLUDED_MEMORY */
2654
2655 vm_page_t
2656 vm_page_grab(void)
2657 {
2658 return vm_page_grab_options(0);
2659 }
2660
2661 #if HIBERNATION
2662 boolean_t hibernate_rebuild_needed = FALSE;
2663 #endif /* HIBERNATION */
2664
2665 vm_page_t
2666 vm_page_grab_options(
2667 int grab_options)
2668 {
2669 vm_page_t mem;
2670
2671 disable_preemption();
2672
2673 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2674 return_page_from_cpu_list:
2675 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2676
2677 #if HIBERNATION
2678 if (hibernate_rebuild_needed) {
2679 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2680 }
2681 #endif /* HIBERNATION */
2682 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2683 PROCESSOR_DATA(current_processor(), free_pages) = mem->snext;
2684
2685 enable_preemption();
2686 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2687 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2688
2689 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2690 assert(mem->tabled == FALSE);
2691 assert(mem->vm_page_object == 0);
2692 assert(!mem->laundry);
2693 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2694 assert(mem->busy);
2695 assert(!mem->pmapped);
2696 assert(!mem->wpmapped);
2697 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2698
2699 #if CONFIG_BACKGROUND_QUEUE
2700 vm_page_assign_background_state(mem);
2701 #endif
2702 return mem;
2703 }
2704 enable_preemption();
2705
2706
2707 /*
2708 * Optionally produce warnings if the wire or gobble
2709 * counts exceed some threshold.
2710 */
2711 #if VM_PAGE_WIRE_COUNT_WARNING
2712 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2713 printf("mk: vm_page_grab(): high wired page count of %d\n",
2714 vm_page_wire_count);
2715 }
2716 #endif
2717 #if VM_PAGE_GOBBLE_COUNT_WARNING
2718 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2719 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2720 vm_page_gobble_count);
2721 }
2722 #endif
2723
2724 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2725
2726 /*
2727 * Only let privileged threads (involved in pageout)
2728 * dip into the reserved pool.
2729 */
2730 if ((vm_page_free_count < vm_page_free_reserved) &&
2731 !(current_thread()->options & TH_OPT_VMPRIV)) {
2732 /* no page for us in the free queue... */
2733 lck_mtx_unlock(&vm_page_queue_free_lock);
2734 mem = VM_PAGE_NULL;
2735
2736 #if CONFIG_SECLUDED_MEMORY
2737 /* ... but can we try and grab from the secluded queue? */
2738 if (vm_page_secluded_count > 0 &&
2739 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2740 task_can_use_secluded_mem(current_task()))) {
2741 mem = vm_page_grab_secluded();
2742 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2743 vm_page_secluded.grab_for_iokit++;
2744 if (mem) {
2745 vm_page_secluded.grab_for_iokit_success++;
2746 }
2747 }
2748 if (mem) {
2749 VM_CHECK_MEMORYSTATUS;
2750 return mem;
2751 }
2752 }
2753 #else /* CONFIG_SECLUDED_MEMORY */
2754 (void) grab_options;
2755 #endif /* CONFIG_SECLUDED_MEMORY */
2756 }
2757 else {
2758 vm_page_t head;
2759 vm_page_t tail;
2760 unsigned int pages_to_steal;
2761 unsigned int color;
2762 unsigned int clump_end, sub_count;
2763
2764 while ( vm_page_free_count == 0 ) {
2765
2766 lck_mtx_unlock(&vm_page_queue_free_lock);
2767 /*
2768 * must be a privileged thread to be
2769 * in this state since a non-privileged
2770 * thread would have bailed if we were
2771 * under the vm_page_free_reserved mark
2772 */
2773 VM_PAGE_WAIT();
2774 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2775 }
2776
2777 disable_preemption();
2778
2779 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2780 lck_mtx_unlock(&vm_page_queue_free_lock);
2781
2782 /*
2783 * we got preempted and moved to another processor
2784 * or we got preempted and someone else ran and filled the cache
2785 */
2786 goto return_page_from_cpu_list;
2787 }
2788 if (vm_page_free_count <= vm_page_free_reserved)
2789 pages_to_steal = 1;
2790 else {
2791 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2792 pages_to_steal = vm_free_magazine_refill_limit;
2793 else
2794 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2795 }
2796 color = PROCESSOR_DATA(current_processor(), start_color);
2797 head = tail = NULL;
2798
2799 vm_page_free_count -= pages_to_steal;
2800 clump_end = sub_count = 0;
2801
2802 while (pages_to_steal--) {
2803
2804 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2805 color = (color + 1) & vm_color_mask;
2806 #if defined(__x86_64__)
2807 vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
2808 mem,
2809 vm_page_t,
2810 pageq,
2811 clump_end);
2812 #else
2813 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2814 mem,
2815 vm_page_t,
2816 pageq);
2817 #endif
2818
2819 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_Q);
2820
2821 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2822
2823 #if defined(__arm__) || defined(__arm64__)
2824 color = (color + 1) & vm_color_mask;
2825 #else
2826
2827 #if DEVELOPMENT || DEBUG
2828
2829 sub_count++;
2830 if (clump_end) {
2831 vm_clump_update_stats(sub_count);
2832 sub_count = 0;
2833 color = (color + 1) & vm_color_mask;
2834 }
2835 #else
2836 if (clump_end) color = (color + 1) & vm_color_mask;
2837
2838 #endif /* if DEVELOPMENT || DEBUG */
2839
2840 #endif /* if defined(__arm__) || defined(__arm64__) */
2841
2842 if (head == NULL)
2843 head = mem;
2844 else
2845 tail->snext = mem;
2846 tail = mem;
2847
2848 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2849 assert(mem->tabled == FALSE);
2850 assert(mem->vm_page_object == 0);
2851 assert(!mem->laundry);
2852
2853 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2854
2855 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2856 assert(mem->busy);
2857 assert(!mem->pmapped);
2858 assert(!mem->wpmapped);
2859 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2860 }
2861 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2862 vm_clump_update_stats(sub_count);
2863 #endif
2864 lck_mtx_unlock(&vm_page_queue_free_lock);
2865
2866 #if HIBERNATION
2867 if (hibernate_rebuild_needed) {
2868 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2869 }
2870 #endif /* HIBERNATION */
2871 PROCESSOR_DATA(current_processor(), free_pages) = head->snext;
2872 PROCESSOR_DATA(current_processor(), start_color) = color;
2873
2874 /*
2875 * satisfy this request
2876 */
2877 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2878 mem = head;
2879 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2880
2881 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2882 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2883
2884 enable_preemption();
2885 }
2886 /*
2887 * Decide if we should poke the pageout daemon.
2888 * We do this if the free count is less than the low
2889 * water mark, or if the free count is less than the high
2890 * water mark (but above the low water mark) and the inactive
2891 * count is less than its target.
2892 *
2893 * We don't have the counts locked ... if they change a little,
2894 * it doesn't really matter.
2895 */
2896 if ((vm_page_free_count < vm_page_free_min) ||
2897 ((vm_page_free_count < vm_page_free_target) &&
2898 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2899 thread_wakeup((event_t) &vm_page_free_wanted);
2900
2901 VM_CHECK_MEMORYSTATUS;
2902
2903 if (mem) {
2904 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2905
2906 #if CONFIG_BACKGROUND_QUEUE
2907 vm_page_assign_background_state(mem);
2908 #endif
2909 }
2910 return mem;
2911 }
2912
2913 #if CONFIG_SECLUDED_MEMORY
2914 vm_page_t
2915 vm_page_grab_secluded(void)
2916 {
2917 vm_page_t mem;
2918 vm_object_t object;
2919 int refmod_state;
2920
2921 if (vm_page_secluded_count == 0) {
2922 /* no secluded pages to grab... */
2923 return VM_PAGE_NULL;
2924 }
2925
2926 /* secluded queue is protected by the VM page queue lock */
2927 vm_page_lock_queues();
2928
2929 if (vm_page_secluded_count == 0) {
2930 /* no secluded pages to grab... */
2931 vm_page_unlock_queues();
2932 return VM_PAGE_NULL;
2933 }
2934
2935 #if 00
2936 /* can we grab from the secluded queue? */
2937 if (vm_page_secluded_count > vm_page_secluded_target ||
2938 (vm_page_secluded_count > 0 &&
2939 task_can_use_secluded_mem(current_task()))) {
2940 /* OK */
2941 } else {
2942 /* can't grab from secluded queue... */
2943 vm_page_unlock_queues();
2944 return VM_PAGE_NULL;
2945 }
2946 #endif
2947
2948 /* we can grab a page from secluded queue! */
2949 assert((vm_page_secluded_count_free +
2950 vm_page_secluded_count_inuse) ==
2951 vm_page_secluded_count);
2952 if (current_task()->task_can_use_secluded_mem) {
2953 assert(num_tasks_can_use_secluded_mem > 0);
2954 }
2955 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
2956 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2957 mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
2958 assert(mem->vm_page_q_state == VM_PAGE_ON_SECLUDED_Q);
2959 vm_page_queues_remove(mem, TRUE);
2960
2961 object = VM_PAGE_OBJECT(mem);
2962
2963 assert(!mem->fictitious);
2964 assert(!VM_PAGE_WIRED(mem));
2965 if (object == VM_OBJECT_NULL) {
2966 /* free for grab! */
2967 vm_page_unlock_queues();
2968 vm_page_secluded.grab_success_free++;
2969
2970 assert(mem->busy);
2971 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2972 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
2973 assert(mem->pageq.next == 0);
2974 assert(mem->pageq.prev == 0);
2975 assert(mem->listq.next == 0);
2976 assert(mem->listq.prev == 0);
2977 #if CONFIG_BACKGROUND_QUEUE
2978 assert(mem->vm_page_on_backgroundq == 0);
2979 assert(mem->vm_page_backgroundq.next == 0);
2980 assert(mem->vm_page_backgroundq.prev == 0);
2981 #endif /* CONFIG_BACKGROUND_QUEUE */
2982 return mem;
2983 }
2984
2985 assert(!object->internal);
2986 // vm_page_pageable_external_count--;
2987
2988 if (!vm_object_lock_try(object)) {
2989 // printf("SECLUDED: page %p: object %p locked\n", mem, object);
2990 vm_page_secluded.grab_failure_locked++;
2991 reactivate_secluded_page:
2992 vm_page_activate(mem);
2993 vm_page_unlock_queues();
2994 return VM_PAGE_NULL;
2995 }
2996 if (mem->busy ||
2997 mem->cleaning ||
2998 mem->laundry) {
2999 /* can't steal page in this state... */
3000 vm_object_unlock(object);
3001 vm_page_secluded.grab_failure_state++;
3002 goto reactivate_secluded_page;
3003 }
3004
3005 mem->busy = TRUE;
3006 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3007 if (refmod_state & VM_MEM_REFERENCED) {
3008 mem->reference = TRUE;
3009 }
3010 if (refmod_state & VM_MEM_MODIFIED) {
3011 SET_PAGE_DIRTY(mem, FALSE);
3012 }
3013 if (mem->dirty || mem->precious) {
3014 /* can't grab a dirty page; re-activate */
3015 // printf("SECLUDED: dirty page %p\n", mem);
3016 PAGE_WAKEUP_DONE(mem);
3017 vm_page_secluded.grab_failure_dirty++;
3018 vm_object_unlock(object);
3019 goto reactivate_secluded_page;
3020 }
3021 if (mem->reference) {
3022 /* it's been used but we do need to grab a page... */
3023 }
3024
3025 vm_page_unlock_queues();
3026
3027 /* finish what vm_page_free() would have done... */
3028 vm_page_free_prepare_object(mem, TRUE);
3029 vm_object_unlock(object);
3030 object = VM_OBJECT_NULL;
3031 if (vm_page_free_verify) {
3032 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3033 }
3034 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3035 vm_page_secluded.grab_success_other++;
3036
3037 assert(mem->busy);
3038 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3039 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3040 assert(mem->pageq.next == 0);
3041 assert(mem->pageq.prev == 0);
3042 assert(mem->listq.next == 0);
3043 assert(mem->listq.prev == 0);
3044 #if CONFIG_BACKGROUND_QUEUE
3045 assert(mem->vm_page_on_backgroundq == 0);
3046 assert(mem->vm_page_backgroundq.next == 0);
3047 assert(mem->vm_page_backgroundq.prev == 0);
3048 #endif /* CONFIG_BACKGROUND_QUEUE */
3049
3050 return mem;
3051 }
3052 #endif /* CONFIG_SECLUDED_MEMORY */
3053
3054 /*
3055 * vm_page_release:
3056 *
3057 * Return a page to the free list.
3058 */
3059
3060 void
3061 vm_page_release(
3062 vm_page_t mem,
3063 boolean_t page_queues_locked)
3064 {
3065 unsigned int color;
3066 int need_wakeup = 0;
3067 int need_priv_wakeup = 0;
3068 #if CONFIG_SECLUDED_MEMORY
3069 int need_secluded_wakeup = 0;
3070 #endif /* CONFIG_SECLUDED_MEMORY */
3071
3072 if (page_queues_locked) {
3073 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3074 } else {
3075 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3076 }
3077
3078 assert(!mem->private && !mem->fictitious);
3079 if (vm_page_free_verify) {
3080 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3081 }
3082 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
3083
3084 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3085
3086 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3087
3088 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3089 assert(mem->busy);
3090 assert(!mem->laundry);
3091 assert(mem->vm_page_object == 0);
3092 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3093 assert(mem->listq.next == 0 && mem->listq.prev == 0);
3094 #if CONFIG_BACKGROUND_QUEUE
3095 assert(mem->vm_page_backgroundq.next == 0 &&
3096 mem->vm_page_backgroundq.prev == 0 &&
3097 mem->vm_page_on_backgroundq == FALSE);
3098 #endif
3099 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3100 vm_lopage_free_count < vm_lopage_free_limit &&
3101 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3102 /*
3103 * this exists to support hardware controllers
3104 * incapable of generating DMAs with more than 32 bits
3105 * of address on platforms with physical memory > 4G...
3106 */
3107 vm_page_queue_enter_first(&vm_lopage_queue_free,
3108 mem,
3109 vm_page_t,
3110 pageq);
3111 vm_lopage_free_count++;
3112
3113 if (vm_lopage_free_count >= vm_lopage_free_limit)
3114 vm_lopage_refill = FALSE;
3115
3116 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3117 mem->lopage = TRUE;
3118 #if CONFIG_SECLUDED_MEMORY
3119 } else if (vm_page_free_count > vm_page_free_reserved &&
3120 vm_page_secluded_count < vm_page_secluded_target &&
3121 num_tasks_can_use_secluded_mem == 0) {
3122 /*
3123 * XXX FBDP TODO: also avoid refilling secluded queue
3124 * when some IOKit objects are already grabbing from it...
3125 */
3126 if (!page_queues_locked) {
3127 if (!vm_page_trylock_queues()) {
3128 /* take locks in right order */
3129 lck_mtx_unlock(&vm_page_queue_free_lock);
3130 vm_page_lock_queues();
3131 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3132 }
3133 }
3134 mem->lopage = FALSE;
3135 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3136 vm_page_queue_enter_first(&vm_page_queue_secluded,
3137 mem,
3138 vm_page_t,
3139 pageq);
3140 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3141 vm_page_secluded_count++;
3142 vm_page_secluded_count_free++;
3143 if (!page_queues_locked) {
3144 vm_page_unlock_queues();
3145 }
3146 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3147 if (vm_page_free_wanted_secluded > 0) {
3148 vm_page_free_wanted_secluded--;
3149 need_secluded_wakeup = 1;
3150 }
3151 #endif /* CONFIG_SECLUDED_MEMORY */
3152 } else {
3153 mem->lopage = FALSE;
3154 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3155
3156 color = VM_PAGE_GET_COLOR(mem);
3157 #if defined(__x86_64__)
3158 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3159 mem,
3160 vm_page_t,
3161 pageq);
3162 #else
3163 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3164 mem,
3165 vm_page_t,
3166 pageq);
3167 #endif
3168 vm_page_free_count++;
3169 /*
3170 * Check if we should wake up someone waiting for page.
3171 * But don't bother waking them unless they can allocate.
3172 *
3173 * We wakeup only one thread, to prevent starvation.
3174 * Because the scheduling system handles wait queues FIFO,
3175 * if we wakeup all waiting threads, one greedy thread
3176 * can starve multiple niceguy threads. When the threads
3177 * all wakeup, the greedy threads runs first, grabs the page,
3178 * and waits for another page. It will be the first to run
3179 * when the next page is freed.
3180 *
3181 * However, there is a slight danger here.
3182 * The thread we wake might not use the free page.
3183 * Then the other threads could wait indefinitely
3184 * while the page goes unused. To forestall this,
3185 * the pageout daemon will keep making free pages
3186 * as long as vm_page_free_wanted is non-zero.
3187 */
3188
3189 assert(vm_page_free_count > 0);
3190 if (vm_page_free_wanted_privileged > 0) {
3191 vm_page_free_wanted_privileged--;
3192 need_priv_wakeup = 1;
3193 #if CONFIG_SECLUDED_MEMORY
3194 } else if (vm_page_free_wanted_secluded > 0 &&
3195 vm_page_free_count > vm_page_free_reserved) {
3196 vm_page_free_wanted_secluded--;
3197 need_secluded_wakeup = 1;
3198 #endif /* CONFIG_SECLUDED_MEMORY */
3199 } else if (vm_page_free_wanted > 0 &&
3200 vm_page_free_count > vm_page_free_reserved) {
3201 vm_page_free_wanted--;
3202 need_wakeup = 1;
3203 }
3204 }
3205 lck_mtx_unlock(&vm_page_queue_free_lock);
3206
3207 if (need_priv_wakeup)
3208 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
3209 #if CONFIG_SECLUDED_MEMORY
3210 else if (need_secluded_wakeup)
3211 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3212 #endif /* CONFIG_SECLUDED_MEMORY */
3213 else if (need_wakeup)
3214 thread_wakeup_one((event_t) &vm_page_free_count);
3215
3216 VM_CHECK_MEMORYSTATUS;
3217 }
3218
3219 /*
3220 * This version of vm_page_release() is used only at startup
3221 * when we are single-threaded and pages are being released
3222 * for the first time. Hence, no locking or unnecessary checks are made.
3223 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3224 */
3225 void
3226 vm_page_release_startup(
3227 vm_page_t mem)
3228 {
3229 vm_page_queue_t queue_free;
3230
3231 if (vm_lopage_free_count < vm_lopage_free_limit &&
3232 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3233 mem->lopage = TRUE;
3234 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3235 vm_lopage_free_count++;
3236 queue_free = &vm_lopage_queue_free;
3237 #if CONFIG_SECLUDED_MEMORY
3238 } else if (vm_page_secluded_count < vm_page_secluded_target) {
3239 mem->lopage = FALSE;
3240 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3241 vm_page_secluded_count++;
3242 vm_page_secluded_count_free++;
3243 queue_free = &vm_page_queue_secluded;
3244 #endif /* CONFIG_SECLUDED_MEMORY */
3245 } else {
3246 mem->lopage = FALSE;
3247 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3248 vm_page_free_count++;
3249 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3250 }
3251 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
3252 #if defined(__x86_64__)
3253 vm_page_queue_enter_clump(queue_free, mem, vm_page_t, pageq);
3254 #else
3255 vm_page_queue_enter(queue_free, mem, vm_page_t, pageq);
3256 #endif
3257 } else
3258 vm_page_queue_enter_first(queue_free, mem, vm_page_t, pageq);
3259 }
3260
3261 /*
3262 * vm_page_wait:
3263 *
3264 * Wait for a page to become available.
3265 * If there are plenty of free pages, then we don't sleep.
3266 *
3267 * Returns:
3268 * TRUE: There may be another page, try again
3269 * FALSE: We were interrupted out of our wait, don't try again
3270 */
3271
3272 boolean_t
3273 vm_page_wait(
3274 int interruptible )
3275 {
3276 /*
3277 * We can't use vm_page_free_reserved to make this
3278 * determination. Consider: some thread might
3279 * need to allocate two pages. The first allocation
3280 * succeeds, the second fails. After the first page is freed,
3281 * a call to vm_page_wait must really block.
3282 */
3283 kern_return_t wait_result;
3284 int need_wakeup = 0;
3285 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3286
3287 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3288
3289 if (is_privileged && vm_page_free_count) {
3290 lck_mtx_unlock(&vm_page_queue_free_lock);
3291 return TRUE;
3292 }
3293
3294 if (vm_page_free_count >= vm_page_free_target) {
3295 lck_mtx_unlock(&vm_page_queue_free_lock);
3296 return TRUE;
3297 }
3298
3299 if (is_privileged) {
3300 if (vm_page_free_wanted_privileged++ == 0)
3301 need_wakeup = 1;
3302 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3303 #if CONFIG_SECLUDED_MEMORY
3304 } else if (secluded_for_apps &&
3305 task_can_use_secluded_mem(current_task())) {
3306 #if 00
3307 /* XXX FBDP: need pageq lock for this... */
3308 /* XXX FBDP: might wait even if pages available, */
3309 /* XXX FBDP: hopefully not for too long... */
3310 if (vm_page_secluded_count > 0) {
3311 lck_mtx_unlock(&vm_page_queue_free_lock);
3312 return TRUE;
3313 }
3314 #endif
3315 if (vm_page_free_wanted_secluded++ == 0) {
3316 need_wakeup = 1;
3317 }
3318 wait_result = assert_wait(
3319 (event_t)&vm_page_free_wanted_secluded,
3320 interruptible);
3321 #endif /* CONFIG_SECLUDED_MEMORY */
3322 } else {
3323 if (vm_page_free_wanted++ == 0)
3324 need_wakeup = 1;
3325 wait_result = assert_wait((event_t)&vm_page_free_count,
3326 interruptible);
3327 }
3328 lck_mtx_unlock(&vm_page_queue_free_lock);
3329 counter(c_vm_page_wait_block++);
3330
3331 if (need_wakeup)
3332 thread_wakeup((event_t)&vm_page_free_wanted);
3333
3334 if (wait_result == THREAD_WAITING) {
3335 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3336 vm_page_free_wanted_privileged,
3337 vm_page_free_wanted,
3338 #if CONFIG_SECLUDED_MEMORY
3339 vm_page_free_wanted_secluded,
3340 #else /* CONFIG_SECLUDED_MEMORY */
3341 0,
3342 #endif /* CONFIG_SECLUDED_MEMORY */
3343 0);
3344 wait_result = thread_block(THREAD_CONTINUE_NULL);
3345 VM_DEBUG_EVENT(vm_page_wait_block,
3346 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3347 }
3348
3349 return (wait_result == THREAD_AWAKENED);
3350 }
3351
3352 /*
3353 * vm_page_alloc:
3354 *
3355 * Allocate and return a memory cell associated
3356 * with this VM object/offset pair.
3357 *
3358 * Object must be locked.
3359 */
3360
3361 vm_page_t
3362 vm_page_alloc(
3363 vm_object_t object,
3364 vm_object_offset_t offset)
3365 {
3366 vm_page_t mem;
3367 int grab_options;
3368
3369 vm_object_lock_assert_exclusive(object);
3370 grab_options = 0;
3371 #if CONFIG_SECLUDED_MEMORY
3372 if (object->can_grab_secluded) {
3373 grab_options |= VM_PAGE_GRAB_SECLUDED;
3374 }
3375 #endif /* CONFIG_SECLUDED_MEMORY */
3376 mem = vm_page_grab_options(grab_options);
3377 if (mem == VM_PAGE_NULL)
3378 return VM_PAGE_NULL;
3379
3380 vm_page_insert(mem, object, offset);
3381
3382 return(mem);
3383 }
3384
3385 /*
3386 * vm_page_alloc_guard:
3387 *
3388 * Allocate a fictitious page which will be used
3389 * as a guard page. The page will be inserted into
3390 * the object and returned to the caller.
3391 */
3392
3393 vm_page_t
3394 vm_page_alloc_guard(
3395 vm_object_t object,
3396 vm_object_offset_t offset)
3397 {
3398 vm_page_t mem;
3399
3400 vm_object_lock_assert_exclusive(object);
3401 mem = vm_page_grab_guard();
3402 if (mem == VM_PAGE_NULL)
3403 return VM_PAGE_NULL;
3404
3405 vm_page_insert(mem, object, offset);
3406
3407 return(mem);
3408 }
3409
3410
3411 counter(unsigned int c_laundry_pages_freed = 0;)
3412
3413 /*
3414 * vm_page_free_prepare:
3415 *
3416 * Removes page from any queue it may be on
3417 * and disassociates it from its VM object.
3418 *
3419 * Object and page queues must be locked prior to entry.
3420 */
3421 static void
3422 vm_page_free_prepare(
3423 vm_page_t mem)
3424 {
3425 vm_page_free_prepare_queues(mem);
3426 vm_page_free_prepare_object(mem, TRUE);
3427 }
3428
3429
3430 void
3431 vm_page_free_prepare_queues(
3432 vm_page_t mem)
3433 {
3434 vm_object_t m_object;
3435
3436 VM_PAGE_CHECK(mem);
3437
3438 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
3439 assert(!mem->cleaning);
3440 m_object = VM_PAGE_OBJECT(mem);
3441
3442 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3443 if (m_object) {
3444 vm_object_lock_assert_exclusive(m_object);
3445 }
3446 if (mem->laundry) {
3447 /*
3448 * We may have to free a page while it's being laundered
3449 * if we lost its pager (due to a forced unmount, for example).
3450 * We need to call vm_pageout_steal_laundry() before removing
3451 * the page from its VM object, so that we can remove it
3452 * from its pageout queue and adjust the laundry accounting
3453 */
3454 vm_pageout_steal_laundry(mem, TRUE);
3455 counter(++c_laundry_pages_freed);
3456 }
3457
3458 vm_page_queues_remove(mem, TRUE);
3459
3460 if (VM_PAGE_WIRED(mem)) {
3461 assert(mem->wire_count > 0);
3462
3463 if (m_object) {
3464
3465 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3466 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3467 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3468
3469 assert(m_object->resident_page_count >=
3470 m_object->wired_page_count);
3471
3472 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3473 OSAddAtomic(+1, &vm_page_purgeable_count);
3474 assert(vm_page_purgeable_wired_count > 0);
3475 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3476 }
3477 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3478 m_object->purgable == VM_PURGABLE_EMPTY) &&
3479 m_object->vo_purgeable_owner != TASK_NULL) {
3480 task_t owner;
3481
3482 owner = m_object->vo_purgeable_owner;
3483 /*
3484 * While wired, this page was accounted
3485 * as "non-volatile" but it should now
3486 * be accounted as "volatile".
3487 */
3488 /* one less "non-volatile"... */
3489 ledger_debit(owner->ledger,
3490 task_ledgers.purgeable_nonvolatile,
3491 PAGE_SIZE);
3492 /* ... and "phys_footprint" */
3493 ledger_debit(owner->ledger,
3494 task_ledgers.phys_footprint,
3495 PAGE_SIZE);
3496 /* one more "volatile" */
3497 ledger_credit(owner->ledger,
3498 task_ledgers.purgeable_volatile,
3499 PAGE_SIZE);
3500 }
3501 }
3502 if (!mem->private && !mem->fictitious)
3503 vm_page_wire_count--;
3504
3505 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3506 mem->wire_count = 0;
3507 assert(!mem->gobbled);
3508 } else if (mem->gobbled) {
3509 if (!mem->private && !mem->fictitious)
3510 vm_page_wire_count--;
3511 vm_page_gobble_count--;
3512 }
3513 }
3514
3515
3516 void
3517 vm_page_free_prepare_object(
3518 vm_page_t mem,
3519 boolean_t remove_from_hash)
3520 {
3521 if (mem->tabled)
3522 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
3523
3524 PAGE_WAKEUP(mem); /* clears wanted */
3525
3526 if (mem->private) {
3527 mem->private = FALSE;
3528 mem->fictitious = TRUE;
3529 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
3530 }
3531 if ( !mem->fictitious) {
3532 assert(mem->pageq.next == 0);
3533 assert(mem->pageq.prev == 0);
3534 assert(mem->listq.next == 0);
3535 assert(mem->listq.prev == 0);
3536 #if CONFIG_BACKGROUND_QUEUE
3537 assert(mem->vm_page_backgroundq.next == 0);
3538 assert(mem->vm_page_backgroundq.prev == 0);
3539 #endif /* CONFIG_BACKGROUND_QUEUE */
3540 assert(mem->next_m == 0);
3541 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->lopage);
3542 }
3543 }
3544
3545
3546 /*
3547 * vm_page_free:
3548 *
3549 * Returns the given page to the free list,
3550 * disassociating it with any VM object.
3551 *
3552 * Object and page queues must be locked prior to entry.
3553 */
3554 void
3555 vm_page_free(
3556 vm_page_t mem)
3557 {
3558 vm_page_free_prepare(mem);
3559
3560 if (mem->fictitious) {
3561 vm_page_release_fictitious(mem);
3562 } else {
3563 vm_page_release(mem,
3564 TRUE); /* page queues are locked */
3565 }
3566 }
3567
3568
3569 void
3570 vm_page_free_unlocked(
3571 vm_page_t mem,
3572 boolean_t remove_from_hash)
3573 {
3574 vm_page_lockspin_queues();
3575 vm_page_free_prepare_queues(mem);
3576 vm_page_unlock_queues();
3577
3578 vm_page_free_prepare_object(mem, remove_from_hash);
3579
3580 if (mem->fictitious) {
3581 vm_page_release_fictitious(mem);
3582 } else {
3583 vm_page_release(mem, FALSE); /* page queues are not locked */
3584 }
3585 }
3586
3587
3588 /*
3589 * Free a list of pages. The list can be up to several hundred pages,
3590 * as blocked up by vm_pageout_scan().
3591 * The big win is not having to take the free list lock once
3592 * per page.
3593 *
3594 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3595 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
3596 */
3597 void
3598 vm_page_free_list(
3599 vm_page_t freeq,
3600 boolean_t prepare_object)
3601 {
3602 vm_page_t mem;
3603 vm_page_t nxt;
3604 vm_page_t local_freeq;
3605 int pg_count;
3606
3607 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3608 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3609
3610 while (freeq) {
3611
3612 pg_count = 0;
3613 local_freeq = VM_PAGE_NULL;
3614 mem = freeq;
3615
3616 /*
3617 * break up the processing into smaller chunks so
3618 * that we can 'pipeline' the pages onto the
3619 * free list w/o introducing too much
3620 * contention on the global free queue lock
3621 */
3622 while (mem && pg_count < 64) {
3623
3624 assert((mem->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
3625 (mem->vm_page_q_state == VM_PAGE_IS_WIRED));
3626 #if CONFIG_BACKGROUND_QUEUE
3627 assert(mem->vm_page_backgroundq.next == 0 &&
3628 mem->vm_page_backgroundq.prev == 0 &&
3629 mem->vm_page_on_backgroundq == FALSE);
3630 #endif
3631 nxt = mem->snext;
3632 mem->snext = NULL;
3633 assert(mem->pageq.prev == 0);
3634
3635 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
3636 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
3637 }
3638 if (prepare_object == TRUE)
3639 vm_page_free_prepare_object(mem, TRUE);
3640
3641 if (!mem->fictitious) {
3642 assert(mem->busy);
3643
3644 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3645 vm_lopage_free_count < vm_lopage_free_limit &&
3646 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3647 vm_page_release(mem, FALSE); /* page queues are not locked */
3648 #if CONFIG_SECLUDED_MEMORY
3649 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3650 num_tasks_can_use_secluded_mem == 0) {
3651 vm_page_release(mem,
3652 FALSE); /* page queues are not locked */
3653 #endif /* CONFIG_SECLUDED_MEMORY */
3654 } else {
3655 /*
3656 * IMPORTANT: we can't set the page "free" here
3657 * because that would make the page eligible for
3658 * a physically-contiguous allocation (see
3659 * vm_page_find_contiguous()) right away (we don't
3660 * hold the vm_page_queue_free lock). That would
3661 * cause trouble because the page is not actually
3662 * in the free queue yet...
3663 */
3664 mem->snext = local_freeq;
3665 local_freeq = mem;
3666 pg_count++;
3667
3668 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3669 }
3670 } else {
3671 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3672 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
3673 vm_page_release_fictitious(mem);
3674 }
3675 mem = nxt;
3676 }
3677 freeq = mem;
3678
3679 if ( (mem = local_freeq) ) {
3680 unsigned int avail_free_count;
3681 unsigned int need_wakeup = 0;
3682 unsigned int need_priv_wakeup = 0;
3683 #if CONFIG_SECLUDED_MEMORY
3684 unsigned int need_wakeup_secluded = 0;
3685 #endif /* CONFIG_SECLUDED_MEMORY */
3686
3687 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3688
3689 while (mem) {
3690 int color;
3691
3692 nxt = mem->snext;
3693
3694 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3695 assert(mem->busy);
3696 mem->lopage = FALSE;
3697 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3698
3699 color = VM_PAGE_GET_COLOR(mem);
3700 #if defined(__x86_64__)
3701 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3702 mem,
3703 vm_page_t,
3704 pageq);
3705 #else
3706 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3707 mem,
3708 vm_page_t,
3709 pageq);
3710 #endif
3711 mem = nxt;
3712 }
3713 vm_page_free_count += pg_count;
3714 avail_free_count = vm_page_free_count;
3715
3716 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3717
3718 if (avail_free_count < vm_page_free_wanted_privileged) {
3719 need_priv_wakeup = avail_free_count;
3720 vm_page_free_wanted_privileged -= avail_free_count;
3721 avail_free_count = 0;
3722 } else {
3723 need_priv_wakeup = vm_page_free_wanted_privileged;
3724 avail_free_count -= vm_page_free_wanted_privileged;
3725 vm_page_free_wanted_privileged = 0;
3726 }
3727 }
3728 #if CONFIG_SECLUDED_MEMORY
3729 if (vm_page_free_wanted_secluded > 0 &&
3730 avail_free_count > vm_page_free_reserved) {
3731 unsigned int available_pages;
3732 available_pages = (avail_free_count -
3733 vm_page_free_reserved);
3734 if (available_pages <
3735 vm_page_free_wanted_secluded) {
3736 need_wakeup_secluded = available_pages;
3737 vm_page_free_wanted_secluded -=
3738 available_pages;
3739 avail_free_count -= available_pages;
3740 } else {
3741 need_wakeup_secluded =
3742 vm_page_free_wanted_secluded;
3743 avail_free_count -=
3744 vm_page_free_wanted_secluded;
3745 vm_page_free_wanted_secluded = 0;
3746 }
3747 }
3748 #endif /* CONFIG_SECLUDED_MEMORY */
3749 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3750 unsigned int available_pages;
3751
3752 available_pages = avail_free_count - vm_page_free_reserved;
3753
3754 if (available_pages >= vm_page_free_wanted) {
3755 need_wakeup = vm_page_free_wanted;
3756 vm_page_free_wanted = 0;
3757 } else {
3758 need_wakeup = available_pages;
3759 vm_page_free_wanted -= available_pages;
3760 }
3761 }
3762 lck_mtx_unlock(&vm_page_queue_free_lock);
3763
3764 if (need_priv_wakeup != 0) {
3765 /*
3766 * There shouldn't be that many VM-privileged threads,
3767 * so let's wake them all up, even if we don't quite
3768 * have enough pages to satisfy them all.
3769 */
3770 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3771 }
3772 #if CONFIG_SECLUDED_MEMORY
3773 if (need_wakeup_secluded != 0 &&
3774 vm_page_free_wanted_secluded == 0) {
3775 thread_wakeup((event_t)
3776 &vm_page_free_wanted_secluded);
3777 } else {
3778 for (;
3779 need_wakeup_secluded != 0;
3780 need_wakeup_secluded--) {
3781 thread_wakeup_one(
3782 (event_t)
3783 &vm_page_free_wanted_secluded);
3784 }
3785 }
3786 #endif /* CONFIG_SECLUDED_MEMORY */
3787 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3788 /*
3789 * We don't expect to have any more waiters
3790 * after this, so let's wake them all up at
3791 * once.
3792 */
3793 thread_wakeup((event_t) &vm_page_free_count);
3794 } else for (; need_wakeup != 0; need_wakeup--) {
3795 /*
3796 * Wake up one waiter per page we just released.
3797 */
3798 thread_wakeup_one((event_t) &vm_page_free_count);
3799 }
3800
3801 VM_CHECK_MEMORYSTATUS;
3802 }
3803 }
3804 }
3805
3806
3807 /*
3808 * vm_page_wire:
3809 *
3810 * Mark this page as wired down by yet
3811 * another map, removing it from paging queues
3812 * as necessary.
3813 *
3814 * The page's object and the page queues must be locked.
3815 */
3816
3817
3818 void
3819 vm_page_wire(
3820 vm_page_t mem,
3821 vm_tag_t tag,
3822 boolean_t check_memorystatus)
3823 {
3824 vm_object_t m_object;
3825
3826 m_object = VM_PAGE_OBJECT(mem);
3827
3828 // dbgLog(current_thread(), mem->offset, m_object, 1); /* (TEST/DEBUG) */
3829
3830 VM_PAGE_CHECK(mem);
3831 if (m_object) {
3832 vm_object_lock_assert_exclusive(m_object);
3833 } else {
3834 /*
3835 * In theory, the page should be in an object before it
3836 * gets wired, since we need to hold the object lock
3837 * to update some fields in the page structure.
3838 * However, some code (i386 pmap, for example) might want
3839 * to wire a page before it gets inserted into an object.
3840 * That's somewhat OK, as long as nobody else can get to
3841 * that page and update it at the same time.
3842 */
3843 }
3844 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3845 if ( !VM_PAGE_WIRED(mem)) {
3846
3847 if (mem->laundry)
3848 vm_pageout_steal_laundry(mem, TRUE);
3849
3850 vm_page_queues_remove(mem, TRUE);
3851
3852 assert(mem->wire_count == 0);
3853 mem->vm_page_q_state = VM_PAGE_IS_WIRED;
3854
3855 if (m_object) {
3856
3857 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3858 VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
3859 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
3860
3861 assert(m_object->resident_page_count >=
3862 m_object->wired_page_count);
3863 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3864 assert(vm_page_purgeable_count > 0);
3865 OSAddAtomic(-1, &vm_page_purgeable_count);
3866 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3867 }
3868 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3869 m_object->purgable == VM_PURGABLE_EMPTY) &&
3870 m_object->vo_purgeable_owner != TASK_NULL) {
3871 task_t owner;
3872
3873 owner = m_object->vo_purgeable_owner;
3874 /* less volatile bytes */
3875 ledger_debit(owner->ledger,
3876 task_ledgers.purgeable_volatile,
3877 PAGE_SIZE);
3878 /* more not-quite-volatile bytes */
3879 ledger_credit(owner->ledger,
3880 task_ledgers.purgeable_nonvolatile,
3881 PAGE_SIZE);
3882 /* more footprint */
3883 ledger_credit(owner->ledger,
3884 task_ledgers.phys_footprint,
3885 PAGE_SIZE);
3886 }
3887 if (m_object->all_reusable) {
3888 /*
3889 * Wired pages are not counted as "re-usable"
3890 * in "all_reusable" VM objects, so nothing
3891 * to do here.
3892 */
3893 } else if (mem->reusable) {
3894 /*
3895 * This page is not "re-usable" when it's
3896 * wired, so adjust its state and the
3897 * accounting.
3898 */
3899 vm_object_reuse_pages(m_object,
3900 mem->offset,
3901 mem->offset+PAGE_SIZE_64,
3902 FALSE);
3903 }
3904 }
3905 assert(!mem->reusable);
3906
3907 if (!mem->private && !mem->fictitious && !mem->gobbled)
3908 vm_page_wire_count++;
3909 if (mem->gobbled)
3910 vm_page_gobble_count--;
3911 mem->gobbled = FALSE;
3912
3913 if (check_memorystatus == TRUE) {
3914 VM_CHECK_MEMORYSTATUS;
3915 }
3916 }
3917 assert(!mem->gobbled);
3918 assert(mem->vm_page_q_state == VM_PAGE_IS_WIRED);
3919 mem->wire_count++;
3920 if (__improbable(mem->wire_count == 0)) {
3921 panic("vm_page_wire(%p): wire_count overflow", mem);
3922 }
3923 VM_PAGE_CHECK(mem);
3924 }
3925
3926 /*
3927 * vm_page_unwire:
3928 *
3929 * Release one wiring of this page, potentially
3930 * enabling it to be paged again.
3931 *
3932 * The page's object and the page queues must be locked.
3933 */
3934 void
3935 vm_page_unwire(
3936 vm_page_t mem,
3937 boolean_t queueit)
3938 {
3939 vm_object_t m_object;
3940
3941 m_object = VM_PAGE_OBJECT(mem);
3942
3943 // dbgLog(current_thread(), mem->offset, m_object, 0); /* (TEST/DEBUG) */
3944
3945 VM_PAGE_CHECK(mem);
3946 assert(VM_PAGE_WIRED(mem));
3947 assert(mem->wire_count > 0);
3948 assert(!mem->gobbled);
3949 assert(m_object != VM_OBJECT_NULL);
3950 vm_object_lock_assert_exclusive(m_object);
3951 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3952 if (--mem->wire_count == 0) {
3953
3954 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3955
3956 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3957 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3958 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3959 if (!mem->private && !mem->fictitious) {
3960 vm_page_wire_count--;
3961 }
3962
3963 assert(m_object->resident_page_count >=
3964 m_object->wired_page_count);
3965 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3966 OSAddAtomic(+1, &vm_page_purgeable_count);
3967 assert(vm_page_purgeable_wired_count > 0);
3968 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3969 }
3970 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3971 m_object->purgable == VM_PURGABLE_EMPTY) &&
3972 m_object->vo_purgeable_owner != TASK_NULL) {
3973 task_t owner;
3974
3975 owner = m_object->vo_purgeable_owner;
3976 /* more volatile bytes */
3977 ledger_credit(owner->ledger,
3978 task_ledgers.purgeable_volatile,
3979 PAGE_SIZE);
3980 /* less not-quite-volatile bytes */
3981 ledger_debit(owner->ledger,
3982 task_ledgers.purgeable_nonvolatile,
3983 PAGE_SIZE);
3984 /* less footprint */
3985 ledger_debit(owner->ledger,
3986 task_ledgers.phys_footprint,
3987 PAGE_SIZE);
3988 }
3989 assert(m_object != kernel_object);
3990 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
3991
3992 if (queueit == TRUE) {
3993 if (m_object->purgable == VM_PURGABLE_EMPTY) {
3994 vm_page_deactivate(mem);
3995 } else {
3996 vm_page_activate(mem);
3997 }
3998 }
3999
4000 VM_CHECK_MEMORYSTATUS;
4001
4002 }
4003 VM_PAGE_CHECK(mem);
4004 }
4005
4006 /*
4007 * vm_page_deactivate:
4008 *
4009 * Returns the given page to the inactive list,
4010 * indicating that no physical maps have access
4011 * to this page. [Used by the physical mapping system.]
4012 *
4013 * The page queues must be locked.
4014 */
4015 void
4016 vm_page_deactivate(
4017 vm_page_t m)
4018 {
4019 vm_page_deactivate_internal(m, TRUE);
4020 }
4021
4022
4023 void
4024 vm_page_deactivate_internal(
4025 vm_page_t m,
4026 boolean_t clear_hw_reference)
4027 {
4028 vm_object_t m_object;
4029
4030 m_object = VM_PAGE_OBJECT(m);
4031
4032 VM_PAGE_CHECK(m);
4033 assert(m_object != kernel_object);
4034 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4035
4036 // dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4037 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4038 /*
4039 * This page is no longer very interesting. If it was
4040 * interesting (active or inactive/referenced), then we
4041 * clear the reference bit and (re)enter it in the
4042 * inactive queue. Note wired pages should not have
4043 * their reference bit cleared.
4044 */
4045 assert ( !(m->absent && !m->unusual));
4046
4047 if (m->gobbled) { /* can this happen? */
4048 assert( !VM_PAGE_WIRED(m));
4049
4050 if (!m->private && !m->fictitious)
4051 vm_page_wire_count--;
4052 vm_page_gobble_count--;
4053 m->gobbled = FALSE;
4054 }
4055 /*
4056 * if this page is currently on the pageout queue, we can't do the
4057 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4058 * and we can't remove it manually since we would need the object lock
4059 * (which is not required here) to decrement the activity_in_progress
4060 * reference which is held on the object while the page is in the pageout queue...
4061 * just let the normal laundry processing proceed
4062 */
4063 if (m->laundry || m->private || m->fictitious ||
4064 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4065 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4066 VM_PAGE_WIRED(m)) {
4067 return;
4068 }
4069 if (!m->absent && clear_hw_reference == TRUE)
4070 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
4071
4072 m->reference = FALSE;
4073 m->no_cache = FALSE;
4074
4075 if ( !VM_PAGE_INACTIVE(m)) {
4076 vm_page_queues_remove(m, FALSE);
4077
4078 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4079 m->dirty && m_object->internal &&
4080 (m_object->purgable == VM_PURGABLE_DENY ||
4081 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4082 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4083 vm_page_check_pageable_safe(m);
4084 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4085 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4086 vm_page_throttled_count++;
4087 } else {
4088 if (m_object->named && m_object->ref_count == 1) {
4089 vm_page_speculate(m, FALSE);
4090 #if DEVELOPMENT || DEBUG
4091 vm_page_speculative_recreated++;
4092 #endif
4093 } else {
4094 vm_page_enqueue_inactive(m, FALSE);
4095 }
4096 }
4097 }
4098 }
4099
4100 /*
4101 * vm_page_enqueue_cleaned
4102 *
4103 * Put the page on the cleaned queue, mark it cleaned, etc.
4104 * Being on the cleaned queue (and having m->clean_queue set)
4105 * does ** NOT ** guarantee that the page is clean!
4106 *
4107 * Call with the queues lock held.
4108 */
4109
4110 void vm_page_enqueue_cleaned(vm_page_t m)
4111 {
4112 vm_object_t m_object;
4113
4114 m_object = VM_PAGE_OBJECT(m);
4115
4116 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4117 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4118 assert( !(m->absent && !m->unusual));
4119
4120 if (VM_PAGE_WIRED(m)) {
4121 return;
4122 }
4123
4124 if (m->gobbled) {
4125 if (!m->private && !m->fictitious)
4126 vm_page_wire_count--;
4127 vm_page_gobble_count--;
4128 m->gobbled = FALSE;
4129 }
4130 /*
4131 * if this page is currently on the pageout queue, we can't do the
4132 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4133 * and we can't remove it manually since we would need the object lock
4134 * (which is not required here) to decrement the activity_in_progress
4135 * reference which is held on the object while the page is in the pageout queue...
4136 * just let the normal laundry processing proceed
4137 */
4138 if (m->laundry || m->private || m->fictitious ||
4139 (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4140 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4141 return;
4142 }
4143 vm_page_queues_remove(m, FALSE);
4144
4145 vm_page_check_pageable_safe(m);
4146 vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
4147 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
4148 vm_page_cleaned_count++;
4149
4150 vm_page_inactive_count++;
4151 if (m_object->internal) {
4152 vm_page_pageable_internal_count++;
4153 } else {
4154 vm_page_pageable_external_count++;
4155 }
4156 #if CONFIG_BACKGROUND_QUEUE
4157 if (m->vm_page_in_background)
4158 vm_page_add_to_backgroundq(m, TRUE);
4159 #endif
4160 vm_pageout_enqueued_cleaned++;
4161 }
4162
4163 /*
4164 * vm_page_activate:
4165 *
4166 * Put the specified page on the active list (if appropriate).
4167 *
4168 * The page queues must be locked.
4169 */
4170
4171 void
4172 vm_page_activate(
4173 vm_page_t m)
4174 {
4175 vm_object_t m_object;
4176
4177 m_object = VM_PAGE_OBJECT(m);
4178
4179 VM_PAGE_CHECK(m);
4180 #ifdef FIXME_4778297
4181 assert(m_object != kernel_object);
4182 #endif
4183 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4184 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4185 assert( !(m->absent && !m->unusual));
4186
4187 if (m->gobbled) {
4188 assert( !VM_PAGE_WIRED(m));
4189 if (!m->private && !m->fictitious)
4190 vm_page_wire_count--;
4191 vm_page_gobble_count--;
4192 m->gobbled = FALSE;
4193 }
4194 /*
4195 * if this page is currently on the pageout queue, we can't do the
4196 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4197 * and we can't remove it manually since we would need the object lock
4198 * (which is not required here) to decrement the activity_in_progress
4199 * reference which is held on the object while the page is in the pageout queue...
4200 * just let the normal laundry processing proceed
4201 */
4202 if (m->laundry || m->private || m->fictitious ||
4203 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4204 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4205 return;
4206
4207 #if DEBUG
4208 if (m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q)
4209 panic("vm_page_activate: already active");
4210 #endif
4211
4212 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4213 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4214 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4215 }
4216
4217 vm_page_queues_remove(m, FALSE);
4218
4219 if ( !VM_PAGE_WIRED(m)) {
4220 vm_page_check_pageable_safe(m);
4221 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4222 m->dirty && m_object->internal &&
4223 (m_object->purgable == VM_PURGABLE_DENY ||
4224 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4225 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4226 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4227 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
4228 vm_page_throttled_count++;
4229 } else {
4230 #if CONFIG_SECLUDED_MEMORY
4231 if (secluded_for_filecache &&
4232 vm_page_secluded_target != 0 &&
4233 num_tasks_can_use_secluded_mem == 0 &&
4234 m_object->eligible_for_secluded) {
4235 vm_page_queue_enter(&vm_page_queue_secluded, m,
4236 vm_page_t, pageq);
4237 m->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
4238 vm_page_secluded_count++;
4239 vm_page_secluded_count_inuse++;
4240 assert(!m_object->internal);
4241 // vm_page_pageable_external_count++;
4242 } else
4243 #endif /* CONFIG_SECLUDED_MEMORY */
4244 vm_page_enqueue_active(m, FALSE);
4245 }
4246 m->reference = TRUE;
4247 m->no_cache = FALSE;
4248 }
4249 VM_PAGE_CHECK(m);
4250 }
4251
4252
4253 /*
4254 * vm_page_speculate:
4255 *
4256 * Put the specified page on the speculative list (if appropriate).
4257 *
4258 * The page queues must be locked.
4259 */
4260 void
4261 vm_page_speculate(
4262 vm_page_t m,
4263 boolean_t new)
4264 {
4265 struct vm_speculative_age_q *aq;
4266 vm_object_t m_object;
4267
4268 m_object = VM_PAGE_OBJECT(m);
4269
4270 VM_PAGE_CHECK(m);
4271 vm_page_check_pageable_safe(m);
4272
4273 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4274 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4275 assert( !(m->absent && !m->unusual));
4276 assert(m_object->internal == FALSE);
4277
4278 /*
4279 * if this page is currently on the pageout queue, we can't do the
4280 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4281 * and we can't remove it manually since we would need the object lock
4282 * (which is not required here) to decrement the activity_in_progress
4283 * reference which is held on the object while the page is in the pageout queue...
4284 * just let the normal laundry processing proceed
4285 */
4286 if (m->laundry || m->private || m->fictitious ||
4287 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4288 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
4289 return;
4290
4291 vm_page_queues_remove(m, FALSE);
4292
4293 if ( !VM_PAGE_WIRED(m)) {
4294 mach_timespec_t ts;
4295 clock_sec_t sec;
4296 clock_nsec_t nsec;
4297
4298 clock_get_system_nanotime(&sec, &nsec);
4299 ts.tv_sec = (unsigned int) sec;
4300 ts.tv_nsec = nsec;
4301
4302 if (vm_page_speculative_count == 0) {
4303
4304 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4305 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4306
4307 aq = &vm_page_queue_speculative[speculative_age_index];
4308
4309 /*
4310 * set the timer to begin a new group
4311 */
4312 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4313 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4314
4315 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4316 } else {
4317 aq = &vm_page_queue_speculative[speculative_age_index];
4318
4319 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4320
4321 speculative_age_index++;
4322
4323 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4324 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4325 if (speculative_age_index == speculative_steal_index) {
4326 speculative_steal_index = speculative_age_index + 1;
4327
4328 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4329 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4330 }
4331 aq = &vm_page_queue_speculative[speculative_age_index];
4332
4333 if (!vm_page_queue_empty(&aq->age_q))
4334 vm_page_speculate_ageit(aq);
4335
4336 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4337 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4338
4339 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4340 }
4341 }
4342 vm_page_enqueue_tail(&aq->age_q, &m->pageq);
4343 m->vm_page_q_state = VM_PAGE_ON_SPECULATIVE_Q;
4344 vm_page_speculative_count++;
4345 vm_page_pageable_external_count++;
4346
4347 if (new == TRUE) {
4348 vm_object_lock_assert_exclusive(m_object);
4349
4350 m_object->pages_created++;
4351 #if DEVELOPMENT || DEBUG
4352 vm_page_speculative_created++;
4353 #endif
4354 }
4355 }
4356 VM_PAGE_CHECK(m);
4357 }
4358
4359
4360 /*
4361 * move pages from the specified aging bin to
4362 * the speculative bin that pageout_scan claims from
4363 *
4364 * The page queues must be locked.
4365 */
4366 void
4367 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4368 {
4369 struct vm_speculative_age_q *sq;
4370 vm_page_t t;
4371
4372 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4373
4374 if (vm_page_queue_empty(&sq->age_q)) {
4375 sq->age_q.next = aq->age_q.next;
4376 sq->age_q.prev = aq->age_q.prev;
4377
4378 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4379 t->pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
4380
4381 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4382 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4383 } else {
4384 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4385 t->pageq.next = aq->age_q.next;
4386
4387 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
4388 t->pageq.prev = sq->age_q.prev;
4389
4390 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4391 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4392
4393 sq->age_q.prev = aq->age_q.prev;
4394 }
4395 vm_page_queue_init(&aq->age_q);
4396 }
4397
4398
4399 void
4400 vm_page_lru(
4401 vm_page_t m)
4402 {
4403 VM_PAGE_CHECK(m);
4404 assert(VM_PAGE_OBJECT(m) != kernel_object);
4405 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4406
4407 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4408 /*
4409 * if this page is currently on the pageout queue, we can't do the
4410 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4411 * and we can't remove it manually since we would need the object lock
4412 * (which is not required here) to decrement the activity_in_progress
4413 * reference which is held on the object while the page is in the pageout queue...
4414 * just let the normal laundry processing proceed
4415 */
4416 if (m->laundry || m->private ||
4417 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4418 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4419 VM_PAGE_WIRED(m))
4420 return;
4421
4422 m->no_cache = FALSE;
4423
4424 vm_page_queues_remove(m, FALSE);
4425
4426 vm_page_enqueue_inactive(m, FALSE);
4427 }
4428
4429
4430 void
4431 vm_page_reactivate_all_throttled(void)
4432 {
4433 vm_page_t first_throttled, last_throttled;
4434 vm_page_t first_active;
4435 vm_page_t m;
4436 int extra_active_count;
4437 int extra_internal_count, extra_external_count;
4438 vm_object_t m_object;
4439
4440 if (!VM_DYNAMIC_PAGING_ENABLED())
4441 return;
4442
4443 extra_active_count = 0;
4444 extra_internal_count = 0;
4445 extra_external_count = 0;
4446 vm_page_lock_queues();
4447 if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
4448 /*
4449 * Switch "throttled" pages to "active".
4450 */
4451 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
4452 VM_PAGE_CHECK(m);
4453 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
4454
4455 m_object = VM_PAGE_OBJECT(m);
4456
4457 extra_active_count++;
4458 if (m_object->internal) {
4459 extra_internal_count++;
4460 } else {
4461 extra_external_count++;
4462 }
4463
4464 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4465 VM_PAGE_CHECK(m);
4466 #if CONFIG_BACKGROUND_QUEUE
4467 if (m->vm_page_in_background)
4468 vm_page_add_to_backgroundq(m, FALSE);
4469 #endif
4470 }
4471
4472 /*
4473 * Transfer the entire throttled queue to a regular LRU page queues.
4474 * We insert it at the head of the active queue, so that these pages
4475 * get re-evaluated by the LRU algorithm first, since they've been
4476 * completely out of it until now.
4477 */
4478 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4479 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4480 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4481 if (vm_page_queue_empty(&vm_page_queue_active)) {
4482 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4483 } else {
4484 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4485 }
4486 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4487 first_throttled->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4488 last_throttled->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4489
4490 #if DEBUG
4491 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4492 #endif
4493 vm_page_queue_init(&vm_page_queue_throttled);
4494 /*
4495 * Adjust the global page counts.
4496 */
4497 vm_page_active_count += extra_active_count;
4498 vm_page_pageable_internal_count += extra_internal_count;
4499 vm_page_pageable_external_count += extra_external_count;
4500 vm_page_throttled_count = 0;
4501 }
4502 assert(vm_page_throttled_count == 0);
4503 assert(vm_page_queue_empty(&vm_page_queue_throttled));
4504 vm_page_unlock_queues();
4505 }
4506
4507
4508 /*
4509 * move pages from the indicated local queue to the global active queue
4510 * its ok to fail if we're below the hard limit and force == FALSE
4511 * the nolocks == TRUE case is to allow this function to be run on
4512 * the hibernate path
4513 */
4514
4515 void
4516 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4517 {
4518 struct vpl *lq;
4519 vm_page_t first_local, last_local;
4520 vm_page_t first_active;
4521 vm_page_t m;
4522 uint32_t count = 0;
4523
4524 if (vm_page_local_q == NULL)
4525 return;
4526
4527 lq = &vm_page_local_q[lid].vpl_un.vpl;
4528
4529 if (nolocks == FALSE) {
4530 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4531 if ( !vm_page_trylockspin_queues())
4532 return;
4533 } else
4534 vm_page_lockspin_queues();
4535
4536 VPL_LOCK(&lq->vpl_lock);
4537 }
4538 if (lq->vpl_count) {
4539 /*
4540 * Switch "local" pages to "active".
4541 */
4542 assert(!vm_page_queue_empty(&lq->vpl_queue));
4543
4544 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
4545 VM_PAGE_CHECK(m);
4546 vm_page_check_pageable_safe(m);
4547 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4548 assert(!m->fictitious);
4549
4550 if (m->local_id != lid)
4551 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4552
4553 m->local_id = 0;
4554 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
4555 VM_PAGE_CHECK(m);
4556 #if CONFIG_BACKGROUND_QUEUE
4557 if (m->vm_page_in_background)
4558 vm_page_add_to_backgroundq(m, FALSE);
4559 #endif
4560 count++;
4561 }
4562 if (count != lq->vpl_count)
4563 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4564
4565 /*
4566 * Transfer the entire local queue to a regular LRU page queues.
4567 */
4568 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4569 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4570 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4571
4572 if (vm_page_queue_empty(&vm_page_queue_active)) {
4573 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4574 } else {
4575 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4576 }
4577 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4578 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4579 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4580
4581 vm_page_queue_init(&lq->vpl_queue);
4582 /*
4583 * Adjust the global page counts.
4584 */
4585 vm_page_active_count += lq->vpl_count;
4586 vm_page_pageable_internal_count += lq->vpl_internal_count;
4587 vm_page_pageable_external_count += lq->vpl_external_count;
4588 lq->vpl_count = 0;
4589 lq->vpl_internal_count = 0;
4590 lq->vpl_external_count = 0;
4591 }
4592 assert(vm_page_queue_empty(&lq->vpl_queue));
4593
4594 if (nolocks == FALSE) {
4595 VPL_UNLOCK(&lq->vpl_lock);
4596 vm_page_unlock_queues();
4597 }
4598 }
4599
4600 /*
4601 * vm_page_part_zero_fill:
4602 *
4603 * Zero-fill a part of the page.
4604 */
4605 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4606 void
4607 vm_page_part_zero_fill(
4608 vm_page_t m,
4609 vm_offset_t m_pa,
4610 vm_size_t len)
4611 {
4612
4613 #if 0
4614 /*
4615 * we don't hold the page queue lock
4616 * so this check isn't safe to make
4617 */
4618 VM_PAGE_CHECK(m);
4619 #endif
4620
4621 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4622 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
4623 #else
4624 vm_page_t tmp;
4625 while (1) {
4626 tmp = vm_page_grab();
4627 if (tmp == VM_PAGE_NULL) {
4628 vm_page_wait(THREAD_UNINT);
4629 continue;
4630 }
4631 break;
4632 }
4633 vm_page_zero_fill(tmp);
4634 if(m_pa != 0) {
4635 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4636 }
4637 if((m_pa + len) < PAGE_SIZE) {
4638 vm_page_part_copy(m, m_pa + len, tmp,
4639 m_pa + len, PAGE_SIZE - (m_pa + len));
4640 }
4641 vm_page_copy(tmp,m);
4642 VM_PAGE_FREE(tmp);
4643 #endif
4644
4645 }
4646
4647 /*
4648 * vm_page_zero_fill:
4649 *
4650 * Zero-fill the specified page.
4651 */
4652 void
4653 vm_page_zero_fill(
4654 vm_page_t m)
4655 {
4656 XPR(XPR_VM_PAGE,
4657 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4658 VM_PAGE_OBJECT(m), m->offset, m, 0,0);
4659 #if 0
4660 /*
4661 * we don't hold the page queue lock
4662 * so this check isn't safe to make
4663 */
4664 VM_PAGE_CHECK(m);
4665 #endif
4666
4667 // dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4668 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
4669 }
4670
4671 /*
4672 * vm_page_part_copy:
4673 *
4674 * copy part of one page to another
4675 */
4676
4677 void
4678 vm_page_part_copy(
4679 vm_page_t src_m,
4680 vm_offset_t src_pa,
4681 vm_page_t dst_m,
4682 vm_offset_t dst_pa,
4683 vm_size_t len)
4684 {
4685 #if 0
4686 /*
4687 * we don't hold the page queue lock
4688 * so this check isn't safe to make
4689 */
4690 VM_PAGE_CHECK(src_m);
4691 VM_PAGE_CHECK(dst_m);
4692 #endif
4693 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4694 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
4695 }
4696
4697 /*
4698 * vm_page_copy:
4699 *
4700 * Copy one page to another
4701 */
4702
4703 int vm_page_copy_cs_validations = 0;
4704 int vm_page_copy_cs_tainted = 0;
4705
4706 void
4707 vm_page_copy(
4708 vm_page_t src_m,
4709 vm_page_t dest_m)
4710 {
4711 vm_object_t src_m_object;
4712
4713 src_m_object = VM_PAGE_OBJECT(src_m);
4714
4715 XPR(XPR_VM_PAGE,
4716 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4717 src_m_object, src_m->offset,
4718 VM_PAGE_OBJECT(dest_m), dest_m->offset,
4719 0);
4720 #if 0
4721 /*
4722 * we don't hold the page queue lock
4723 * so this check isn't safe to make
4724 */
4725 VM_PAGE_CHECK(src_m);
4726 VM_PAGE_CHECK(dest_m);
4727 #endif
4728 vm_object_lock_assert_held(src_m_object);
4729
4730 if (src_m_object != VM_OBJECT_NULL &&
4731 src_m_object->code_signed) {
4732 /*
4733 * We're copying a page from a code-signed object.
4734 * Whoever ends up mapping the copy page might care about
4735 * the original page's integrity, so let's validate the
4736 * source page now.
4737 */
4738 vm_page_copy_cs_validations++;
4739 vm_page_validate_cs(src_m);
4740 #if DEVELOPMENT || DEBUG
4741 DTRACE_VM4(codesigned_copy,
4742 vm_object_t, src_m_object,
4743 vm_object_offset_t, src_m->offset,
4744 int, src_m->cs_validated,
4745 int, src_m->cs_tainted);
4746 #endif /* DEVELOPMENT || DEBUG */
4747
4748 }
4749
4750 if (vm_page_is_slideable(src_m)) {
4751 boolean_t was_busy = src_m->busy;
4752 src_m->busy = TRUE;
4753 (void) vm_page_slide(src_m, 0);
4754 assert(src_m->busy);
4755 if (!was_busy) {
4756 PAGE_WAKEUP_DONE(src_m);
4757 }
4758 }
4759
4760 /*
4761 * Propagate the cs_tainted bit to the copy page. Do not propagate
4762 * the cs_validated bit.
4763 */
4764 dest_m->cs_tainted = src_m->cs_tainted;
4765 if (dest_m->cs_tainted) {
4766 vm_page_copy_cs_tainted++;
4767 }
4768 dest_m->slid = src_m->slid;
4769 dest_m->error = src_m->error; /* sliding src_m might have failed... */
4770 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
4771 }
4772
4773 #if MACH_ASSERT
4774 static void
4775 _vm_page_print(
4776 vm_page_t p)
4777 {
4778 printf("vm_page %p: \n", p);
4779 printf(" pageq: next=%p prev=%p\n",
4780 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.next),
4781 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.prev));
4782 printf(" listq: next=%p prev=%p\n",
4783 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.next)),
4784 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.prev)));
4785 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m)));
4786 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->offset);
4787 printf(" wire_count=%u\n", p->wire_count);
4788 printf(" q_state=%u\n", p->vm_page_q_state);
4789
4790 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
4791 (p->laundry ? "" : "!"),
4792 (p->reference ? "" : "!"),
4793 (p->gobbled ? "" : "!"),
4794 (p->private ? "" : "!"));
4795 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4796 (p->busy ? "" : "!"),
4797 (p->wanted ? "" : "!"),
4798 (p->tabled ? "" : "!"),
4799 (p->fictitious ? "" : "!"),
4800 (p->pmapped ? "" : "!"),
4801 (p->wpmapped ? "" : "!"));
4802 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4803 (p->free_when_done ? "" : "!"),
4804 (p->absent ? "" : "!"),
4805 (p->error ? "" : "!"),
4806 (p->dirty ? "" : "!"),
4807 (p->cleaning ? "" : "!"),
4808 (p->precious ? "" : "!"),
4809 (p->clustered ? "" : "!"));
4810 printf(" %soverwriting, %srestart, %sunusual\n",
4811 (p->overwriting ? "" : "!"),
4812 (p->restart ? "" : "!"),
4813 (p->unusual ? "" : "!"));
4814 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
4815 (p->cs_validated ? "" : "!"),
4816 (p->cs_tainted ? "" : "!"),
4817 (p->cs_nx ? "" : "!"),
4818 (p->no_cache ? "" : "!"));
4819
4820 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
4821 }
4822
4823 /*
4824 * Check that the list of pages is ordered by
4825 * ascending physical address and has no holes.
4826 */
4827 static int
4828 vm_page_verify_contiguous(
4829 vm_page_t pages,
4830 unsigned int npages)
4831 {
4832 vm_page_t m;
4833 unsigned int page_count;
4834 vm_offset_t prev_addr;
4835
4836 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
4837 page_count = 1;
4838 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
4839 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4840 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
4841 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
4842 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
4843 panic("vm_page_verify_contiguous: not contiguous!");
4844 }
4845 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4846 ++page_count;
4847 }
4848 if (page_count != npages) {
4849 printf("pages %p actual count 0x%x but requested 0x%x\n",
4850 pages, page_count, npages);
4851 panic("vm_page_verify_contiguous: count error");
4852 }
4853 return 1;
4854 }
4855
4856
4857 /*
4858 * Check the free lists for proper length etc.
4859 */
4860 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
4861 static unsigned int
4862 vm_page_verify_free_list(
4863 vm_page_queue_head_t *vm_page_queue,
4864 unsigned int color,
4865 vm_page_t look_for_page,
4866 boolean_t expect_page)
4867 {
4868 unsigned int npages;
4869 vm_page_t m;
4870 vm_page_t prev_m;
4871 boolean_t found_page;
4872
4873 if (! vm_page_verify_this_free_list_enabled)
4874 return 0;
4875
4876 found_page = FALSE;
4877 npages = 0;
4878 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
4879
4880 vm_page_queue_iterate(vm_page_queue,
4881 m,
4882 vm_page_t,
4883 pageq) {
4884
4885 if (m == look_for_page) {
4886 found_page = TRUE;
4887 }
4888 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev) != prev_m)
4889 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4890 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev), prev_m);
4891 if ( ! m->busy )
4892 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4893 color, npages, m);
4894 if (color != (unsigned int) -1) {
4895 if (VM_PAGE_GET_COLOR(m) != color)
4896 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4897 color, npages, m, VM_PAGE_GET_COLOR(m), color);
4898 if (m->vm_page_q_state != VM_PAGE_ON_FREE_Q)
4899 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
4900 color, npages, m, m->vm_page_q_state);
4901 } else {
4902 if (m->vm_page_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
4903 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
4904 npages, m, m->vm_page_q_state);
4905 }
4906 ++npages;
4907 prev_m = m;
4908 }
4909 if (look_for_page != VM_PAGE_NULL) {
4910 unsigned int other_color;
4911
4912 if (expect_page && !found_page) {
4913 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4914 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4915 _vm_page_print(look_for_page);
4916 for (other_color = 0;
4917 other_color < vm_colors;
4918 other_color++) {
4919 if (other_color == color)
4920 continue;
4921 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
4922 other_color, look_for_page, FALSE);
4923 }
4924 if (color == (unsigned int) -1) {
4925 vm_page_verify_free_list(&vm_lopage_queue_free,
4926 (unsigned int) -1, look_for_page, FALSE);
4927 }
4928 panic("vm_page_verify_free_list(color=%u)\n", color);
4929 }
4930 if (!expect_page && found_page) {
4931 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4932 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
4933 }
4934 }
4935 return npages;
4936 }
4937
4938 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
4939 static void
4940 vm_page_verify_free_lists( void )
4941 {
4942 unsigned int color, npages, nlopages;
4943 boolean_t toggle = TRUE;
4944
4945 if (! vm_page_verify_all_free_lists_enabled)
4946 return;
4947
4948 npages = 0;
4949
4950 lck_mtx_lock(&vm_page_queue_free_lock);
4951
4952 if (vm_page_verify_this_free_list_enabled == TRUE) {
4953 /*
4954 * This variable has been set globally for extra checking of
4955 * each free list Q. Since we didn't set it, we don't own it
4956 * and we shouldn't toggle it.
4957 */
4958 toggle = FALSE;
4959 }
4960
4961 if (toggle == TRUE) {
4962 vm_page_verify_this_free_list_enabled = TRUE;
4963 }
4964
4965 for( color = 0; color < vm_colors; color++ ) {
4966 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
4967 color, VM_PAGE_NULL, FALSE);
4968 }
4969 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4970 (unsigned int) -1,
4971 VM_PAGE_NULL, FALSE);
4972 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4973 panic("vm_page_verify_free_lists: "
4974 "npages %u free_count %d nlopages %u lo_free_count %u",
4975 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
4976
4977 if (toggle == TRUE) {
4978 vm_page_verify_this_free_list_enabled = FALSE;
4979 }
4980
4981 lck_mtx_unlock(&vm_page_queue_free_lock);
4982 }
4983
4984 #endif /* MACH_ASSERT */
4985
4986
4987
4988 #if __arm64__
4989 /*
4990 * 1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
4991 * after the system has 'aged'. To ensure that other allocation requests don't mess
4992 * with the chances of that request being satisfied, we pre-allocate a single contiguous
4993 * 10MB buffer and hand it out to the first request of >= 4MB.
4994 */
4995
4996 kern_return_t cpm_preallocate_early(void);
4997
4998 vm_page_t cpm_preallocated_pages_list = NULL;
4999 boolean_t preallocated_buffer_available = FALSE;
5000
5001 #define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
5002 #define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER ((4 * 1024 *1024) / PAGE_SIZE_64) /* 4 MB */
5003
5004 kern_return_t
5005 cpm_preallocate_early(void)
5006 {
5007
5008 kern_return_t kr = KERN_SUCCESS;
5009 vm_map_size_t prealloc_size = (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT * PAGE_SIZE_64);
5010
5011 printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT);
5012
5013 kr = cpm_allocate(CAST_DOWN(vm_size_t, prealloc_size), &cpm_preallocated_pages_list, 0, 0, TRUE, 0);
5014
5015 if (kr != KERN_SUCCESS) {
5016 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr);
5017 } else {
5018 preallocated_buffer_available = TRUE;
5019 }
5020
5021 return kr;
5022 }
5023 #endif /* __arm64__ */
5024
5025
5026 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
5027
5028 /*
5029 * CONTIGUOUS PAGE ALLOCATION
5030 *
5031 * Find a region large enough to contain at least n pages
5032 * of contiguous physical memory.
5033 *
5034 * This is done by traversing the vm_page_t array in a linear fashion
5035 * we assume that the vm_page_t array has the avaiable physical pages in an
5036 * ordered, ascending list... this is currently true of all our implementations
5037 * and must remain so... there can be 'holes' in the array... we also can
5038 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5039 * which use to happen via 'vm_page_convert'... that function was no longer
5040 * being called and was removed...
5041 *
5042 * The basic flow consists of stabilizing some of the interesting state of
5043 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5044 * sweep at the beginning of the array looking for pages that meet our criterea
5045 * for a 'stealable' page... currently we are pretty conservative... if the page
5046 * meets this criterea and is physically contiguous to the previous page in the 'run'
5047 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5048 * and start to develop a new run... if at this point we've already considered
5049 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5050 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5051 * to other threads trying to acquire free pages (or move pages from q to q),
5052 * and then continue from the spot we left off... we only make 1 pass through the
5053 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5054 * which steals the pages from the queues they're currently on... pages on the free
5055 * queue can be stolen directly... pages that are on any of the other queues
5056 * must be removed from the object they are tabled on... this requires taking the
5057 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5058 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5059 * dump the pages we've currently stolen back to the free list, and pick up our
5060 * scan from the point where we aborted the 'current' run.
5061 *
5062 *
5063 * Requirements:
5064 * - neither vm_page_queue nor vm_free_list lock can be held on entry
5065 *
5066 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5067 *
5068 * Algorithm:
5069 */
5070
5071 #define MAX_CONSIDERED_BEFORE_YIELD 1000
5072
5073
5074 #define RESET_STATE_OF_RUN() \
5075 MACRO_BEGIN \
5076 prevcontaddr = -2; \
5077 start_pnum = -1; \
5078 free_considered = 0; \
5079 substitute_needed = 0; \
5080 npages = 0; \
5081 MACRO_END
5082
5083 /*
5084 * Can we steal in-use (i.e. not free) pages when searching for
5085 * physically-contiguous pages ?
5086 */
5087 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5088
5089 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
5090 #if DEBUG
5091 int vm_page_find_contig_debug = 0;
5092 #endif
5093
5094 static vm_page_t
5095 vm_page_find_contiguous(
5096 unsigned int contig_pages,
5097 ppnum_t max_pnum,
5098 ppnum_t pnum_mask,
5099 boolean_t wire,
5100 int flags)
5101 {
5102 vm_page_t m = NULL;
5103 ppnum_t prevcontaddr = 0;
5104 ppnum_t start_pnum = 0;
5105 unsigned int npages = 0, considered = 0, scanned = 0;
5106 unsigned int page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5107 unsigned int idx_last_contig_page_found = 0;
5108 int free_considered = 0, free_available = 0;
5109 int substitute_needed = 0;
5110 boolean_t wrapped, zone_gc_called = FALSE;
5111 kern_return_t kr;
5112 #if DEBUG
5113 clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
5114 clock_usec_t tv_start_usec = 0, tv_end_usec = 0;
5115 #endif
5116
5117 int yielded = 0;
5118 int dumped_run = 0;
5119 int stolen_pages = 0;
5120 int compressed_pages = 0;
5121
5122
5123 if (contig_pages == 0)
5124 return VM_PAGE_NULL;
5125
5126 full_scan_again:
5127
5128 #if MACH_ASSERT
5129 vm_page_verify_free_lists();
5130 #endif
5131 #if DEBUG
5132 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5133 #endif
5134 PAGE_REPLACEMENT_ALLOWED(TRUE);
5135
5136 vm_page_lock_queues();
5137
5138 #if __arm64__
5139 if (preallocated_buffer_available) {
5140
5141 if ((contig_pages >= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER) && (contig_pages <= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT)) {
5142
5143 m = cpm_preallocated_pages_list;
5144
5145 start_idx = (unsigned int) (m - &vm_pages[0]);
5146
5147 if (wire == FALSE) {
5148
5149 last_idx = start_idx;
5150
5151 for(npages = 0; npages < contig_pages; npages++, last_idx++) {
5152
5153 assert(vm_pages[last_idx].gobbled == FALSE);
5154
5155 vm_pages[last_idx].gobbled = TRUE;
5156 vm_page_gobble_count++;
5157
5158 assert(1 == vm_pages[last_idx].wire_count);
5159 /*
5160 * Gobbled pages are counted as wired pages. So no need to drop
5161 * the global wired page count. Just the page's wire count is fine.
5162 */
5163 vm_pages[last_idx].wire_count--;
5164 vm_pages[last_idx].vm_page_q_state = VM_PAGE_NOT_ON_Q;
5165 }
5166
5167 }
5168
5169 last_idx = start_idx + contig_pages - 1;
5170
5171 vm_pages[last_idx].snext = NULL;
5172
5173 printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages, start_idx, last_idx, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT - contig_pages);
5174
5175 last_idx += 1;
5176 for(npages = contig_pages; npages < PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT; npages++, last_idx++) {
5177
5178 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages[last_idx]);
5179 vm_page_free(&vm_pages[last_idx]);
5180 }
5181
5182 cpm_preallocated_pages_list = NULL;
5183 preallocated_buffer_available = FALSE;
5184
5185 goto done_scanning;
5186 }
5187 }
5188 #endif /* __arm64__ */
5189
5190 lck_mtx_lock(&vm_page_queue_free_lock);
5191
5192 RESET_STATE_OF_RUN();
5193
5194 scanned = 0;
5195 considered = 0;
5196 free_available = vm_page_free_count - vm_page_free_reserved;
5197
5198 wrapped = FALSE;
5199
5200 if(flags & KMA_LOMEM)
5201 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5202 else
5203 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
5204
5205 orig_last_idx = idx_last_contig_page_found;
5206 last_idx = orig_last_idx;
5207
5208 for (page_idx = last_idx, start_idx = last_idx;
5209 npages < contig_pages && page_idx < vm_pages_count;
5210 page_idx++) {
5211 retry:
5212 if (wrapped &&
5213 npages == 0 &&
5214 page_idx >= orig_last_idx) {
5215 /*
5216 * We're back where we started and we haven't
5217 * found any suitable contiguous range. Let's
5218 * give up.
5219 */
5220 break;
5221 }
5222 scanned++;
5223 m = &vm_pages[page_idx];
5224
5225 assert(!m->fictitious);
5226 assert(!m->private);
5227
5228 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
5229 /* no more low pages... */
5230 break;
5231 }
5232 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
5233 /*
5234 * not aligned
5235 */
5236 RESET_STATE_OF_RUN();
5237
5238 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
5239 m->laundry || m->wanted ||
5240 m->cleaning || m->overwriting || m->free_when_done) {
5241 /*
5242 * page is in a transient state
5243 * or a state we don't want to deal
5244 * with, so don't consider it which
5245 * means starting a new run
5246 */
5247 RESET_STATE_OF_RUN();
5248
5249 } else if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
5250 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5251 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5252 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5253 /*
5254 * page needs to be on one of our queues (other then the pageout or special free queues)
5255 * or it needs to belong to the compressor pool (which is now indicated
5256 * by vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5257 * from the check for VM_PAGE_NOT_ON_Q)
5258 * in order for it to be stable behind the
5259 * locks we hold at this point...
5260 * if not, don't consider it which
5261 * means starting a new run
5262 */
5263 RESET_STATE_OF_RUN();
5264
5265 } else if ((m->vm_page_q_state != VM_PAGE_ON_FREE_Q) && (!m->tabled || m->busy)) {
5266 /*
5267 * pages on the free list are always 'busy'
5268 * so we couldn't test for 'busy' in the check
5269 * for the transient states... pages that are
5270 * 'free' are never 'tabled', so we also couldn't
5271 * test for 'tabled'. So we check here to make
5272 * sure that a non-free page is not busy and is
5273 * tabled on an object...
5274 * if not, don't consider it which
5275 * means starting a new run
5276 */
5277 RESET_STATE_OF_RUN();
5278
5279 } else {
5280 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5281 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5282 RESET_STATE_OF_RUN();
5283 goto did_consider;
5284 } else {
5285 npages = 1;
5286 start_idx = page_idx;
5287 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5288 }
5289 } else {
5290 npages++;
5291 }
5292 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5293
5294 VM_PAGE_CHECK(m);
5295 if (m->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5296 free_considered++;
5297 } else {
5298 /*
5299 * This page is not free.
5300 * If we can't steal used pages,
5301 * we have to give up this run
5302 * and keep looking.
5303 * Otherwise, we might need to
5304 * move the contents of this page
5305 * into a substitute page.
5306 */
5307 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5308 if (m->pmapped || m->dirty || m->precious) {
5309 substitute_needed++;
5310 }
5311 #else
5312 RESET_STATE_OF_RUN();
5313 #endif
5314 }
5315
5316 if ((free_considered + substitute_needed) > free_available) {
5317 /*
5318 * if we let this run continue
5319 * we will end up dropping the vm_page_free_count
5320 * below the reserve limit... we need to abort
5321 * this run, but we can at least re-consider this
5322 * page... thus the jump back to 'retry'
5323 */
5324 RESET_STATE_OF_RUN();
5325
5326 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5327 considered++;
5328 goto retry;
5329 }
5330 /*
5331 * free_available == 0
5332 * so can't consider any free pages... if
5333 * we went to retry in this case, we'd
5334 * get stuck looking at the same page
5335 * w/o making any forward progress
5336 * we also want to take this path if we've already
5337 * reached our limit that controls the lock latency
5338 */
5339 }
5340 }
5341 did_consider:
5342 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5343
5344 PAGE_REPLACEMENT_ALLOWED(FALSE);
5345
5346 lck_mtx_unlock(&vm_page_queue_free_lock);
5347 vm_page_unlock_queues();
5348
5349 mutex_pause(0);
5350
5351 PAGE_REPLACEMENT_ALLOWED(TRUE);
5352
5353 vm_page_lock_queues();
5354 lck_mtx_lock(&vm_page_queue_free_lock);
5355
5356 RESET_STATE_OF_RUN();
5357 /*
5358 * reset our free page limit since we
5359 * dropped the lock protecting the vm_page_free_queue
5360 */
5361 free_available = vm_page_free_count - vm_page_free_reserved;
5362 considered = 0;
5363
5364 yielded++;
5365
5366 goto retry;
5367 }
5368 considered++;
5369 }
5370 m = VM_PAGE_NULL;
5371
5372 if (npages != contig_pages) {
5373 if (!wrapped) {
5374 /*
5375 * We didn't find a contiguous range but we didn't
5376 * start from the very first page.
5377 * Start again from the very first page.
5378 */
5379 RESET_STATE_OF_RUN();
5380 if( flags & KMA_LOMEM)
5381 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
5382 else
5383 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5384 last_idx = 0;
5385 page_idx = last_idx;
5386 wrapped = TRUE;
5387 goto retry;
5388 }
5389 lck_mtx_unlock(&vm_page_queue_free_lock);
5390 } else {
5391 vm_page_t m1;
5392 vm_page_t m2;
5393 unsigned int cur_idx;
5394 unsigned int tmp_start_idx;
5395 vm_object_t locked_object = VM_OBJECT_NULL;
5396 boolean_t abort_run = FALSE;
5397
5398 assert(page_idx - start_idx == contig_pages);
5399
5400 tmp_start_idx = start_idx;
5401
5402 /*
5403 * first pass through to pull the free pages
5404 * off of the free queue so that in case we
5405 * need substitute pages, we won't grab any
5406 * of the free pages in the run... we'll clear
5407 * the 'free' bit in the 2nd pass, and even in
5408 * an abort_run case, we'll collect all of the
5409 * free pages in this run and return them to the free list
5410 */
5411 while (start_idx < page_idx) {
5412
5413 m1 = &vm_pages[start_idx++];
5414
5415 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5416 assert(m1->vm_page_q_state == VM_PAGE_ON_FREE_Q);
5417 #endif
5418
5419 if (m1->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
5420 unsigned int color;
5421
5422 color = VM_PAGE_GET_COLOR(m1);
5423 #if MACH_ASSERT
5424 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
5425 #endif
5426 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5427 m1,
5428 vm_page_t,
5429 pageq);
5430
5431 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5432 #if MACH_ASSERT
5433 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
5434 #endif
5435 /*
5436 * Clear the "free" bit so that this page
5437 * does not get considered for another
5438 * concurrent physically-contiguous allocation.
5439 */
5440 m1->vm_page_q_state = VM_PAGE_NOT_ON_Q;
5441 assert(m1->busy);
5442
5443 vm_page_free_count--;
5444 }
5445 }
5446 if( flags & KMA_LOMEM)
5447 vm_page_lomem_find_contiguous_last_idx = page_idx;
5448 else
5449 vm_page_find_contiguous_last_idx = page_idx;
5450
5451 /*
5452 * we can drop the free queue lock at this point since
5453 * we've pulled any 'free' candidates off of the list
5454 * we need it dropped so that we can do a vm_page_grab
5455 * when substituing for pmapped/dirty pages
5456 */
5457 lck_mtx_unlock(&vm_page_queue_free_lock);
5458
5459 start_idx = tmp_start_idx;
5460 cur_idx = page_idx - 1;
5461
5462 while (start_idx++ < page_idx) {
5463 /*
5464 * must go through the list from back to front
5465 * so that the page list is created in the
5466 * correct order - low -> high phys addresses
5467 */
5468 m1 = &vm_pages[cur_idx--];
5469
5470 if (m1->vm_page_object == 0) {
5471 /*
5472 * page has already been removed from
5473 * the free list in the 1st pass
5474 */
5475 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5476 assert(m1->offset == (vm_object_offset_t) -1);
5477 assert(m1->busy);
5478 assert(!m1->wanted);
5479 assert(!m1->laundry);
5480 } else {
5481 vm_object_t object;
5482 int refmod;
5483 boolean_t disconnected, reusable;
5484
5485 if (abort_run == TRUE)
5486 continue;
5487
5488 assert(m1->vm_page_q_state != VM_PAGE_NOT_ON_Q);
5489
5490 object = VM_PAGE_OBJECT(m1);
5491
5492 if (object != locked_object) {
5493 if (locked_object) {
5494 vm_object_unlock(locked_object);
5495 locked_object = VM_OBJECT_NULL;
5496 }
5497 if (vm_object_lock_try(object))
5498 locked_object = object;
5499 }
5500 if (locked_object == VM_OBJECT_NULL ||
5501 (VM_PAGE_WIRED(m1) || m1->gobbled ||
5502 m1->laundry || m1->wanted ||
5503 m1->cleaning || m1->overwriting || m1->free_when_done || m1->busy) ||
5504 (m1->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5505
5506 if (locked_object) {
5507 vm_object_unlock(locked_object);
5508 locked_object = VM_OBJECT_NULL;
5509 }
5510 tmp_start_idx = cur_idx;
5511 abort_run = TRUE;
5512 continue;
5513 }
5514
5515 disconnected = FALSE;
5516 reusable = FALSE;
5517
5518 if ((m1->reusable ||
5519 object->all_reusable) &&
5520 (m1->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5521 !m1->dirty &&
5522 !m1->reference) {
5523 /* reusable page... */
5524 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5525 disconnected = TRUE;
5526 if (refmod == 0) {
5527 /*
5528 * ... not reused: can steal
5529 * without relocating contents.
5530 */
5531 reusable = TRUE;
5532 }
5533 }
5534
5535 if ((m1->pmapped &&
5536 ! reusable) ||
5537 m1->dirty ||
5538 m1->precious) {
5539 vm_object_offset_t offset;
5540
5541 m2 = vm_page_grab();
5542
5543 if (m2 == VM_PAGE_NULL) {
5544 if (locked_object) {
5545 vm_object_unlock(locked_object);
5546 locked_object = VM_OBJECT_NULL;
5547 }
5548 tmp_start_idx = cur_idx;
5549 abort_run = TRUE;
5550 continue;
5551 }
5552 if (! disconnected) {
5553 if (m1->pmapped)
5554 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5555 else
5556 refmod = 0;
5557 }
5558
5559 /* copy the page's contents */
5560 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
5561 /* copy the page's state */
5562 assert(!VM_PAGE_WIRED(m1));
5563 assert(m1->vm_page_q_state != VM_PAGE_ON_FREE_Q);
5564 assert(m1->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q);
5565 assert(!m1->laundry);
5566 m2->reference = m1->reference;
5567 assert(!m1->gobbled);
5568 assert(!m1->private);
5569 m2->no_cache = m1->no_cache;
5570 m2->xpmapped = 0;
5571 assert(!m1->busy);
5572 assert(!m1->wanted);
5573 assert(!m1->fictitious);
5574 m2->pmapped = m1->pmapped; /* should flush cache ? */
5575 m2->wpmapped = m1->wpmapped;
5576 assert(!m1->free_when_done);
5577 m2->absent = m1->absent;
5578 m2->error = m1->error;
5579 m2->dirty = m1->dirty;
5580 assert(!m1->cleaning);
5581 m2->precious = m1->precious;
5582 m2->clustered = m1->clustered;
5583 assert(!m1->overwriting);
5584 m2->restart = m1->restart;
5585 m2->unusual = m1->unusual;
5586 m2->cs_validated = m1->cs_validated;
5587 m2->cs_tainted = m1->cs_tainted;
5588 m2->cs_nx = m1->cs_nx;
5589
5590 /*
5591 * If m1 had really been reusable,
5592 * we would have just stolen it, so
5593 * let's not propagate it's "reusable"
5594 * bit and assert that m2 is not
5595 * marked as "reusable".
5596 */
5597 // m2->reusable = m1->reusable;
5598 assert(!m2->reusable);
5599
5600 // assert(!m1->lopage);
5601 m2->slid = m1->slid;
5602
5603 if (m1->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5604 m2->vm_page_q_state = VM_PAGE_USED_BY_COMPRESSOR;
5605
5606 /*
5607 * page may need to be flushed if
5608 * it is marshalled into a UPL
5609 * that is going to be used by a device
5610 * that doesn't support coherency
5611 */
5612 m2->written_by_kernel = TRUE;
5613
5614 /*
5615 * make sure we clear the ref/mod state
5616 * from the pmap layer... else we risk
5617 * inheriting state from the last time
5618 * this page was used...
5619 */
5620 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5621
5622 if (refmod & VM_MEM_REFERENCED)
5623 m2->reference = TRUE;
5624 if (refmod & VM_MEM_MODIFIED) {
5625 SET_PAGE_DIRTY(m2, TRUE);
5626 }
5627 offset = m1->offset;
5628
5629 /*
5630 * completely cleans up the state
5631 * of the page so that it is ready
5632 * to be put onto the free list, or
5633 * for this purpose it looks like it
5634 * just came off of the free list
5635 */
5636 vm_page_free_prepare(m1);
5637
5638 /*
5639 * now put the substitute page
5640 * on the object
5641 */
5642 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
5643
5644 if (m2->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5645 m2->pmapped = TRUE;
5646 m2->wpmapped = TRUE;
5647
5648 PMAP_ENTER(kernel_pmap, m2->offset, m2,
5649 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5650
5651 assert(kr == KERN_SUCCESS);
5652
5653 compressed_pages++;
5654
5655 } else {
5656 if (m2->reference)
5657 vm_page_activate(m2);
5658 else
5659 vm_page_deactivate(m2);
5660 }
5661 PAGE_WAKEUP_DONE(m2);
5662
5663 } else {
5664 assert(m1->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
5665
5666 /*
5667 * completely cleans up the state
5668 * of the page so that it is ready
5669 * to be put onto the free list, or
5670 * for this purpose it looks like it
5671 * just came off of the free list
5672 */
5673 vm_page_free_prepare(m1);
5674 }
5675
5676 stolen_pages++;
5677
5678 }
5679 #if CONFIG_BACKGROUND_QUEUE
5680 vm_page_assign_background_state(m1);
5681 #endif
5682 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5683 m1->snext = m;
5684 m = m1;
5685 }
5686 if (locked_object) {
5687 vm_object_unlock(locked_object);
5688 locked_object = VM_OBJECT_NULL;
5689 }
5690
5691 if (abort_run == TRUE) {
5692 /*
5693 * want the index of the last
5694 * page in this run that was
5695 * successfully 'stolen', so back
5696 * it up 1 for the auto-decrement on use
5697 * and 1 more to bump back over this page
5698 */
5699 page_idx = tmp_start_idx + 2;
5700 if (page_idx >= vm_pages_count) {
5701 if (wrapped) {
5702 if (m != VM_PAGE_NULL) {
5703 vm_page_unlock_queues();
5704 vm_page_free_list(m, FALSE);
5705 vm_page_lock_queues();
5706 m = VM_PAGE_NULL;
5707 }
5708 dumped_run++;
5709 goto done_scanning;
5710 }
5711 page_idx = last_idx = 0;
5712 wrapped = TRUE;
5713 }
5714 abort_run = FALSE;
5715
5716 /*
5717 * We didn't find a contiguous range but we didn't
5718 * start from the very first page.
5719 * Start again from the very first page.
5720 */
5721 RESET_STATE_OF_RUN();
5722
5723 if( flags & KMA_LOMEM)
5724 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
5725 else
5726 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5727
5728 last_idx = page_idx;
5729
5730 if (m != VM_PAGE_NULL) {
5731 vm_page_unlock_queues();
5732 vm_page_free_list(m, FALSE);
5733 vm_page_lock_queues();
5734 m = VM_PAGE_NULL;
5735 }
5736 dumped_run++;
5737
5738 lck_mtx_lock(&vm_page_queue_free_lock);
5739 /*
5740 * reset our free page limit since we
5741 * dropped the lock protecting the vm_page_free_queue
5742 */
5743 free_available = vm_page_free_count - vm_page_free_reserved;
5744 goto retry;
5745 }
5746
5747 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
5748
5749 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5750 assert(m1->wire_count == 0);
5751
5752 if (wire == TRUE) {
5753 m1->wire_count++;
5754 m1->vm_page_q_state = VM_PAGE_IS_WIRED;
5755 } else
5756 m1->gobbled = TRUE;
5757 }
5758 if (wire == FALSE)
5759 vm_page_gobble_count += npages;
5760
5761 /*
5762 * gobbled pages are also counted as wired pages
5763 */
5764 vm_page_wire_count += npages;
5765
5766 assert(vm_page_verify_contiguous(m, npages));
5767 }
5768 done_scanning:
5769 PAGE_REPLACEMENT_ALLOWED(FALSE);
5770
5771 vm_page_unlock_queues();
5772
5773 #if DEBUG
5774 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5775
5776 tv_end_sec -= tv_start_sec;
5777 if (tv_end_usec < tv_start_usec) {
5778 tv_end_sec--;
5779 tv_end_usec += 1000000;
5780 }
5781 tv_end_usec -= tv_start_usec;
5782 if (tv_end_usec >= 1000000) {
5783 tv_end_sec++;
5784 tv_end_sec -= 1000000;
5785 }
5786 if (vm_page_find_contig_debug) {
5787 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5788 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5789 (long)tv_end_sec, tv_end_usec, orig_last_idx,
5790 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
5791 }
5792
5793 #endif
5794 #if MACH_ASSERT
5795 vm_page_verify_free_lists();
5796 #endif
5797 if (m == NULL && zone_gc_called == FALSE) {
5798 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5799 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5800 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5801
5802 if (consider_buffer_cache_collect != NULL) {
5803 (void)(*consider_buffer_cache_collect)(1);
5804 }
5805
5806 consider_zone_gc(FALSE);
5807
5808 zone_gc_called = TRUE;
5809
5810 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5811 goto full_scan_again;
5812 }
5813
5814 return m;
5815 }
5816
5817 /*
5818 * Allocate a list of contiguous, wired pages.
5819 */
5820 kern_return_t
5821 cpm_allocate(
5822 vm_size_t size,
5823 vm_page_t *list,
5824 ppnum_t max_pnum,
5825 ppnum_t pnum_mask,
5826 boolean_t wire,
5827 int flags)
5828 {
5829 vm_page_t pages;
5830 unsigned int npages;
5831
5832 if (size % PAGE_SIZE != 0)
5833 return KERN_INVALID_ARGUMENT;
5834
5835 npages = (unsigned int) (size / PAGE_SIZE);
5836 if (npages != size / PAGE_SIZE) {
5837 /* 32-bit overflow */
5838 return KERN_INVALID_ARGUMENT;
5839 }
5840
5841 /*
5842 * Obtain a pointer to a subset of the free
5843 * list large enough to satisfy the request;
5844 * the region will be physically contiguous.
5845 */
5846 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
5847
5848 if (pages == VM_PAGE_NULL)
5849 return KERN_NO_SPACE;
5850 /*
5851 * determine need for wakeups
5852 */
5853 if ((vm_page_free_count < vm_page_free_min) ||
5854 ((vm_page_free_count < vm_page_free_target) &&
5855 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
5856 thread_wakeup((event_t) &vm_page_free_wanted);
5857
5858 VM_CHECK_MEMORYSTATUS;
5859
5860 /*
5861 * The CPM pages should now be available and
5862 * ordered by ascending physical address.
5863 */
5864 assert(vm_page_verify_contiguous(pages, npages));
5865
5866 *list = pages;
5867 return KERN_SUCCESS;
5868 }
5869
5870
5871 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
5872
5873 /*
5874 * when working on a 'run' of pages, it is necessary to hold
5875 * the vm_page_queue_lock (a hot global lock) for certain operations
5876 * on the page... however, the majority of the work can be done
5877 * while merely holding the object lock... in fact there are certain
5878 * collections of pages that don't require any work brokered by the
5879 * vm_page_queue_lock... to mitigate the time spent behind the global
5880 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
5881 * while doing all of the work that doesn't require the vm_page_queue_lock...
5882 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
5883 * necessary work for each page... we will grab the busy bit on the page
5884 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
5885 * if it can't immediately take the vm_page_queue_lock in order to compete
5886 * for the locks in the same order that vm_pageout_scan takes them.
5887 * the operation names are modeled after the names of the routines that
5888 * need to be called in order to make the changes very obvious in the
5889 * original loop
5890 */
5891
5892 void
5893 vm_page_do_delayed_work(
5894 vm_object_t object,
5895 vm_tag_t tag,
5896 struct vm_page_delayed_work *dwp,
5897 int dw_count)
5898 {
5899 int j;
5900 vm_page_t m;
5901 vm_page_t local_free_q = VM_PAGE_NULL;
5902
5903 /*
5904 * pageout_scan takes the vm_page_lock_queues first
5905 * then tries for the object lock... to avoid what
5906 * is effectively a lock inversion, we'll go to the
5907 * trouble of taking them in that same order... otherwise
5908 * if this object contains the majority of the pages resident
5909 * in the UBC (or a small set of large objects actively being
5910 * worked on contain the majority of the pages), we could
5911 * cause the pageout_scan thread to 'starve' in its attempt
5912 * to find pages to move to the free queue, since it has to
5913 * successfully acquire the object lock of any candidate page
5914 * before it can steal/clean it.
5915 */
5916 if (!vm_page_trylockspin_queues()) {
5917 vm_object_unlock(object);
5918
5919 vm_page_lockspin_queues();
5920
5921 for (j = 0; ; j++) {
5922 if (!vm_object_lock_avoid(object) &&
5923 _vm_object_lock_try(object))
5924 break;
5925 vm_page_unlock_queues();
5926 mutex_pause(j);
5927 vm_page_lockspin_queues();
5928 }
5929 }
5930 for (j = 0; j < dw_count; j++, dwp++) {
5931
5932 m = dwp->dw_m;
5933
5934 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
5935 vm_pageout_throttle_up(m);
5936 #if CONFIG_PHANTOM_CACHE
5937 if (dwp->dw_mask & DW_vm_phantom_cache_update)
5938 vm_phantom_cache_update(m);
5939 #endif
5940 if (dwp->dw_mask & DW_vm_page_wire)
5941 vm_page_wire(m, tag, FALSE);
5942 else if (dwp->dw_mask & DW_vm_page_unwire) {
5943 boolean_t queueit;
5944
5945 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
5946
5947 vm_page_unwire(m, queueit);
5948 }
5949 if (dwp->dw_mask & DW_vm_page_free) {
5950 vm_page_free_prepare_queues(m);
5951
5952 assert(m->pageq.next == 0 && m->pageq.prev == 0);
5953 /*
5954 * Add this page to our list of reclaimed pages,
5955 * to be freed later.
5956 */
5957 m->snext = local_free_q;
5958 local_free_q = m;
5959 } else {
5960 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5961 vm_page_deactivate_internal(m, FALSE);
5962 else if (dwp->dw_mask & DW_vm_page_activate) {
5963 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q) {
5964 vm_page_activate(m);
5965 }
5966 }
5967 else if (dwp->dw_mask & DW_vm_page_speculate)
5968 vm_page_speculate(m, TRUE);
5969 else if (dwp->dw_mask & DW_enqueue_cleaned) {
5970 /*
5971 * if we didn't hold the object lock and did this,
5972 * we might disconnect the page, then someone might
5973 * soft fault it back in, then we would put it on the
5974 * cleaned queue, and so we would have a referenced (maybe even dirty)
5975 * page on that queue, which we don't want
5976 */
5977 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5978
5979 if ((refmod_state & VM_MEM_REFERENCED)) {
5980 /*
5981 * this page has been touched since it got cleaned; let's activate it
5982 * if it hasn't already been
5983 */
5984 vm_pageout_enqueued_cleaned++;
5985 vm_pageout_cleaned_reactivated++;
5986 vm_pageout_cleaned_commit_reactivated++;
5987
5988 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q)
5989 vm_page_activate(m);
5990 } else {
5991 m->reference = FALSE;
5992 vm_page_enqueue_cleaned(m);
5993 }
5994 }
5995 else if (dwp->dw_mask & DW_vm_page_lru)
5996 vm_page_lru(m);
5997 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5998 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q)
5999 vm_page_queues_remove(m, TRUE);
6000 }
6001 if (dwp->dw_mask & DW_set_reference)
6002 m->reference = TRUE;
6003 else if (dwp->dw_mask & DW_clear_reference)
6004 m->reference = FALSE;
6005
6006 if (dwp->dw_mask & DW_move_page) {
6007 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6008 vm_page_queues_remove(m, FALSE);
6009
6010 assert(VM_PAGE_OBJECT(m) != kernel_object);
6011
6012 vm_page_enqueue_inactive(m, FALSE);
6013 }
6014 }
6015 if (dwp->dw_mask & DW_clear_busy)
6016 m->busy = FALSE;
6017
6018 if (dwp->dw_mask & DW_PAGE_WAKEUP)
6019 PAGE_WAKEUP(m);
6020 }
6021 }
6022 vm_page_unlock_queues();
6023
6024 if (local_free_q)
6025 vm_page_free_list(local_free_q, TRUE);
6026
6027 VM_CHECK_MEMORYSTATUS;
6028
6029 }
6030
6031 kern_return_t
6032 vm_page_alloc_list(
6033 int page_count,
6034 int flags,
6035 vm_page_t *list)
6036 {
6037 vm_page_t lo_page_list = VM_PAGE_NULL;
6038 vm_page_t mem;
6039 int i;
6040
6041 if ( !(flags & KMA_LOMEM))
6042 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6043
6044 for (i = 0; i < page_count; i++) {
6045
6046 mem = vm_page_grablo();
6047
6048 if (mem == VM_PAGE_NULL) {
6049 if (lo_page_list)
6050 vm_page_free_list(lo_page_list, FALSE);
6051
6052 *list = VM_PAGE_NULL;
6053
6054 return (KERN_RESOURCE_SHORTAGE);
6055 }
6056 mem->snext = lo_page_list;
6057 lo_page_list = mem;
6058 }
6059 *list = lo_page_list;
6060
6061 return (KERN_SUCCESS);
6062 }
6063
6064 void
6065 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6066 {
6067 page->offset = offset;
6068 }
6069
6070 vm_page_t
6071 vm_page_get_next(vm_page_t page)
6072 {
6073 return (page->snext);
6074 }
6075
6076 vm_object_offset_t
6077 vm_page_get_offset(vm_page_t page)
6078 {
6079 return (page->offset);
6080 }
6081
6082 ppnum_t
6083 vm_page_get_phys_page(vm_page_t page)
6084 {
6085 return (VM_PAGE_GET_PHYS_PAGE(page));
6086 }
6087
6088
6089 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6090
6091 #if HIBERNATION
6092
6093 static vm_page_t hibernate_gobble_queue;
6094
6095 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
6096 static int hibernate_flush_dirty_pages(int);
6097 static int hibernate_flush_queue(vm_page_queue_head_t *, int);
6098
6099 void hibernate_flush_wait(void);
6100 void hibernate_mark_in_progress(void);
6101 void hibernate_clear_in_progress(void);
6102
6103 void hibernate_free_range(int, int);
6104 void hibernate_hash_insert_page(vm_page_t);
6105 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6106 void hibernate_rebuild_vm_structs(void);
6107 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6108 ppnum_t hibernate_lookup_paddr(unsigned int);
6109
6110 struct hibernate_statistics {
6111 int hibernate_considered;
6112 int hibernate_reentered_on_q;
6113 int hibernate_found_dirty;
6114 int hibernate_skipped_cleaning;
6115 int hibernate_skipped_transient;
6116 int hibernate_skipped_precious;
6117 int hibernate_skipped_external;
6118 int hibernate_queue_nolock;
6119 int hibernate_queue_paused;
6120 int hibernate_throttled;
6121 int hibernate_throttle_timeout;
6122 int hibernate_drained;
6123 int hibernate_drain_timeout;
6124 int cd_lock_failed;
6125 int cd_found_precious;
6126 int cd_found_wired;
6127 int cd_found_busy;
6128 int cd_found_unusual;
6129 int cd_found_cleaning;
6130 int cd_found_laundry;
6131 int cd_found_dirty;
6132 int cd_found_xpmapped;
6133 int cd_skipped_xpmapped;
6134 int cd_local_free;
6135 int cd_total_free;
6136 int cd_vm_page_wire_count;
6137 int cd_vm_struct_pages_unneeded;
6138 int cd_pages;
6139 int cd_discarded;
6140 int cd_count_wire;
6141 } hibernate_stats;
6142
6143
6144 /*
6145 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6146 * so that we don't overrun the estimated image size, which would
6147 * result in a hibernation failure.
6148 */
6149 #define HIBERNATE_XPMAPPED_LIMIT 40000
6150
6151
6152 static int
6153 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6154 {
6155 wait_result_t wait_result;
6156
6157 vm_page_lock_queues();
6158
6159 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
6160
6161 q->pgo_draining = TRUE;
6162
6163 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
6164
6165 vm_page_unlock_queues();
6166
6167 wait_result = thread_block(THREAD_CONTINUE_NULL);
6168
6169 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
6170 hibernate_stats.hibernate_drain_timeout++;
6171
6172 if (q == &vm_pageout_queue_external)
6173 return (0);
6174
6175 return (1);
6176 }
6177 vm_page_lock_queues();
6178
6179 hibernate_stats.hibernate_drained++;
6180 }
6181 vm_page_unlock_queues();
6182
6183 return (0);
6184 }
6185
6186
6187 boolean_t hibernate_skip_external = FALSE;
6188
6189 static int
6190 hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
6191 {
6192 vm_page_t m;
6193 vm_object_t l_object = NULL;
6194 vm_object_t m_object = NULL;
6195 int refmod_state = 0;
6196 int try_failed_count = 0;
6197 int retval = 0;
6198 int current_run = 0;
6199 struct vm_pageout_queue *iq;
6200 struct vm_pageout_queue *eq;
6201 struct vm_pageout_queue *tq;
6202
6203 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6204 VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
6205
6206 iq = &vm_pageout_queue_internal;
6207 eq = &vm_pageout_queue_external;
6208
6209 vm_page_lock_queues();
6210
6211 while (qcount && !vm_page_queue_empty(q)) {
6212
6213 if (current_run++ == 1000) {
6214 if (hibernate_should_abort()) {
6215 retval = 1;
6216 break;
6217 }
6218 current_run = 0;
6219 }
6220
6221 m = (vm_page_t) vm_page_queue_first(q);
6222 m_object = VM_PAGE_OBJECT(m);
6223
6224 /*
6225 * check to see if we currently are working
6226 * with the same object... if so, we've
6227 * already got the lock
6228 */
6229 if (m_object != l_object) {
6230 /*
6231 * the object associated with candidate page is
6232 * different from the one we were just working
6233 * with... dump the lock if we still own it
6234 */
6235 if (l_object != NULL) {
6236 vm_object_unlock(l_object);
6237 l_object = NULL;
6238 }
6239 /*
6240 * Try to lock object; since we've alread got the
6241 * page queues lock, we can only 'try' for this one.
6242 * if the 'try' fails, we need to do a mutex_pause
6243 * to allow the owner of the object lock a chance to
6244 * run...
6245 */
6246 if ( !vm_object_lock_try_scan(m_object)) {
6247
6248 if (try_failed_count > 20) {
6249 hibernate_stats.hibernate_queue_nolock++;
6250
6251 goto reenter_pg_on_q;
6252 }
6253
6254 vm_page_unlock_queues();
6255 mutex_pause(try_failed_count++);
6256 vm_page_lock_queues();
6257
6258 hibernate_stats.hibernate_queue_paused++;
6259 continue;
6260 } else {
6261 l_object = m_object;
6262 }
6263 }
6264 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
6265 /*
6266 * page is not to be cleaned
6267 * put it back on the head of its queue
6268 */
6269 if (m->cleaning)
6270 hibernate_stats.hibernate_skipped_cleaning++;
6271 else
6272 hibernate_stats.hibernate_skipped_transient++;
6273
6274 goto reenter_pg_on_q;
6275 }
6276 if (m_object->copy == VM_OBJECT_NULL) {
6277 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6278 /*
6279 * let the normal hibernate image path
6280 * deal with these
6281 */
6282 goto reenter_pg_on_q;
6283 }
6284 }
6285 if ( !m->dirty && m->pmapped) {
6286 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6287
6288 if ((refmod_state & VM_MEM_MODIFIED)) {
6289 SET_PAGE_DIRTY(m, FALSE);
6290 }
6291 } else
6292 refmod_state = 0;
6293
6294 if ( !m->dirty) {
6295 /*
6296 * page is not to be cleaned
6297 * put it back on the head of its queue
6298 */
6299 if (m->precious)
6300 hibernate_stats.hibernate_skipped_precious++;
6301
6302 goto reenter_pg_on_q;
6303 }
6304
6305 if (hibernate_skip_external == TRUE && !m_object->internal) {
6306
6307 hibernate_stats.hibernate_skipped_external++;
6308
6309 goto reenter_pg_on_q;
6310 }
6311 tq = NULL;
6312
6313 if (m_object->internal) {
6314 if (VM_PAGE_Q_THROTTLED(iq))
6315 tq = iq;
6316 } else if (VM_PAGE_Q_THROTTLED(eq))
6317 tq = eq;
6318
6319 if (tq != NULL) {
6320 wait_result_t wait_result;
6321 int wait_count = 5;
6322
6323 if (l_object != NULL) {
6324 vm_object_unlock(l_object);
6325 l_object = NULL;
6326 }
6327
6328 while (retval == 0) {
6329
6330 tq->pgo_throttled = TRUE;
6331
6332 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6333
6334 vm_page_unlock_queues();
6335
6336 wait_result = thread_block(THREAD_CONTINUE_NULL);
6337
6338 vm_page_lock_queues();
6339
6340 if (wait_result != THREAD_TIMED_OUT)
6341 break;
6342 if (!VM_PAGE_Q_THROTTLED(tq))
6343 break;
6344
6345 if (hibernate_should_abort())
6346 retval = 1;
6347
6348 if (--wait_count == 0) {
6349
6350 hibernate_stats.hibernate_throttle_timeout++;
6351
6352 if (tq == eq) {
6353 hibernate_skip_external = TRUE;
6354 break;
6355 }
6356 retval = 1;
6357 }
6358 }
6359 if (retval)
6360 break;
6361
6362 hibernate_stats.hibernate_throttled++;
6363
6364 continue;
6365 }
6366 /*
6367 * we've already factored out pages in the laundry which
6368 * means this page can't be on the pageout queue so it's
6369 * safe to do the vm_page_queues_remove
6370 */
6371 vm_page_queues_remove(m, TRUE);
6372
6373 if (m_object->internal == TRUE)
6374 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
6375
6376 vm_pageout_cluster(m);
6377
6378 hibernate_stats.hibernate_found_dirty++;
6379
6380 goto next_pg;
6381
6382 reenter_pg_on_q:
6383 vm_page_queue_remove(q, m, vm_page_t, pageq);
6384 vm_page_queue_enter(q, m, vm_page_t, pageq);
6385
6386 hibernate_stats.hibernate_reentered_on_q++;
6387 next_pg:
6388 hibernate_stats.hibernate_considered++;
6389
6390 qcount--;
6391 try_failed_count = 0;
6392 }
6393 if (l_object != NULL) {
6394 vm_object_unlock(l_object);
6395 l_object = NULL;
6396 }
6397
6398 vm_page_unlock_queues();
6399
6400 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6401
6402 return (retval);
6403 }
6404
6405
6406 static int
6407 hibernate_flush_dirty_pages(int pass)
6408 {
6409 struct vm_speculative_age_q *aq;
6410 uint32_t i;
6411
6412 if (vm_page_local_q) {
6413 for (i = 0; i < vm_page_local_q_count; i++)
6414 vm_page_reactivate_local(i, TRUE, FALSE);
6415 }
6416
6417 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6418 int qcount;
6419 vm_page_t m;
6420
6421 aq = &vm_page_queue_speculative[i];
6422
6423 if (vm_page_queue_empty(&aq->age_q))
6424 continue;
6425 qcount = 0;
6426
6427 vm_page_lockspin_queues();
6428
6429 vm_page_queue_iterate(&aq->age_q,
6430 m,
6431 vm_page_t,
6432 pageq)
6433 {
6434 qcount++;
6435 }
6436 vm_page_unlock_queues();
6437
6438 if (qcount) {
6439 if (hibernate_flush_queue(&aq->age_q, qcount))
6440 return (1);
6441 }
6442 }
6443 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
6444 return (1);
6445 /* XXX FBDP TODO: flush secluded queue */
6446 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6447 return (1);
6448 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
6449 return (1);
6450 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6451 return (1);
6452
6453 if (pass == 1)
6454 vm_compressor_record_warmup_start();
6455
6456 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
6457 if (pass == 1)
6458 vm_compressor_record_warmup_end();
6459 return (1);
6460 }
6461 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
6462 if (pass == 1)
6463 vm_compressor_record_warmup_end();
6464 return (1);
6465 }
6466 if (pass == 1)
6467 vm_compressor_record_warmup_end();
6468
6469 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6470 return (1);
6471
6472 return (0);
6473 }
6474
6475
6476 void
6477 hibernate_reset_stats()
6478 {
6479 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6480 }
6481
6482
6483 int
6484 hibernate_flush_memory()
6485 {
6486 int retval;
6487
6488 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6489
6490 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6491
6492 hibernate_cleaning_in_progress = TRUE;
6493 hibernate_skip_external = FALSE;
6494
6495 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6496
6497 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6498
6499 vm_compressor_flush();
6500
6501 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6502
6503 if (consider_buffer_cache_collect != NULL) {
6504 unsigned int orig_wire_count;
6505
6506 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6507 orig_wire_count = vm_page_wire_count;
6508
6509 (void)(*consider_buffer_cache_collect)(1);
6510 consider_zone_gc(FALSE);
6511
6512 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6513
6514 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
6515 }
6516 }
6517 hibernate_cleaning_in_progress = FALSE;
6518
6519 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6520
6521 if (retval)
6522 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6523
6524
6525 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6526 hibernate_stats.hibernate_considered,
6527 hibernate_stats.hibernate_reentered_on_q,
6528 hibernate_stats.hibernate_found_dirty);
6529 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6530 hibernate_stats.hibernate_skipped_cleaning,
6531 hibernate_stats.hibernate_skipped_transient,
6532 hibernate_stats.hibernate_skipped_precious,
6533 hibernate_stats.hibernate_skipped_external,
6534 hibernate_stats.hibernate_queue_nolock);
6535 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6536 hibernate_stats.hibernate_queue_paused,
6537 hibernate_stats.hibernate_throttled,
6538 hibernate_stats.hibernate_throttle_timeout,
6539 hibernate_stats.hibernate_drained,
6540 hibernate_stats.hibernate_drain_timeout);
6541
6542 return (retval);
6543 }
6544
6545
6546 static void
6547 hibernate_page_list_zero(hibernate_page_list_t *list)
6548 {
6549 uint32_t bank;
6550 hibernate_bitmap_t * bitmap;
6551
6552 bitmap = &list->bank_bitmap[0];
6553 for (bank = 0; bank < list->bank_count; bank++)
6554 {
6555 uint32_t last_bit;
6556
6557 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6558 // set out-of-bound bits at end of bitmap.
6559 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6560 if (last_bit)
6561 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6562
6563 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6564 }
6565 }
6566
6567 void
6568 hibernate_free_gobble_pages(void)
6569 {
6570 vm_page_t m, next;
6571 uint32_t count = 0;
6572
6573 m = (vm_page_t) hibernate_gobble_queue;
6574 while(m)
6575 {
6576 next = m->snext;
6577 vm_page_free(m);
6578 count++;
6579 m = next;
6580 }
6581 hibernate_gobble_queue = VM_PAGE_NULL;
6582
6583 if (count)
6584 HIBLOG("Freed %d pages\n", count);
6585 }
6586
6587 static boolean_t
6588 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
6589 {
6590 vm_object_t object = NULL;
6591 int refmod_state;
6592 boolean_t discard = FALSE;
6593
6594 do
6595 {
6596 if (m->private)
6597 panic("hibernate_consider_discard: private");
6598
6599 object = VM_PAGE_OBJECT(m);
6600
6601 if (!vm_object_lock_try(object)) {
6602 object = NULL;
6603 if (!preflight) hibernate_stats.cd_lock_failed++;
6604 break;
6605 }
6606 if (VM_PAGE_WIRED(m)) {
6607 if (!preflight) hibernate_stats.cd_found_wired++;
6608 break;
6609 }
6610 if (m->precious) {
6611 if (!preflight) hibernate_stats.cd_found_precious++;
6612 break;
6613 }
6614 if (m->busy || !object->alive) {
6615 /*
6616 * Somebody is playing with this page.
6617 */
6618 if (!preflight) hibernate_stats.cd_found_busy++;
6619 break;
6620 }
6621 if (m->absent || m->unusual || m->error) {
6622 /*
6623 * If it's unusual in anyway, ignore it
6624 */
6625 if (!preflight) hibernate_stats.cd_found_unusual++;
6626 break;
6627 }
6628 if (m->cleaning) {
6629 if (!preflight) hibernate_stats.cd_found_cleaning++;
6630 break;
6631 }
6632 if (m->laundry) {
6633 if (!preflight) hibernate_stats.cd_found_laundry++;
6634 break;
6635 }
6636 if (!m->dirty)
6637 {
6638 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6639
6640 if (refmod_state & VM_MEM_REFERENCED)
6641 m->reference = TRUE;
6642 if (refmod_state & VM_MEM_MODIFIED) {
6643 SET_PAGE_DIRTY(m, FALSE);
6644 }
6645 }
6646
6647 /*
6648 * If it's clean or purgeable we can discard the page on wakeup.
6649 */
6650 discard = (!m->dirty)
6651 || (VM_PURGABLE_VOLATILE == object->purgable)
6652 || (VM_PURGABLE_EMPTY == object->purgable);
6653
6654
6655 if (discard == FALSE) {
6656 if (!preflight)
6657 hibernate_stats.cd_found_dirty++;
6658 } else if (m->xpmapped && m->reference && !object->internal) {
6659 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6660 if (!preflight)
6661 hibernate_stats.cd_found_xpmapped++;
6662 discard = FALSE;
6663 } else {
6664 if (!preflight)
6665 hibernate_stats.cd_skipped_xpmapped++;
6666 }
6667 }
6668 }
6669 while (FALSE);
6670
6671 if (object)
6672 vm_object_unlock(object);
6673
6674 return (discard);
6675 }
6676
6677
6678 static void
6679 hibernate_discard_page(vm_page_t m)
6680 {
6681 vm_object_t m_object;
6682
6683 if (m->absent || m->unusual || m->error)
6684 /*
6685 * If it's unusual in anyway, ignore
6686 */
6687 return;
6688
6689 m_object = VM_PAGE_OBJECT(m);
6690
6691 #if MACH_ASSERT || DEBUG
6692 if (!vm_object_lock_try(m_object))
6693 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6694 #else
6695 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6696 makes sure these locks are uncontended before sleep */
6697 #endif /* MACH_ASSERT || DEBUG */
6698
6699 if (m->pmapped == TRUE)
6700 {
6701 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6702 }
6703
6704 if (m->laundry)
6705 panic("hibernate_discard_page(%p) laundry", m);
6706 if (m->private)
6707 panic("hibernate_discard_page(%p) private", m);
6708 if (m->fictitious)
6709 panic("hibernate_discard_page(%p) fictitious", m);
6710
6711 if (VM_PURGABLE_VOLATILE == m_object->purgable)
6712 {
6713 /* object should be on a queue */
6714 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6715 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
6716 assert(old_queue);
6717 if (m_object->purgeable_when_ripe) {
6718 vm_purgeable_token_delete_first(old_queue);
6719 }
6720 vm_object_lock_assert_exclusive(m_object);
6721 m_object->purgable = VM_PURGABLE_EMPTY;
6722
6723 /*
6724 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6725 * accounted in the "volatile" ledger, so no change here.
6726 * We have to update vm_page_purgeable_count, though, since we're
6727 * effectively purging this object.
6728 */
6729 unsigned int delta;
6730 assert(m_object->resident_page_count >= m_object->wired_page_count);
6731 delta = (m_object->resident_page_count - m_object->wired_page_count);
6732 assert(vm_page_purgeable_count >= delta);
6733 assert(delta > 0);
6734 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
6735 }
6736
6737 vm_page_free(m);
6738
6739 #if MACH_ASSERT || DEBUG
6740 vm_object_unlock(m_object);
6741 #endif /* MACH_ASSERT || DEBUG */
6742 }
6743
6744 /*
6745 Grab locks for hibernate_page_list_setall()
6746 */
6747 void
6748 hibernate_vm_lock_queues(void)
6749 {
6750 vm_object_lock(compressor_object);
6751 vm_page_lock_queues();
6752 lck_mtx_lock(&vm_page_queue_free_lock);
6753 lck_mtx_lock(&vm_purgeable_queue_lock);
6754
6755 if (vm_page_local_q) {
6756 uint32_t i;
6757 for (i = 0; i < vm_page_local_q_count; i++) {
6758 struct vpl *lq;
6759 lq = &vm_page_local_q[i].vpl_un.vpl;
6760 VPL_LOCK(&lq->vpl_lock);
6761 }
6762 }
6763 }
6764
6765 void
6766 hibernate_vm_unlock_queues(void)
6767 {
6768 if (vm_page_local_q) {
6769 uint32_t i;
6770 for (i = 0; i < vm_page_local_q_count; i++) {
6771 struct vpl *lq;
6772 lq = &vm_page_local_q[i].vpl_un.vpl;
6773 VPL_UNLOCK(&lq->vpl_lock);
6774 }
6775 }
6776 lck_mtx_unlock(&vm_purgeable_queue_lock);
6777 lck_mtx_unlock(&vm_page_queue_free_lock);
6778 vm_page_unlock_queues();
6779 vm_object_unlock(compressor_object);
6780 }
6781
6782 /*
6783 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6784 pages known to VM to not need saving are subtracted.
6785 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6786 */
6787
6788 void
6789 hibernate_page_list_setall(hibernate_page_list_t * page_list,
6790 hibernate_page_list_t * page_list_wired,
6791 hibernate_page_list_t * page_list_pal,
6792 boolean_t preflight,
6793 boolean_t will_discard,
6794 uint32_t * pagesOut)
6795 {
6796 uint64_t start, end, nsec;
6797 vm_page_t m;
6798 vm_page_t next;
6799 uint32_t pages = page_list->page_count;
6800 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
6801 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
6802 uint32_t count_wire = pages;
6803 uint32_t count_discard_active = 0;
6804 uint32_t count_discard_inactive = 0;
6805 uint32_t count_discard_cleaned = 0;
6806 uint32_t count_discard_purgeable = 0;
6807 uint32_t count_discard_speculative = 0;
6808 uint32_t count_discard_vm_struct_pages = 0;
6809 uint32_t i;
6810 uint32_t bank;
6811 hibernate_bitmap_t * bitmap;
6812 hibernate_bitmap_t * bitmap_wired;
6813 boolean_t discard_all;
6814 boolean_t discard;
6815
6816 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
6817
6818 if (preflight) {
6819 page_list = NULL;
6820 page_list_wired = NULL;
6821 page_list_pal = NULL;
6822 discard_all = FALSE;
6823 } else {
6824 discard_all = will_discard;
6825 }
6826
6827 #if MACH_ASSERT || DEBUG
6828 if (!preflight)
6829 {
6830 assert(hibernate_vm_locks_are_safe());
6831 vm_page_lock_queues();
6832 if (vm_page_local_q) {
6833 for (i = 0; i < vm_page_local_q_count; i++) {
6834 struct vpl *lq;
6835 lq = &vm_page_local_q[i].vpl_un.vpl;
6836 VPL_LOCK(&lq->vpl_lock);
6837 }
6838 }
6839 }
6840 #endif /* MACH_ASSERT || DEBUG */
6841
6842
6843 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
6844
6845 clock_get_uptime(&start);
6846
6847 if (!preflight) {
6848 hibernate_page_list_zero(page_list);
6849 hibernate_page_list_zero(page_list_wired);
6850 hibernate_page_list_zero(page_list_pal);
6851
6852 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
6853 hibernate_stats.cd_pages = pages;
6854 }
6855
6856 if (vm_page_local_q) {
6857 for (i = 0; i < vm_page_local_q_count; i++)
6858 vm_page_reactivate_local(i, TRUE, !preflight);
6859 }
6860
6861 if (preflight) {
6862 vm_object_lock(compressor_object);
6863 vm_page_lock_queues();
6864 lck_mtx_lock(&vm_page_queue_free_lock);
6865 }
6866
6867 m = (vm_page_t) hibernate_gobble_queue;
6868 while (m)
6869 {
6870 pages--;
6871 count_wire--;
6872 if (!preflight) {
6873 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6874 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6875 }
6876 m = m->snext;
6877 }
6878
6879 if (!preflight) for( i = 0; i < real_ncpus; i++ )
6880 {
6881 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
6882 {
6883 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->snext)
6884 {
6885 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
6886
6887 pages--;
6888 count_wire--;
6889 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6890 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6891
6892 hibernate_stats.cd_local_free++;
6893 hibernate_stats.cd_total_free++;
6894 }
6895 }
6896 }
6897
6898 for( i = 0; i < vm_colors; i++ )
6899 {
6900 vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
6901 m,
6902 vm_page_t,
6903 pageq)
6904 {
6905 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_Q);
6906
6907 pages--;
6908 count_wire--;
6909 if (!preflight) {
6910 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6911 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6912
6913 hibernate_stats.cd_total_free++;
6914 }
6915 }
6916 }
6917
6918 vm_page_queue_iterate(&vm_lopage_queue_free,
6919 m,
6920 vm_page_t,
6921 pageq)
6922 {
6923 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
6924
6925 pages--;
6926 count_wire--;
6927 if (!preflight) {
6928 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6929 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6930
6931 hibernate_stats.cd_total_free++;
6932 }
6933 }
6934
6935 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
6936 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
6937 {
6938 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
6939
6940 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6941 discard = FALSE;
6942 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6943 && hibernate_consider_discard(m, preflight))
6944 {
6945 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6946 count_discard_inactive++;
6947 discard = discard_all;
6948 }
6949 else
6950 count_throttled++;
6951 count_wire--;
6952 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6953
6954 if (discard) hibernate_discard_page(m);
6955 m = next;
6956 }
6957
6958 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6959 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
6960 {
6961 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6962
6963 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6964 discard = FALSE;
6965 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6966 && hibernate_consider_discard(m, preflight))
6967 {
6968 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6969 if (m->dirty)
6970 count_discard_purgeable++;
6971 else
6972 count_discard_inactive++;
6973 discard = discard_all;
6974 }
6975 else
6976 count_anonymous++;
6977 count_wire--;
6978 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6979 if (discard) hibernate_discard_page(m);
6980 m = next;
6981 }
6982
6983 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
6984 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
6985 {
6986 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
6987
6988 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
6989 discard = FALSE;
6990 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6991 && hibernate_consider_discard(m, preflight))
6992 {
6993 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6994 if (m->dirty)
6995 count_discard_purgeable++;
6996 else
6997 count_discard_cleaned++;
6998 discard = discard_all;
6999 }
7000 else
7001 count_cleaned++;
7002 count_wire--;
7003 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7004 if (discard) hibernate_discard_page(m);
7005 m = next;
7006 }
7007
7008 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7009 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7010 {
7011 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7012
7013 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7014 discard = FALSE;
7015 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
7016 && hibernate_consider_discard(m, preflight))
7017 {
7018 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7019 if (m->dirty)
7020 count_discard_purgeable++;
7021 else
7022 count_discard_active++;
7023 discard = discard_all;
7024 }
7025 else
7026 count_active++;
7027 count_wire--;
7028 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7029 if (discard) hibernate_discard_page(m);
7030 m = next;
7031 }
7032
7033 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7034 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7035 {
7036 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7037
7038 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7039 discard = FALSE;
7040 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7041 && hibernate_consider_discard(m, preflight))
7042 {
7043 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7044 if (m->dirty)
7045 count_discard_purgeable++;
7046 else
7047 count_discard_inactive++;
7048 discard = discard_all;
7049 }
7050 else
7051 count_inactive++;
7052 count_wire--;
7053 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7054 if (discard) hibernate_discard_page(m);
7055 m = next;
7056 }
7057 /* XXX FBDP TODO: secluded queue */
7058
7059 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7060 {
7061 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7062 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7063 {
7064 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7065
7066 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
7067 discard = FALSE;
7068 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7069 && hibernate_consider_discard(m, preflight))
7070 {
7071 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7072 count_discard_speculative++;
7073 discard = discard_all;
7074 }
7075 else
7076 count_speculative++;
7077 count_wire--;
7078 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7079 if (discard) hibernate_discard_page(m);
7080 m = next;
7081 }
7082 }
7083
7084 vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
7085 {
7086 assert(m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR);
7087
7088 count_compressor++;
7089 count_wire--;
7090 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7091 }
7092
7093 if (preflight == FALSE && discard_all == TRUE) {
7094 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
7095
7096 HIBLOG("hibernate_teardown started\n");
7097 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7098 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7099
7100 pages -= count_discard_vm_struct_pages;
7101 count_wire -= count_discard_vm_struct_pages;
7102
7103 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7104
7105 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7106 }
7107
7108 if (!preflight) {
7109 // pull wired from hibernate_bitmap
7110 bitmap = &page_list->bank_bitmap[0];
7111 bitmap_wired = &page_list_wired->bank_bitmap[0];
7112 for (bank = 0; bank < page_list->bank_count; bank++)
7113 {
7114 for (i = 0; i < bitmap->bitmapwords; i++)
7115 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7116 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
7117 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7118 }
7119 }
7120
7121 // machine dependent adjustments
7122 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
7123
7124 if (!preflight) {
7125 hibernate_stats.cd_count_wire = count_wire;
7126 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7127 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
7128 }
7129
7130 clock_get_uptime(&end);
7131 absolutetime_to_nanoseconds(end - start, &nsec);
7132 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7133
7134 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7135 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7136 discard_all ? "did" : "could",
7137 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7138
7139 if (hibernate_stats.cd_skipped_xpmapped)
7140 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7141
7142 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7143
7144 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7145
7146 #if MACH_ASSERT || DEBUG
7147 if (!preflight)
7148 {
7149 if (vm_page_local_q) {
7150 for (i = 0; i < vm_page_local_q_count; i++) {
7151 struct vpl *lq;
7152 lq = &vm_page_local_q[i].vpl_un.vpl;
7153 VPL_UNLOCK(&lq->vpl_lock);
7154 }
7155 }
7156 vm_page_unlock_queues();
7157 }
7158 #endif /* MACH_ASSERT || DEBUG */
7159
7160 if (preflight) {
7161 lck_mtx_unlock(&vm_page_queue_free_lock);
7162 vm_page_unlock_queues();
7163 vm_object_unlock(compressor_object);
7164 }
7165
7166 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
7167 }
7168
7169 void
7170 hibernate_page_list_discard(hibernate_page_list_t * page_list)
7171 {
7172 uint64_t start, end, nsec;
7173 vm_page_t m;
7174 vm_page_t next;
7175 uint32_t i;
7176 uint32_t count_discard_active = 0;
7177 uint32_t count_discard_inactive = 0;
7178 uint32_t count_discard_purgeable = 0;
7179 uint32_t count_discard_cleaned = 0;
7180 uint32_t count_discard_speculative = 0;
7181
7182
7183 #if MACH_ASSERT || DEBUG
7184 vm_page_lock_queues();
7185 if (vm_page_local_q) {
7186 for (i = 0; i < vm_page_local_q_count; i++) {
7187 struct vpl *lq;
7188 lq = &vm_page_local_q[i].vpl_un.vpl;
7189 VPL_LOCK(&lq->vpl_lock);
7190 }
7191 }
7192 #endif /* MACH_ASSERT || DEBUG */
7193
7194 clock_get_uptime(&start);
7195
7196 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7197 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7198 {
7199 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7200
7201 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7202 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7203 {
7204 if (m->dirty)
7205 count_discard_purgeable++;
7206 else
7207 count_discard_inactive++;
7208 hibernate_discard_page(m);
7209 }
7210 m = next;
7211 }
7212
7213 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7214 {
7215 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7216 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7217 {
7218 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7219
7220 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7221 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7222 {
7223 count_discard_speculative++;
7224 hibernate_discard_page(m);
7225 }
7226 m = next;
7227 }
7228 }
7229
7230 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7231 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7232 {
7233 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7234
7235 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7236 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7237 {
7238 if (m->dirty)
7239 count_discard_purgeable++;
7240 else
7241 count_discard_inactive++;
7242 hibernate_discard_page(m);
7243 }
7244 m = next;
7245 }
7246 /* XXX FBDP TODO: secluded queue */
7247
7248 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7249 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7250 {
7251 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7252
7253 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7254 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7255 {
7256 if (m->dirty)
7257 count_discard_purgeable++;
7258 else
7259 count_discard_active++;
7260 hibernate_discard_page(m);
7261 }
7262 m = next;
7263 }
7264
7265 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7266 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7267 {
7268 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7269
7270 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7271 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7272 {
7273 if (m->dirty)
7274 count_discard_purgeable++;
7275 else
7276 count_discard_cleaned++;
7277 hibernate_discard_page(m);
7278 }
7279 m = next;
7280 }
7281
7282 #if MACH_ASSERT || DEBUG
7283 if (vm_page_local_q) {
7284 for (i = 0; i < vm_page_local_q_count; i++) {
7285 struct vpl *lq;
7286 lq = &vm_page_local_q[i].vpl_un.vpl;
7287 VPL_UNLOCK(&lq->vpl_lock);
7288 }
7289 }
7290 vm_page_unlock_queues();
7291 #endif /* MACH_ASSERT || DEBUG */
7292
7293 clock_get_uptime(&end);
7294 absolutetime_to_nanoseconds(end - start, &nsec);
7295 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7296 nsec / 1000000ULL,
7297 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7298 }
7299
7300 boolean_t hibernate_paddr_map_inited = FALSE;
7301 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
7302 vm_page_t hibernate_rebuild_hash_list = NULL;
7303
7304 unsigned int hibernate_teardown_found_tabled_pages = 0;
7305 unsigned int hibernate_teardown_found_created_pages = 0;
7306 unsigned int hibernate_teardown_found_free_pages = 0;
7307 unsigned int hibernate_teardown_vm_page_free_count;
7308
7309
7310 struct ppnum_mapping {
7311 struct ppnum_mapping *ppnm_next;
7312 ppnum_t ppnm_base_paddr;
7313 unsigned int ppnm_sindx;
7314 unsigned int ppnm_eindx;
7315 };
7316
7317 struct ppnum_mapping *ppnm_head;
7318 struct ppnum_mapping *ppnm_last_found = NULL;
7319
7320
7321 void
7322 hibernate_create_paddr_map()
7323 {
7324 unsigned int i;
7325 ppnum_t next_ppnum_in_run = 0;
7326 struct ppnum_mapping *ppnm = NULL;
7327
7328 if (hibernate_paddr_map_inited == FALSE) {
7329
7330 for (i = 0; i < vm_pages_count; i++) {
7331
7332 if (ppnm)
7333 ppnm->ppnm_eindx = i;
7334
7335 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
7336
7337 ppnm = kalloc(sizeof(struct ppnum_mapping));
7338
7339 ppnm->ppnm_next = ppnm_head;
7340 ppnm_head = ppnm;
7341
7342 ppnm->ppnm_sindx = i;
7343 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
7344 }
7345 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
7346 }
7347 ppnm->ppnm_eindx++;
7348
7349 hibernate_paddr_map_inited = TRUE;
7350 }
7351 }
7352
7353 ppnum_t
7354 hibernate_lookup_paddr(unsigned int indx)
7355 {
7356 struct ppnum_mapping *ppnm = NULL;
7357
7358 ppnm = ppnm_last_found;
7359
7360 if (ppnm) {
7361 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7362 goto done;
7363 }
7364 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7365
7366 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7367 ppnm_last_found = ppnm;
7368 break;
7369 }
7370 }
7371 if (ppnm == NULL)
7372 panic("hibernate_lookup_paddr of %d failed\n", indx);
7373 done:
7374 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7375 }
7376
7377
7378 uint32_t
7379 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7380 {
7381 addr64_t saddr_aligned;
7382 addr64_t eaddr_aligned;
7383 addr64_t addr;
7384 ppnum_t paddr;
7385 unsigned int mark_as_unneeded_pages = 0;
7386
7387 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7388 eaddr_aligned = eaddr & ~PAGE_MASK_64;
7389
7390 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7391
7392 paddr = pmap_find_phys(kernel_pmap, addr);
7393
7394 assert(paddr);
7395
7396 hibernate_page_bitset(page_list, TRUE, paddr);
7397 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7398
7399 mark_as_unneeded_pages++;
7400 }
7401 return (mark_as_unneeded_pages);
7402 }
7403
7404
7405 void
7406 hibernate_hash_insert_page(vm_page_t mem)
7407 {
7408 vm_page_bucket_t *bucket;
7409 int hash_id;
7410 vm_object_t m_object;
7411
7412 m_object = VM_PAGE_OBJECT(mem);
7413
7414 assert(mem->hashed);
7415 assert(m_object);
7416 assert(mem->offset != (vm_object_offset_t) -1);
7417
7418 /*
7419 * Insert it into the object_object/offset hash table
7420 */
7421 hash_id = vm_page_hash(m_object, mem->offset);
7422 bucket = &vm_page_buckets[hash_id];
7423
7424 mem->next_m = bucket->page_list;
7425 bucket->page_list = VM_PAGE_PACK_PTR(mem);
7426 }
7427
7428
7429 void
7430 hibernate_free_range(int sindx, int eindx)
7431 {
7432 vm_page_t mem;
7433 unsigned int color;
7434
7435 while (sindx < eindx) {
7436 mem = &vm_pages[sindx];
7437
7438 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7439
7440 mem->lopage = FALSE;
7441 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7442
7443 color = VM_PAGE_GET_COLOR(mem);
7444 #if defined(__x86_64__)
7445 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
7446 mem,
7447 vm_page_t,
7448 pageq);
7449 #else
7450 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
7451 mem,
7452 vm_page_t,
7453 pageq);
7454 #endif
7455 vm_page_free_count++;
7456
7457 sindx++;
7458 }
7459 }
7460
7461
7462 extern void hibernate_rebuild_pmap_structs(void);
7463
7464 void
7465 hibernate_rebuild_vm_structs(void)
7466 {
7467 int i, cindx, sindx, eindx;
7468 vm_page_t mem, tmem, mem_next;
7469 AbsoluteTime startTime, endTime;
7470 uint64_t nsec;
7471
7472 if (hibernate_rebuild_needed == FALSE)
7473 return;
7474
7475 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
7476 HIBLOG("hibernate_rebuild started\n");
7477
7478 clock_get_uptime(&startTime);
7479
7480 hibernate_rebuild_pmap_structs();
7481
7482 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7483 eindx = vm_pages_count;
7484
7485 /*
7486 * Mark all the vm_pages[] that have not been initialized yet as being
7487 * transient. This is needed to ensure that buddy page search is corrrect.
7488 * Without this random data in these vm_pages[] can trip the buddy search
7489 */
7490 for (i = hibernate_teardown_last_valid_compact_indx+1; i < eindx; ++i)
7491 vm_pages[i].vm_page_q_state = VM_PAGE_NOT_ON_Q;
7492
7493 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7494
7495 mem = &vm_pages[cindx];
7496 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
7497 /*
7498 * hibernate_teardown_vm_structs leaves the location where
7499 * this vm_page_t must be located in "next".
7500 */
7501 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7502 mem->next_m = VM_PAGE_PACK_PTR(NULL);
7503
7504 sindx = (int)(tmem - &vm_pages[0]);
7505
7506 if (mem != tmem) {
7507 /*
7508 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7509 * so move it back to its real location
7510 */
7511 *tmem = *mem;
7512 mem = tmem;
7513 }
7514 if (mem->hashed)
7515 hibernate_hash_insert_page(mem);
7516 /*
7517 * the 'hole' between this vm_page_t and the previous
7518 * vm_page_t we moved needs to be initialized as
7519 * a range of free vm_page_t's
7520 */
7521 hibernate_free_range(sindx + 1, eindx);
7522
7523 eindx = sindx;
7524 }
7525 if (sindx)
7526 hibernate_free_range(0, sindx);
7527
7528 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7529
7530 /*
7531 * process the list of vm_page_t's that were entered in the hash,
7532 * but were not located in the vm_pages arrary... these are
7533 * vm_page_t's that were created on the fly (i.e. fictitious)
7534 */
7535 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
7536 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7537
7538 mem->next_m = 0;
7539 hibernate_hash_insert_page(mem);
7540 }
7541 hibernate_rebuild_hash_list = NULL;
7542
7543 clock_get_uptime(&endTime);
7544 SUB_ABSOLUTETIME(&endTime, &startTime);
7545 absolutetime_to_nanoseconds(endTime, &nsec);
7546
7547 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7548
7549 hibernate_rebuild_needed = FALSE;
7550
7551 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
7552 }
7553
7554
7555 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7556
7557 uint32_t
7558 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7559 {
7560 unsigned int i;
7561 unsigned int compact_target_indx;
7562 vm_page_t mem, mem_next;
7563 vm_page_bucket_t *bucket;
7564 unsigned int mark_as_unneeded_pages = 0;
7565 unsigned int unneeded_vm_page_bucket_pages = 0;
7566 unsigned int unneeded_vm_pages_pages = 0;
7567 unsigned int unneeded_pmap_pages = 0;
7568 addr64_t start_of_unneeded = 0;
7569 addr64_t end_of_unneeded = 0;
7570
7571
7572 if (hibernate_should_abort())
7573 return (0);
7574
7575 hibernate_rebuild_needed = TRUE;
7576
7577 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7578 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7579 vm_page_cleaned_count, compressor_object->resident_page_count);
7580
7581 for (i = 0; i < vm_page_bucket_count; i++) {
7582
7583 bucket = &vm_page_buckets[i];
7584
7585 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
7586 assert(mem->hashed);
7587
7588 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
7589
7590 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
7591 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
7592 hibernate_rebuild_hash_list = mem;
7593 }
7594 }
7595 }
7596 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7597 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7598
7599 hibernate_teardown_vm_page_free_count = vm_page_free_count;
7600
7601 compact_target_indx = 0;
7602
7603 for (i = 0; i < vm_pages_count; i++) {
7604
7605 mem = &vm_pages[i];
7606
7607 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7608 unsigned int color;
7609
7610 assert(mem->busy);
7611 assert(!mem->lopage);
7612
7613 color = VM_PAGE_GET_COLOR(mem);
7614
7615 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7616 mem,
7617 vm_page_t,
7618 pageq);
7619
7620 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7621
7622 vm_page_free_count--;
7623
7624 hibernate_teardown_found_free_pages++;
7625
7626 if (vm_pages[compact_target_indx].vm_page_q_state != VM_PAGE_ON_FREE_Q)
7627 compact_target_indx = i;
7628 } else {
7629 /*
7630 * record this vm_page_t's original location
7631 * we need this even if it doesn't get moved
7632 * as an indicator to the rebuild function that
7633 * we don't have to move it
7634 */
7635 mem->next_m = VM_PAGE_PACK_PTR(mem);
7636
7637 if (vm_pages[compact_target_indx].vm_page_q_state == VM_PAGE_ON_FREE_Q) {
7638 /*
7639 * we've got a hole to fill, so
7640 * move this vm_page_t to it's new home
7641 */
7642 vm_pages[compact_target_indx] = *mem;
7643 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
7644
7645 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7646 compact_target_indx++;
7647 } else
7648 hibernate_teardown_last_valid_compact_indx = i;
7649 }
7650 }
7651 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7652 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7653 mark_as_unneeded_pages += unneeded_vm_pages_pages;
7654
7655 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7656
7657 if (start_of_unneeded) {
7658 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7659 mark_as_unneeded_pages += unneeded_pmap_pages;
7660 }
7661 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7662
7663 return (mark_as_unneeded_pages);
7664 }
7665
7666
7667 #endif /* HIBERNATION */
7668
7669 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7670
7671 #include <mach_vm_debug.h>
7672 #if MACH_VM_DEBUG
7673
7674 #include <mach_debug/hash_info.h>
7675 #include <vm/vm_debug.h>
7676
7677 /*
7678 * Routine: vm_page_info
7679 * Purpose:
7680 * Return information about the global VP table.
7681 * Fills the buffer with as much information as possible
7682 * and returns the desired size of the buffer.
7683 * Conditions:
7684 * Nothing locked. The caller should provide
7685 * possibly-pageable memory.
7686 */
7687
7688 unsigned int
7689 vm_page_info(
7690 hash_info_bucket_t *info,
7691 unsigned int count)
7692 {
7693 unsigned int i;
7694 lck_spin_t *bucket_lock;
7695
7696 if (vm_page_bucket_count < count)
7697 count = vm_page_bucket_count;
7698
7699 for (i = 0; i < count; i++) {
7700 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7701 unsigned int bucket_count = 0;
7702 vm_page_t m;
7703
7704 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7705 lck_spin_lock(bucket_lock);
7706
7707 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7708 m != VM_PAGE_NULL;
7709 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->next_m)))
7710 bucket_count++;
7711
7712 lck_spin_unlock(bucket_lock);
7713
7714 /* don't touch pageable memory while holding locks */
7715 info[i].hib_count = bucket_count;
7716 }
7717
7718 return vm_page_bucket_count;
7719 }
7720 #endif /* MACH_VM_DEBUG */
7721
7722 #if VM_PAGE_BUCKETS_CHECK
7723 void
7724 vm_page_buckets_check(void)
7725 {
7726 unsigned int i;
7727 vm_page_t p;
7728 unsigned int p_hash;
7729 vm_page_bucket_t *bucket;
7730 lck_spin_t *bucket_lock;
7731
7732 if (!vm_page_buckets_check_ready) {
7733 return;
7734 }
7735
7736 #if HIBERNATION
7737 if (hibernate_rebuild_needed ||
7738 hibernate_rebuild_hash_list) {
7739 panic("BUCKET_CHECK: hibernation in progress: "
7740 "rebuild_needed=%d rebuild_hash_list=%p\n",
7741 hibernate_rebuild_needed,
7742 hibernate_rebuild_hash_list);
7743 }
7744 #endif /* HIBERNATION */
7745
7746 #if VM_PAGE_FAKE_BUCKETS
7747 char *cp;
7748 for (cp = (char *) vm_page_fake_buckets_start;
7749 cp < (char *) vm_page_fake_buckets_end;
7750 cp++) {
7751 if (*cp != 0x5a) {
7752 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7753 "[0x%llx:0x%llx]\n",
7754 cp,
7755 (uint64_t) vm_page_fake_buckets_start,
7756 (uint64_t) vm_page_fake_buckets_end);
7757 }
7758 }
7759 #endif /* VM_PAGE_FAKE_BUCKETS */
7760
7761 for (i = 0; i < vm_page_bucket_count; i++) {
7762 vm_object_t p_object;
7763
7764 bucket = &vm_page_buckets[i];
7765 if (!bucket->page_list) {
7766 continue;
7767 }
7768
7769 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7770 lck_spin_lock(bucket_lock);
7771 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7772
7773 while (p != VM_PAGE_NULL) {
7774 p_object = VM_PAGE_OBJECT(p);
7775
7776 if (!p->hashed) {
7777 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7778 "hash %d in bucket %d at %p "
7779 "is not hashed\n",
7780 p, p_object, p->offset,
7781 p_hash, i, bucket);
7782 }
7783 p_hash = vm_page_hash(p_object, p->offset);
7784 if (p_hash != i) {
7785 panic("BUCKET_CHECK: corruption in bucket %d "
7786 "at %p: page %p object %p offset 0x%llx "
7787 "hash %d\n",
7788 i, bucket, p, p_object, p->offset,
7789 p_hash);
7790 }
7791 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m));
7792 }
7793 lck_spin_unlock(bucket_lock);
7794 }
7795
7796 // printf("BUCKET_CHECK: checked buckets\n");
7797 }
7798 #endif /* VM_PAGE_BUCKETS_CHECK */
7799
7800 /*
7801 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7802 * local queues if they exist... its the only spot in the system where we add pages
7803 * to those queues... once on those queues, those pages can only move to one of the
7804 * global page queues or the free queues... they NEVER move from local q to local q.
7805 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7806 * the global vm_page_queue_lock at this point... we still need to take the local lock
7807 * in case this operation is being run on a different CPU then the local queue's identity,
7808 * but we don't have to worry about the page moving to a global queue or becoming wired
7809 * while we're grabbing the local lock since those operations would require the global
7810 * vm_page_queue_lock to be held, and we already own it.
7811 *
7812 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7813 * 'wired' and local are ALWAYS mutually exclusive conditions.
7814 */
7815
7816 #if CONFIG_BACKGROUND_QUEUE
7817 void
7818 vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
7819 #else
7820 void
7821 vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
7822 #endif
7823 {
7824 boolean_t was_pageable = TRUE;
7825 vm_object_t m_object;
7826
7827 m_object = VM_PAGE_OBJECT(mem);
7828
7829 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7830
7831 if (mem->vm_page_q_state == VM_PAGE_NOT_ON_Q)
7832 {
7833 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7834 #if CONFIG_BACKGROUND_QUEUE
7835 if (remove_from_backgroundq == TRUE) {
7836 vm_page_remove_from_backgroundq(mem);
7837 }
7838 if (mem->vm_page_on_backgroundq) {
7839 assert(mem->vm_page_backgroundq.next != 0);
7840 assert(mem->vm_page_backgroundq.prev != 0);
7841 } else {
7842 assert(mem->vm_page_backgroundq.next == 0);
7843 assert(mem->vm_page_backgroundq.prev == 0);
7844 }
7845 #endif /* CONFIG_BACKGROUND_QUEUE */
7846 return;
7847 }
7848
7849 if (mem->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
7850 {
7851 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7852 #if CONFIG_BACKGROUND_QUEUE
7853 assert(mem->vm_page_backgroundq.next == 0 &&
7854 mem->vm_page_backgroundq.prev == 0 &&
7855 mem->vm_page_on_backgroundq == FALSE);
7856 #endif
7857 return;
7858 }
7859 if (mem->vm_page_q_state == VM_PAGE_IS_WIRED) {
7860 /*
7861 * might put these guys on a list for debugging purposes
7862 * if we do, we'll need to remove this assert
7863 */
7864 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7865 #if CONFIG_BACKGROUND_QUEUE
7866 assert(mem->vm_page_backgroundq.next == 0 &&
7867 mem->vm_page_backgroundq.prev == 0 &&
7868 mem->vm_page_on_backgroundq == FALSE);
7869 #endif
7870 return;
7871 }
7872
7873 assert(m_object != compressor_object);
7874 assert(m_object != kernel_object);
7875 assert(m_object != vm_submap_object);
7876 assert(!mem->fictitious);
7877
7878 switch(mem->vm_page_q_state) {
7879
7880 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
7881 {
7882 struct vpl *lq;
7883
7884 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
7885 VPL_LOCK(&lq->vpl_lock);
7886 vm_page_queue_remove(&lq->vpl_queue,
7887 mem, vm_page_t, pageq);
7888 mem->local_id = 0;
7889 lq->vpl_count--;
7890 if (m_object->internal) {
7891 lq->vpl_internal_count--;
7892 } else {
7893 lq->vpl_external_count--;
7894 }
7895 VPL_UNLOCK(&lq->vpl_lock);
7896 was_pageable = FALSE;
7897 break;
7898 }
7899 case VM_PAGE_ON_ACTIVE_Q:
7900 {
7901 vm_page_queue_remove(&vm_page_queue_active,
7902 mem, vm_page_t, pageq);
7903 vm_page_active_count--;
7904 break;
7905 }
7906
7907 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
7908 {
7909 assert(m_object->internal == TRUE);
7910
7911 vm_page_inactive_count--;
7912 vm_page_queue_remove(&vm_page_queue_anonymous,
7913 mem, vm_page_t, pageq);
7914 vm_page_anonymous_count--;
7915 vm_purgeable_q_advance_all();
7916 break;
7917 }
7918
7919 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
7920 {
7921 assert(m_object->internal == FALSE);
7922
7923 vm_page_inactive_count--;
7924 vm_page_queue_remove(&vm_page_queue_inactive,
7925 mem, vm_page_t, pageq);
7926 vm_purgeable_q_advance_all();
7927 break;
7928 }
7929
7930 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
7931 {
7932 assert(m_object->internal == FALSE);
7933
7934 vm_page_inactive_count--;
7935 vm_page_queue_remove(&vm_page_queue_cleaned,
7936 mem, vm_page_t, pageq);
7937 vm_page_cleaned_count--;
7938 break;
7939 }
7940
7941 case VM_PAGE_ON_THROTTLED_Q:
7942 {
7943 assert(m_object->internal == TRUE);
7944
7945 vm_page_queue_remove(&vm_page_queue_throttled,
7946 mem, vm_page_t, pageq);
7947 vm_page_throttled_count--;
7948 was_pageable = FALSE;
7949 break;
7950 }
7951
7952 case VM_PAGE_ON_SPECULATIVE_Q:
7953 {
7954 assert(m_object->internal == FALSE);
7955
7956 vm_page_remque(&mem->pageq);
7957 vm_page_speculative_count--;
7958 break;
7959 }
7960
7961 #if CONFIG_SECLUDED_MEMORY
7962 case VM_PAGE_ON_SECLUDED_Q:
7963 {
7964 vm_page_queue_remove(&vm_page_queue_secluded,
7965 mem, vm_page_t, pageq);
7966 vm_page_secluded_count--;
7967 if (m_object == VM_OBJECT_NULL) {
7968 vm_page_secluded_count_free--;
7969 was_pageable = FALSE;
7970 } else {
7971 assert(!m_object->internal);
7972 vm_page_secluded_count_inuse--;
7973 was_pageable = FALSE;
7974 // was_pageable = TRUE;
7975 }
7976 break;
7977 }
7978 #endif /* CONFIG_SECLUDED_MEMORY */
7979
7980 default:
7981 {
7982 /*
7983 * if (mem->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
7984 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
7985 * the caller is responsible for determing if the page is on that queue, and if so, must
7986 * either first remove it (it needs both the page queues lock and the object lock to do
7987 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
7988 *
7989 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
7990 * or any of the undefined states
7991 */
7992 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vm_page_q_state);
7993 break;
7994 }
7995
7996 }
7997 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7998 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
7999
8000 #if CONFIG_BACKGROUND_QUEUE
8001 if (remove_from_backgroundq == TRUE)
8002 vm_page_remove_from_backgroundq(mem);
8003 #endif
8004 if (was_pageable) {
8005 if (m_object->internal) {
8006 vm_page_pageable_internal_count--;
8007 } else {
8008 vm_page_pageable_external_count--;
8009 }
8010 }
8011 }
8012
8013 void
8014 vm_page_remove_internal(vm_page_t page)
8015 {
8016 vm_object_t __object = VM_PAGE_OBJECT(page);
8017 if (page == __object->memq_hint) {
8018 vm_page_t __new_hint;
8019 vm_page_queue_entry_t __qe;
8020 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->listq);
8021 if (vm_page_queue_end(&__object->memq, __qe)) {
8022 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->listq);
8023 if (vm_page_queue_end(&__object->memq, __qe)) {
8024 __qe = NULL;
8025 }
8026 }
8027 __new_hint = (vm_page_t)((uintptr_t) __qe);
8028 __object->memq_hint = __new_hint;
8029 }
8030 vm_page_queue_remove(&__object->memq, page, vm_page_t, listq);
8031 #if CONFIG_SECLUDED_MEMORY
8032 if (__object->eligible_for_secluded) {
8033 vm_page_secluded.eligible_for_secluded--;
8034 }
8035 #endif /* CONFIG_SECLUDED_MEMORY */
8036 }
8037
8038 void
8039 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8040 {
8041 vm_object_t m_object;
8042
8043 m_object = VM_PAGE_OBJECT(mem);
8044
8045 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8046 assert(!mem->fictitious);
8047 assert(!mem->laundry);
8048 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8049 vm_page_check_pageable_safe(mem);
8050
8051 if (m_object->internal) {
8052 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
8053
8054 if (first == TRUE)
8055 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8056 else
8057 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
8058
8059 vm_page_anonymous_count++;
8060 vm_page_pageable_internal_count++;
8061 } else {
8062 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
8063
8064 if (first == TRUE)
8065 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8066 else
8067 vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
8068
8069 vm_page_pageable_external_count++;
8070 }
8071 vm_page_inactive_count++;
8072 token_new_pagecount++;
8073
8074 #if CONFIG_BACKGROUND_QUEUE
8075 if (mem->vm_page_in_background)
8076 vm_page_add_to_backgroundq(mem, FALSE);
8077 #endif
8078 }
8079
8080 void
8081 vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8082 {
8083 vm_object_t m_object;
8084
8085 m_object = VM_PAGE_OBJECT(mem);
8086
8087 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8088 assert(!mem->fictitious);
8089 assert(!mem->laundry);
8090 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8091 vm_page_check_pageable_safe(mem);
8092
8093 mem->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
8094 if (first == TRUE)
8095 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, pageq);
8096 else
8097 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
8098 vm_page_active_count++;
8099
8100 if (m_object->internal) {
8101 vm_page_pageable_internal_count++;
8102 } else {
8103 vm_page_pageable_external_count++;
8104 }
8105
8106 #if CONFIG_BACKGROUND_QUEUE
8107 if (mem->vm_page_in_background)
8108 vm_page_add_to_backgroundq(mem, FALSE);
8109 #endif
8110 }
8111
8112 /*
8113 * Pages from special kernel objects shouldn't
8114 * be placed on pageable queues.
8115 */
8116 void
8117 vm_page_check_pageable_safe(vm_page_t page)
8118 {
8119 vm_object_t page_object;
8120
8121 page_object = VM_PAGE_OBJECT(page);
8122
8123 if (page_object == kernel_object) {
8124 panic("vm_page_check_pageable_safe: trying to add page" \
8125 "from kernel object (%p) to pageable queue", kernel_object);
8126 }
8127
8128 if (page_object == compressor_object) {
8129 panic("vm_page_check_pageable_safe: trying to add page" \
8130 "from compressor object (%p) to pageable queue", compressor_object);
8131 }
8132
8133 if (page_object == vm_submap_object) {
8134 panic("vm_page_check_pageable_safe: trying to add page" \
8135 "from submap object (%p) to pageable queue", vm_submap_object);
8136 }
8137 }
8138
8139 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8140 * wired page diagnose
8141 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8142
8143 #include <libkern/OSKextLibPrivate.h>
8144
8145 #define KA_SIZE(namelen, subtotalscount) \
8146 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8147
8148 #define KA_NAME(alloc) \
8149 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8150
8151 #define KA_NAME_LEN(alloc) \
8152 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8153
8154 vm_tag_t
8155 vm_tag_bt(void)
8156 {
8157 uintptr_t* frameptr;
8158 uintptr_t* frameptr_next;
8159 uintptr_t retaddr;
8160 uintptr_t kstackb, kstackt;
8161 const vm_allocation_site_t * site;
8162 thread_t cthread;
8163 kern_allocation_name_t name;
8164
8165 cthread = current_thread();
8166 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
8167
8168 if ((name = thread_get_kernel_state(cthread)->allocation_name))
8169 {
8170 if (!name->tag) vm_tag_alloc(name);
8171 return name->tag;
8172 }
8173
8174 kstackb = cthread->kernel_stack;
8175 kstackt = kstackb + kernel_stack_size;
8176
8177 /* Load stack frame pointer (EBP on x86) into frameptr */
8178 frameptr = __builtin_frame_address(0);
8179 site = NULL;
8180 while (frameptr != NULL)
8181 {
8182 /* Verify thread stack bounds */
8183 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
8184
8185 /* Next frame pointer is pointed to by the previous one */
8186 frameptr_next = (uintptr_t*) *frameptr;
8187
8188 /* Pull return address from one spot above the frame pointer */
8189 retaddr = *(frameptr + 1);
8190
8191
8192 if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
8193 {
8194 site = OSKextGetAllocationSiteForCaller(retaddr);
8195 break;
8196 }
8197 frameptr = frameptr_next;
8198 }
8199
8200 return (site ? site->tag : VM_KERN_MEMORY_NONE);
8201 }
8202
8203 static uint64_t free_tag_bits[VM_MAX_TAG_VALUE/64];
8204
8205 void
8206 vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
8207 {
8208 vm_tag_t tag;
8209 uint64_t avail;
8210 uint32_t idx;
8211 vm_allocation_site_t * prev;
8212
8213 if (site->tag) return;
8214
8215 idx = 0;
8216 while (TRUE)
8217 {
8218 avail = free_tag_bits[idx];
8219 if (avail)
8220 {
8221 tag = __builtin_clzll(avail);
8222 avail &= ~(1ULL << (63 - tag));
8223 free_tag_bits[idx] = avail;
8224 tag += (idx << 6);
8225 break;
8226 }
8227 idx++;
8228 if (idx >= ARRAY_COUNT(free_tag_bits))
8229 {
8230 for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++)
8231 {
8232 prev = vm_allocation_sites[idx];
8233 if (!prev) continue;
8234 if (!KA_NAME_LEN(prev)) continue;
8235 if (!prev->tag) continue;
8236 if (prev->total) continue;
8237 if (1 != prev->refcount) continue;
8238
8239 assert(idx == prev->tag);
8240 tag = idx;
8241 prev->tag = VM_KERN_MEMORY_NONE;
8242 *releasesiteP = prev;
8243 break;
8244 }
8245 if (idx >= ARRAY_COUNT(vm_allocation_sites))
8246 {
8247 tag = VM_KERN_MEMORY_ANY;
8248 }
8249 break;
8250 }
8251 }
8252 site->tag = tag;
8253
8254 OSAddAtomic16(1, &site->refcount);
8255
8256 if (VM_KERN_MEMORY_ANY != tag) vm_allocation_sites[tag] = site;
8257
8258 if (tag > vm_allocation_tag_highest) vm_allocation_tag_highest = tag;
8259 }
8260
8261 static void
8262 vm_tag_free_locked(vm_tag_t tag)
8263 {
8264 uint64_t avail;
8265 uint32_t idx;
8266 uint64_t bit;
8267
8268 if (VM_KERN_MEMORY_ANY == tag) return;
8269
8270 idx = (tag >> 6);
8271 avail = free_tag_bits[idx];
8272 tag &= 63;
8273 bit = (1ULL << (63 - tag));
8274 assert(!(avail & bit));
8275 free_tag_bits[idx] = (avail | bit);
8276 }
8277
8278 static void
8279 vm_tag_init(void)
8280 {
8281 vm_tag_t tag;
8282 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8283 {
8284 vm_tag_free_locked(tag);
8285 }
8286
8287 for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++)
8288 {
8289 vm_tag_free_locked(tag);
8290 }
8291 }
8292
8293 vm_tag_t
8294 vm_tag_alloc(vm_allocation_site_t * site)
8295 {
8296 vm_tag_t tag;
8297 vm_allocation_site_t * releasesite;
8298
8299 if (VM_TAG_BT & site->flags)
8300 {
8301 tag = vm_tag_bt();
8302 if (VM_KERN_MEMORY_NONE != tag) return (tag);
8303 }
8304
8305 if (!site->tag)
8306 {
8307 releasesite = NULL;
8308 lck_spin_lock(&vm_allocation_sites_lock);
8309 vm_tag_alloc_locked(site, &releasesite);
8310 lck_spin_unlock(&vm_allocation_sites_lock);
8311 if (releasesite) kern_allocation_name_release(releasesite);
8312 }
8313
8314 return (site->tag);
8315 }
8316
8317 void
8318 vm_tag_update_size(vm_tag_t tag, int64_t delta)
8319 {
8320 vm_allocation_site_t * allocation;
8321 uint64_t prior;
8322
8323 assert(VM_KERN_MEMORY_NONE != tag);
8324 assert(tag < VM_MAX_TAG_VALUE);
8325
8326 allocation = vm_allocation_sites[tag];
8327 assert(allocation);
8328
8329 if (delta < 0) {
8330 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
8331 }
8332 prior = OSAddAtomic64(delta, &allocation->total);
8333
8334 #if DEBUG || DEVELOPMENT
8335
8336 uint64_t new, peak;
8337 new = prior + delta;
8338 do
8339 {
8340 peak = allocation->peak;
8341 if (new <= peak) break;
8342 }
8343 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8344
8345 #endif /* DEBUG || DEVELOPMENT */
8346
8347 if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) return;
8348
8349 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8350 }
8351
8352 void
8353 kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
8354 {
8355 uint64_t prior;
8356
8357 if (delta < 0) {
8358 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
8359 }
8360 prior = OSAddAtomic64(delta, &allocation->total);
8361
8362 #if DEBUG || DEVELOPMENT
8363
8364 uint64_t new, peak;
8365 new = prior + delta;
8366 do
8367 {
8368 peak = allocation->peak;
8369 if (new <= peak) break;
8370 }
8371 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8372
8373 #endif /* DEBUG || DEVELOPMENT */
8374
8375 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8376 }
8377
8378 #if VM_MAX_TAG_ZONES
8379
8380 void
8381 vm_allocation_zones_init(void)
8382 {
8383 kern_return_t ret;
8384 vm_offset_t addr;
8385 vm_size_t size;
8386
8387 size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **)
8388 + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8389
8390 ret = kernel_memory_allocate(kernel_map,
8391 &addr, round_page(size), 0,
8392 KMA_ZERO, VM_KERN_MEMORY_DIAG);
8393 assert(KERN_SUCCESS == ret);
8394
8395 vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
8396 addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **);
8397
8398 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8399 // in vm_tag_update_zone_size() won't recurse
8400 vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG] = (vm_allocation_zone_total_t *) addr;
8401 addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8402 vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
8403 }
8404
8405 void
8406 vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
8407 {
8408 vm_allocation_zone_total_t * zone;
8409
8410 assert(VM_KERN_MEMORY_NONE != tag);
8411 assert(tag < VM_MAX_TAG_VALUE);
8412
8413 if (zidx >= VM_MAX_TAG_ZONES) return;
8414
8415 zone = vm_allocation_zone_totals[tag];
8416 if (!zone)
8417 {
8418 zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
8419 if (!zone) return;
8420 bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8421 if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag]))
8422 {
8423 kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8424 }
8425 }
8426 }
8427
8428 void
8429 vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
8430 {
8431 vm_allocation_zone_total_t * zone;
8432 uint32_t new;
8433
8434 assert(VM_KERN_MEMORY_NONE != tag);
8435 assert(tag < VM_MAX_TAG_VALUE);
8436
8437 if (zidx >= VM_MAX_TAG_ZONES) return;
8438
8439 zone = vm_allocation_zone_totals[tag];
8440 assert(zone);
8441 zone += zidx;
8442
8443 /* the zone is locked */
8444 if (delta < 0)
8445 {
8446 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
8447 zone->total += delta;
8448 }
8449 else
8450 {
8451 zone->total += delta;
8452 if (zone->total > zone->peak) zone->peak = zone->total;
8453 if (dwaste)
8454 {
8455 new = zone->waste;
8456 if (zone->wastediv < 65536) zone->wastediv++;
8457 else new -= (new >> 16);
8458 __assert_only bool ov = os_add_overflow(new, dwaste, &new);
8459 assert(!ov);
8460 zone->waste = new;
8461 }
8462 }
8463 }
8464
8465 #endif /* VM_MAX_TAG_ZONES */
8466
8467 void
8468 kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
8469 {
8470 kern_allocation_name_t other;
8471 struct vm_allocation_total * total;
8472 uint32_t subidx;
8473
8474 subidx = 0;
8475 assert(VM_KERN_MEMORY_NONE != subtag);
8476 for (; subidx < allocation->subtotalscount; subidx++)
8477 {
8478 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag)
8479 {
8480 allocation->subtotals[subidx].tag = subtag;
8481 break;
8482 }
8483 if (subtag == allocation->subtotals[subidx].tag) break;
8484 }
8485 assert(subidx < allocation->subtotalscount);
8486 if (subidx >= allocation->subtotalscount) return;
8487
8488 total = &allocation->subtotals[subidx];
8489 other = vm_allocation_sites[subtag];
8490 assert(other);
8491
8492 if (delta < 0)
8493 {
8494 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
8495 OSAddAtomic64(delta, &total->total);
8496 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
8497 OSAddAtomic64(delta, &other->mapped);
8498 }
8499 else
8500 {
8501 OSAddAtomic64(delta, &other->mapped);
8502 OSAddAtomic64(delta, &total->total);
8503 }
8504 }
8505
8506 const char *
8507 kern_allocation_get_name(kern_allocation_name_t allocation)
8508 {
8509 return (KA_NAME(allocation));
8510 }
8511
8512 kern_allocation_name_t
8513 kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
8514 {
8515 uint32_t namelen;
8516
8517 namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
8518
8519 kern_allocation_name_t allocation;
8520 allocation = kalloc(KA_SIZE(namelen, subtotalscount));
8521 bzero(allocation, KA_SIZE(namelen, subtotalscount));
8522
8523 allocation->refcount = 1;
8524 allocation->subtotalscount = subtotalscount;
8525 allocation->flags = (namelen << VM_TAG_NAME_LEN_SHIFT);
8526 strlcpy(KA_NAME(allocation), name, namelen + 1);
8527
8528 return (allocation);
8529 }
8530
8531 void
8532 kern_allocation_name_release(kern_allocation_name_t allocation)
8533 {
8534 assert(allocation->refcount > 0);
8535 if (1 == OSAddAtomic16(-1, &allocation->refcount))
8536 {
8537 kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
8538 }
8539 }
8540
8541 vm_tag_t
8542 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
8543 {
8544 return (vm_tag_alloc(allocation));
8545 }
8546
8547 static void
8548 vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
8549 {
8550 if (!object->wired_page_count) return;
8551 if (object != kernel_object)
8552 {
8553 assert(object->wire_tag < num_info);
8554 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
8555 }
8556 }
8557
8558 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
8559 unsigned int num_info, vm_object_t object);
8560
8561 static void
8562 vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
8563 vm_page_iterate_proc proc, purgeable_q_t queue,
8564 int group)
8565 {
8566 vm_object_t object;
8567
8568 for (object = (vm_object_t) queue_first(&queue->objq[group]);
8569 !queue_end(&queue->objq[group], (queue_entry_t) object);
8570 object = (vm_object_t) queue_next(&object->objq))
8571 {
8572 proc(info, num_info, object);
8573 }
8574 }
8575
8576 static void
8577 vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
8578 vm_page_iterate_proc proc)
8579 {
8580 purgeable_q_t volatile_q;
8581 queue_head_t * nonvolatile_q;
8582 vm_object_t object;
8583 int group;
8584
8585 lck_spin_lock(&vm_objects_wired_lock);
8586 queue_iterate(&vm_objects_wired,
8587 object,
8588 vm_object_t,
8589 objq)
8590 {
8591 proc(info, num_info, object);
8592 }
8593 lck_spin_unlock(&vm_objects_wired_lock);
8594
8595 lck_mtx_lock(&vm_purgeable_queue_lock);
8596 nonvolatile_q = &purgeable_nonvolatile_queue;
8597 for (object = (vm_object_t) queue_first(nonvolatile_q);
8598 !queue_end(nonvolatile_q, (queue_entry_t) object);
8599 object = (vm_object_t) queue_next(&object->objq))
8600 {
8601 proc(info, num_info, object);
8602 }
8603
8604 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
8605 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, 0);
8606
8607 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
8608 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8609 {
8610 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8611 }
8612
8613 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
8614 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8615 {
8616 vm_page_iterate_purgeable_objects(info, num_info, proc, volatile_q, group);
8617 }
8618 lck_mtx_unlock(&vm_purgeable_queue_lock);
8619 }
8620
8621 static uint64_t
8622 process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
8623 {
8624 size_t namelen;
8625 unsigned int idx, count, nextinfo;
8626 vm_allocation_site_t * site;
8627 lck_spin_lock(&vm_allocation_sites_lock);
8628
8629 for (idx = 0; idx <= vm_allocation_tag_highest; idx++)
8630 {
8631 site = vm_allocation_sites[idx];
8632 if (!site) continue;
8633 info[idx].mapped = site->mapped;
8634 info[idx].tag = site->tag;
8635 if (!iterated)
8636 {
8637 info[idx].size = site->total;
8638 #if DEBUG || DEVELOPMENT
8639 info[idx].peak = site->peak;
8640 #endif /* DEBUG || DEVELOPMENT */
8641 }
8642 else
8643 {
8644 if (!site->subtotalscount && (site->total != info[idx].size))
8645 {
8646 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
8647 info[idx].size = site->total;
8648 }
8649 }
8650 }
8651
8652 nextinfo = (vm_allocation_tag_highest + 1);
8653 count = nextinfo;
8654 if (count >= num_info) count = num_info;
8655
8656 for (idx = 0; idx < count; idx++)
8657 {
8658 site = vm_allocation_sites[idx];
8659 if (!site) continue;
8660 info[idx].flags |= VM_KERN_SITE_WIRED;
8661 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8662 {
8663 info[idx].site = idx;
8664 info[idx].flags |= VM_KERN_SITE_TAG;
8665 if (VM_KERN_MEMORY_ZONE == idx)
8666 {
8667 info[idx].flags |= VM_KERN_SITE_HIDE;
8668 info[idx].flags &= ~VM_KERN_SITE_WIRED;
8669 info[idx].collectable_bytes = zones_collectable_bytes;
8670 }
8671 }
8672 else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT))))
8673 {
8674 info[idx].site = 0;
8675 info[idx].flags |= VM_KERN_SITE_NAMED;
8676 if (namelen > sizeof(info[idx].name)) namelen = sizeof(info[idx].name);
8677 strncpy(&info[idx].name[0], KA_NAME(site), namelen);
8678 }
8679 else if (VM_TAG_KMOD & site->flags)
8680 {
8681 info[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
8682 info[idx].flags |= VM_KERN_SITE_KMOD;
8683 }
8684 else
8685 {
8686 info[idx].site = VM_KERNEL_UNSLIDE(site);
8687 info[idx].flags |= VM_KERN_SITE_KERNEL;
8688 }
8689 #if VM_MAX_TAG_ZONES
8690 vm_allocation_zone_total_t * zone;
8691 unsigned int zidx;
8692 vm_size_t elem_size;
8693
8694 if (vm_allocation_zone_totals
8695 && (zone = vm_allocation_zone_totals[idx])
8696 && (nextinfo < num_info))
8697 {
8698 for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++)
8699 {
8700 if (!zone[zidx].peak) continue;
8701 info[nextinfo] = info[idx];
8702 info[nextinfo].zone = zone_index_from_tag_index(zidx, &elem_size);
8703 info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
8704 info[nextinfo].flags |= VM_KERN_SITE_ZONE;
8705 info[nextinfo].size = zone[zidx].total;
8706 info[nextinfo].peak = zone[zidx].peak;
8707 info[nextinfo].mapped = 0;
8708 if (zone[zidx].wastediv)
8709 {
8710 info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
8711 }
8712 nextinfo++;
8713 }
8714 }
8715 #endif /* VM_MAX_TAG_ZONES */
8716 if (site->subtotalscount)
8717 {
8718 uint64_t mapped, mapcost, take;
8719 uint32_t sub;
8720 vm_tag_t alloctag;
8721
8722 info[idx].size = site->total;
8723 mapped = info[idx].size;
8724 info[idx].mapped = mapped;
8725 mapcost = 0;
8726 for (sub = 0; sub < site->subtotalscount; sub++)
8727 {
8728 alloctag = site->subtotals[sub].tag;
8729 assert(alloctag < num_info);
8730 if (info[alloctag].name[0]) continue;
8731 take = info[alloctag].mapped;
8732 if (take > info[alloctag].size) take = info[alloctag].size;
8733 if (take > mapped) take = mapped;
8734 info[alloctag].mapped -= take;
8735 info[alloctag].size -= take;
8736 mapped -= take;
8737 mapcost += take;
8738 }
8739 info[idx].size = mapcost;
8740 }
8741 }
8742 lck_spin_unlock(&vm_allocation_sites_lock);
8743
8744 return (0);
8745 }
8746
8747 uint32_t
8748 vm_page_diagnose_estimate(void)
8749 {
8750 vm_allocation_site_t * site;
8751 uint32_t count;
8752 uint32_t idx;
8753
8754 lck_spin_lock(&vm_allocation_sites_lock);
8755 for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++)
8756 {
8757 site = vm_allocation_sites[idx];
8758 if (!site) continue;
8759 count++;
8760 #if VM_MAX_TAG_ZONES
8761 if (vm_allocation_zone_totals)
8762 {
8763 vm_allocation_zone_total_t * zone;
8764 zone = vm_allocation_zone_totals[idx];
8765 if (!zone) continue;
8766 for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) if (zone[zidx].peak) count++;
8767 }
8768 #endif
8769 }
8770 lck_spin_unlock(&vm_allocation_sites_lock);
8771
8772 /* some slop for new tags created */
8773 count += 8;
8774 count += VM_KERN_COUNTER_COUNT;
8775
8776 return (count);
8777 }
8778
8779
8780 kern_return_t
8781 vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
8782 {
8783 uint64_t wired_size;
8784 uint64_t wired_managed_size;
8785 uint64_t wired_reserved_size;
8786 uint64_t booter_size;
8787 boolean_t iterate;
8788 mach_memory_info_t * counts;
8789
8790 bzero(info, num_info * sizeof(mach_memory_info_t));
8791
8792 if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8793
8794 #if CONFIG_EMBEDDED
8795 wired_size = ptoa_64(vm_page_wire_count);
8796 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
8797 #else
8798 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8799 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8800 #endif
8801 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8802
8803 booter_size = ml_get_booter_memory_size();
8804 wired_size += booter_size;
8805
8806 assert(num_info >= VM_KERN_COUNTER_COUNT);
8807 num_info -= VM_KERN_COUNTER_COUNT;
8808 counts = &info[num_info];
8809
8810 #define SET_COUNT(xcount, xsize, xflags) \
8811 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
8812 counts[xcount].site = (xcount); \
8813 counts[xcount].size = (xsize); \
8814 counts[xcount].mapped = (xsize); \
8815 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8816
8817 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
8818 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
8819 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
8820 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
8821 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8822 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8823 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
8824 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
8825
8826 #define SET_MAP(xcount, xsize, xfree, xlargest) \
8827 counts[xcount].site = (xcount); \
8828 counts[xcount].size = (xsize); \
8829 counts[xcount].mapped = (xsize); \
8830 counts[xcount].free = (xfree); \
8831 counts[xcount].largest = (xlargest); \
8832 counts[xcount].flags = VM_KERN_SITE_COUNTER;
8833
8834 vm_map_size_t map_size, map_free, map_largest;
8835
8836 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
8837 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
8838
8839 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
8840 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
8841
8842 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
8843 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
8844
8845 iterate = !VM_TAG_ACTIVE_UPDATE;
8846 if (iterate)
8847 {
8848 enum { kMaxKernelDepth = 1 };
8849 vm_map_t maps [kMaxKernelDepth];
8850 vm_map_entry_t entries[kMaxKernelDepth];
8851 vm_map_t map;
8852 vm_map_entry_t entry;
8853 vm_object_offset_t offset;
8854 vm_page_t page;
8855 int stackIdx, count;
8856
8857 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
8858
8859 map = kernel_map;
8860 stackIdx = 0;
8861 while (map)
8862 {
8863 vm_map_lock(map);
8864 for (entry = map->hdr.links.next; map; entry = entry->links.next)
8865 {
8866 if (entry->is_sub_map)
8867 {
8868 assert(stackIdx < kMaxKernelDepth);
8869 maps[stackIdx] = map;
8870 entries[stackIdx] = entry;
8871 stackIdx++;
8872 map = VME_SUBMAP(entry);
8873 entry = NULL;
8874 break;
8875 }
8876 if (VME_OBJECT(entry) == kernel_object)
8877 {
8878 count = 0;
8879 vm_object_lock(VME_OBJECT(entry));
8880 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
8881 {
8882 page = vm_page_lookup(VME_OBJECT(entry), offset);
8883 if (page && VM_PAGE_WIRED(page)) count++;
8884 }
8885 vm_object_unlock(VME_OBJECT(entry));
8886
8887 if (count)
8888 {
8889 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
8890 assert(VME_ALIAS(entry) < num_info);
8891 info[VME_ALIAS(entry)].size += ptoa_64(count);
8892 }
8893 }
8894 while (map && (entry == vm_map_last_entry(map)))
8895 {
8896 vm_map_unlock(map);
8897 if (!stackIdx) map = NULL;
8898 else
8899 {
8900 --stackIdx;
8901 map = maps[stackIdx];
8902 entry = entries[stackIdx];
8903 }
8904 }
8905 }
8906 }
8907 }
8908
8909 process_account(info, num_info, zones_collectable_bytes, iterate);
8910
8911 return (KERN_SUCCESS);
8912 }
8913
8914 #if DEBUG || DEVELOPMENT
8915
8916 kern_return_t
8917 vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
8918 {
8919 kern_return_t ret;
8920 vm_size_t zsize;
8921 vm_map_t map;
8922 vm_map_entry_t entry;
8923
8924 zsize = zone_element_info((void *) addr, tag);
8925 if (zsize)
8926 {
8927 *zone_size = *size = zsize;
8928 return (KERN_SUCCESS);
8929 }
8930
8931 *zone_size = 0;
8932 ret = KERN_INVALID_ADDRESS;
8933 for (map = kernel_map; map; )
8934 {
8935 vm_map_lock(map);
8936 if (!vm_map_lookup_entry(map, addr, &entry)) break;
8937 if (entry->is_sub_map)
8938 {
8939 if (map != kernel_map) break;
8940 map = VME_SUBMAP(entry);
8941 continue;
8942 }
8943 if (entry->vme_start != addr) break;
8944 *tag = VME_ALIAS(entry);
8945 *size = (entry->vme_end - addr);
8946 ret = KERN_SUCCESS;
8947 break;
8948 }
8949 if (map != kernel_map) vm_map_unlock(map);
8950 vm_map_unlock(kernel_map);
8951
8952 return (ret);
8953 }
8954
8955 #endif /* DEBUG || DEVELOPMENT */
8956
8957 uint32_t
8958 vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
8959 {
8960 vm_allocation_site_t * site;
8961 uint32_t kmodId;
8962
8963 kmodId = 0;
8964 lck_spin_lock(&vm_allocation_sites_lock);
8965 if ((site = vm_allocation_sites[tag]))
8966 {
8967 if (VM_TAG_KMOD & site->flags)
8968 {
8969 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
8970 }
8971 }
8972 lck_spin_unlock(&vm_allocation_sites_lock);
8973
8974 return (kmodId);
8975 }