]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
68
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
72 #include <mach/sdt.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/policy_internal.h>
76 #include <kern/task.h>
77 #include <kern/thread.h>
78 #include <kern/kalloc.h>
79 #include <kern/zalloc.h>
80 #include <kern/xpr.h>
81 #include <kern/ledger.h>
82 #include <vm/pmap.h>
83 #include <vm/vm_init.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
88 #include <kern/misc_protos.h>
89 #include <zone_debug.h>
90 #include <mach_debug/zone_info.h>
91 #include <vm/cpm.h>
92 #include <pexpert/pexpert.h>
93 #include <san/kasan.h>
94
95 #include <vm/vm_protos.h>
96 #include <vm/memory_object.h>
97 #include <vm/vm_purgeable_internal.h>
98 #include <vm/vm_compressor.h>
99
100 #if CONFIG_PHANTOM_CACHE
101 #include <vm/vm_phantom_cache.h>
102 #endif
103
104 #include <IOKit/IOHibernatePrivate.h>
105
106 #include <sys/kdebug.h>
107
108
109
110 char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
111 char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
112 char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
113 char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
114
115 #if CONFIG_SECLUDED_MEMORY
116 struct vm_page_secluded_data vm_page_secluded;
117 void secluded_suppression_init(void);
118 #endif /* CONFIG_SECLUDED_MEMORY */
119
120 boolean_t hibernate_cleaning_in_progress = FALSE;
121 boolean_t vm_page_free_verify = TRUE;
122
123 uint32_t vm_lopage_free_count = 0;
124 uint32_t vm_lopage_free_limit = 0;
125 uint32_t vm_lopage_lowater = 0;
126 boolean_t vm_lopage_refill = FALSE;
127 boolean_t vm_lopage_needed = FALSE;
128
129 lck_mtx_ext_t vm_page_queue_lock_ext;
130 lck_mtx_ext_t vm_page_queue_free_lock_ext;
131 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
132
133 int speculative_age_index = 0;
134 int speculative_steal_index = 0;
135 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
136
137 boolean_t hibernation_vmqueues_inspection = FALSE; /* Tracks if the hibernation code is looking at the VM queues.
138 * Updated and checked behind the vm_page_queues_lock. */
139
140 __private_extern__ void vm_page_init_lck_grp(void);
141
142 static void vm_page_free_prepare(vm_page_t page);
143 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
144
145 static void vm_tag_init(void);
146
147 uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
148 uint32_t vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
149 uint32_t vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
150
151 /*
152 * Associated with page of user-allocatable memory is a
153 * page structure.
154 */
155
156 /*
157 * These variables record the values returned by vm_page_bootstrap,
158 * for debugging purposes. The implementation of pmap_steal_memory
159 * and pmap_startup here also uses them internally.
160 */
161
162 vm_offset_t virtual_space_start;
163 vm_offset_t virtual_space_end;
164 uint32_t vm_page_pages;
165
166 /*
167 * The vm_page_lookup() routine, which provides for fast
168 * (virtual memory object, offset) to page lookup, employs
169 * the following hash table. The vm_page_{insert,remove}
170 * routines install and remove associations in the table.
171 * [This table is often called the virtual-to-physical,
172 * or VP, table.]
173 */
174 typedef struct {
175 vm_page_packed_t page_list;
176 #if MACH_PAGE_HASH_STATS
177 int cur_count; /* current count */
178 int hi_count; /* high water mark */
179 #endif /* MACH_PAGE_HASH_STATS */
180 } vm_page_bucket_t;
181
182
183 #define BUCKETS_PER_LOCK 16
184
185 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
186 unsigned int vm_page_bucket_count = 0; /* How big is array? */
187 unsigned int vm_page_hash_mask; /* Mask for hash function */
188 unsigned int vm_page_hash_shift; /* Shift for hash function */
189 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
190 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
191
192 #ifndef VM_TAG_ACTIVE_UPDATE
193 #error VM_TAG_ACTIVE_UPDATE
194 #endif
195 #ifndef VM_MAX_TAG_ZONES
196 #error VM_MAX_TAG_ZONES
197 #endif
198
199 boolean_t vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
200 lck_spin_t *vm_page_bucket_locks;
201 lck_spin_t vm_objects_wired_lock;
202 lck_spin_t vm_allocation_sites_lock;
203
204 vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
205 vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
206 #if VM_MAX_TAG_ZONES
207 vm_allocation_zone_total_t ** vm_allocation_zone_totals;
208 #endif /* VM_MAX_TAG_ZONES */
209
210 vm_tag_t vm_allocation_tag_highest;
211
212 #if VM_PAGE_BUCKETS_CHECK
213 boolean_t vm_page_buckets_check_ready = FALSE;
214 #if VM_PAGE_FAKE_BUCKETS
215 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
216 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
217 #endif /* VM_PAGE_FAKE_BUCKETS */
218 #endif /* VM_PAGE_BUCKETS_CHECK */
219
220
221
222 #if MACH_PAGE_HASH_STATS
223 /* This routine is only for debug. It is intended to be called by
224 * hand by a developer using a kernel debugger. This routine prints
225 * out vm_page_hash table statistics to the kernel debug console.
226 */
227 void
228 hash_debug(void)
229 {
230 int i;
231 int numbuckets = 0;
232 int highsum = 0;
233 int maxdepth = 0;
234
235 for (i = 0; i < vm_page_bucket_count; i++) {
236 if (vm_page_buckets[i].hi_count) {
237 numbuckets++;
238 highsum += vm_page_buckets[i].hi_count;
239 if (vm_page_buckets[i].hi_count > maxdepth)
240 maxdepth = vm_page_buckets[i].hi_count;
241 }
242 }
243 printf("Total number of buckets: %d\n", vm_page_bucket_count);
244 printf("Number used buckets: %d = %d%%\n",
245 numbuckets, 100*numbuckets/vm_page_bucket_count);
246 printf("Number unused buckets: %d = %d%%\n",
247 vm_page_bucket_count - numbuckets,
248 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
249 printf("Sum of bucket max depth: %d\n", highsum);
250 printf("Average bucket depth: %d.%2d\n",
251 highsum/vm_page_bucket_count,
252 highsum%vm_page_bucket_count);
253 printf("Maximum bucket depth: %d\n", maxdepth);
254 }
255 #endif /* MACH_PAGE_HASH_STATS */
256
257 /*
258 * The virtual page size is currently implemented as a runtime
259 * variable, but is constant once initialized using vm_set_page_size.
260 * This initialization must be done in the machine-dependent
261 * bootstrap sequence, before calling other machine-independent
262 * initializations.
263 *
264 * All references to the virtual page size outside this
265 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
266 * constants.
267 */
268 #if defined(__arm__) || defined(__arm64__)
269 vm_size_t page_size;
270 vm_size_t page_mask;
271 int page_shift;
272 #else
273 vm_size_t page_size = PAGE_SIZE;
274 vm_size_t page_mask = PAGE_MASK;
275 int page_shift = PAGE_SHIFT;
276 #endif
277
278 /*
279 * Resident page structures are initialized from
280 * a template (see vm_page_alloc).
281 *
282 * When adding a new field to the virtual memory
283 * object structure, be sure to add initialization
284 * (see vm_page_bootstrap).
285 */
286 struct vm_page vm_page_template;
287
288 vm_page_t vm_pages = VM_PAGE_NULL;
289 vm_page_t vm_page_array_beginning_addr;
290 vm_page_t vm_page_array_ending_addr;
291 vm_page_t vm_page_array_boundary;
292
293 unsigned int vm_pages_count = 0;
294 ppnum_t vm_page_lowest = 0;
295
296 /*
297 * Resident pages that represent real memory
298 * are allocated from a set of free lists,
299 * one per color.
300 */
301 unsigned int vm_colors;
302 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
303 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
304 unsigned int vm_free_magazine_refill_limit = 0;
305
306
307 struct vm_page_queue_free_head {
308 vm_page_queue_head_t qhead;
309 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
310
311 struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
312
313
314 unsigned int vm_page_free_wanted;
315 unsigned int vm_page_free_wanted_privileged;
316 #if CONFIG_SECLUDED_MEMORY
317 unsigned int vm_page_free_wanted_secluded;
318 #endif /* CONFIG_SECLUDED_MEMORY */
319 unsigned int vm_page_free_count;
320
321 /*
322 * Occasionally, the virtual memory system uses
323 * resident page structures that do not refer to
324 * real pages, for example to leave a page with
325 * important state information in the VP table.
326 *
327 * These page structures are allocated the way
328 * most other kernel structures are.
329 */
330 zone_t vm_page_array_zone;
331 zone_t vm_page_zone;
332 vm_locks_array_t vm_page_locks;
333 decl_lck_mtx_data(,vm_page_alloc_lock)
334 lck_mtx_ext_t vm_page_alloc_lock_ext;
335
336 unsigned int vm_page_local_q_count = 0;
337 unsigned int vm_page_local_q_soft_limit = 250;
338 unsigned int vm_page_local_q_hard_limit = 500;
339 struct vplq *vm_page_local_q = NULL;
340
341 /* N.B. Guard and fictitious pages must not
342 * be assigned a zero phys_page value.
343 */
344 /*
345 * Fictitious pages don't have a physical address,
346 * but we must initialize phys_page to something.
347 * For debugging, this should be a strange value
348 * that the pmap module can recognize in assertions.
349 */
350 const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
351
352 /*
353 * Guard pages are not accessible so they don't
354 * need a physical address, but we need to enter
355 * one in the pmap.
356 * Let's make it recognizable and make sure that
357 * we don't use a real physical page with that
358 * physical address.
359 */
360 const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
361
362 /*
363 * Resident page structures are also chained on
364 * queues that are used by the page replacement
365 * system (pageout daemon). These queues are
366 * defined here, but are shared by the pageout
367 * module. The inactive queue is broken into
368 * file backed and anonymous for convenience as the
369 * pageout daemon often assignes a higher
370 * importance to anonymous pages (less likely to pick)
371 */
372 vm_page_queue_head_t vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
373 vm_page_queue_head_t vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
374 #if CONFIG_SECLUDED_MEMORY
375 vm_page_queue_head_t vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
376 #endif /* CONFIG_SECLUDED_MEMORY */
377 vm_page_queue_head_t vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* inactive memory queue for anonymous pages */
378 vm_page_queue_head_t vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
379
380 queue_head_t vm_objects_wired;
381
382 void vm_update_darkwake_mode(boolean_t);
383
384 #if CONFIG_BACKGROUND_QUEUE
385 vm_page_queue_head_t vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
386 uint32_t vm_page_background_target;
387 uint32_t vm_page_background_target_snapshot;
388 uint32_t vm_page_background_count;
389 uint64_t vm_page_background_promoted_count;
390
391 uint32_t vm_page_background_internal_count;
392 uint32_t vm_page_background_external_count;
393
394 uint32_t vm_page_background_mode;
395 uint32_t vm_page_background_exclude_external;
396 #endif
397
398 unsigned int vm_page_active_count;
399 unsigned int vm_page_inactive_count;
400 #if CONFIG_SECLUDED_MEMORY
401 unsigned int vm_page_secluded_count;
402 unsigned int vm_page_secluded_count_free;
403 unsigned int vm_page_secluded_count_inuse;
404 #endif /* CONFIG_SECLUDED_MEMORY */
405 unsigned int vm_page_anonymous_count;
406 unsigned int vm_page_throttled_count;
407 unsigned int vm_page_speculative_count;
408
409 unsigned int vm_page_wire_count;
410 unsigned int vm_page_wire_count_on_boot = 0;
411 unsigned int vm_page_stolen_count;
412 unsigned int vm_page_wire_count_initial;
413 unsigned int vm_page_pages_initial;
414 unsigned int vm_page_gobble_count = 0;
415
416 #define VM_PAGE_WIRE_COUNT_WARNING 0
417 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
418
419 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
420 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
421 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
422
423 unsigned int vm_page_xpmapped_external_count = 0;
424 unsigned int vm_page_external_count = 0;
425 unsigned int vm_page_internal_count = 0;
426 unsigned int vm_page_pageable_external_count = 0;
427 unsigned int vm_page_pageable_internal_count = 0;
428
429 #if DEVELOPMENT || DEBUG
430 unsigned int vm_page_speculative_recreated = 0;
431 unsigned int vm_page_speculative_created = 0;
432 unsigned int vm_page_speculative_used = 0;
433 #endif
434
435 vm_page_queue_head_t vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
436
437 unsigned int vm_page_cleaned_count = 0;
438
439 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
440 ppnum_t max_valid_low_ppnum = 0xffffffff;
441
442
443 /*
444 * Several page replacement parameters are also
445 * shared with this module, so that page allocation
446 * (done here in vm_page_alloc) can trigger the
447 * pageout daemon.
448 */
449 unsigned int vm_page_free_target = 0;
450 unsigned int vm_page_free_min = 0;
451 unsigned int vm_page_throttle_limit = 0;
452 unsigned int vm_page_inactive_target = 0;
453 #if CONFIG_SECLUDED_MEMORY
454 unsigned int vm_page_secluded_target = 0;
455 #endif /* CONFIG_SECLUDED_MEMORY */
456 unsigned int vm_page_anonymous_min = 0;
457 unsigned int vm_page_free_reserved = 0;
458
459
460 /*
461 * The VM system has a couple of heuristics for deciding
462 * that pages are "uninteresting" and should be placed
463 * on the inactive queue as likely candidates for replacement.
464 * These variables let the heuristics be controlled at run-time
465 * to make experimentation easier.
466 */
467
468 boolean_t vm_page_deactivate_hint = TRUE;
469
470 struct vm_page_stats_reusable vm_page_stats_reusable;
471
472 /*
473 * vm_set_page_size:
474 *
475 * Sets the page size, perhaps based upon the memory
476 * size. Must be called before any use of page-size
477 * dependent functions.
478 *
479 * Sets page_shift and page_mask from page_size.
480 */
481 void
482 vm_set_page_size(void)
483 {
484 page_size = PAGE_SIZE;
485 page_mask = PAGE_MASK;
486 page_shift = PAGE_SHIFT;
487
488 if ((page_mask & page_size) != 0)
489 panic("vm_set_page_size: page size not a power of two");
490
491 for (page_shift = 0; ; page_shift++)
492 if ((1U << page_shift) == page_size)
493 break;
494 }
495
496 #if defined (__x86_64__)
497
498 #define MAX_CLUMP_SIZE 16
499 #define DEFAULT_CLUMP_SIZE 4
500
501 unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
502
503 #if DEVELOPMENT || DEBUG
504 unsigned long vm_clump_stats[MAX_CLUMP_SIZE+1];
505 unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
506
507 static inline void vm_clump_update_stats(unsigned int c) {
508 assert(c<=vm_clump_size);
509 if(c>0 && c<=vm_clump_size) vm_clump_stats[c]+=c;
510 vm_clump_allocs+=c;
511 }
512 #endif /* if DEVELOPMENT || DEBUG */
513
514 /* Called once to setup the VM clump knobs */
515 static void
516 vm_page_setup_clump( void )
517 {
518 unsigned int override, n;
519
520 vm_clump_size = DEFAULT_CLUMP_SIZE;
521 if ( PE_parse_boot_argn("clump_size", &override, sizeof (override)) ) vm_clump_size = override;
522
523 if(vm_clump_size > MAX_CLUMP_SIZE) panic("vm_page_setup_clump:: clump_size is too large!");
524 if(vm_clump_size < 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
525 if((vm_clump_size & (vm_clump_size-1)) != 0) panic("vm_page_setup_clump:: clump_size must be a power of 2");
526
527 vm_clump_promote_threshold = vm_clump_size;
528 vm_clump_mask = vm_clump_size - 1;
529 for(vm_clump_shift=0, n=vm_clump_size; n>1; n>>=1, vm_clump_shift++);
530
531 #if DEVELOPMENT || DEBUG
532 bzero(vm_clump_stats, sizeof(vm_clump_stats));
533 vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
534 #endif /* if DEVELOPMENT || DEBUG */
535 }
536
537 #endif /* #if defined (__x86_64__) */
538
539 #define COLOR_GROUPS_TO_STEAL 4
540
541 /* Called once during statup, once the cache geometry is known.
542 */
543 static void
544 vm_page_set_colors( void )
545 {
546 unsigned int n, override;
547
548 #if defined (__x86_64__)
549 /* adjust #colors because we need to color outside the clump boundary */
550 vm_cache_geometry_colors >>= vm_clump_shift;
551 #endif
552 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
553 n = override;
554 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
555 n = vm_cache_geometry_colors;
556 else n = DEFAULT_COLORS; /* use default if all else fails */
557
558 if ( n == 0 )
559 n = 1;
560 if ( n > MAX_COLORS )
561 n = MAX_COLORS;
562
563 /* the count must be a power of 2 */
564 if ( ( n & (n - 1)) != 0 )
565 n = DEFAULT_COLORS; /* use default if all else fails */
566
567 vm_colors = n;
568 vm_color_mask = n - 1;
569
570 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
571
572 #if defined (__x86_64__)
573 /* adjust for reduction in colors due to clumping and multiple cores */
574 if (real_ncpus)
575 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
576 #endif
577 }
578
579
580 lck_grp_t vm_page_lck_grp_free;
581 lck_grp_t vm_page_lck_grp_queue;
582 lck_grp_t vm_page_lck_grp_local;
583 lck_grp_t vm_page_lck_grp_purge;
584 lck_grp_t vm_page_lck_grp_alloc;
585 lck_grp_t vm_page_lck_grp_bucket;
586 lck_grp_attr_t vm_page_lck_grp_attr;
587 lck_attr_t vm_page_lck_attr;
588
589
590 __private_extern__ void
591 vm_page_init_lck_grp(void)
592 {
593 /*
594 * initialze the vm_page lock world
595 */
596 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
597 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
598 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
599 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
600 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
601 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
602 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
603 lck_attr_setdefault(&vm_page_lck_attr);
604 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
605
606 vm_compressor_init_locks();
607 }
608
609 #define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
610
611 void
612 vm_page_init_local_q()
613 {
614 unsigned int num_cpus;
615 unsigned int i;
616 struct vplq *t_local_q;
617
618 num_cpus = ml_get_max_cpus();
619
620 /*
621 * no point in this for a uni-processor system
622 */
623 if (num_cpus >= 2) {
624 #if KASAN
625 /* KASAN breaks the expectation of a size-aligned object by adding a
626 * redzone, so explicitly align. */
627 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
628 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT-1));
629 #else
630 /* round the size up to the nearest power of two */
631 t_local_q = (struct vplq *)kalloc(ROUNDUP_NEXTP2(num_cpus * sizeof(struct vplq)));
632 #endif
633
634 for (i = 0; i < num_cpus; i++) {
635 struct vpl *lq;
636
637 lq = &t_local_q[i].vpl_un.vpl;
638 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
639 vm_page_queue_init(&lq->vpl_queue);
640 lq->vpl_count = 0;
641 lq->vpl_internal_count = 0;
642 lq->vpl_external_count = 0;
643 }
644 vm_page_local_q_count = num_cpus;
645
646 vm_page_local_q = (struct vplq *)t_local_q;
647 }
648 }
649
650 /*
651 * vm_init_before_launchd
652 *
653 * This should be called right before launchd is loaded.
654 */
655 void
656 vm_init_before_launchd()
657 {
658 vm_page_wire_count_on_boot = vm_page_wire_count;
659 }
660
661
662 /*
663 * vm_page_bootstrap:
664 *
665 * Initializes the resident memory module.
666 *
667 * Allocates memory for the page cells, and
668 * for the object/offset-to-page hash table headers.
669 * Each page cell is initialized and placed on the free list.
670 * Returns the range of available kernel virtual memory.
671 */
672
673 void
674 vm_page_bootstrap(
675 vm_offset_t *startp,
676 vm_offset_t *endp)
677 {
678 vm_page_t m;
679 unsigned int i;
680 unsigned int log1;
681 unsigned int log2;
682 unsigned int size;
683
684 /*
685 * Initialize the vm_page template.
686 */
687
688 m = &vm_page_template;
689 bzero(m, sizeof (*m));
690
691 #if CONFIG_BACKGROUND_QUEUE
692 m->vmp_backgroundq.next = 0;
693 m->vmp_backgroundq.prev = 0;
694 m->vmp_in_background = FALSE;
695 m->vmp_on_backgroundq = FALSE;
696 #endif
697
698 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
699 m->vmp_listq.next = 0;
700 m->vmp_listq.prev = 0;
701 m->vmp_next_m = 0;
702
703 m->vmp_object = 0; /* reset later */
704 m->vmp_offset = (vm_object_offset_t) -1; /* reset later */
705
706 m->vmp_wire_count = 0;
707 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
708 m->vmp_laundry = FALSE;
709 m->vmp_reference = FALSE;
710 m->vmp_gobbled = FALSE;
711 m->vmp_private = FALSE;
712 m->vmp_unused_page_bits = 0;
713
714 #if !defined(__arm__) && !defined(__arm64__)
715 VM_PAGE_SET_PHYS_PAGE(m, 0); /* reset later */
716 #endif
717 m->vmp_busy = TRUE;
718 m->vmp_wanted = FALSE;
719 m->vmp_tabled = FALSE;
720 m->vmp_hashed = FALSE;
721 m->vmp_fictitious = FALSE;
722 m->vmp_pmapped = FALSE;
723 m->vmp_wpmapped = FALSE;
724 m->vmp_free_when_done = FALSE;
725 m->vmp_absent = FALSE;
726 m->vmp_error = FALSE;
727 m->vmp_dirty = FALSE;
728 m->vmp_cleaning = FALSE;
729 m->vmp_precious = FALSE;
730 m->vmp_clustered = FALSE;
731 m->vmp_overwriting = FALSE;
732 m->vmp_restart = FALSE;
733 m->vmp_unusual = FALSE;
734 m->vmp_cs_validated = FALSE;
735 m->vmp_cs_tainted = FALSE;
736 m->vmp_cs_nx = FALSE;
737 m->vmp_no_cache = FALSE;
738 m->vmp_reusable = FALSE;
739 m->vmp_xpmapped = FALSE;
740 m->vmp_written_by_kernel = FALSE;
741 m->vmp_unused_object_bits = 0;
742
743 /*
744 * Initialize the page queues.
745 */
746 vm_page_init_lck_grp();
747
748 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
749 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
750 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
751
752 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
753 int group;
754
755 purgeable_queues[i].token_q_head = 0;
756 purgeable_queues[i].token_q_tail = 0;
757 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
758 queue_init(&purgeable_queues[i].objq[group]);
759
760 purgeable_queues[i].type = i;
761 purgeable_queues[i].new_pages = 0;
762 #if MACH_ASSERT
763 purgeable_queues[i].debug_count_tokens = 0;
764 purgeable_queues[i].debug_count_objects = 0;
765 #endif
766 };
767 purgeable_nonvolatile_count = 0;
768 queue_init(&purgeable_nonvolatile_queue);
769
770 for (i = 0; i < MAX_COLORS; i++ )
771 vm_page_queue_init(&vm_page_queue_free[i].qhead);
772
773 vm_page_queue_init(&vm_lopage_queue_free);
774 vm_page_queue_init(&vm_page_queue_active);
775 vm_page_queue_init(&vm_page_queue_inactive);
776 #if CONFIG_SECLUDED_MEMORY
777 vm_page_queue_init(&vm_page_queue_secluded);
778 #endif /* CONFIG_SECLUDED_MEMORY */
779 vm_page_queue_init(&vm_page_queue_cleaned);
780 vm_page_queue_init(&vm_page_queue_throttled);
781 vm_page_queue_init(&vm_page_queue_anonymous);
782 queue_init(&vm_objects_wired);
783
784 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
785 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
786
787 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
788 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
789 }
790 #if CONFIG_BACKGROUND_QUEUE
791 vm_page_queue_init(&vm_page_queue_background);
792
793 vm_page_background_count = 0;
794 vm_page_background_internal_count = 0;
795 vm_page_background_external_count = 0;
796 vm_page_background_promoted_count = 0;
797
798 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
799
800 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
801 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
802
803 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
804 vm_page_background_exclude_external = 0;
805
806 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
807 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
808 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
809
810 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1)
811 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
812 #endif
813 vm_page_free_wanted = 0;
814 vm_page_free_wanted_privileged = 0;
815 #if CONFIG_SECLUDED_MEMORY
816 vm_page_free_wanted_secluded = 0;
817 #endif /* CONFIG_SECLUDED_MEMORY */
818
819 #if defined (__x86_64__)
820 /* this must be called before vm_page_set_colors() */
821 vm_page_setup_clump();
822 #endif
823
824 vm_page_set_colors();
825
826 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
827 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
828 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
829 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
830
831 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
832 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
833 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
834 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
835 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
836 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
837 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
838 #if CONFIG_SECLUDED_MEMORY
839 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
840 #endif /* CONFIG_SECLUDED_MEMORY */
841
842 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
843 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
844 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
845 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
846 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
847 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
848 #if CONFIG_SECLUDED_MEMORY
849 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
850 #endif /* CONFIG_SECLUDED_MEMORY */
851
852 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
853 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
854 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
855 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
856 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
857 #if CONFIG_SECLUDED_MEMORY
858 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
859 #endif /* CONFIG_SECLUDED_MEMORY */
860
861 for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++)
862 {
863 vm_allocation_sites_static[i].refcount = 2;
864 vm_allocation_sites_static[i].tag = i;
865 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
866 }
867 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
868 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
869 vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
870
871 /*
872 * Steal memory for the map and zone subsystems.
873 */
874 #if CONFIG_GZALLOC
875 gzalloc_configure();
876 #endif
877 kernel_debug_string_early("vm_map_steal_memory");
878 vm_map_steal_memory();
879
880 /*
881 * Allocate (and initialize) the virtual-to-physical
882 * table hash buckets.
883 *
884 * The number of buckets should be a power of two to
885 * get a good hash function. The following computation
886 * chooses the first power of two that is greater
887 * than the number of physical pages in the system.
888 */
889
890 if (vm_page_bucket_count == 0) {
891 unsigned int npages = pmap_free_pages();
892
893 vm_page_bucket_count = 1;
894 while (vm_page_bucket_count < npages)
895 vm_page_bucket_count <<= 1;
896 }
897 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
898
899 vm_page_hash_mask = vm_page_bucket_count - 1;
900
901 /*
902 * Calculate object shift value for hashing algorithm:
903 * O = log2(sizeof(struct vm_object))
904 * B = log2(vm_page_bucket_count)
905 * hash shifts the object left by
906 * B/2 - O
907 */
908 size = vm_page_bucket_count;
909 for (log1 = 0; size > 1; log1++)
910 size /= 2;
911 size = sizeof(struct vm_object);
912 for (log2 = 0; size > 1; log2++)
913 size /= 2;
914 vm_page_hash_shift = log1/2 - log2 + 1;
915
916 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
917 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
918 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
919
920 if (vm_page_hash_mask & vm_page_bucket_count)
921 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
922
923 #if VM_PAGE_BUCKETS_CHECK
924 #if VM_PAGE_FAKE_BUCKETS
925 /*
926 * Allocate a decoy set of page buckets, to detect
927 * any stomping there.
928 */
929 vm_page_fake_buckets = (vm_page_bucket_t *)
930 pmap_steal_memory(vm_page_bucket_count *
931 sizeof(vm_page_bucket_t));
932 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
933 vm_page_fake_buckets_end =
934 vm_map_round_page((vm_page_fake_buckets_start +
935 (vm_page_bucket_count *
936 sizeof (vm_page_bucket_t))),
937 PAGE_MASK);
938 char *cp;
939 for (cp = (char *)vm_page_fake_buckets_start;
940 cp < (char *)vm_page_fake_buckets_end;
941 cp++) {
942 *cp = 0x5a;
943 }
944 #endif /* VM_PAGE_FAKE_BUCKETS */
945 #endif /* VM_PAGE_BUCKETS_CHECK */
946
947 kernel_debug_string_early("vm_page_buckets");
948 vm_page_buckets = (vm_page_bucket_t *)
949 pmap_steal_memory(vm_page_bucket_count *
950 sizeof(vm_page_bucket_t));
951
952 kernel_debug_string_early("vm_page_bucket_locks");
953 vm_page_bucket_locks = (lck_spin_t *)
954 pmap_steal_memory(vm_page_bucket_lock_count *
955 sizeof(lck_spin_t));
956
957 for (i = 0; i < vm_page_bucket_count; i++) {
958 vm_page_bucket_t *bucket = &vm_page_buckets[i];
959
960 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
961 #if MACH_PAGE_HASH_STATS
962 bucket->cur_count = 0;
963 bucket->hi_count = 0;
964 #endif /* MACH_PAGE_HASH_STATS */
965 }
966
967 for (i = 0; i < vm_page_bucket_lock_count; i++)
968 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
969
970 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
971 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
972 vm_tag_init();
973
974 #if VM_PAGE_BUCKETS_CHECK
975 vm_page_buckets_check_ready = TRUE;
976 #endif /* VM_PAGE_BUCKETS_CHECK */
977
978 /*
979 * Machine-dependent code allocates the resident page table.
980 * It uses vm_page_init to initialize the page frames.
981 * The code also returns to us the virtual space available
982 * to the kernel. We don't trust the pmap module
983 * to get the alignment right.
984 */
985
986 kernel_debug_string_early("pmap_startup");
987 pmap_startup(&virtual_space_start, &virtual_space_end);
988 virtual_space_start = round_page(virtual_space_start);
989 virtual_space_end = trunc_page(virtual_space_end);
990
991 *startp = virtual_space_start;
992 *endp = virtual_space_end;
993
994 /*
995 * Compute the initial "wire" count.
996 * Up until now, the pages which have been set aside are not under
997 * the VM system's control, so although they aren't explicitly
998 * wired, they nonetheless can't be moved. At this moment,
999 * all VM managed pages are "free", courtesy of pmap_startup.
1000 */
1001 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
1002 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
1003 #if CONFIG_SECLUDED_MEMORY
1004 vm_page_wire_count -= vm_page_secluded_count;
1005 #endif
1006 vm_page_wire_count_initial = vm_page_wire_count;
1007 vm_page_pages_initial = vm_page_pages;
1008
1009 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1010 vm_page_free_count, vm_page_wire_count);
1011
1012 kernel_debug_string_early("vm_page_bootstrap complete");
1013 simple_lock_init(&vm_paging_lock, 0);
1014 }
1015
1016 #ifndef MACHINE_PAGES
1017 /*
1018 * We implement pmap_steal_memory and pmap_startup with the help
1019 * of two simpler functions, pmap_virtual_space and pmap_next_page.
1020 */
1021
1022 void *
1023 pmap_steal_memory(
1024 vm_size_t size)
1025 {
1026 kern_return_t kr;
1027 vm_offset_t addr, vaddr;
1028 ppnum_t phys_page;
1029
1030 /*
1031 * We round the size to a round multiple.
1032 */
1033
1034 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1035
1036 /*
1037 * If this is the first call to pmap_steal_memory,
1038 * we have to initialize ourself.
1039 */
1040
1041 if (virtual_space_start == virtual_space_end) {
1042 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1043
1044 /*
1045 * The initial values must be aligned properly, and
1046 * we don't trust the pmap module to do it right.
1047 */
1048
1049 virtual_space_start = round_page(virtual_space_start);
1050 virtual_space_end = trunc_page(virtual_space_end);
1051 }
1052
1053 /*
1054 * Allocate virtual memory for this request.
1055 */
1056
1057 addr = virtual_space_start;
1058 virtual_space_start += size;
1059
1060 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1061
1062 /*
1063 * Allocate and map physical pages to back new virtual pages.
1064 */
1065
1066 for (vaddr = round_page(addr);
1067 vaddr < addr + size;
1068 vaddr += PAGE_SIZE) {
1069
1070 if (!pmap_next_page_hi(&phys_page))
1071 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1072
1073 /*
1074 * XXX Logically, these mappings should be wired,
1075 * but some pmap modules barf if they are.
1076 */
1077 #if defined(__LP64__)
1078 #ifdef __arm64__
1079 /* ARM64_TODO: verify that we really don't need this */
1080 #else
1081 pmap_pre_expand(kernel_pmap, vaddr);
1082 #endif
1083 #endif
1084
1085 kr = pmap_enter(kernel_pmap, vaddr, phys_page,
1086 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
1087 VM_WIMG_USE_DEFAULT, FALSE);
1088
1089 if (kr != KERN_SUCCESS) {
1090 panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1091 (unsigned long)vaddr, phys_page);
1092 }
1093
1094 /*
1095 * Account for newly stolen memory
1096 */
1097 vm_page_wire_count++;
1098 vm_page_stolen_count++;
1099 }
1100
1101 #if KASAN
1102 kasan_notify_address(round_page(addr), size);
1103 #endif
1104 return (void *) addr;
1105 }
1106
1107 #if CONFIG_SECLUDED_MEMORY
1108 /* boot-args to control secluded memory */
1109 unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
1110 int secluded_for_iokit = 1; /* IOKit can use secluded memory */
1111 int secluded_for_apps = 1; /* apps can use secluded memory */
1112 int secluded_for_filecache = 2; /* filecache can use seclude memory */
1113 #if 11
1114 int secluded_for_fbdp = 0;
1115 #endif
1116 uint64_t secluded_shutoff_trigger = 0;
1117 #endif /* CONFIG_SECLUDED_MEMORY */
1118
1119
1120 #if defined(__arm__) || defined(__arm64__)
1121 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1122 unsigned int vm_first_phys_ppnum = 0;
1123 #endif
1124
1125
1126 void vm_page_release_startup(vm_page_t mem);
1127 void
1128 pmap_startup(
1129 vm_offset_t *startp,
1130 vm_offset_t *endp)
1131 {
1132 unsigned int i, npages, pages_initialized, fill, fillval;
1133 ppnum_t phys_page;
1134 addr64_t tmpaddr;
1135
1136 #if defined(__LP64__)
1137 /*
1138 * make sure we are aligned on a 64 byte boundary
1139 * for VM_PAGE_PACK_PTR (it clips off the low-order
1140 * 6 bits of the pointer)
1141 */
1142 if (virtual_space_start != virtual_space_end)
1143 virtual_space_start = round_page(virtual_space_start);
1144 #endif
1145
1146 /*
1147 * We calculate how many page frames we will have
1148 * and then allocate the page structures in one chunk.
1149 */
1150
1151 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
1152 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
1153 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1154
1155 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1156
1157 /*
1158 * Initialize the page frames.
1159 */
1160 kernel_debug_string_early("Initialize the page frames");
1161
1162 vm_page_array_beginning_addr = &vm_pages[0];
1163 vm_page_array_ending_addr = &vm_pages[npages];
1164
1165 for (i = 0, pages_initialized = 0; i < npages; i++) {
1166 if (!pmap_next_page(&phys_page))
1167 break;
1168 #if defined(__arm__) || defined(__arm64__)
1169 if (pages_initialized == 0) {
1170 vm_first_phys_ppnum = phys_page;
1171 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr, (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1172 }
1173 assert((i + vm_first_phys_ppnum) == phys_page);
1174 #endif
1175 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1176 vm_page_lowest = phys_page;
1177
1178 vm_page_init(&vm_pages[i], phys_page, FALSE);
1179 vm_page_pages++;
1180 pages_initialized++;
1181 }
1182 vm_pages_count = pages_initialized;
1183 vm_page_array_boundary = &vm_pages[pages_initialized];
1184
1185 #if defined(__LP64__)
1186
1187 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
1188 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1189
1190 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
1191 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1192 #endif
1193 kernel_debug_string_early("page fill/release");
1194 /*
1195 * Check if we want to initialize pages to a known value
1196 */
1197 fill = 0; /* Assume no fill */
1198 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
1199 #if DEBUG
1200 /* This slows down booting the DEBUG kernel, particularly on
1201 * large memory systems, but is worthwhile in deterministically
1202 * trapping uninitialized memory usage.
1203 */
1204 if (fill == 0) {
1205 fill = 1;
1206 fillval = 0xDEB8F177;
1207 }
1208 #endif
1209 if (fill)
1210 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
1211
1212 #if CONFIG_SECLUDED_MEMORY
1213 /* default: no secluded mem */
1214 secluded_mem_mb = 0;
1215 if (max_mem > 1*1024*1024*1024) {
1216 /* default to 90MB for devices with > 1GB of RAM */
1217 secluded_mem_mb = 90;
1218 }
1219 /* override with value from device tree, if provided */
1220 PE_get_default("kern.secluded_mem_mb",
1221 &secluded_mem_mb, sizeof(secluded_mem_mb));
1222 /* override with value from boot-args, if provided */
1223 PE_parse_boot_argn("secluded_mem_mb",
1224 &secluded_mem_mb,
1225 sizeof (secluded_mem_mb));
1226
1227 vm_page_secluded_target = (unsigned int)
1228 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1229 PE_parse_boot_argn("secluded_for_iokit",
1230 &secluded_for_iokit,
1231 sizeof (secluded_for_iokit));
1232 PE_parse_boot_argn("secluded_for_apps",
1233 &secluded_for_apps,
1234 sizeof (secluded_for_apps));
1235 PE_parse_boot_argn("secluded_for_filecache",
1236 &secluded_for_filecache,
1237 sizeof (secluded_for_filecache));
1238 #if 11
1239 PE_parse_boot_argn("secluded_for_fbdp",
1240 &secluded_for_fbdp,
1241 sizeof (secluded_for_fbdp));
1242 #endif
1243
1244 /*
1245 * On small devices, allow a large app to effectively suppress
1246 * secluded memory until it exits.
1247 */
1248 if (max_mem <= 1 * 1024 * 1024 * 1024 && vm_page_secluded_target != 0) {
1249
1250 /*
1251 * Get an amount from boot-args, else use 500MB.
1252 * 500MB was chosen from a Peace daemon tentpole test which used munch
1253 * to induce jetsam thrashing of false idle daemons.
1254 */
1255 int secluded_shutoff_mb;
1256 if (PE_parse_boot_argn("secluded_shutoff_mb", &secluded_shutoff_mb,
1257 sizeof (secluded_shutoff_mb)))
1258 secluded_shutoff_trigger = (uint64_t)secluded_shutoff_mb * 1024 * 1024;
1259 else
1260 secluded_shutoff_trigger = 500 * 1024 * 1024;
1261
1262 if (secluded_shutoff_trigger != 0)
1263 secluded_suppression_init();
1264 }
1265
1266 #endif /* CONFIG_SECLUDED_MEMORY */
1267
1268 /*
1269 * By default release pages in reverse order so that physical pages
1270 * initially get allocated in ascending addresses. This keeps
1271 * the devices (which must address physical memory) happy if
1272 * they require several consecutive pages.
1273 *
1274 * For debugging, you can reverse this ordering and/or fill
1275 * all pages with a known value.
1276 */
1277 if (vm_himemory_mode == 2) {
1278 for (i = 0; i < pages_initialized; i++) {
1279 if (fill)
1280 fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]), fillval);
1281 vm_page_release_startup(&vm_pages[i]);
1282 }
1283 } else {
1284 for (i = pages_initialized; i-- > 0; ) {
1285 if (fill)
1286 fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]), fillval);
1287 vm_page_release_startup(&vm_pages[i]);
1288 }
1289 }
1290
1291 VM_CHECK_MEMORYSTATUS;
1292
1293 #if 0
1294 {
1295 vm_page_t xx, xxo, xxl;
1296 int i, j, k, l;
1297
1298 j = 0; /* (BRINGUP) */
1299 xxl = 0;
1300
1301 for( i = 0; i < vm_colors; i++ ) {
1302 queue_iterate(&vm_page_queue_free[i].qhead,
1303 xx,
1304 vm_page_t,
1305 vmp_pageq) { /* BRINGUP */
1306 j++; /* (BRINGUP) */
1307 if(j > vm_page_free_count) { /* (BRINGUP) */
1308 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
1309 }
1310
1311 l = vm_page_free_count - j; /* (BRINGUP) */
1312 k = 0; /* (BRINGUP) */
1313
1314 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1315
1316 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
1317 k++;
1318 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1319 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
1320 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1321 }
1322 }
1323
1324 xxl = xx;
1325 }
1326 }
1327
1328 if(j != vm_page_free_count) { /* (BRINGUP) */
1329 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1330 }
1331 }
1332 #endif
1333
1334
1335 /*
1336 * We have to re-align virtual_space_start,
1337 * because pmap_steal_memory has been using it.
1338 */
1339
1340 virtual_space_start = round_page(virtual_space_start);
1341
1342 *startp = virtual_space_start;
1343 *endp = virtual_space_end;
1344 }
1345 #endif /* MACHINE_PAGES */
1346
1347 /*
1348 * Routine: vm_page_module_init
1349 * Purpose:
1350 * Second initialization pass, to be done after
1351 * the basic VM system is ready.
1352 */
1353 void
1354 vm_page_module_init(void)
1355 {
1356 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1357 vm_size_t vm_page_with_ppnum_size;
1358
1359 vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1360 0, PAGE_SIZE, "vm pages array");
1361
1362 zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1363 zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1364 zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1365 zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1366 zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
1367 /*
1368 * Adjust zone statistics to account for the real pages allocated
1369 * in vm_page_create(). [Q: is this really what we want?]
1370 */
1371 vm_page_array_zone->count += vm_page_pages;
1372 vm_page_array_zone->sum_count += vm_page_pages;
1373 vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1374 vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1375 vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1376 OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
1377 /* since zone accounts for these, take them out of stolen */
1378 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1379
1380 vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1381
1382 vm_page_zone = zinit(vm_page_with_ppnum_size,
1383 0, PAGE_SIZE, "vm pages");
1384
1385 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1386 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1387 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1388 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1389 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1390 zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1391 }
1392
1393 /*
1394 * Routine: vm_page_create
1395 * Purpose:
1396 * After the VM system is up, machine-dependent code
1397 * may stumble across more physical memory. For example,
1398 * memory that it was reserving for a frame buffer.
1399 * vm_page_create turns this memory into available pages.
1400 */
1401
1402 void
1403 vm_page_create(
1404 ppnum_t start,
1405 ppnum_t end)
1406 {
1407 ppnum_t phys_page;
1408 vm_page_t m;
1409
1410 for (phys_page = start;
1411 phys_page < end;
1412 phys_page++) {
1413 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1414 == VM_PAGE_NULL)
1415 vm_page_more_fictitious();
1416
1417 m->vmp_fictitious = FALSE;
1418 pmap_clear_noencrypt(phys_page);
1419
1420 vm_page_pages++;
1421 vm_page_release(m, FALSE);
1422 }
1423 }
1424
1425 /*
1426 * vm_page_hash:
1427 *
1428 * Distributes the object/offset key pair among hash buckets.
1429 *
1430 * NOTE: The bucket count must be a power of 2
1431 */
1432 #define vm_page_hash(object, offset) (\
1433 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1434 & vm_page_hash_mask)
1435
1436
1437 /*
1438 * vm_page_insert: [ internal use only ]
1439 *
1440 * Inserts the given mem entry into the object/object-page
1441 * table and object list.
1442 *
1443 * The object must be locked.
1444 */
1445 void
1446 vm_page_insert(
1447 vm_page_t mem,
1448 vm_object_t object,
1449 vm_object_offset_t offset)
1450 {
1451 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1452 }
1453
1454 void
1455 vm_page_insert_wired(
1456 vm_page_t mem,
1457 vm_object_t object,
1458 vm_object_offset_t offset,
1459 vm_tag_t tag)
1460 {
1461 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1462 }
1463
1464 void
1465 vm_page_insert_internal(
1466 vm_page_t mem,
1467 vm_object_t object,
1468 vm_object_offset_t offset,
1469 vm_tag_t tag,
1470 boolean_t queues_lock_held,
1471 boolean_t insert_in_hash,
1472 boolean_t batch_pmap_op,
1473 boolean_t batch_accounting,
1474 uint64_t *delayed_ledger_update)
1475 {
1476 vm_page_bucket_t *bucket;
1477 lck_spin_t *bucket_lock;
1478 int hash_id;
1479 task_t owner;
1480 int ledger_idx_volatile;
1481 int ledger_idx_nonvolatile;
1482 int ledger_idx_volatile_compressed;
1483 int ledger_idx_nonvolatile_compressed;
1484 boolean_t do_footprint;
1485
1486 XPR(XPR_VM_PAGE,
1487 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1488 object, offset, mem, 0,0);
1489 #if 0
1490 /*
1491 * we may not hold the page queue lock
1492 * so this check isn't safe to make
1493 */
1494 VM_PAGE_CHECK(mem);
1495 #endif
1496
1497 assert(page_aligned(offset));
1498
1499 assert(!VM_PAGE_WIRED(mem) || mem->vmp_private || mem->vmp_fictitious || (tag != VM_KERN_MEMORY_NONE));
1500
1501 /* the vm_submap_object is only a placeholder for submaps */
1502 assert(object != vm_submap_object);
1503
1504 vm_object_lock_assert_exclusive(object);
1505 LCK_MTX_ASSERT(&vm_page_queue_lock,
1506 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1507 : LCK_MTX_ASSERT_NOTOWNED);
1508
1509 if (queues_lock_held == FALSE)
1510 assert(!VM_PAGE_PAGEABLE(mem));
1511
1512 if (insert_in_hash == TRUE) {
1513 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1514 if (mem->vmp_tabled || mem->vmp_object)
1515 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1516 "already in (obj=%p,off=0x%llx)",
1517 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
1518 #endif
1519 if (object->internal && (offset >= object->vo_size)) {
1520 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1521 mem, object, offset, object->vo_size);
1522 }
1523
1524 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1525
1526 /*
1527 * Record the object/offset pair in this page
1528 */
1529
1530 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1531 mem->vmp_offset = offset;
1532
1533 #if CONFIG_SECLUDED_MEMORY
1534 if (object->eligible_for_secluded) {
1535 vm_page_secluded.eligible_for_secluded++;
1536 }
1537 #endif /* CONFIG_SECLUDED_MEMORY */
1538
1539 /*
1540 * Insert it into the object_object/offset hash table
1541 */
1542 hash_id = vm_page_hash(object, offset);
1543 bucket = &vm_page_buckets[hash_id];
1544 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1545
1546 lck_spin_lock(bucket_lock);
1547
1548 mem->vmp_next_m = bucket->page_list;
1549 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1550 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
1551
1552 #if MACH_PAGE_HASH_STATS
1553 if (++bucket->cur_count > bucket->hi_count)
1554 bucket->hi_count = bucket->cur_count;
1555 #endif /* MACH_PAGE_HASH_STATS */
1556 mem->vmp_hashed = TRUE;
1557 lck_spin_unlock(bucket_lock);
1558 }
1559
1560 {
1561 unsigned int cache_attr;
1562
1563 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1564
1565 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1566 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1567 }
1568 }
1569 /*
1570 * Now link into the object's list of backed pages.
1571 */
1572 vm_page_queue_enter(&object->memq, mem, vm_page_t, vmp_listq);
1573 object->memq_hint = mem;
1574 mem->vmp_tabled = TRUE;
1575
1576 /*
1577 * Show that the object has one more resident page.
1578 */
1579
1580 object->resident_page_count++;
1581 if (VM_PAGE_WIRED(mem)) {
1582 assert(mem->vmp_wire_count > 0);
1583 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1584 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1585 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
1586 }
1587 assert(object->resident_page_count >= object->wired_page_count);
1588
1589 if (batch_accounting == FALSE) {
1590 if (object->internal) {
1591 OSAddAtomic(1, &vm_page_internal_count);
1592 } else {
1593 OSAddAtomic(1, &vm_page_external_count);
1594 }
1595 }
1596
1597 /*
1598 * It wouldn't make sense to insert a "reusable" page in
1599 * an object (the page would have been marked "reusable" only
1600 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1601 * in the object at that time).
1602 * But a page could be inserted in a "all_reusable" object, if
1603 * something faults it in (a vm_read() from another task or a
1604 * "use-after-free" issue in user space, for example). It can
1605 * also happen if we're relocating a page from that object to
1606 * a different physical page during a physically-contiguous
1607 * allocation.
1608 */
1609 assert(!mem->vmp_reusable);
1610 if (object->all_reusable) {
1611 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1612 }
1613
1614 if (object->purgable == VM_PURGABLE_DENY &&
1615 ! object->vo_ledger_tag) {
1616 owner = TASK_NULL;
1617 } else {
1618 owner = VM_OBJECT_OWNER(object);
1619 vm_object_ledger_tag_ledgers(object,
1620 &ledger_idx_volatile,
1621 &ledger_idx_nonvolatile,
1622 &ledger_idx_volatile_compressed,
1623 &ledger_idx_nonvolatile_compressed,
1624 &do_footprint);
1625 }
1626 if (owner &&
1627 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1628 object->purgable == VM_PURGABLE_DENY ||
1629 VM_PAGE_WIRED(mem))) {
1630
1631 if (delayed_ledger_update)
1632 *delayed_ledger_update += PAGE_SIZE;
1633 else {
1634 /* more non-volatile bytes */
1635 ledger_credit(owner->ledger,
1636 ledger_idx_nonvolatile,
1637 PAGE_SIZE);
1638 if (do_footprint) {
1639 /* more footprint */
1640 ledger_credit(owner->ledger,
1641 task_ledgers.phys_footprint,
1642 PAGE_SIZE);
1643 }
1644 }
1645
1646 } else if (owner &&
1647 (object->purgable == VM_PURGABLE_VOLATILE ||
1648 object->purgable == VM_PURGABLE_EMPTY)) {
1649 assert(! VM_PAGE_WIRED(mem));
1650 /* more volatile bytes */
1651 ledger_credit(owner->ledger,
1652 ledger_idx_volatile,
1653 PAGE_SIZE);
1654 }
1655
1656 if (object->purgable == VM_PURGABLE_VOLATILE) {
1657 if (VM_PAGE_WIRED(mem)) {
1658 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1659 } else {
1660 OSAddAtomic(+1, &vm_page_purgeable_count);
1661 }
1662 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1663 mem->vmp_q_state == VM_PAGE_ON_THROTTLED_Q) {
1664 /*
1665 * This page belongs to a purged VM object but hasn't
1666 * been purged (because it was "busy").
1667 * It's in the "throttled" queue and hence not
1668 * visible to vm_pageout_scan(). Move it to a pageable
1669 * queue, so that it can eventually be reclaimed, instead
1670 * of lingering in the "empty" object.
1671 */
1672 if (queues_lock_held == FALSE)
1673 vm_page_lockspin_queues();
1674 vm_page_deactivate(mem);
1675 if (queues_lock_held == FALSE)
1676 vm_page_unlock_queues();
1677 }
1678
1679 #if VM_OBJECT_TRACKING_OP_MODIFIED
1680 if (vm_object_tracking_inited &&
1681 object->internal &&
1682 object->resident_page_count == 0 &&
1683 object->pager == NULL &&
1684 object->shadow != NULL &&
1685 object->shadow->copy == object) {
1686 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1687 int numsaved = 0;
1688
1689 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1690 btlog_add_entry(vm_object_tracking_btlog,
1691 object,
1692 VM_OBJECT_TRACKING_OP_MODIFIED,
1693 bt,
1694 numsaved);
1695 }
1696 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1697 }
1698
1699 /*
1700 * vm_page_replace:
1701 *
1702 * Exactly like vm_page_insert, except that we first
1703 * remove any existing page at the given offset in object.
1704 *
1705 * The object must be locked.
1706 */
1707 void
1708 vm_page_replace(
1709 vm_page_t mem,
1710 vm_object_t object,
1711 vm_object_offset_t offset)
1712 {
1713 vm_page_bucket_t *bucket;
1714 vm_page_t found_m = VM_PAGE_NULL;
1715 lck_spin_t *bucket_lock;
1716 int hash_id;
1717
1718 #if 0
1719 /*
1720 * we don't hold the page queue lock
1721 * so this check isn't safe to make
1722 */
1723 VM_PAGE_CHECK(mem);
1724 #endif
1725 vm_object_lock_assert_exclusive(object);
1726 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1727 if (mem->vmp_tabled || mem->vmp_object)
1728 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1729 "already in (obj=%p,off=0x%llx)",
1730 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
1731 #endif
1732 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1733
1734 assert(!VM_PAGE_PAGEABLE(mem));
1735
1736 /*
1737 * Record the object/offset pair in this page
1738 */
1739 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1740 mem->vmp_offset = offset;
1741
1742 /*
1743 * Insert it into the object_object/offset hash table,
1744 * replacing any page that might have been there.
1745 */
1746
1747 hash_id = vm_page_hash(object, offset);
1748 bucket = &vm_page_buckets[hash_id];
1749 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1750
1751 lck_spin_lock(bucket_lock);
1752
1753 if (bucket->page_list) {
1754 vm_page_packed_t *mp = &bucket->page_list;
1755 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
1756
1757 do {
1758 /*
1759 * compare packed object pointers
1760 */
1761 if (m->vmp_object == mem->vmp_object && m->vmp_offset == offset) {
1762 /*
1763 * Remove old page from hash list
1764 */
1765 *mp = m->vmp_next_m;
1766 m->vmp_hashed = FALSE;
1767 m->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1768
1769 found_m = m;
1770 break;
1771 }
1772 mp = &m->vmp_next_m;
1773 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
1774
1775 mem->vmp_next_m = bucket->page_list;
1776 } else {
1777 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1778 }
1779 /*
1780 * insert new page at head of hash list
1781 */
1782 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1783 mem->vmp_hashed = TRUE;
1784
1785 lck_spin_unlock(bucket_lock);
1786
1787 if (found_m) {
1788 /*
1789 * there was already a page at the specified
1790 * offset for this object... remove it from
1791 * the object and free it back to the free list
1792 */
1793 vm_page_free_unlocked(found_m, FALSE);
1794 }
1795 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1796 }
1797
1798 /*
1799 * vm_page_remove: [ internal use only ]
1800 *
1801 * Removes the given mem entry from the object/offset-page
1802 * table and the object page list.
1803 *
1804 * The object must be locked.
1805 */
1806
1807 void
1808 vm_page_remove(
1809 vm_page_t mem,
1810 boolean_t remove_from_hash)
1811 {
1812 vm_page_bucket_t *bucket;
1813 vm_page_t this;
1814 lck_spin_t *bucket_lock;
1815 int hash_id;
1816 task_t owner;
1817 vm_object_t m_object;
1818 int ledger_idx_volatile;
1819 int ledger_idx_nonvolatile;
1820 int ledger_idx_volatile_compressed;
1821 int ledger_idx_nonvolatile_compressed;
1822 int do_footprint;
1823
1824 m_object = VM_PAGE_OBJECT(mem);
1825
1826 XPR(XPR_VM_PAGE,
1827 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1828 m_object, mem->vmp_offset,
1829 mem, 0,0);
1830
1831 vm_object_lock_assert_exclusive(m_object);
1832 assert(mem->vmp_tabled);
1833 assert(!mem->vmp_cleaning);
1834 assert(!mem->vmp_laundry);
1835
1836 if (VM_PAGE_PAGEABLE(mem)) {
1837 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1838 }
1839 #if 0
1840 /*
1841 * we don't hold the page queue lock
1842 * so this check isn't safe to make
1843 */
1844 VM_PAGE_CHECK(mem);
1845 #endif
1846 if (remove_from_hash == TRUE) {
1847 /*
1848 * Remove from the object_object/offset hash table
1849 */
1850 hash_id = vm_page_hash(m_object, mem->vmp_offset);
1851 bucket = &vm_page_buckets[hash_id];
1852 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1853
1854 lck_spin_lock(bucket_lock);
1855
1856 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
1857 /* optimize for common case */
1858
1859 bucket->page_list = mem->vmp_next_m;
1860 } else {
1861 vm_page_packed_t *prev;
1862
1863 for (prev = &this->vmp_next_m;
1864 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
1865 prev = &this->vmp_next_m)
1866 continue;
1867 *prev = this->vmp_next_m;
1868 }
1869 #if MACH_PAGE_HASH_STATS
1870 bucket->cur_count--;
1871 #endif /* MACH_PAGE_HASH_STATS */
1872 mem->vmp_hashed = FALSE;
1873 this->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1874 lck_spin_unlock(bucket_lock);
1875 }
1876 /*
1877 * Now remove from the object's list of backed pages.
1878 */
1879
1880 vm_page_remove_internal(mem);
1881
1882 /*
1883 * And show that the object has one fewer resident
1884 * page.
1885 */
1886
1887 assert(m_object->resident_page_count > 0);
1888 m_object->resident_page_count--;
1889
1890 if (m_object->internal) {
1891 #if DEBUG
1892 assert(vm_page_internal_count);
1893 #endif /* DEBUG */
1894
1895 OSAddAtomic(-1, &vm_page_internal_count);
1896 } else {
1897 assert(vm_page_external_count);
1898 OSAddAtomic(-1, &vm_page_external_count);
1899
1900 if (mem->vmp_xpmapped) {
1901 assert(vm_page_xpmapped_external_count);
1902 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1903 }
1904 }
1905 if (!m_object->internal &&
1906 m_object->cached_list.next &&
1907 m_object->cached_list.prev) {
1908 if (m_object->resident_page_count == 0)
1909 vm_object_cache_remove(m_object);
1910 }
1911
1912 if (VM_PAGE_WIRED(mem)) {
1913 assert(mem->vmp_wire_count > 0);
1914 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
1915 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
1916 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
1917 }
1918 assert(m_object->resident_page_count >=
1919 m_object->wired_page_count);
1920 if (mem->vmp_reusable) {
1921 assert(m_object->reusable_page_count > 0);
1922 m_object->reusable_page_count--;
1923 assert(m_object->reusable_page_count <=
1924 m_object->resident_page_count);
1925 mem->vmp_reusable = FALSE;
1926 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1927 vm_page_stats_reusable.reused_remove++;
1928 } else if (m_object->all_reusable) {
1929 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1930 vm_page_stats_reusable.reused_remove++;
1931 }
1932
1933 if (m_object->purgable == VM_PURGABLE_DENY &&
1934 ! m_object->vo_ledger_tag) {
1935 owner = TASK_NULL;
1936 } else {
1937 owner = VM_OBJECT_OWNER(m_object);
1938 vm_object_ledger_tag_ledgers(m_object,
1939 &ledger_idx_volatile,
1940 &ledger_idx_nonvolatile,
1941 &ledger_idx_volatile_compressed,
1942 &ledger_idx_nonvolatile_compressed,
1943 &do_footprint);
1944 }
1945 if (owner &&
1946 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
1947 m_object->purgable == VM_PURGABLE_DENY ||
1948 VM_PAGE_WIRED(mem))) {
1949 /* less non-volatile bytes */
1950 ledger_debit(owner->ledger,
1951 ledger_idx_nonvolatile,
1952 PAGE_SIZE);
1953 if (do_footprint) {
1954 /* less footprint */
1955 ledger_debit(owner->ledger,
1956 task_ledgers.phys_footprint,
1957 PAGE_SIZE);
1958 }
1959 } else if (owner &&
1960 (m_object->purgable == VM_PURGABLE_VOLATILE ||
1961 m_object->purgable == VM_PURGABLE_EMPTY)) {
1962 assert(! VM_PAGE_WIRED(mem));
1963 /* less volatile bytes */
1964 ledger_debit(owner->ledger,
1965 ledger_idx_volatile,
1966 PAGE_SIZE);
1967 }
1968 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
1969 if (VM_PAGE_WIRED(mem)) {
1970 assert(vm_page_purgeable_wired_count > 0);
1971 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1972 } else {
1973 assert(vm_page_purgeable_count > 0);
1974 OSAddAtomic(-1, &vm_page_purgeable_count);
1975 }
1976 }
1977
1978 if (m_object->set_cache_attr == TRUE)
1979 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
1980
1981 mem->vmp_tabled = FALSE;
1982 mem->vmp_object = 0;
1983 mem->vmp_offset = (vm_object_offset_t) -1;
1984 }
1985
1986
1987 /*
1988 * vm_page_lookup:
1989 *
1990 * Returns the page associated with the object/offset
1991 * pair specified; if none is found, VM_PAGE_NULL is returned.
1992 *
1993 * The object must be locked. No side effects.
1994 */
1995
1996 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1997
1998 #if DEBUG_VM_PAGE_LOOKUP
1999
2000 struct {
2001 uint64_t vpl_total;
2002 uint64_t vpl_empty_obj;
2003 uint64_t vpl_bucket_NULL;
2004 uint64_t vpl_hit_hint;
2005 uint64_t vpl_hit_hint_next;
2006 uint64_t vpl_hit_hint_prev;
2007 uint64_t vpl_fast;
2008 uint64_t vpl_slow;
2009 uint64_t vpl_hit;
2010 uint64_t vpl_miss;
2011
2012 uint64_t vpl_fast_elapsed;
2013 uint64_t vpl_slow_elapsed;
2014 } vm_page_lookup_stats __attribute__((aligned(8)));
2015
2016 #endif
2017
2018 #define KDP_VM_PAGE_WALK_MAX 1000
2019
2020 vm_page_t
2021 kdp_vm_page_lookup(
2022 vm_object_t object,
2023 vm_object_offset_t offset)
2024 {
2025 vm_page_t cur_page;
2026 int num_traversed = 0;
2027
2028 if (not_in_kdp) {
2029 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
2030 }
2031
2032 vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, vmp_listq) {
2033 if (cur_page->vmp_offset == offset) {
2034 return cur_page;
2035 }
2036 num_traversed++;
2037
2038 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
2039 return VM_PAGE_NULL;
2040 }
2041 }
2042
2043 return VM_PAGE_NULL;
2044 }
2045
2046 vm_page_t
2047 vm_page_lookup(
2048 vm_object_t object,
2049 vm_object_offset_t offset)
2050 {
2051 vm_page_t mem;
2052 vm_page_bucket_t *bucket;
2053 vm_page_queue_entry_t qe;
2054 lck_spin_t *bucket_lock = NULL;
2055 int hash_id;
2056 #if DEBUG_VM_PAGE_LOOKUP
2057 uint64_t start, elapsed;
2058
2059 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2060 #endif
2061 vm_object_lock_assert_held(object);
2062
2063 if (object->resident_page_count == 0) {
2064 #if DEBUG_VM_PAGE_LOOKUP
2065 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2066 #endif
2067 return (VM_PAGE_NULL);
2068 }
2069
2070 mem = object->memq_hint;
2071
2072 if (mem != VM_PAGE_NULL) {
2073 assert(VM_PAGE_OBJECT(mem) == object);
2074
2075 if (mem->vmp_offset == offset) {
2076 #if DEBUG_VM_PAGE_LOOKUP
2077 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2078 #endif
2079 return (mem);
2080 }
2081 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->vmp_listq);
2082
2083 if (! vm_page_queue_end(&object->memq, qe)) {
2084 vm_page_t next_page;
2085
2086 next_page = (vm_page_t)((uintptr_t)qe);
2087 assert(VM_PAGE_OBJECT(next_page) == object);
2088
2089 if (next_page->vmp_offset == offset) {
2090 object->memq_hint = next_page; /* new hint */
2091 #if DEBUG_VM_PAGE_LOOKUP
2092 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2093 #endif
2094 return (next_page);
2095 }
2096 }
2097 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->vmp_listq);
2098
2099 if (! vm_page_queue_end(&object->memq, qe)) {
2100 vm_page_t prev_page;
2101
2102 prev_page = (vm_page_t)((uintptr_t)qe);
2103 assert(VM_PAGE_OBJECT(prev_page) == object);
2104
2105 if (prev_page->vmp_offset == offset) {
2106 object->memq_hint = prev_page; /* new hint */
2107 #if DEBUG_VM_PAGE_LOOKUP
2108 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2109 #endif
2110 return (prev_page);
2111 }
2112 }
2113 }
2114 /*
2115 * Search the hash table for this object/offset pair
2116 */
2117 hash_id = vm_page_hash(object, offset);
2118 bucket = &vm_page_buckets[hash_id];
2119
2120 /*
2121 * since we hold the object lock, we are guaranteed that no
2122 * new pages can be inserted into this object... this in turn
2123 * guarantess that the page we're looking for can't exist
2124 * if the bucket it hashes to is currently NULL even when looked
2125 * at outside the scope of the hash bucket lock... this is a
2126 * really cheap optimiztion to avoid taking the lock
2127 */
2128 if (!bucket->page_list) {
2129 #if DEBUG_VM_PAGE_LOOKUP
2130 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2131 #endif
2132 return (VM_PAGE_NULL);
2133 }
2134
2135 #if DEBUG_VM_PAGE_LOOKUP
2136 start = mach_absolute_time();
2137 #endif
2138 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
2139 /*
2140 * on average, it's roughly 3 times faster to run a short memq list
2141 * than to take the spin lock and go through the hash list
2142 */
2143 mem = (vm_page_t)vm_page_queue_first(&object->memq);
2144
2145 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
2146
2147 if (mem->vmp_offset == offset)
2148 break;
2149
2150 mem = (vm_page_t)vm_page_queue_next(&mem->vmp_listq);
2151 }
2152 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
2153 mem = NULL;
2154 } else {
2155 vm_page_object_t packed_object;
2156
2157 packed_object = VM_PAGE_PACK_OBJECT(object);
2158
2159 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2160
2161 lck_spin_lock(bucket_lock);
2162
2163 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2164 mem != VM_PAGE_NULL;
2165 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m))) {
2166 #if 0
2167 /*
2168 * we don't hold the page queue lock
2169 * so this check isn't safe to make
2170 */
2171 VM_PAGE_CHECK(mem);
2172 #endif
2173 if ((mem->vmp_object == packed_object) && (mem->vmp_offset == offset))
2174 break;
2175 }
2176 lck_spin_unlock(bucket_lock);
2177 }
2178
2179 #if DEBUG_VM_PAGE_LOOKUP
2180 elapsed = mach_absolute_time() - start;
2181
2182 if (bucket_lock) {
2183 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2184 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2185 } else {
2186 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2187 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2188 }
2189 if (mem != VM_PAGE_NULL)
2190 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2191 else
2192 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2193 #endif
2194 if (mem != VM_PAGE_NULL) {
2195 assert(VM_PAGE_OBJECT(mem) == object);
2196
2197 object->memq_hint = mem;
2198 }
2199 return (mem);
2200 }
2201
2202
2203 /*
2204 * vm_page_rename:
2205 *
2206 * Move the given memory entry from its
2207 * current object to the specified target object/offset.
2208 *
2209 * The object must be locked.
2210 */
2211 void
2212 vm_page_rename(
2213 vm_page_t mem,
2214 vm_object_t new_object,
2215 vm_object_offset_t new_offset)
2216 {
2217 boolean_t internal_to_external, external_to_internal;
2218 vm_tag_t tag;
2219 vm_object_t m_object;
2220
2221 m_object = VM_PAGE_OBJECT(mem);
2222
2223 assert(m_object != new_object);
2224 assert(m_object);
2225
2226 XPR(XPR_VM_PAGE,
2227 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2228 new_object, new_offset,
2229 mem, 0,0);
2230
2231 /*
2232 * Changes to mem->vmp_object require the page lock because
2233 * the pageout daemon uses that lock to get the object.
2234 */
2235 vm_page_lockspin_queues();
2236
2237 internal_to_external = FALSE;
2238 external_to_internal = FALSE;
2239
2240 if (mem->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
2241 /*
2242 * it's much easier to get the vm_page_pageable_xxx accounting correct
2243 * if we first move the page to the active queue... it's going to end
2244 * up there anyway, and we don't do vm_page_rename's frequently enough
2245 * for this to matter.
2246 */
2247 vm_page_queues_remove(mem, FALSE);
2248 vm_page_activate(mem);
2249 }
2250 if (VM_PAGE_PAGEABLE(mem)) {
2251 if (m_object->internal && !new_object->internal) {
2252 internal_to_external = TRUE;
2253 }
2254 if (!m_object->internal && new_object->internal) {
2255 external_to_internal = TRUE;
2256 }
2257 }
2258
2259 tag = m_object->wire_tag;
2260 vm_page_remove(mem, TRUE);
2261 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
2262
2263 if (internal_to_external) {
2264 vm_page_pageable_internal_count--;
2265 vm_page_pageable_external_count++;
2266 } else if (external_to_internal) {
2267 vm_page_pageable_external_count--;
2268 vm_page_pageable_internal_count++;
2269 }
2270
2271 vm_page_unlock_queues();
2272 }
2273
2274 /*
2275 * vm_page_init:
2276 *
2277 * Initialize the fields in a new page.
2278 * This takes a structure with random values and initializes it
2279 * so that it can be given to vm_page_release or vm_page_insert.
2280 */
2281 void
2282 vm_page_init(
2283 vm_page_t mem,
2284 ppnum_t phys_page,
2285 boolean_t lopage)
2286 {
2287 assert(phys_page);
2288
2289 #if DEBUG
2290 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2291 if (!(pmap_valid_page(phys_page))) {
2292 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2293 }
2294 }
2295 #endif
2296 *mem = vm_page_template;
2297
2298 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
2299 #if 0
2300 /*
2301 * we're leaving this turned off for now... currently pages
2302 * come off the free list and are either immediately dirtied/referenced
2303 * due to zero-fill or COW faults, or are used to read or write files...
2304 * in the file I/O case, the UPL mechanism takes care of clearing
2305 * the state of the HW ref/mod bits in a somewhat fragile way.
2306 * Since we may change the way this works in the future (to toughen it up),
2307 * I'm leaving this as a reminder of where these bits could get cleared
2308 */
2309
2310 /*
2311 * make sure both the h/w referenced and modified bits are
2312 * clear at this point... we are especially dependent on
2313 * not finding a 'stale' h/w modified in a number of spots
2314 * once this page goes back into use
2315 */
2316 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2317 #endif
2318 mem->vmp_lopage = lopage;
2319 }
2320
2321 /*
2322 * vm_page_grab_fictitious:
2323 *
2324 * Remove a fictitious page from the free list.
2325 * Returns VM_PAGE_NULL if there are no free pages.
2326 */
2327 int c_vm_page_grab_fictitious = 0;
2328 int c_vm_page_grab_fictitious_failed = 0;
2329 int c_vm_page_release_fictitious = 0;
2330 int c_vm_page_more_fictitious = 0;
2331
2332 vm_page_t
2333 vm_page_grab_fictitious_common(
2334 ppnum_t phys_addr)
2335 {
2336 vm_page_t m;
2337
2338 if ((m = (vm_page_t)zget(vm_page_zone))) {
2339
2340 vm_page_init(m, phys_addr, FALSE);
2341 m->vmp_fictitious = TRUE;
2342
2343 c_vm_page_grab_fictitious++;
2344 } else
2345 c_vm_page_grab_fictitious_failed++;
2346
2347 return m;
2348 }
2349
2350 vm_page_t
2351 vm_page_grab_fictitious(void)
2352 {
2353 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2354 }
2355
2356 int vm_guard_count;
2357
2358
2359 vm_page_t
2360 vm_page_grab_guard(void)
2361 {
2362 vm_page_t page;
2363 page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2364 if (page) OSAddAtomic(1, &vm_guard_count);
2365 return page;
2366 }
2367
2368
2369 /*
2370 * vm_page_release_fictitious:
2371 *
2372 * Release a fictitious page to the zone pool
2373 */
2374 void
2375 vm_page_release_fictitious(
2376 vm_page_t m)
2377 {
2378 assert((m->vmp_q_state == VM_PAGE_NOT_ON_Q) || (m->vmp_q_state == VM_PAGE_IS_WIRED));
2379 assert(m->vmp_fictitious);
2380 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2381 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
2382
2383
2384 if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) OSAddAtomic(-1, &vm_guard_count);
2385
2386 c_vm_page_release_fictitious++;
2387
2388 zfree(vm_page_zone, m);
2389 }
2390
2391 /*
2392 * vm_page_more_fictitious:
2393 *
2394 * Add more fictitious pages to the zone.
2395 * Allowed to block. This routine is way intimate
2396 * with the zones code, for several reasons:
2397 * 1. we need to carve some page structures out of physical
2398 * memory before zones work, so they _cannot_ come from
2399 * the zone_map.
2400 * 2. the zone needs to be collectable in order to prevent
2401 * growth without bound. These structures are used by
2402 * the device pager (by the hundreds and thousands), as
2403 * private pages for pageout, and as blocking pages for
2404 * pagein. Temporary bursts in demand should not result in
2405 * permanent allocation of a resource.
2406 * 3. To smooth allocation humps, we allocate single pages
2407 * with kernel_memory_allocate(), and cram them into the
2408 * zone.
2409 */
2410
2411 void vm_page_more_fictitious(void)
2412 {
2413 vm_offset_t addr;
2414 kern_return_t retval;
2415
2416 c_vm_page_more_fictitious++;
2417
2418 /*
2419 * Allocate a single page from the zone_map. Do not wait if no physical
2420 * pages are immediately available, and do not zero the space. We need
2421 * our own blocking lock here to prevent having multiple,
2422 * simultaneous requests from piling up on the zone_map lock. Exactly
2423 * one (of our) threads should be potentially waiting on the map lock.
2424 * If winner is not vm-privileged, then the page allocation will fail,
2425 * and it will temporarily block here in the vm_page_wait().
2426 */
2427 lck_mtx_lock(&vm_page_alloc_lock);
2428 /*
2429 * If another thread allocated space, just bail out now.
2430 */
2431 if (zone_free_count(vm_page_zone) > 5) {
2432 /*
2433 * The number "5" is a small number that is larger than the
2434 * number of fictitious pages that any single caller will
2435 * attempt to allocate. Otherwise, a thread will attempt to
2436 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2437 * release all of the resources and locks already acquired,
2438 * and then call this routine. This routine finds the pages
2439 * that the caller released, so fails to allocate new space.
2440 * The process repeats infinitely. The largest known number
2441 * of fictitious pages required in this manner is 2. 5 is
2442 * simply a somewhat larger number.
2443 */
2444 lck_mtx_unlock(&vm_page_alloc_lock);
2445 return;
2446 }
2447
2448 retval = kernel_memory_allocate(zone_map,
2449 &addr, PAGE_SIZE, 0,
2450 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2451 if (retval != KERN_SUCCESS) {
2452 /*
2453 * No page was available. Drop the
2454 * lock to give another thread a chance at it, and
2455 * wait for the pageout daemon to make progress.
2456 */
2457 lck_mtx_unlock(&vm_page_alloc_lock);
2458 vm_page_wait(THREAD_UNINT);
2459 return;
2460 }
2461
2462 zcram(vm_page_zone, addr, PAGE_SIZE);
2463
2464 lck_mtx_unlock(&vm_page_alloc_lock);
2465 }
2466
2467
2468 /*
2469 * vm_pool_low():
2470 *
2471 * Return true if it is not likely that a non-vm_privileged thread
2472 * can get memory without blocking. Advisory only, since the
2473 * situation may change under us.
2474 */
2475 int
2476 vm_pool_low(void)
2477 {
2478 /* No locking, at worst we will fib. */
2479 return( vm_page_free_count <= vm_page_free_reserved );
2480 }
2481
2482 boolean_t vm_darkwake_mode = FALSE;
2483
2484 /*
2485 * vm_update_darkwake_mode():
2486 *
2487 * Tells the VM that the system is in / out of darkwake.
2488 *
2489 * Today, the VM only lowers/raises the background queue target
2490 * so as to favor consuming more/less background pages when
2491 * darwake is ON/OFF.
2492 *
2493 * We might need to do more things in the future.
2494 */
2495
2496 void
2497 vm_update_darkwake_mode(boolean_t darkwake_mode)
2498 {
2499 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2500
2501 vm_page_lockspin_queues();
2502
2503 if (vm_darkwake_mode == darkwake_mode) {
2504 /*
2505 * No change.
2506 */
2507 vm_page_unlock_queues();
2508 return;
2509 }
2510
2511 vm_darkwake_mode = darkwake_mode;
2512
2513 if (vm_darkwake_mode == TRUE) {
2514 #if CONFIG_BACKGROUND_QUEUE
2515
2516 /* save background target to restore later */
2517 vm_page_background_target_snapshot = vm_page_background_target;
2518
2519 /* target is set to 0...no protection for background pages */
2520 vm_page_background_target = 0;
2521
2522 #endif /* CONFIG_BACKGROUND_QUEUE */
2523
2524 } else if (vm_darkwake_mode == FALSE) {
2525 #if CONFIG_BACKGROUND_QUEUE
2526
2527 if (vm_page_background_target_snapshot) {
2528 vm_page_background_target = vm_page_background_target_snapshot;
2529 }
2530 #endif /* CONFIG_BACKGROUND_QUEUE */
2531 }
2532 vm_page_unlock_queues();
2533 }
2534
2535 #if CONFIG_BACKGROUND_QUEUE
2536
2537 void
2538 vm_page_update_background_state(vm_page_t mem)
2539 {
2540 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2541 return;
2542
2543 if (mem->vmp_in_background == FALSE)
2544 return;
2545
2546 task_t my_task = current_task();
2547
2548 if (my_task) {
2549 if (task_get_darkwake_mode(my_task)) {
2550 return;
2551 }
2552 }
2553
2554 #if BACKGROUNDQ_BASED_ON_QOS
2555 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2556 return;
2557 #else
2558 if (my_task) {
2559 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2560 return;
2561 }
2562 #endif
2563 vm_page_lockspin_queues();
2564
2565 mem->vmp_in_background = FALSE;
2566 vm_page_background_promoted_count++;
2567
2568 vm_page_remove_from_backgroundq(mem);
2569
2570 vm_page_unlock_queues();
2571 }
2572
2573
2574 void
2575 vm_page_assign_background_state(vm_page_t mem)
2576 {
2577 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2578 return;
2579
2580 task_t my_task = current_task();
2581
2582 if (my_task) {
2583 if (task_get_darkwake_mode(my_task)) {
2584 mem->vmp_in_background = TRUE;
2585 return;
2586 }
2587 }
2588
2589 #if BACKGROUNDQ_BASED_ON_QOS
2590 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2591 mem->vmp_in_background = TRUE;
2592 else
2593 mem->vmp_in_background = FALSE;
2594 #else
2595 if (my_task)
2596 mem->vmp_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2597 #endif
2598 }
2599
2600
2601 void
2602 vm_page_remove_from_backgroundq(
2603 vm_page_t mem)
2604 {
2605 vm_object_t m_object;
2606
2607 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2608
2609 if (mem->vmp_on_backgroundq) {
2610 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
2611
2612 mem->vmp_backgroundq.next = 0;
2613 mem->vmp_backgroundq.prev = 0;
2614 mem->vmp_on_backgroundq = FALSE;
2615
2616 vm_page_background_count--;
2617
2618 m_object = VM_PAGE_OBJECT(mem);
2619
2620 if (m_object->internal)
2621 vm_page_background_internal_count--;
2622 else
2623 vm_page_background_external_count--;
2624 } else {
2625 assert(VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.next) == (uintptr_t)NULL &&
2626 VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.prev) == (uintptr_t)NULL);
2627 }
2628 }
2629
2630
2631 void
2632 vm_page_add_to_backgroundq(
2633 vm_page_t mem,
2634 boolean_t first)
2635 {
2636 vm_object_t m_object;
2637
2638 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2639
2640 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2641 return;
2642
2643 if (mem->vmp_on_backgroundq == FALSE) {
2644
2645 m_object = VM_PAGE_OBJECT(mem);
2646
2647 if (vm_page_background_exclude_external && !m_object->internal)
2648 return;
2649
2650 if (first == TRUE)
2651 vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
2652 else
2653 vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
2654 mem->vmp_on_backgroundq = TRUE;
2655
2656 vm_page_background_count++;
2657
2658 if (m_object->internal)
2659 vm_page_background_internal_count++;
2660 else
2661 vm_page_background_external_count++;
2662 }
2663 }
2664
2665 #endif /* CONFIG_BACKGROUND_QUEUE */
2666
2667 /*
2668 * this is an interface to support bring-up of drivers
2669 * on platforms with physical memory > 4G...
2670 */
2671 int vm_himemory_mode = 2;
2672
2673
2674 /*
2675 * this interface exists to support hardware controllers
2676 * incapable of generating DMAs with more than 32 bits
2677 * of address on platforms with physical memory > 4G...
2678 */
2679 unsigned int vm_lopages_allocated_q = 0;
2680 unsigned int vm_lopages_allocated_cpm_success = 0;
2681 unsigned int vm_lopages_allocated_cpm_failed = 0;
2682 vm_page_queue_head_t vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2683
2684 vm_page_t
2685 vm_page_grablo(void)
2686 {
2687 vm_page_t mem;
2688
2689 if (vm_lopage_needed == FALSE)
2690 return (vm_page_grab());
2691
2692 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2693
2694 if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2695 vm_page_queue_remove_first(&vm_lopage_queue_free,
2696 mem,
2697 vm_page_t,
2698 vmp_pageq);
2699 assert(vm_lopage_free_count);
2700 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2701 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
2702
2703 vm_lopage_free_count--;
2704 vm_lopages_allocated_q++;
2705
2706 if (vm_lopage_free_count < vm_lopage_lowater)
2707 vm_lopage_refill = TRUE;
2708
2709 lck_mtx_unlock(&vm_page_queue_free_lock);
2710
2711 #if CONFIG_BACKGROUND_QUEUE
2712 vm_page_assign_background_state(mem);
2713 #endif
2714 } else {
2715 lck_mtx_unlock(&vm_page_queue_free_lock);
2716
2717 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2718
2719 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2720 vm_lopages_allocated_cpm_failed++;
2721 lck_mtx_unlock(&vm_page_queue_free_lock);
2722
2723 return (VM_PAGE_NULL);
2724 }
2725 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2726
2727 mem->vmp_busy = TRUE;
2728
2729 vm_page_lockspin_queues();
2730
2731 mem->vmp_gobbled = FALSE;
2732 vm_page_gobble_count--;
2733 vm_page_wire_count--;
2734
2735 vm_lopages_allocated_cpm_success++;
2736 vm_page_unlock_queues();
2737 }
2738 assert(mem->vmp_busy);
2739 assert(!mem->vmp_pmapped);
2740 assert(!mem->vmp_wpmapped);
2741 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2742
2743 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2744
2745 disable_preemption();
2746 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2747 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, 0, 1, 0, 0);
2748 enable_preemption();
2749
2750 return (mem);
2751 }
2752
2753
2754 /*
2755 * vm_page_grab:
2756 *
2757 * first try to grab a page from the per-cpu free list...
2758 * this must be done while pre-emption is disabled... if
2759 * a page is available, we're done...
2760 * if no page is available, grab the vm_page_queue_free_lock
2761 * and see if current number of free pages would allow us
2762 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2763 * if there are pages available, disable preemption and
2764 * recheck the state of the per-cpu free list... we could
2765 * have been preempted and moved to a different cpu, or
2766 * some other thread could have re-filled it... if still
2767 * empty, figure out how many pages we can steal from the
2768 * global free queue and move to the per-cpu queue...
2769 * return 1 of these pages when done... only wakeup the
2770 * pageout_scan thread if we moved pages from the global
2771 * list... no need for the wakeup if we've satisfied the
2772 * request from the per-cpu queue.
2773 */
2774
2775 #if CONFIG_SECLUDED_MEMORY
2776 vm_page_t vm_page_grab_secluded(void);
2777 #endif /* CONFIG_SECLUDED_MEMORY */
2778
2779 vm_page_t
2780 vm_page_grab(void)
2781 {
2782 return vm_page_grab_options(0);
2783 }
2784
2785 #if HIBERNATION
2786 boolean_t hibernate_rebuild_needed = FALSE;
2787 #endif /* HIBERNATION */
2788
2789 vm_page_t
2790 vm_page_grab_options(
2791 int grab_options)
2792 {
2793 vm_page_t mem;
2794
2795 disable_preemption();
2796
2797 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2798 return_page_from_cpu_list:
2799 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2800
2801 #if HIBERNATION
2802 if (hibernate_rebuild_needed) {
2803 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2804 }
2805 #endif /* HIBERNATION */
2806 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2807 PROCESSOR_DATA(current_processor(), free_pages) = mem->vmp_snext;
2808 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2809
2810 enable_preemption();
2811 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2812 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
2813
2814 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
2815 assert(mem->vmp_tabled == FALSE);
2816 assert(mem->vmp_object == 0);
2817 assert(!mem->vmp_laundry);
2818 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
2819 assert(mem->vmp_busy);
2820 assert(!mem->vmp_pmapped);
2821 assert(!mem->vmp_wpmapped);
2822 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2823
2824 #if CONFIG_BACKGROUND_QUEUE
2825 vm_page_assign_background_state(mem);
2826 #endif
2827 return mem;
2828 }
2829 enable_preemption();
2830
2831
2832 /*
2833 * Optionally produce warnings if the wire or gobble
2834 * counts exceed some threshold.
2835 */
2836 #if VM_PAGE_WIRE_COUNT_WARNING
2837 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2838 printf("mk: vm_page_grab(): high wired page count of %d\n",
2839 vm_page_wire_count);
2840 }
2841 #endif
2842 #if VM_PAGE_GOBBLE_COUNT_WARNING
2843 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2844 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2845 vm_page_gobble_count);
2846 }
2847 #endif
2848
2849 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2850
2851 /*
2852 * Only let privileged threads (involved in pageout)
2853 * dip into the reserved pool.
2854 */
2855 if ((vm_page_free_count < vm_page_free_reserved) &&
2856 !(current_thread()->options & TH_OPT_VMPRIV)) {
2857 /* no page for us in the free queue... */
2858 lck_mtx_unlock(&vm_page_queue_free_lock);
2859 mem = VM_PAGE_NULL;
2860
2861 #if CONFIG_SECLUDED_MEMORY
2862 /* ... but can we try and grab from the secluded queue? */
2863 if (vm_page_secluded_count > 0 &&
2864 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2865 task_can_use_secluded_mem(current_task(), TRUE))) {
2866 mem = vm_page_grab_secluded();
2867 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2868 vm_page_secluded.grab_for_iokit++;
2869 if (mem) {
2870 vm_page_secluded.grab_for_iokit_success++;
2871 }
2872 }
2873 if (mem) {
2874 VM_CHECK_MEMORYSTATUS;
2875
2876 disable_preemption();
2877 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2878 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2879 enable_preemption();
2880
2881 return mem;
2882 }
2883 }
2884 #else /* CONFIG_SECLUDED_MEMORY */
2885 (void) grab_options;
2886 #endif /* CONFIG_SECLUDED_MEMORY */
2887 }
2888 else {
2889 vm_page_t head;
2890 vm_page_t tail;
2891 unsigned int pages_to_steal;
2892 unsigned int color;
2893 unsigned int clump_end, sub_count;
2894
2895 while ( vm_page_free_count == 0 ) {
2896
2897 lck_mtx_unlock(&vm_page_queue_free_lock);
2898 /*
2899 * must be a privileged thread to be
2900 * in this state since a non-privileged
2901 * thread would have bailed if we were
2902 * under the vm_page_free_reserved mark
2903 */
2904 VM_PAGE_WAIT();
2905 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2906 }
2907
2908 disable_preemption();
2909
2910 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2911 lck_mtx_unlock(&vm_page_queue_free_lock);
2912
2913 /*
2914 * we got preempted and moved to another processor
2915 * or we got preempted and someone else ran and filled the cache
2916 */
2917 goto return_page_from_cpu_list;
2918 }
2919 if (vm_page_free_count <= vm_page_free_reserved)
2920 pages_to_steal = 1;
2921 else {
2922 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2923 pages_to_steal = vm_free_magazine_refill_limit;
2924 else
2925 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2926 }
2927 color = PROCESSOR_DATA(current_processor(), start_color);
2928 head = tail = NULL;
2929
2930 vm_page_free_count -= pages_to_steal;
2931 clump_end = sub_count = 0;
2932
2933 while (pages_to_steal--) {
2934
2935 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2936 color = (color + 1) & vm_color_mask;
2937 #if defined(__x86_64__)
2938 vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
2939 mem,
2940 vm_page_t,
2941 vmp_pageq,
2942 clump_end);
2943 #else
2944 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2945 mem,
2946 vm_page_t,
2947 vmp_pageq);
2948 #endif
2949
2950 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_Q);
2951
2952 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2953
2954 #if defined(__arm__) || defined(__arm64__)
2955 color = (color + 1) & vm_color_mask;
2956 #else
2957
2958 #if DEVELOPMENT || DEBUG
2959
2960 sub_count++;
2961 if (clump_end) {
2962 vm_clump_update_stats(sub_count);
2963 sub_count = 0;
2964 color = (color + 1) & vm_color_mask;
2965 }
2966 #else
2967 if (clump_end) color = (color + 1) & vm_color_mask;
2968
2969 #endif /* if DEVELOPMENT || DEBUG */
2970
2971 #endif /* if defined(__arm__) || defined(__arm64__) */
2972
2973 if (head == NULL)
2974 head = mem;
2975 else
2976 tail->vmp_snext = mem;
2977 tail = mem;
2978
2979 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
2980 assert(mem->vmp_tabled == FALSE);
2981 assert(mem->vmp_object == 0);
2982 assert(!mem->vmp_laundry);
2983
2984 mem->vmp_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2985
2986 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
2987 assert(mem->vmp_busy);
2988 assert(!mem->vmp_pmapped);
2989 assert(!mem->vmp_wpmapped);
2990 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2991 }
2992 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2993 vm_clump_update_stats(sub_count);
2994 #endif
2995 lck_mtx_unlock(&vm_page_queue_free_lock);
2996
2997 #if HIBERNATION
2998 if (hibernate_rebuild_needed) {
2999 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3000 }
3001 #endif /* HIBERNATION */
3002 PROCESSOR_DATA(current_processor(), free_pages) = head->vmp_snext;
3003 PROCESSOR_DATA(current_processor(), start_color) = color;
3004
3005 /*
3006 * satisfy this request
3007 */
3008 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
3009 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
3010 mem = head;
3011 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
3012
3013 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
3014 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3015
3016 enable_preemption();
3017 }
3018 /*
3019 * Decide if we should poke the pageout daemon.
3020 * We do this if the free count is less than the low
3021 * water mark, or if the free count is less than the high
3022 * water mark (but above the low water mark) and the inactive
3023 * count is less than its target.
3024 *
3025 * We don't have the counts locked ... if they change a little,
3026 * it doesn't really matter.
3027 */
3028 if (vm_page_free_count < vm_page_free_min)
3029 thread_wakeup((event_t) &vm_page_free_wanted);
3030
3031 VM_CHECK_MEMORYSTATUS;
3032
3033 if (mem) {
3034 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
3035
3036 #if CONFIG_BACKGROUND_QUEUE
3037 vm_page_assign_background_state(mem);
3038 #endif
3039 }
3040 return mem;
3041 }
3042
3043 #if CONFIG_SECLUDED_MEMORY
3044 vm_page_t
3045 vm_page_grab_secluded(void)
3046 {
3047 vm_page_t mem;
3048 vm_object_t object;
3049 int refmod_state;
3050
3051 if (vm_page_secluded_count == 0) {
3052 /* no secluded pages to grab... */
3053 return VM_PAGE_NULL;
3054 }
3055
3056 /* secluded queue is protected by the VM page queue lock */
3057 vm_page_lock_queues();
3058
3059 if (vm_page_secluded_count == 0) {
3060 /* no secluded pages to grab... */
3061 vm_page_unlock_queues();
3062 return VM_PAGE_NULL;
3063 }
3064
3065 #if 00
3066 /* can we grab from the secluded queue? */
3067 if (vm_page_secluded_count > vm_page_secluded_target ||
3068 (vm_page_secluded_count > 0 &&
3069 task_can_use_secluded_mem(current_task(), TRUE))) {
3070 /* OK */
3071 } else {
3072 /* can't grab from secluded queue... */
3073 vm_page_unlock_queues();
3074 return VM_PAGE_NULL;
3075 }
3076 #endif
3077
3078 /* we can grab a page from secluded queue! */
3079 assert((vm_page_secluded_count_free +
3080 vm_page_secluded_count_inuse) ==
3081 vm_page_secluded_count);
3082 if (current_task()->task_can_use_secluded_mem) {
3083 assert(num_tasks_can_use_secluded_mem > 0);
3084 }
3085 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
3086 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3087 mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
3088 assert(mem->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
3089 vm_page_queues_remove(mem, TRUE);
3090
3091 object = VM_PAGE_OBJECT(mem);
3092
3093 assert(!mem->vmp_fictitious);
3094 assert(!VM_PAGE_WIRED(mem));
3095 if (object == VM_OBJECT_NULL) {
3096 /* free for grab! */
3097 vm_page_unlock_queues();
3098 vm_page_secluded.grab_success_free++;
3099
3100 assert(mem->vmp_busy);
3101 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3102 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3103 assert(mem->vmp_pageq.next == 0);
3104 assert(mem->vmp_pageq.prev == 0);
3105 assert(mem->vmp_listq.next == 0);
3106 assert(mem->vmp_listq.prev == 0);
3107 #if CONFIG_BACKGROUND_QUEUE
3108 assert(mem->vmp_on_backgroundq == 0);
3109 assert(mem->vmp_backgroundq.next == 0);
3110 assert(mem->vmp_backgroundq.prev == 0);
3111 #endif /* CONFIG_BACKGROUND_QUEUE */
3112 return mem;
3113 }
3114
3115 assert(!object->internal);
3116 // vm_page_pageable_external_count--;
3117
3118 if (!vm_object_lock_try(object)) {
3119 // printf("SECLUDED: page %p: object %p locked\n", mem, object);
3120 vm_page_secluded.grab_failure_locked++;
3121 reactivate_secluded_page:
3122 vm_page_activate(mem);
3123 vm_page_unlock_queues();
3124 return VM_PAGE_NULL;
3125 }
3126 if (mem->vmp_busy ||
3127 mem->vmp_cleaning ||
3128 mem->vmp_laundry) {
3129 /* can't steal page in this state... */
3130 vm_object_unlock(object);
3131 vm_page_secluded.grab_failure_state++;
3132 goto reactivate_secluded_page;
3133 }
3134
3135 mem->vmp_busy = TRUE;
3136 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3137 if (refmod_state & VM_MEM_REFERENCED) {
3138 mem->vmp_reference = TRUE;
3139 }
3140 if (refmod_state & VM_MEM_MODIFIED) {
3141 SET_PAGE_DIRTY(mem, FALSE);
3142 }
3143 if (mem->vmp_dirty || mem->vmp_precious) {
3144 /* can't grab a dirty page; re-activate */
3145 // printf("SECLUDED: dirty page %p\n", mem);
3146 PAGE_WAKEUP_DONE(mem);
3147 vm_page_secluded.grab_failure_dirty++;
3148 vm_object_unlock(object);
3149 goto reactivate_secluded_page;
3150 }
3151 if (mem->vmp_reference) {
3152 /* it's been used but we do need to grab a page... */
3153 }
3154
3155 vm_page_unlock_queues();
3156
3157 /* finish what vm_page_free() would have done... */
3158 vm_page_free_prepare_object(mem, TRUE);
3159 vm_object_unlock(object);
3160 object = VM_OBJECT_NULL;
3161 if (vm_page_free_verify) {
3162 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
3163 }
3164 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3165 vm_page_secluded.grab_success_other++;
3166
3167 assert(mem->vmp_busy);
3168 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3169 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
3170 assert(mem->vmp_pageq.next == 0);
3171 assert(mem->vmp_pageq.prev == 0);
3172 assert(mem->vmp_listq.next == 0);
3173 assert(mem->vmp_listq.prev == 0);
3174 #if CONFIG_BACKGROUND_QUEUE
3175 assert(mem->vmp_on_backgroundq == 0);
3176 assert(mem->vmp_backgroundq.next == 0);
3177 assert(mem->vmp_backgroundq.prev == 0);
3178 #endif /* CONFIG_BACKGROUND_QUEUE */
3179
3180 return mem;
3181 }
3182 #endif /* CONFIG_SECLUDED_MEMORY */
3183
3184 /*
3185 * vm_page_release:
3186 *
3187 * Return a page to the free list.
3188 */
3189
3190 void
3191 vm_page_release(
3192 vm_page_t mem,
3193 boolean_t page_queues_locked)
3194 {
3195 unsigned int color;
3196 int need_wakeup = 0;
3197 int need_priv_wakeup = 0;
3198 #if CONFIG_SECLUDED_MEMORY
3199 int need_secluded_wakeup = 0;
3200 #endif /* CONFIG_SECLUDED_MEMORY */
3201
3202 if (page_queues_locked) {
3203 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3204 } else {
3205 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3206 }
3207
3208 assert(!mem->vmp_private && !mem->vmp_fictitious);
3209 if (vm_page_free_verify) {
3210 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
3211 }
3212 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
3213
3214 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3215
3216 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3217
3218 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3219 assert(mem->vmp_busy);
3220 assert(!mem->vmp_laundry);
3221 assert(mem->vmp_object == 0);
3222 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
3223 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3224 #if CONFIG_BACKGROUND_QUEUE
3225 assert(mem->vmp_backgroundq.next == 0 &&
3226 mem->vmp_backgroundq.prev == 0 &&
3227 mem->vmp_on_backgroundq == FALSE);
3228 #endif
3229 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
3230 vm_lopage_free_count < vm_lopage_free_limit &&
3231 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3232 /*
3233 * this exists to support hardware controllers
3234 * incapable of generating DMAs with more than 32 bits
3235 * of address on platforms with physical memory > 4G...
3236 */
3237 vm_page_queue_enter_first(&vm_lopage_queue_free,
3238 mem,
3239 vm_page_t,
3240 vmp_pageq);
3241 vm_lopage_free_count++;
3242
3243 if (vm_lopage_free_count >= vm_lopage_free_limit)
3244 vm_lopage_refill = FALSE;
3245
3246 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3247 mem->vmp_lopage = TRUE;
3248 #if CONFIG_SECLUDED_MEMORY
3249 } else if (vm_page_free_count > vm_page_free_reserved &&
3250 vm_page_secluded_count < vm_page_secluded_target &&
3251 num_tasks_can_use_secluded_mem == 0) {
3252 /*
3253 * XXX FBDP TODO: also avoid refilling secluded queue
3254 * when some IOKit objects are already grabbing from it...
3255 */
3256 if (!page_queues_locked) {
3257 if (!vm_page_trylock_queues()) {
3258 /* take locks in right order */
3259 lck_mtx_unlock(&vm_page_queue_free_lock);
3260 vm_page_lock_queues();
3261 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3262 }
3263 }
3264 mem->vmp_lopage = FALSE;
3265 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3266 vm_page_queue_enter_first(&vm_page_queue_secluded,
3267 mem,
3268 vm_page_t,
3269 vmp_pageq);
3270 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3271 vm_page_secluded_count++;
3272 vm_page_secluded_count_free++;
3273 if (!page_queues_locked) {
3274 vm_page_unlock_queues();
3275 }
3276 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3277 if (vm_page_free_wanted_secluded > 0) {
3278 vm_page_free_wanted_secluded--;
3279 need_secluded_wakeup = 1;
3280 }
3281 #endif /* CONFIG_SECLUDED_MEMORY */
3282 } else {
3283 mem->vmp_lopage = FALSE;
3284 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3285
3286 color = VM_PAGE_GET_COLOR(mem);
3287 #if defined(__x86_64__)
3288 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3289 mem,
3290 vm_page_t,
3291 vmp_pageq);
3292 #else
3293 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3294 mem,
3295 vm_page_t,
3296 vmp_pageq);
3297 #endif
3298 vm_page_free_count++;
3299 /*
3300 * Check if we should wake up someone waiting for page.
3301 * But don't bother waking them unless they can allocate.
3302 *
3303 * We wakeup only one thread, to prevent starvation.
3304 * Because the scheduling system handles wait queues FIFO,
3305 * if we wakeup all waiting threads, one greedy thread
3306 * can starve multiple niceguy threads. When the threads
3307 * all wakeup, the greedy threads runs first, grabs the page,
3308 * and waits for another page. It will be the first to run
3309 * when the next page is freed.
3310 *
3311 * However, there is a slight danger here.
3312 * The thread we wake might not use the free page.
3313 * Then the other threads could wait indefinitely
3314 * while the page goes unused. To forestall this,
3315 * the pageout daemon will keep making free pages
3316 * as long as vm_page_free_wanted is non-zero.
3317 */
3318
3319 assert(vm_page_free_count > 0);
3320 if (vm_page_free_wanted_privileged > 0) {
3321 vm_page_free_wanted_privileged--;
3322 need_priv_wakeup = 1;
3323 #if CONFIG_SECLUDED_MEMORY
3324 } else if (vm_page_free_wanted_secluded > 0 &&
3325 vm_page_free_count > vm_page_free_reserved) {
3326 vm_page_free_wanted_secluded--;
3327 need_secluded_wakeup = 1;
3328 #endif /* CONFIG_SECLUDED_MEMORY */
3329 } else if (vm_page_free_wanted > 0 &&
3330 vm_page_free_count > vm_page_free_reserved) {
3331 vm_page_free_wanted--;
3332 need_wakeup = 1;
3333 }
3334 }
3335 vm_pageout_vminfo.vm_page_pages_freed++;
3336
3337 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, 1, 0, 0, 0);
3338
3339 lck_mtx_unlock(&vm_page_queue_free_lock);
3340
3341 if (need_priv_wakeup)
3342 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
3343 #if CONFIG_SECLUDED_MEMORY
3344 else if (need_secluded_wakeup)
3345 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3346 #endif /* CONFIG_SECLUDED_MEMORY */
3347 else if (need_wakeup)
3348 thread_wakeup_one((event_t) &vm_page_free_count);
3349
3350 VM_CHECK_MEMORYSTATUS;
3351 }
3352
3353 /*
3354 * This version of vm_page_release() is used only at startup
3355 * when we are single-threaded and pages are being released
3356 * for the first time. Hence, no locking or unnecessary checks are made.
3357 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3358 */
3359 void
3360 vm_page_release_startup(
3361 vm_page_t mem)
3362 {
3363 vm_page_queue_t queue_free;
3364
3365 if (vm_lopage_free_count < vm_lopage_free_limit &&
3366 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3367 mem->vmp_lopage = TRUE;
3368 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3369 vm_lopage_free_count++;
3370 queue_free = &vm_lopage_queue_free;
3371 #if CONFIG_SECLUDED_MEMORY
3372 } else if (vm_page_secluded_count < vm_page_secluded_target) {
3373 mem->vmp_lopage = FALSE;
3374 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
3375 vm_page_secluded_count++;
3376 vm_page_secluded_count_free++;
3377 queue_free = &vm_page_queue_secluded;
3378 #endif /* CONFIG_SECLUDED_MEMORY */
3379 } else {
3380 mem->vmp_lopage = FALSE;
3381 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3382 vm_page_free_count++;
3383 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
3384 }
3385 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
3386 #if defined(__x86_64__)
3387 vm_page_queue_enter_clump(queue_free, mem, vm_page_t, vmp_pageq);
3388 #else
3389 vm_page_queue_enter(queue_free, mem, vm_page_t, vmp_pageq);
3390 #endif
3391 } else
3392 vm_page_queue_enter_first(queue_free, mem, vm_page_t, vmp_pageq);
3393 }
3394
3395 /*
3396 * vm_page_wait:
3397 *
3398 * Wait for a page to become available.
3399 * If there are plenty of free pages, then we don't sleep.
3400 *
3401 * Returns:
3402 * TRUE: There may be another page, try again
3403 * FALSE: We were interrupted out of our wait, don't try again
3404 */
3405
3406 boolean_t
3407 vm_page_wait(
3408 int interruptible )
3409 {
3410 /*
3411 * We can't use vm_page_free_reserved to make this
3412 * determination. Consider: some thread might
3413 * need to allocate two pages. The first allocation
3414 * succeeds, the second fails. After the first page is freed,
3415 * a call to vm_page_wait must really block.
3416 */
3417 kern_return_t wait_result;
3418 int need_wakeup = 0;
3419 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
3420
3421 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3422
3423 if (is_privileged && vm_page_free_count) {
3424 lck_mtx_unlock(&vm_page_queue_free_lock);
3425 return TRUE;
3426 }
3427
3428 if (vm_page_free_count >= vm_page_free_target) {
3429 lck_mtx_unlock(&vm_page_queue_free_lock);
3430 return TRUE;
3431 }
3432
3433 if (is_privileged) {
3434 if (vm_page_free_wanted_privileged++ == 0)
3435 need_wakeup = 1;
3436 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3437 #if CONFIG_SECLUDED_MEMORY
3438 } else if (secluded_for_apps &&
3439 task_can_use_secluded_mem(current_task(), FALSE)) {
3440 #if 00
3441 /* XXX FBDP: need pageq lock for this... */
3442 /* XXX FBDP: might wait even if pages available, */
3443 /* XXX FBDP: hopefully not for too long... */
3444 if (vm_page_secluded_count > 0) {
3445 lck_mtx_unlock(&vm_page_queue_free_lock);
3446 return TRUE;
3447 }
3448 #endif
3449 if (vm_page_free_wanted_secluded++ == 0) {
3450 need_wakeup = 1;
3451 }
3452 wait_result = assert_wait(
3453 (event_t)&vm_page_free_wanted_secluded,
3454 interruptible);
3455 #endif /* CONFIG_SECLUDED_MEMORY */
3456 } else {
3457 if (vm_page_free_wanted++ == 0)
3458 need_wakeup = 1;
3459 wait_result = assert_wait((event_t)&vm_page_free_count,
3460 interruptible);
3461 }
3462 lck_mtx_unlock(&vm_page_queue_free_lock);
3463 counter(c_vm_page_wait_block++);
3464
3465 if (need_wakeup)
3466 thread_wakeup((event_t)&vm_page_free_wanted);
3467
3468 if (wait_result == THREAD_WAITING) {
3469 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3470 vm_page_free_wanted_privileged,
3471 vm_page_free_wanted,
3472 #if CONFIG_SECLUDED_MEMORY
3473 vm_page_free_wanted_secluded,
3474 #else /* CONFIG_SECLUDED_MEMORY */
3475 0,
3476 #endif /* CONFIG_SECLUDED_MEMORY */
3477 0);
3478 wait_result = thread_block(THREAD_CONTINUE_NULL);
3479 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block,
3480 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3481 }
3482
3483 return (wait_result == THREAD_AWAKENED);
3484 }
3485
3486 /*
3487 * vm_page_alloc:
3488 *
3489 * Allocate and return a memory cell associated
3490 * with this VM object/offset pair.
3491 *
3492 * Object must be locked.
3493 */
3494
3495 vm_page_t
3496 vm_page_alloc(
3497 vm_object_t object,
3498 vm_object_offset_t offset)
3499 {
3500 vm_page_t mem;
3501 int grab_options;
3502
3503 vm_object_lock_assert_exclusive(object);
3504 grab_options = 0;
3505 #if CONFIG_SECLUDED_MEMORY
3506 if (object->can_grab_secluded) {
3507 grab_options |= VM_PAGE_GRAB_SECLUDED;
3508 }
3509 #endif /* CONFIG_SECLUDED_MEMORY */
3510 mem = vm_page_grab_options(grab_options);
3511 if (mem == VM_PAGE_NULL)
3512 return VM_PAGE_NULL;
3513
3514 vm_page_insert(mem, object, offset);
3515
3516 return(mem);
3517 }
3518
3519 /*
3520 * vm_page_alloc_guard:
3521 *
3522 * Allocate a fictitious page which will be used
3523 * as a guard page. The page will be inserted into
3524 * the object and returned to the caller.
3525 */
3526
3527 vm_page_t
3528 vm_page_alloc_guard(
3529 vm_object_t object,
3530 vm_object_offset_t offset)
3531 {
3532 vm_page_t mem;
3533
3534 vm_object_lock_assert_exclusive(object);
3535 mem = vm_page_grab_guard();
3536 if (mem == VM_PAGE_NULL)
3537 return VM_PAGE_NULL;
3538
3539 vm_page_insert(mem, object, offset);
3540
3541 return(mem);
3542 }
3543
3544
3545 counter(unsigned int c_laundry_pages_freed = 0;)
3546
3547 /*
3548 * vm_page_free_prepare:
3549 *
3550 * Removes page from any queue it may be on
3551 * and disassociates it from its VM object.
3552 *
3553 * Object and page queues must be locked prior to entry.
3554 */
3555 static void
3556 vm_page_free_prepare(
3557 vm_page_t mem)
3558 {
3559 vm_page_free_prepare_queues(mem);
3560 vm_page_free_prepare_object(mem, TRUE);
3561 }
3562
3563
3564 void
3565 vm_page_free_prepare_queues(
3566 vm_page_t mem)
3567 {
3568 vm_object_t m_object;
3569
3570 VM_PAGE_CHECK(mem);
3571
3572 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
3573 assert(!mem->vmp_cleaning);
3574 m_object = VM_PAGE_OBJECT(mem);
3575
3576 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3577 if (m_object) {
3578 vm_object_lock_assert_exclusive(m_object);
3579 }
3580 if (mem->vmp_laundry) {
3581 /*
3582 * We may have to free a page while it's being laundered
3583 * if we lost its pager (due to a forced unmount, for example).
3584 * We need to call vm_pageout_steal_laundry() before removing
3585 * the page from its VM object, so that we can remove it
3586 * from its pageout queue and adjust the laundry accounting
3587 */
3588 vm_pageout_steal_laundry(mem, TRUE);
3589 counter(++c_laundry_pages_freed);
3590 }
3591
3592 vm_page_queues_remove(mem, TRUE);
3593
3594 if (VM_PAGE_WIRED(mem)) {
3595 assert(mem->vmp_wire_count > 0);
3596
3597 if (m_object) {
3598
3599 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3600 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3601 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3602
3603 assert(m_object->resident_page_count >=
3604 m_object->wired_page_count);
3605
3606 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
3607 OSAddAtomic(+1, &vm_page_purgeable_count);
3608 assert(vm_page_purgeable_wired_count > 0);
3609 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3610 }
3611 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3612 m_object->purgable == VM_PURGABLE_EMPTY) &&
3613 m_object->vo_owner != TASK_NULL) {
3614 task_t owner;
3615 int ledger_idx_volatile;
3616 int ledger_idx_nonvolatile;
3617 int ledger_idx_volatile_compressed;
3618 int ledger_idx_nonvolatile_compressed;
3619 boolean_t do_footprint;
3620
3621 owner = VM_OBJECT_OWNER(m_object);
3622 vm_object_ledger_tag_ledgers(
3623 m_object,
3624 &ledger_idx_volatile,
3625 &ledger_idx_nonvolatile,
3626 &ledger_idx_volatile_compressed,
3627 &ledger_idx_nonvolatile_compressed,
3628 &do_footprint);
3629 /*
3630 * While wired, this page was accounted
3631 * as "non-volatile" but it should now
3632 * be accounted as "volatile".
3633 */
3634 /* one less "non-volatile"... */
3635 ledger_debit(owner->ledger,
3636 ledger_idx_nonvolatile,
3637 PAGE_SIZE);
3638 if (do_footprint) {
3639 /* ... and "phys_footprint" */
3640 ledger_debit(owner->ledger,
3641 task_ledgers.phys_footprint,
3642 PAGE_SIZE);
3643 }
3644 /* one more "volatile" */
3645 ledger_credit(owner->ledger,
3646 ledger_idx_volatile,
3647 PAGE_SIZE);
3648 }
3649 }
3650 if (!mem->vmp_private && !mem->vmp_fictitious)
3651 vm_page_wire_count--;
3652
3653 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3654 mem->vmp_wire_count = 0;
3655 assert(!mem->vmp_gobbled);
3656 } else if (mem->vmp_gobbled) {
3657 if (!mem->vmp_private && !mem->vmp_fictitious)
3658 vm_page_wire_count--;
3659 vm_page_gobble_count--;
3660 }
3661 }
3662
3663
3664 void
3665 vm_page_free_prepare_object(
3666 vm_page_t mem,
3667 boolean_t remove_from_hash)
3668 {
3669 if (mem->vmp_tabled)
3670 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
3671
3672 PAGE_WAKEUP(mem); /* clears wanted */
3673
3674 if (mem->vmp_private) {
3675 mem->vmp_private = FALSE;
3676 mem->vmp_fictitious = TRUE;
3677 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
3678 }
3679 if ( !mem->vmp_fictitious) {
3680 assert(mem->vmp_pageq.next == 0);
3681 assert(mem->vmp_pageq.prev == 0);
3682 assert(mem->vmp_listq.next == 0);
3683 assert(mem->vmp_listq.prev == 0);
3684 #if CONFIG_BACKGROUND_QUEUE
3685 assert(mem->vmp_backgroundq.next == 0);
3686 assert(mem->vmp_backgroundq.prev == 0);
3687 #endif /* CONFIG_BACKGROUND_QUEUE */
3688 assert(mem->vmp_next_m == 0);
3689 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->vmp_lopage);
3690 }
3691 }
3692
3693
3694 /*
3695 * vm_page_free:
3696 *
3697 * Returns the given page to the free list,
3698 * disassociating it with any VM object.
3699 *
3700 * Object and page queues must be locked prior to entry.
3701 */
3702 void
3703 vm_page_free(
3704 vm_page_t mem)
3705 {
3706 vm_page_free_prepare(mem);
3707
3708 if (mem->vmp_fictitious) {
3709 vm_page_release_fictitious(mem);
3710 } else {
3711 vm_page_release(mem,
3712 TRUE); /* page queues are locked */
3713 }
3714 }
3715
3716
3717 void
3718 vm_page_free_unlocked(
3719 vm_page_t mem,
3720 boolean_t remove_from_hash)
3721 {
3722 vm_page_lockspin_queues();
3723 vm_page_free_prepare_queues(mem);
3724 vm_page_unlock_queues();
3725
3726 vm_page_free_prepare_object(mem, remove_from_hash);
3727
3728 if (mem->vmp_fictitious) {
3729 vm_page_release_fictitious(mem);
3730 } else {
3731 vm_page_release(mem, FALSE); /* page queues are not locked */
3732 }
3733 }
3734
3735
3736 /*
3737 * Free a list of pages. The list can be up to several hundred pages,
3738 * as blocked up by vm_pageout_scan().
3739 * The big win is not having to take the free list lock once
3740 * per page.
3741 *
3742 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3743 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
3744 */
3745 void
3746 vm_page_free_list(
3747 vm_page_t freeq,
3748 boolean_t prepare_object)
3749 {
3750 vm_page_t mem;
3751 vm_page_t nxt;
3752 vm_page_t local_freeq;
3753 int pg_count;
3754
3755 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3756 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3757
3758 while (freeq) {
3759
3760 pg_count = 0;
3761 local_freeq = VM_PAGE_NULL;
3762 mem = freeq;
3763
3764 /*
3765 * break up the processing into smaller chunks so
3766 * that we can 'pipeline' the pages onto the
3767 * free list w/o introducing too much
3768 * contention on the global free queue lock
3769 */
3770 while (mem && pg_count < 64) {
3771
3772 assert((mem->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
3773 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
3774 #if CONFIG_BACKGROUND_QUEUE
3775 assert(mem->vmp_backgroundq.next == 0 &&
3776 mem->vmp_backgroundq.prev == 0 &&
3777 mem->vmp_on_backgroundq == FALSE);
3778 #endif
3779 nxt = mem->vmp_snext;
3780 mem->vmp_snext = NULL;
3781 assert(mem->vmp_pageq.prev == 0);
3782
3783 if (vm_page_free_verify && !mem->vmp_fictitious && !mem->vmp_private) {
3784 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
3785 }
3786 if (prepare_object == TRUE)
3787 vm_page_free_prepare_object(mem, TRUE);
3788
3789 if (!mem->vmp_fictitious) {
3790 assert(mem->vmp_busy);
3791
3792 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
3793 vm_lopage_free_count < vm_lopage_free_limit &&
3794 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3795 vm_page_release(mem, FALSE); /* page queues are not locked */
3796 #if CONFIG_SECLUDED_MEMORY
3797 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3798 num_tasks_can_use_secluded_mem == 0) {
3799 vm_page_release(mem,
3800 FALSE); /* page queues are not locked */
3801 #endif /* CONFIG_SECLUDED_MEMORY */
3802 } else {
3803 /*
3804 * IMPORTANT: we can't set the page "free" here
3805 * because that would make the page eligible for
3806 * a physically-contiguous allocation (see
3807 * vm_page_find_contiguous()) right away (we don't
3808 * hold the vm_page_queue_free lock). That would
3809 * cause trouble because the page is not actually
3810 * in the free queue yet...
3811 */
3812 mem->vmp_snext = local_freeq;
3813 local_freeq = mem;
3814 pg_count++;
3815
3816 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
3817 }
3818 } else {
3819 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3820 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
3821 vm_page_release_fictitious(mem);
3822 }
3823 mem = nxt;
3824 }
3825 freeq = mem;
3826
3827 if ( (mem = local_freeq) ) {
3828 unsigned int avail_free_count;
3829 unsigned int need_wakeup = 0;
3830 unsigned int need_priv_wakeup = 0;
3831 #if CONFIG_SECLUDED_MEMORY
3832 unsigned int need_wakeup_secluded = 0;
3833 #endif /* CONFIG_SECLUDED_MEMORY */
3834
3835 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3836
3837 while (mem) {
3838 int color;
3839
3840 nxt = mem->vmp_snext;
3841
3842 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3843 assert(mem->vmp_busy);
3844 mem->vmp_lopage = FALSE;
3845 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
3846
3847 color = VM_PAGE_GET_COLOR(mem);
3848 #if defined(__x86_64__)
3849 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
3850 mem,
3851 vm_page_t,
3852 vmp_pageq);
3853 #else
3854 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3855 mem,
3856 vm_page_t,
3857 vmp_pageq);
3858 #endif
3859 mem = nxt;
3860 }
3861 vm_pageout_vminfo.vm_page_pages_freed += pg_count;
3862 vm_page_free_count += pg_count;
3863 avail_free_count = vm_page_free_count;
3864
3865 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, pg_count, 0, 0, 0);
3866
3867 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3868
3869 if (avail_free_count < vm_page_free_wanted_privileged) {
3870 need_priv_wakeup = avail_free_count;
3871 vm_page_free_wanted_privileged -= avail_free_count;
3872 avail_free_count = 0;
3873 } else {
3874 need_priv_wakeup = vm_page_free_wanted_privileged;
3875 avail_free_count -= vm_page_free_wanted_privileged;
3876 vm_page_free_wanted_privileged = 0;
3877 }
3878 }
3879 #if CONFIG_SECLUDED_MEMORY
3880 if (vm_page_free_wanted_secluded > 0 &&
3881 avail_free_count > vm_page_free_reserved) {
3882 unsigned int available_pages;
3883 available_pages = (avail_free_count -
3884 vm_page_free_reserved);
3885 if (available_pages <
3886 vm_page_free_wanted_secluded) {
3887 need_wakeup_secluded = available_pages;
3888 vm_page_free_wanted_secluded -=
3889 available_pages;
3890 avail_free_count -= available_pages;
3891 } else {
3892 need_wakeup_secluded =
3893 vm_page_free_wanted_secluded;
3894 avail_free_count -=
3895 vm_page_free_wanted_secluded;
3896 vm_page_free_wanted_secluded = 0;
3897 }
3898 }
3899 #endif /* CONFIG_SECLUDED_MEMORY */
3900 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3901 unsigned int available_pages;
3902
3903 available_pages = avail_free_count - vm_page_free_reserved;
3904
3905 if (available_pages >= vm_page_free_wanted) {
3906 need_wakeup = vm_page_free_wanted;
3907 vm_page_free_wanted = 0;
3908 } else {
3909 need_wakeup = available_pages;
3910 vm_page_free_wanted -= available_pages;
3911 }
3912 }
3913 lck_mtx_unlock(&vm_page_queue_free_lock);
3914
3915 if (need_priv_wakeup != 0) {
3916 /*
3917 * There shouldn't be that many VM-privileged threads,
3918 * so let's wake them all up, even if we don't quite
3919 * have enough pages to satisfy them all.
3920 */
3921 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3922 }
3923 #if CONFIG_SECLUDED_MEMORY
3924 if (need_wakeup_secluded != 0 &&
3925 vm_page_free_wanted_secluded == 0) {
3926 thread_wakeup((event_t)
3927 &vm_page_free_wanted_secluded);
3928 } else {
3929 for (;
3930 need_wakeup_secluded != 0;
3931 need_wakeup_secluded--) {
3932 thread_wakeup_one(
3933 (event_t)
3934 &vm_page_free_wanted_secluded);
3935 }
3936 }
3937 #endif /* CONFIG_SECLUDED_MEMORY */
3938 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3939 /*
3940 * We don't expect to have any more waiters
3941 * after this, so let's wake them all up at
3942 * once.
3943 */
3944 thread_wakeup((event_t) &vm_page_free_count);
3945 } else for (; need_wakeup != 0; need_wakeup--) {
3946 /*
3947 * Wake up one waiter per page we just released.
3948 */
3949 thread_wakeup_one((event_t) &vm_page_free_count);
3950 }
3951
3952 VM_CHECK_MEMORYSTATUS;
3953 }
3954 }
3955 }
3956
3957
3958 /*
3959 * vm_page_wire:
3960 *
3961 * Mark this page as wired down by yet
3962 * another map, removing it from paging queues
3963 * as necessary.
3964 *
3965 * The page's object and the page queues must be locked.
3966 */
3967
3968
3969 void
3970 vm_page_wire(
3971 vm_page_t mem,
3972 vm_tag_t tag,
3973 boolean_t check_memorystatus)
3974 {
3975 vm_object_t m_object;
3976
3977 m_object = VM_PAGE_OBJECT(mem);
3978
3979 // dbgLog(current_thread(), mem->vmp_offset, m_object, 1); /* (TEST/DEBUG) */
3980
3981 VM_PAGE_CHECK(mem);
3982 if (m_object) {
3983 vm_object_lock_assert_exclusive(m_object);
3984 } else {
3985 /*
3986 * In theory, the page should be in an object before it
3987 * gets wired, since we need to hold the object lock
3988 * to update some fields in the page structure.
3989 * However, some code (i386 pmap, for example) might want
3990 * to wire a page before it gets inserted into an object.
3991 * That's somewhat OK, as long as nobody else can get to
3992 * that page and update it at the same time.
3993 */
3994 }
3995 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3996 if ( !VM_PAGE_WIRED(mem)) {
3997
3998 if (mem->vmp_laundry)
3999 vm_pageout_steal_laundry(mem, TRUE);
4000
4001 vm_page_queues_remove(mem, TRUE);
4002
4003 assert(mem->vmp_wire_count == 0);
4004 mem->vmp_q_state = VM_PAGE_IS_WIRED;
4005
4006 if (m_object) {
4007
4008 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4009 VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
4010 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
4011
4012 assert(m_object->resident_page_count >=
4013 m_object->wired_page_count);
4014 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4015 assert(vm_page_purgeable_count > 0);
4016 OSAddAtomic(-1, &vm_page_purgeable_count);
4017 OSAddAtomic(1, &vm_page_purgeable_wired_count);
4018 }
4019 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4020 m_object->purgable == VM_PURGABLE_EMPTY) &&
4021 m_object->vo_owner != TASK_NULL) {
4022 task_t owner;
4023 int ledger_idx_volatile;
4024 int ledger_idx_nonvolatile;
4025 int ledger_idx_volatile_compressed;
4026 int ledger_idx_nonvolatile_compressed;
4027 boolean_t do_footprint;
4028
4029 owner = VM_OBJECT_OWNER(m_object);
4030 vm_object_ledger_tag_ledgers(
4031 m_object,
4032 &ledger_idx_volatile,
4033 &ledger_idx_nonvolatile,
4034 &ledger_idx_volatile_compressed,
4035 &ledger_idx_nonvolatile_compressed,
4036 &do_footprint);
4037 /* less volatile bytes */
4038 ledger_debit(owner->ledger,
4039 ledger_idx_volatile,
4040 PAGE_SIZE);
4041 /* more not-quite-volatile bytes */
4042 ledger_credit(owner->ledger,
4043 ledger_idx_nonvolatile,
4044 PAGE_SIZE);
4045 if (do_footprint) {
4046 /* more footprint */
4047 ledger_credit(owner->ledger,
4048 task_ledgers.phys_footprint,
4049 PAGE_SIZE);
4050 }
4051 }
4052 if (m_object->all_reusable) {
4053 /*
4054 * Wired pages are not counted as "re-usable"
4055 * in "all_reusable" VM objects, so nothing
4056 * to do here.
4057 */
4058 } else if (mem->vmp_reusable) {
4059 /*
4060 * This page is not "re-usable" when it's
4061 * wired, so adjust its state and the
4062 * accounting.
4063 */
4064 vm_object_reuse_pages(m_object,
4065 mem->vmp_offset,
4066 mem->vmp_offset+PAGE_SIZE_64,
4067 FALSE);
4068 }
4069 }
4070 assert(!mem->vmp_reusable);
4071
4072 if (!mem->vmp_private && !mem->vmp_fictitious && !mem->vmp_gobbled)
4073 vm_page_wire_count++;
4074 if (mem->vmp_gobbled)
4075 vm_page_gobble_count--;
4076 mem->vmp_gobbled = FALSE;
4077
4078 if (check_memorystatus == TRUE) {
4079 VM_CHECK_MEMORYSTATUS;
4080 }
4081 }
4082 assert(!mem->vmp_gobbled);
4083 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
4084 mem->vmp_wire_count++;
4085 if (__improbable(mem->vmp_wire_count == 0)) {
4086 panic("vm_page_wire(%p): wire_count overflow", mem);
4087 }
4088 VM_PAGE_CHECK(mem);
4089 }
4090
4091 /*
4092 * vm_page_unwire:
4093 *
4094 * Release one wiring of this page, potentially
4095 * enabling it to be paged again.
4096 *
4097 * The page's object and the page queues must be locked.
4098 */
4099 void
4100 vm_page_unwire(
4101 vm_page_t mem,
4102 boolean_t queueit)
4103 {
4104 vm_object_t m_object;
4105
4106 m_object = VM_PAGE_OBJECT(mem);
4107
4108 // dbgLog(current_thread(), mem->vmp_offset, m_object, 0); /* (TEST/DEBUG) */
4109
4110 VM_PAGE_CHECK(mem);
4111 assert(VM_PAGE_WIRED(mem));
4112 assert(mem->vmp_wire_count > 0);
4113 assert(!mem->vmp_gobbled);
4114 assert(m_object != VM_OBJECT_NULL);
4115 vm_object_lock_assert_exclusive(m_object);
4116 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4117 if (--mem->vmp_wire_count == 0) {
4118
4119 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
4120
4121 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4122 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4123 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
4124 if (!mem->vmp_private && !mem->vmp_fictitious) {
4125 vm_page_wire_count--;
4126 }
4127
4128 assert(m_object->resident_page_count >=
4129 m_object->wired_page_count);
4130 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
4131 OSAddAtomic(+1, &vm_page_purgeable_count);
4132 assert(vm_page_purgeable_wired_count > 0);
4133 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4134 }
4135 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4136 m_object->purgable == VM_PURGABLE_EMPTY) &&
4137 m_object->vo_owner != TASK_NULL) {
4138 task_t owner;
4139 int ledger_idx_volatile;
4140 int ledger_idx_nonvolatile;
4141 int ledger_idx_volatile_compressed;
4142 int ledger_idx_nonvolatile_compressed;
4143 boolean_t do_footprint;
4144
4145 owner = VM_OBJECT_OWNER(m_object);
4146 vm_object_ledger_tag_ledgers(
4147 m_object,
4148 &ledger_idx_volatile,
4149 &ledger_idx_nonvolatile,
4150 &ledger_idx_volatile_compressed,
4151 &ledger_idx_nonvolatile_compressed,
4152 &do_footprint);
4153 /* more volatile bytes */
4154 ledger_credit(owner->ledger,
4155 ledger_idx_volatile,
4156 PAGE_SIZE);
4157 /* less not-quite-volatile bytes */
4158 ledger_debit(owner->ledger,
4159 ledger_idx_nonvolatile,
4160 PAGE_SIZE);
4161 if (do_footprint) {
4162 /* less footprint */
4163 ledger_debit(owner->ledger,
4164 task_ledgers.phys_footprint,
4165 PAGE_SIZE);
4166 }
4167 }
4168 assert(m_object != kernel_object);
4169 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
4170
4171 if (queueit == TRUE) {
4172 if (m_object->purgable == VM_PURGABLE_EMPTY) {
4173 vm_page_deactivate(mem);
4174 } else {
4175 vm_page_activate(mem);
4176 }
4177 }
4178
4179 VM_CHECK_MEMORYSTATUS;
4180
4181 }
4182 VM_PAGE_CHECK(mem);
4183 }
4184
4185 /*
4186 * vm_page_deactivate:
4187 *
4188 * Returns the given page to the inactive list,
4189 * indicating that no physical maps have access
4190 * to this page. [Used by the physical mapping system.]
4191 *
4192 * The page queues must be locked.
4193 */
4194 void
4195 vm_page_deactivate(
4196 vm_page_t m)
4197 {
4198 vm_page_deactivate_internal(m, TRUE);
4199 }
4200
4201
4202 void
4203 vm_page_deactivate_internal(
4204 vm_page_t m,
4205 boolean_t clear_hw_reference)
4206 {
4207 vm_object_t m_object;
4208
4209 m_object = VM_PAGE_OBJECT(m);
4210
4211 VM_PAGE_CHECK(m);
4212 assert(m_object != kernel_object);
4213 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4214
4215 // dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4216 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4217 /*
4218 * This page is no longer very interesting. If it was
4219 * interesting (active or inactive/referenced), then we
4220 * clear the reference bit and (re)enter it in the
4221 * inactive queue. Note wired pages should not have
4222 * their reference bit cleared.
4223 */
4224 assert ( !(m->vmp_absent && !m->vmp_unusual));
4225
4226 if (m->vmp_gobbled) { /* can this happen? */
4227 assert( !VM_PAGE_WIRED(m));
4228
4229 if (!m->vmp_private && !m->vmp_fictitious)
4230 vm_page_wire_count--;
4231 vm_page_gobble_count--;
4232 m->vmp_gobbled = FALSE;
4233 }
4234 /*
4235 * if this page is currently on the pageout queue, we can't do the
4236 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4237 * and we can't remove it manually since we would need the object lock
4238 * (which is not required here) to decrement the activity_in_progress
4239 * reference which is held on the object while the page is in the pageout queue...
4240 * just let the normal laundry processing proceed
4241 */
4242 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4243 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4244 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4245 VM_PAGE_WIRED(m)) {
4246 return;
4247 }
4248 if (!m->vmp_absent && clear_hw_reference == TRUE)
4249 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
4250
4251 m->vmp_reference = FALSE;
4252 m->vmp_no_cache = FALSE;
4253
4254 if ( !VM_PAGE_INACTIVE(m)) {
4255 vm_page_queues_remove(m, FALSE);
4256
4257 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4258 m->vmp_dirty && m_object->internal &&
4259 (m_object->purgable == VM_PURGABLE_DENY ||
4260 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4261 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4262 vm_page_check_pageable_safe(m);
4263 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq);
4264 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
4265 vm_page_throttled_count++;
4266 } else {
4267 if (m_object->named && m_object->ref_count == 1) {
4268 vm_page_speculate(m, FALSE);
4269 #if DEVELOPMENT || DEBUG
4270 vm_page_speculative_recreated++;
4271 #endif
4272 } else {
4273 vm_page_enqueue_inactive(m, FALSE);
4274 }
4275 }
4276 }
4277 }
4278
4279 /*
4280 * vm_page_enqueue_cleaned
4281 *
4282 * Put the page on the cleaned queue, mark it cleaned, etc.
4283 * Being on the cleaned queue (and having m->clean_queue set)
4284 * does ** NOT ** guarantee that the page is clean!
4285 *
4286 * Call with the queues lock held.
4287 */
4288
4289 void vm_page_enqueue_cleaned(vm_page_t m)
4290 {
4291 vm_object_t m_object;
4292
4293 m_object = VM_PAGE_OBJECT(m);
4294
4295 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4296 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4297 assert( !(m->vmp_absent && !m->vmp_unusual));
4298
4299 if (VM_PAGE_WIRED(m)) {
4300 return;
4301 }
4302
4303 if (m->vmp_gobbled) {
4304 if (!m->vmp_private && !m->vmp_fictitious)
4305 vm_page_wire_count--;
4306 vm_page_gobble_count--;
4307 m->vmp_gobbled = FALSE;
4308 }
4309 /*
4310 * if this page is currently on the pageout queue, we can't do the
4311 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4312 * and we can't remove it manually since we would need the object lock
4313 * (which is not required here) to decrement the activity_in_progress
4314 * reference which is held on the object while the page is in the pageout queue...
4315 * just let the normal laundry processing proceed
4316 */
4317 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4318 (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4319 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
4320 return;
4321 }
4322 vm_page_queues_remove(m, FALSE);
4323
4324 vm_page_check_pageable_safe(m);
4325 vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, vmp_pageq);
4326 m->vmp_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
4327 vm_page_cleaned_count++;
4328
4329 vm_page_inactive_count++;
4330 if (m_object->internal) {
4331 vm_page_pageable_internal_count++;
4332 } else {
4333 vm_page_pageable_external_count++;
4334 }
4335 #if CONFIG_BACKGROUND_QUEUE
4336 if (m->vmp_in_background)
4337 vm_page_add_to_backgroundq(m, TRUE);
4338 #endif
4339 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
4340 }
4341
4342 /*
4343 * vm_page_activate:
4344 *
4345 * Put the specified page on the active list (if appropriate).
4346 *
4347 * The page queues must be locked.
4348 */
4349
4350 void
4351 vm_page_activate(
4352 vm_page_t m)
4353 {
4354 vm_object_t m_object;
4355
4356 m_object = VM_PAGE_OBJECT(m);
4357
4358 VM_PAGE_CHECK(m);
4359 #ifdef FIXME_4778297
4360 assert(m_object != kernel_object);
4361 #endif
4362 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4363 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4364 assert( !(m->vmp_absent && !m->vmp_unusual));
4365
4366 if (m->vmp_gobbled) {
4367 assert( !VM_PAGE_WIRED(m));
4368 if (!m->vmp_private && !m->vmp_fictitious)
4369 vm_page_wire_count--;
4370 vm_page_gobble_count--;
4371 m->vmp_gobbled = FALSE;
4372 }
4373 /*
4374 * if this page is currently on the pageout queue, we can't do the
4375 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4376 * and we can't remove it manually since we would need the object lock
4377 * (which is not required here) to decrement the activity_in_progress
4378 * reference which is held on the object while the page is in the pageout queue...
4379 * just let the normal laundry processing proceed
4380 */
4381 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4382 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4383 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q))
4384 return;
4385
4386 #if DEBUG
4387 if (m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q)
4388 panic("vm_page_activate: already active");
4389 #endif
4390
4391 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
4392 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4393 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4394 }
4395
4396 vm_page_queues_remove(m, FALSE);
4397
4398 if ( !VM_PAGE_WIRED(m)) {
4399 vm_page_check_pageable_safe(m);
4400 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4401 m->vmp_dirty && m_object->internal &&
4402 (m_object->purgable == VM_PURGABLE_DENY ||
4403 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4404 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4405 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq);
4406 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
4407 vm_page_throttled_count++;
4408 } else {
4409 #if CONFIG_SECLUDED_MEMORY
4410 if (secluded_for_filecache &&
4411 vm_page_secluded_target != 0 &&
4412 num_tasks_can_use_secluded_mem == 0 &&
4413 m_object->eligible_for_secluded) {
4414 vm_page_queue_enter(&vm_page_queue_secluded, m,
4415 vm_page_t, vmp_pageq);
4416 m->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
4417 vm_page_secluded_count++;
4418 vm_page_secluded_count_inuse++;
4419 assert(!m_object->internal);
4420 // vm_page_pageable_external_count++;
4421 } else
4422 #endif /* CONFIG_SECLUDED_MEMORY */
4423 vm_page_enqueue_active(m, FALSE);
4424 }
4425 m->vmp_reference = TRUE;
4426 m->vmp_no_cache = FALSE;
4427 }
4428 VM_PAGE_CHECK(m);
4429 }
4430
4431
4432 /*
4433 * vm_page_speculate:
4434 *
4435 * Put the specified page on the speculative list (if appropriate).
4436 *
4437 * The page queues must be locked.
4438 */
4439 void
4440 vm_page_speculate(
4441 vm_page_t m,
4442 boolean_t new)
4443 {
4444 struct vm_speculative_age_q *aq;
4445 vm_object_t m_object;
4446
4447 m_object = VM_PAGE_OBJECT(m);
4448
4449 VM_PAGE_CHECK(m);
4450 vm_page_check_pageable_safe(m);
4451
4452 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4453 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4454 assert( !(m->vmp_absent && !m->vmp_unusual));
4455 assert(m_object->internal == FALSE);
4456
4457 /*
4458 * if this page is currently on the pageout queue, we can't do the
4459 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4460 * and we can't remove it manually since we would need the object lock
4461 * (which is not required here) to decrement the activity_in_progress
4462 * reference which is held on the object while the page is in the pageout queue...
4463 * just let the normal laundry processing proceed
4464 */
4465 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4466 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4467 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q))
4468 return;
4469
4470 vm_page_queues_remove(m, FALSE);
4471
4472 if ( !VM_PAGE_WIRED(m)) {
4473 mach_timespec_t ts;
4474 clock_sec_t sec;
4475 clock_nsec_t nsec;
4476
4477 clock_get_system_nanotime(&sec, &nsec);
4478 ts.tv_sec = (unsigned int) sec;
4479 ts.tv_nsec = nsec;
4480
4481 if (vm_page_speculative_count == 0) {
4482
4483 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4484 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4485
4486 aq = &vm_page_queue_speculative[speculative_age_index];
4487
4488 /*
4489 * set the timer to begin a new group
4490 */
4491 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
4492 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4493 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4494 } else {
4495 aq = &vm_page_queue_speculative[speculative_age_index];
4496
4497 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4498
4499 speculative_age_index++;
4500
4501 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4502 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4503 if (speculative_age_index == speculative_steal_index) {
4504 speculative_steal_index = speculative_age_index + 1;
4505
4506 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4507 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4508 }
4509 aq = &vm_page_queue_speculative[speculative_age_index];
4510
4511 if (!vm_page_queue_empty(&aq->age_q))
4512 vm_page_speculate_ageit(aq);
4513
4514 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
4515 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
4516 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4517 }
4518 }
4519 vm_page_enqueue_tail(&aq->age_q, &m->vmp_pageq);
4520 m->vmp_q_state = VM_PAGE_ON_SPECULATIVE_Q;
4521 vm_page_speculative_count++;
4522 vm_page_pageable_external_count++;
4523
4524 if (new == TRUE) {
4525 vm_object_lock_assert_exclusive(m_object);
4526
4527 m_object->pages_created++;
4528 #if DEVELOPMENT || DEBUG
4529 vm_page_speculative_created++;
4530 #endif
4531 }
4532 }
4533 VM_PAGE_CHECK(m);
4534 }
4535
4536
4537 /*
4538 * move pages from the specified aging bin to
4539 * the speculative bin that pageout_scan claims from
4540 *
4541 * The page queues must be locked.
4542 */
4543 void
4544 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4545 {
4546 struct vm_speculative_age_q *sq;
4547 vm_page_t t;
4548
4549 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4550
4551 if (vm_page_queue_empty(&sq->age_q)) {
4552 sq->age_q.next = aq->age_q.next;
4553 sq->age_q.prev = aq->age_q.prev;
4554
4555 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4556 t->vmp_pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
4557
4558 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4559 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4560 } else {
4561 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4562 t->vmp_pageq.next = aq->age_q.next;
4563
4564 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
4565 t->vmp_pageq.prev = sq->age_q.prev;
4566
4567 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4568 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
4569
4570 sq->age_q.prev = aq->age_q.prev;
4571 }
4572 vm_page_queue_init(&aq->age_q);
4573 }
4574
4575
4576 void
4577 vm_page_lru(
4578 vm_page_t m)
4579 {
4580 VM_PAGE_CHECK(m);
4581 assert(VM_PAGE_OBJECT(m) != kernel_object);
4582 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4583
4584 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4585
4586 if (m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q) {
4587 /*
4588 * we don't need to do all the other work that
4589 * vm_page_queues_remove and vm_page_enqueue_inactive
4590 * bring along for the ride
4591 */
4592 assert(!m->vmp_laundry);
4593 assert(!m->vmp_private);
4594
4595 m->vmp_no_cache = FALSE;
4596
4597 vm_page_queue_remove(&vm_page_queue_inactive, m, vm_page_t, vmp_pageq);
4598 vm_page_queue_enter(&vm_page_queue_inactive, m, vm_page_t, vmp_pageq);
4599
4600 return;
4601 }
4602 /*
4603 * if this page is currently on the pageout queue, we can't do the
4604 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4605 * and we can't remove it manually since we would need the object lock
4606 * (which is not required here) to decrement the activity_in_progress
4607 * reference which is held on the object while the page is in the pageout queue...
4608 * just let the normal laundry processing proceed
4609 */
4610 if (m->vmp_laundry || m->vmp_private ||
4611 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4612 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4613 VM_PAGE_WIRED(m))
4614 return;
4615
4616 m->vmp_no_cache = FALSE;
4617
4618 vm_page_queues_remove(m, FALSE);
4619
4620 vm_page_enqueue_inactive(m, FALSE);
4621 }
4622
4623
4624 void
4625 vm_page_reactivate_all_throttled(void)
4626 {
4627 vm_page_t first_throttled, last_throttled;
4628 vm_page_t first_active;
4629 vm_page_t m;
4630 int extra_active_count;
4631 int extra_internal_count, extra_external_count;
4632 vm_object_t m_object;
4633
4634 if (!VM_DYNAMIC_PAGING_ENABLED())
4635 return;
4636
4637 extra_active_count = 0;
4638 extra_internal_count = 0;
4639 extra_external_count = 0;
4640 vm_page_lock_queues();
4641 if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
4642 /*
4643 * Switch "throttled" pages to "active".
4644 */
4645 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq) {
4646 VM_PAGE_CHECK(m);
4647 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
4648
4649 m_object = VM_PAGE_OBJECT(m);
4650
4651 extra_active_count++;
4652 if (m_object->internal) {
4653 extra_internal_count++;
4654 } else {
4655 extra_external_count++;
4656 }
4657
4658 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
4659 VM_PAGE_CHECK(m);
4660 #if CONFIG_BACKGROUND_QUEUE
4661 if (m->vmp_in_background)
4662 vm_page_add_to_backgroundq(m, FALSE);
4663 #endif
4664 }
4665
4666 /*
4667 * Transfer the entire throttled queue to a regular LRU page queues.
4668 * We insert it at the head of the active queue, so that these pages
4669 * get re-evaluated by the LRU algorithm first, since they've been
4670 * completely out of it until now.
4671 */
4672 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4673 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4674 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4675 if (vm_page_queue_empty(&vm_page_queue_active)) {
4676 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4677 } else {
4678 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
4679 }
4680 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4681 first_throttled->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4682 last_throttled->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4683
4684 #if DEBUG
4685 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4686 #endif
4687 vm_page_queue_init(&vm_page_queue_throttled);
4688 /*
4689 * Adjust the global page counts.
4690 */
4691 vm_page_active_count += extra_active_count;
4692 vm_page_pageable_internal_count += extra_internal_count;
4693 vm_page_pageable_external_count += extra_external_count;
4694 vm_page_throttled_count = 0;
4695 }
4696 assert(vm_page_throttled_count == 0);
4697 assert(vm_page_queue_empty(&vm_page_queue_throttled));
4698 vm_page_unlock_queues();
4699 }
4700
4701
4702 /*
4703 * move pages from the indicated local queue to the global active queue
4704 * its ok to fail if we're below the hard limit and force == FALSE
4705 * the nolocks == TRUE case is to allow this function to be run on
4706 * the hibernate path
4707 */
4708
4709 void
4710 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4711 {
4712 struct vpl *lq;
4713 vm_page_t first_local, last_local;
4714 vm_page_t first_active;
4715 vm_page_t m;
4716 uint32_t count = 0;
4717
4718 if (vm_page_local_q == NULL)
4719 return;
4720
4721 lq = &vm_page_local_q[lid].vpl_un.vpl;
4722
4723 if (nolocks == FALSE) {
4724 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4725 if ( !vm_page_trylockspin_queues())
4726 return;
4727 } else
4728 vm_page_lockspin_queues();
4729
4730 VPL_LOCK(&lq->vpl_lock);
4731 }
4732 if (lq->vpl_count) {
4733 /*
4734 * Switch "local" pages to "active".
4735 */
4736 assert(!vm_page_queue_empty(&lq->vpl_queue));
4737
4738 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, vmp_pageq) {
4739 VM_PAGE_CHECK(m);
4740 vm_page_check_pageable_safe(m);
4741 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4742 assert(!m->vmp_fictitious);
4743
4744 if (m->vmp_local_id != lid)
4745 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4746
4747 m->vmp_local_id = 0;
4748 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
4749 VM_PAGE_CHECK(m);
4750 #if CONFIG_BACKGROUND_QUEUE
4751 if (m->vmp_in_background)
4752 vm_page_add_to_backgroundq(m, FALSE);
4753 #endif
4754 count++;
4755 }
4756 if (count != lq->vpl_count)
4757 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4758
4759 /*
4760 * Transfer the entire local queue to a regular LRU page queues.
4761 */
4762 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4763 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4764 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4765
4766 if (vm_page_queue_empty(&vm_page_queue_active)) {
4767 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4768 } else {
4769 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
4770 }
4771 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4772 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4773 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
4774
4775 vm_page_queue_init(&lq->vpl_queue);
4776 /*
4777 * Adjust the global page counts.
4778 */
4779 vm_page_active_count += lq->vpl_count;
4780 vm_page_pageable_internal_count += lq->vpl_internal_count;
4781 vm_page_pageable_external_count += lq->vpl_external_count;
4782 lq->vpl_count = 0;
4783 lq->vpl_internal_count = 0;
4784 lq->vpl_external_count = 0;
4785 }
4786 assert(vm_page_queue_empty(&lq->vpl_queue));
4787
4788 if (nolocks == FALSE) {
4789 VPL_UNLOCK(&lq->vpl_lock);
4790
4791 vm_page_balance_inactive(count / 4);
4792 vm_page_unlock_queues();
4793 }
4794 }
4795
4796 /*
4797 * vm_page_part_zero_fill:
4798 *
4799 * Zero-fill a part of the page.
4800 */
4801 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4802 void
4803 vm_page_part_zero_fill(
4804 vm_page_t m,
4805 vm_offset_t m_pa,
4806 vm_size_t len)
4807 {
4808
4809 #if 0
4810 /*
4811 * we don't hold the page queue lock
4812 * so this check isn't safe to make
4813 */
4814 VM_PAGE_CHECK(m);
4815 #endif
4816
4817 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4818 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
4819 #else
4820 vm_page_t tmp;
4821 while (1) {
4822 tmp = vm_page_grab();
4823 if (tmp == VM_PAGE_NULL) {
4824 vm_page_wait(THREAD_UNINT);
4825 continue;
4826 }
4827 break;
4828 }
4829 vm_page_zero_fill(tmp);
4830 if(m_pa != 0) {
4831 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4832 }
4833 if((m_pa + len) < PAGE_SIZE) {
4834 vm_page_part_copy(m, m_pa + len, tmp,
4835 m_pa + len, PAGE_SIZE - (m_pa + len));
4836 }
4837 vm_page_copy(tmp,m);
4838 VM_PAGE_FREE(tmp);
4839 #endif
4840
4841 }
4842
4843 /*
4844 * vm_page_zero_fill:
4845 *
4846 * Zero-fill the specified page.
4847 */
4848 void
4849 vm_page_zero_fill(
4850 vm_page_t m)
4851 {
4852 XPR(XPR_VM_PAGE,
4853 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4854 VM_PAGE_OBJECT(m), m->vmp_offset, m, 0,0);
4855 #if 0
4856 /*
4857 * we don't hold the page queue lock
4858 * so this check isn't safe to make
4859 */
4860 VM_PAGE_CHECK(m);
4861 #endif
4862
4863 // dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4864 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
4865 }
4866
4867 /*
4868 * vm_page_part_copy:
4869 *
4870 * copy part of one page to another
4871 */
4872
4873 void
4874 vm_page_part_copy(
4875 vm_page_t src_m,
4876 vm_offset_t src_pa,
4877 vm_page_t dst_m,
4878 vm_offset_t dst_pa,
4879 vm_size_t len)
4880 {
4881 #if 0
4882 /*
4883 * we don't hold the page queue lock
4884 * so this check isn't safe to make
4885 */
4886 VM_PAGE_CHECK(src_m);
4887 VM_PAGE_CHECK(dst_m);
4888 #endif
4889 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4890 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
4891 }
4892
4893 /*
4894 * vm_page_copy:
4895 *
4896 * Copy one page to another
4897 */
4898
4899 int vm_page_copy_cs_validations = 0;
4900 int vm_page_copy_cs_tainted = 0;
4901
4902 void
4903 vm_page_copy(
4904 vm_page_t src_m,
4905 vm_page_t dest_m)
4906 {
4907 vm_object_t src_m_object;
4908
4909 src_m_object = VM_PAGE_OBJECT(src_m);
4910
4911 XPR(XPR_VM_PAGE,
4912 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4913 src_m_object, src_m->vmp_offset,
4914 VM_PAGE_OBJECT(dest_m), dest_m->vmp_offset,
4915 0);
4916 #if 0
4917 /*
4918 * we don't hold the page queue lock
4919 * so this check isn't safe to make
4920 */
4921 VM_PAGE_CHECK(src_m);
4922 VM_PAGE_CHECK(dest_m);
4923 #endif
4924 vm_object_lock_assert_held(src_m_object);
4925
4926 if (src_m_object != VM_OBJECT_NULL &&
4927 src_m_object->code_signed) {
4928 /*
4929 * We're copying a page from a code-signed object.
4930 * Whoever ends up mapping the copy page might care about
4931 * the original page's integrity, so let's validate the
4932 * source page now.
4933 */
4934 vm_page_copy_cs_validations++;
4935 vm_page_validate_cs(src_m);
4936 #if DEVELOPMENT || DEBUG
4937 DTRACE_VM4(codesigned_copy,
4938 vm_object_t, src_m_object,
4939 vm_object_offset_t, src_m->vmp_offset,
4940 int, src_m->vmp_cs_validated,
4941 int, src_m->vmp_cs_tainted);
4942 #endif /* DEVELOPMENT || DEBUG */
4943
4944 }
4945
4946 /*
4947 * Propagate the cs_tainted bit to the copy page. Do not propagate
4948 * the cs_validated bit.
4949 */
4950 dest_m->vmp_cs_tainted = src_m->vmp_cs_tainted;
4951 if (dest_m->vmp_cs_tainted) {
4952 vm_page_copy_cs_tainted++;
4953 }
4954 dest_m->vmp_error = src_m->vmp_error; /* sliding src_m might have failed... */
4955 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
4956 }
4957
4958 #if MACH_ASSERT
4959 static void
4960 _vm_page_print(
4961 vm_page_t p)
4962 {
4963 printf("vm_page %p: \n", p);
4964 printf(" pageq: next=%p prev=%p\n",
4965 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next),
4966 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev));
4967 printf(" listq: next=%p prev=%p\n",
4968 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.next)),
4969 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.prev)));
4970 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m)));
4971 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->vmp_offset);
4972 printf(" wire_count=%u\n", p->vmp_wire_count);
4973 printf(" q_state=%u\n", p->vmp_q_state);
4974
4975 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
4976 (p->vmp_laundry ? "" : "!"),
4977 (p->vmp_reference ? "" : "!"),
4978 (p->vmp_gobbled ? "" : "!"),
4979 (p->vmp_private ? "" : "!"));
4980 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4981 (p->vmp_busy ? "" : "!"),
4982 (p->vmp_wanted ? "" : "!"),
4983 (p->vmp_tabled ? "" : "!"),
4984 (p->vmp_fictitious ? "" : "!"),
4985 (p->vmp_pmapped ? "" : "!"),
4986 (p->vmp_wpmapped ? "" : "!"));
4987 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4988 (p->vmp_free_when_done ? "" : "!"),
4989 (p->vmp_absent ? "" : "!"),
4990 (p->vmp_error ? "" : "!"),
4991 (p->vmp_dirty ? "" : "!"),
4992 (p->vmp_cleaning ? "" : "!"),
4993 (p->vmp_precious ? "" : "!"),
4994 (p->vmp_clustered ? "" : "!"));
4995 printf(" %soverwriting, %srestart, %sunusual\n",
4996 (p->vmp_overwriting ? "" : "!"),
4997 (p->vmp_restart ? "" : "!"),
4998 (p->vmp_unusual ? "" : "!"));
4999 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
5000 (p->vmp_cs_validated ? "" : "!"),
5001 (p->vmp_cs_tainted ? "" : "!"),
5002 (p->vmp_cs_nx ? "" : "!"),
5003 (p->vmp_no_cache ? "" : "!"));
5004
5005 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
5006 }
5007
5008 /*
5009 * Check that the list of pages is ordered by
5010 * ascending physical address and has no holes.
5011 */
5012 static int
5013 vm_page_verify_contiguous(
5014 vm_page_t pages,
5015 unsigned int npages)
5016 {
5017 vm_page_t m;
5018 unsigned int page_count;
5019 vm_offset_t prev_addr;
5020
5021 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
5022 page_count = 1;
5023 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
5024 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5025 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
5026 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
5027 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
5028 panic("vm_page_verify_contiguous: not contiguous!");
5029 }
5030 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5031 ++page_count;
5032 }
5033 if (page_count != npages) {
5034 printf("pages %p actual count 0x%x but requested 0x%x\n",
5035 pages, page_count, npages);
5036 panic("vm_page_verify_contiguous: count error");
5037 }
5038 return 1;
5039 }
5040
5041
5042 /*
5043 * Check the free lists for proper length etc.
5044 */
5045 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
5046 static unsigned int
5047 vm_page_verify_free_list(
5048 vm_page_queue_head_t *vm_page_queue,
5049 unsigned int color,
5050 vm_page_t look_for_page,
5051 boolean_t expect_page)
5052 {
5053 unsigned int npages;
5054 vm_page_t m;
5055 vm_page_t prev_m;
5056 boolean_t found_page;
5057
5058 if (! vm_page_verify_this_free_list_enabled)
5059 return 0;
5060
5061 found_page = FALSE;
5062 npages = 0;
5063 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
5064
5065 vm_page_queue_iterate(vm_page_queue,
5066 m,
5067 vm_page_t,
5068 vmp_pageq) {
5069
5070 if (m == look_for_page) {
5071 found_page = TRUE;
5072 }
5073 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev) != prev_m)
5074 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
5075 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev), prev_m);
5076 if ( ! m->vmp_busy )
5077 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
5078 color, npages, m);
5079 if (color != (unsigned int) -1) {
5080 if (VM_PAGE_GET_COLOR(m) != color)
5081 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
5082 color, npages, m, VM_PAGE_GET_COLOR(m), color);
5083 if (m->vmp_q_state != VM_PAGE_ON_FREE_Q)
5084 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
5085 color, npages, m, m->vmp_q_state);
5086 } else {
5087 if (m->vmp_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
5088 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
5089 npages, m, m->vmp_q_state);
5090 }
5091 ++npages;
5092 prev_m = m;
5093 }
5094 if (look_for_page != VM_PAGE_NULL) {
5095 unsigned int other_color;
5096
5097 if (expect_page && !found_page) {
5098 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
5099 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5100 _vm_page_print(look_for_page);
5101 for (other_color = 0;
5102 other_color < vm_colors;
5103 other_color++) {
5104 if (other_color == color)
5105 continue;
5106 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
5107 other_color, look_for_page, FALSE);
5108 }
5109 if (color == (unsigned int) -1) {
5110 vm_page_verify_free_list(&vm_lopage_queue_free,
5111 (unsigned int) -1, look_for_page, FALSE);
5112 }
5113 panic("vm_page_verify_free_list(color=%u)\n", color);
5114 }
5115 if (!expect_page && found_page) {
5116 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
5117 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
5118 }
5119 }
5120 return npages;
5121 }
5122
5123 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
5124 static void
5125 vm_page_verify_free_lists( void )
5126 {
5127 unsigned int color, npages, nlopages;
5128 boolean_t toggle = TRUE;
5129
5130 if (! vm_page_verify_all_free_lists_enabled)
5131 return;
5132
5133 npages = 0;
5134
5135 lck_mtx_lock(&vm_page_queue_free_lock);
5136
5137 if (vm_page_verify_this_free_list_enabled == TRUE) {
5138 /*
5139 * This variable has been set globally for extra checking of
5140 * each free list Q. Since we didn't set it, we don't own it
5141 * and we shouldn't toggle it.
5142 */
5143 toggle = FALSE;
5144 }
5145
5146 if (toggle == TRUE) {
5147 vm_page_verify_this_free_list_enabled = TRUE;
5148 }
5149
5150 for( color = 0; color < vm_colors; color++ ) {
5151 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
5152 color, VM_PAGE_NULL, FALSE);
5153 }
5154 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
5155 (unsigned int) -1,
5156 VM_PAGE_NULL, FALSE);
5157 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
5158 panic("vm_page_verify_free_lists: "
5159 "npages %u free_count %d nlopages %u lo_free_count %u",
5160 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
5161
5162 if (toggle == TRUE) {
5163 vm_page_verify_this_free_list_enabled = FALSE;
5164 }
5165
5166 lck_mtx_unlock(&vm_page_queue_free_lock);
5167 }
5168
5169 #endif /* MACH_ASSERT */
5170
5171
5172
5173 #if __arm64__
5174 /*
5175 * 1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
5176 * after the system has 'aged'. To ensure that other allocation requests don't mess
5177 * with the chances of that request being satisfied, we pre-allocate a single contiguous
5178 * 10MB buffer and hand it out to the first request of >= 4MB.
5179 */
5180
5181 kern_return_t cpm_preallocate_early(void);
5182
5183 vm_page_t cpm_preallocated_pages_list = NULL;
5184 boolean_t preallocated_buffer_available = FALSE;
5185
5186 #define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
5187 #define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER ((4 * 1024 *1024) / PAGE_SIZE_64) /* 4 MB */
5188
5189 kern_return_t
5190 cpm_preallocate_early(void)
5191 {
5192
5193 kern_return_t kr = KERN_SUCCESS;
5194 vm_map_size_t prealloc_size = (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT * PAGE_SIZE_64);
5195
5196 printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT);
5197
5198 kr = cpm_allocate(CAST_DOWN(vm_size_t, prealloc_size), &cpm_preallocated_pages_list, 0, 0, TRUE, 0);
5199
5200 if (kr != KERN_SUCCESS) {
5201 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr);
5202 } else {
5203 preallocated_buffer_available = TRUE;
5204 }
5205
5206 return kr;
5207 }
5208 #endif /* __arm64__ */
5209
5210
5211 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
5212
5213 /*
5214 * CONTIGUOUS PAGE ALLOCATION
5215 *
5216 * Find a region large enough to contain at least n pages
5217 * of contiguous physical memory.
5218 *
5219 * This is done by traversing the vm_page_t array in a linear fashion
5220 * we assume that the vm_page_t array has the avaiable physical pages in an
5221 * ordered, ascending list... this is currently true of all our implementations
5222 * and must remain so... there can be 'holes' in the array... we also can
5223 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5224 * which use to happen via 'vm_page_convert'... that function was no longer
5225 * being called and was removed...
5226 *
5227 * The basic flow consists of stabilizing some of the interesting state of
5228 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5229 * sweep at the beginning of the array looking for pages that meet our criterea
5230 * for a 'stealable' page... currently we are pretty conservative... if the page
5231 * meets this criterea and is physically contiguous to the previous page in the 'run'
5232 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5233 * and start to develop a new run... if at this point we've already considered
5234 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5235 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5236 * to other threads trying to acquire free pages (or move pages from q to q),
5237 * and then continue from the spot we left off... we only make 1 pass through the
5238 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5239 * which steals the pages from the queues they're currently on... pages on the free
5240 * queue can be stolen directly... pages that are on any of the other queues
5241 * must be removed from the object they are tabled on... this requires taking the
5242 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5243 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5244 * dump the pages we've currently stolen back to the free list, and pick up our
5245 * scan from the point where we aborted the 'current' run.
5246 *
5247 *
5248 * Requirements:
5249 * - neither vm_page_queue nor vm_free_list lock can be held on entry
5250 *
5251 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5252 *
5253 * Algorithm:
5254 */
5255
5256 #define MAX_CONSIDERED_BEFORE_YIELD 1000
5257
5258
5259 #define RESET_STATE_OF_RUN() \
5260 MACRO_BEGIN \
5261 prevcontaddr = -2; \
5262 start_pnum = -1; \
5263 free_considered = 0; \
5264 substitute_needed = 0; \
5265 npages = 0; \
5266 MACRO_END
5267
5268 /*
5269 * Can we steal in-use (i.e. not free) pages when searching for
5270 * physically-contiguous pages ?
5271 */
5272 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5273
5274 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
5275 #if DEBUG
5276 int vm_page_find_contig_debug = 0;
5277 #endif
5278
5279 static vm_page_t
5280 vm_page_find_contiguous(
5281 unsigned int contig_pages,
5282 ppnum_t max_pnum,
5283 ppnum_t pnum_mask,
5284 boolean_t wire,
5285 int flags)
5286 {
5287 vm_page_t m = NULL;
5288 ppnum_t prevcontaddr = 0;
5289 ppnum_t start_pnum = 0;
5290 unsigned int npages = 0, considered = 0, scanned = 0;
5291 unsigned int page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5292 unsigned int idx_last_contig_page_found = 0;
5293 int free_considered = 0, free_available = 0;
5294 int substitute_needed = 0;
5295 boolean_t wrapped, zone_gc_called = FALSE;
5296 kern_return_t kr;
5297 #if DEBUG
5298 clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
5299 clock_usec_t tv_start_usec = 0, tv_end_usec = 0;
5300 #endif
5301
5302 int yielded = 0;
5303 int dumped_run = 0;
5304 int stolen_pages = 0;
5305 int compressed_pages = 0;
5306
5307
5308 if (contig_pages == 0)
5309 return VM_PAGE_NULL;
5310
5311 full_scan_again:
5312
5313 #if MACH_ASSERT
5314 vm_page_verify_free_lists();
5315 #endif
5316 #if DEBUG
5317 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5318 #endif
5319 PAGE_REPLACEMENT_ALLOWED(TRUE);
5320
5321 vm_page_lock_queues();
5322
5323 #if __arm64__
5324 if (preallocated_buffer_available) {
5325
5326 if ((contig_pages >= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER) && (contig_pages <= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT)) {
5327
5328 m = cpm_preallocated_pages_list;
5329
5330 start_idx = (unsigned int) (m - &vm_pages[0]);
5331
5332 if (wire == FALSE) {
5333
5334 last_idx = start_idx;
5335
5336 for(npages = 0; npages < contig_pages; npages++, last_idx++) {
5337
5338 assert(vm_pages[last_idx].vmp_gobbled == FALSE);
5339
5340 vm_pages[last_idx].vmp_gobbled = TRUE;
5341 vm_page_gobble_count++;
5342
5343 assert(1 == vm_pages[last_idx].vmp_wire_count);
5344 /*
5345 * Gobbled pages are counted as wired pages. So no need to drop
5346 * the global wired page count. Just the page's wire count is fine.
5347 */
5348 vm_pages[last_idx].vmp_wire_count--;
5349 vm_pages[last_idx].vmp_q_state = VM_PAGE_NOT_ON_Q;
5350 }
5351
5352 }
5353
5354 last_idx = start_idx + contig_pages - 1;
5355
5356 vm_pages[last_idx].vmp_snext = NULL;
5357
5358 printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages, start_idx, last_idx, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT - contig_pages);
5359
5360 last_idx += 1;
5361 for(npages = contig_pages; npages < PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT; npages++, last_idx++) {
5362
5363 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages[last_idx]);
5364 vm_page_free(&vm_pages[last_idx]);
5365 }
5366
5367 cpm_preallocated_pages_list = NULL;
5368 preallocated_buffer_available = FALSE;
5369
5370 goto done_scanning;
5371 }
5372 }
5373 #endif /* __arm64__ */
5374
5375 lck_mtx_lock(&vm_page_queue_free_lock);
5376
5377 RESET_STATE_OF_RUN();
5378
5379 scanned = 0;
5380 considered = 0;
5381 free_available = vm_page_free_count - vm_page_free_reserved;
5382
5383 wrapped = FALSE;
5384
5385 if(flags & KMA_LOMEM)
5386 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5387 else
5388 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
5389
5390 orig_last_idx = idx_last_contig_page_found;
5391 last_idx = orig_last_idx;
5392
5393 for (page_idx = last_idx, start_idx = last_idx;
5394 npages < contig_pages && page_idx < vm_pages_count;
5395 page_idx++) {
5396 retry:
5397 if (wrapped &&
5398 npages == 0 &&
5399 page_idx >= orig_last_idx) {
5400 /*
5401 * We're back where we started and we haven't
5402 * found any suitable contiguous range. Let's
5403 * give up.
5404 */
5405 break;
5406 }
5407 scanned++;
5408 m = &vm_pages[page_idx];
5409
5410 assert(!m->vmp_fictitious);
5411 assert(!m->vmp_private);
5412
5413 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
5414 /* no more low pages... */
5415 break;
5416 }
5417 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
5418 /*
5419 * not aligned
5420 */
5421 RESET_STATE_OF_RUN();
5422
5423 } else if (VM_PAGE_WIRED(m) || m->vmp_gobbled ||
5424 m->vmp_laundry || m->vmp_wanted ||
5425 m->vmp_cleaning || m->vmp_overwriting || m->vmp_free_when_done) {
5426 /*
5427 * page is in a transient state
5428 * or a state we don't want to deal
5429 * with, so don't consider it which
5430 * means starting a new run
5431 */
5432 RESET_STATE_OF_RUN();
5433
5434 } else if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
5435 (m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5436 (m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5437 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5438 /*
5439 * page needs to be on one of our queues (other then the pageout or special free queues)
5440 * or it needs to belong to the compressor pool (which is now indicated
5441 * by vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5442 * from the check for VM_PAGE_NOT_ON_Q)
5443 * in order for it to be stable behind the
5444 * locks we hold at this point...
5445 * if not, don't consider it which
5446 * means starting a new run
5447 */
5448 RESET_STATE_OF_RUN();
5449
5450 } else if ((m->vmp_q_state != VM_PAGE_ON_FREE_Q) && (!m->vmp_tabled || m->vmp_busy)) {
5451 /*
5452 * pages on the free list are always 'busy'
5453 * so we couldn't test for 'busy' in the check
5454 * for the transient states... pages that are
5455 * 'free' are never 'tabled', so we also couldn't
5456 * test for 'tabled'. So we check here to make
5457 * sure that a non-free page is not busy and is
5458 * tabled on an object...
5459 * if not, don't consider it which
5460 * means starting a new run
5461 */
5462 RESET_STATE_OF_RUN();
5463
5464 } else {
5465 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5466 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
5467 RESET_STATE_OF_RUN();
5468 goto did_consider;
5469 } else {
5470 npages = 1;
5471 start_idx = page_idx;
5472 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
5473 }
5474 } else {
5475 npages++;
5476 }
5477 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
5478
5479 VM_PAGE_CHECK(m);
5480 if (m->vmp_q_state == VM_PAGE_ON_FREE_Q) {
5481 free_considered++;
5482 } else {
5483 /*
5484 * This page is not free.
5485 * If we can't steal used pages,
5486 * we have to give up this run
5487 * and keep looking.
5488 * Otherwise, we might need to
5489 * move the contents of this page
5490 * into a substitute page.
5491 */
5492 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5493 if (m->vmp_pmapped || m->vmp_dirty || m->vmp_precious) {
5494 substitute_needed++;
5495 }
5496 #else
5497 RESET_STATE_OF_RUN();
5498 #endif
5499 }
5500
5501 if ((free_considered + substitute_needed) > free_available) {
5502 /*
5503 * if we let this run continue
5504 * we will end up dropping the vm_page_free_count
5505 * below the reserve limit... we need to abort
5506 * this run, but we can at least re-consider this
5507 * page... thus the jump back to 'retry'
5508 */
5509 RESET_STATE_OF_RUN();
5510
5511 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5512 considered++;
5513 goto retry;
5514 }
5515 /*
5516 * free_available == 0
5517 * so can't consider any free pages... if
5518 * we went to retry in this case, we'd
5519 * get stuck looking at the same page
5520 * w/o making any forward progress
5521 * we also want to take this path if we've already
5522 * reached our limit that controls the lock latency
5523 */
5524 }
5525 }
5526 did_consider:
5527 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
5528
5529 PAGE_REPLACEMENT_ALLOWED(FALSE);
5530
5531 lck_mtx_unlock(&vm_page_queue_free_lock);
5532 vm_page_unlock_queues();
5533
5534 mutex_pause(0);
5535
5536 PAGE_REPLACEMENT_ALLOWED(TRUE);
5537
5538 vm_page_lock_queues();
5539 lck_mtx_lock(&vm_page_queue_free_lock);
5540
5541 RESET_STATE_OF_RUN();
5542 /*
5543 * reset our free page limit since we
5544 * dropped the lock protecting the vm_page_free_queue
5545 */
5546 free_available = vm_page_free_count - vm_page_free_reserved;
5547 considered = 0;
5548
5549 yielded++;
5550
5551 goto retry;
5552 }
5553 considered++;
5554 }
5555 m = VM_PAGE_NULL;
5556
5557 if (npages != contig_pages) {
5558 if (!wrapped) {
5559 /*
5560 * We didn't find a contiguous range but we didn't
5561 * start from the very first page.
5562 * Start again from the very first page.
5563 */
5564 RESET_STATE_OF_RUN();
5565 if( flags & KMA_LOMEM)
5566 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
5567 else
5568 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5569 last_idx = 0;
5570 page_idx = last_idx;
5571 wrapped = TRUE;
5572 goto retry;
5573 }
5574 lck_mtx_unlock(&vm_page_queue_free_lock);
5575 } else {
5576 vm_page_t m1;
5577 vm_page_t m2;
5578 unsigned int cur_idx;
5579 unsigned int tmp_start_idx;
5580 vm_object_t locked_object = VM_OBJECT_NULL;
5581 boolean_t abort_run = FALSE;
5582
5583 assert(page_idx - start_idx == contig_pages);
5584
5585 tmp_start_idx = start_idx;
5586
5587 /*
5588 * first pass through to pull the free pages
5589 * off of the free queue so that in case we
5590 * need substitute pages, we won't grab any
5591 * of the free pages in the run... we'll clear
5592 * the 'free' bit in the 2nd pass, and even in
5593 * an abort_run case, we'll collect all of the
5594 * free pages in this run and return them to the free list
5595 */
5596 while (start_idx < page_idx) {
5597
5598 m1 = &vm_pages[start_idx++];
5599
5600 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5601 assert(m1->vmp_q_state == VM_PAGE_ON_FREE_Q);
5602 #endif
5603
5604 if (m1->vmp_q_state == VM_PAGE_ON_FREE_Q) {
5605 unsigned int color;
5606
5607 color = VM_PAGE_GET_COLOR(m1);
5608 #if MACH_ASSERT
5609 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
5610 #endif
5611 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5612 m1,
5613 vm_page_t,
5614 vmp_pageq);
5615
5616 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5617 #if MACH_ASSERT
5618 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
5619 #endif
5620 /*
5621 * Clear the "free" bit so that this page
5622 * does not get considered for another
5623 * concurrent physically-contiguous allocation.
5624 */
5625 m1->vmp_q_state = VM_PAGE_NOT_ON_Q;
5626 assert(m1->vmp_busy);
5627
5628 vm_page_free_count--;
5629 }
5630 }
5631 if( flags & KMA_LOMEM)
5632 vm_page_lomem_find_contiguous_last_idx = page_idx;
5633 else
5634 vm_page_find_contiguous_last_idx = page_idx;
5635
5636 /*
5637 * we can drop the free queue lock at this point since
5638 * we've pulled any 'free' candidates off of the list
5639 * we need it dropped so that we can do a vm_page_grab
5640 * when substituing for pmapped/dirty pages
5641 */
5642 lck_mtx_unlock(&vm_page_queue_free_lock);
5643
5644 start_idx = tmp_start_idx;
5645 cur_idx = page_idx - 1;
5646
5647 while (start_idx++ < page_idx) {
5648 /*
5649 * must go through the list from back to front
5650 * so that the page list is created in the
5651 * correct order - low -> high phys addresses
5652 */
5653 m1 = &vm_pages[cur_idx--];
5654
5655 if (m1->vmp_object == 0) {
5656 /*
5657 * page has already been removed from
5658 * the free list in the 1st pass
5659 */
5660 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
5661 assert(m1->vmp_offset == (vm_object_offset_t) -1);
5662 assert(m1->vmp_busy);
5663 assert(!m1->vmp_wanted);
5664 assert(!m1->vmp_laundry);
5665 } else {
5666 vm_object_t object;
5667 int refmod;
5668 boolean_t disconnected, reusable;
5669
5670 if (abort_run == TRUE)
5671 continue;
5672
5673 assert(m1->vmp_q_state != VM_PAGE_NOT_ON_Q);
5674
5675 object = VM_PAGE_OBJECT(m1);
5676
5677 if (object != locked_object) {
5678 if (locked_object) {
5679 vm_object_unlock(locked_object);
5680 locked_object = VM_OBJECT_NULL;
5681 }
5682 if (vm_object_lock_try(object))
5683 locked_object = object;
5684 }
5685 if (locked_object == VM_OBJECT_NULL ||
5686 (VM_PAGE_WIRED(m1) || m1->vmp_gobbled ||
5687 m1->vmp_laundry || m1->vmp_wanted ||
5688 m1->vmp_cleaning || m1->vmp_overwriting || m1->vmp_free_when_done || m1->vmp_busy) ||
5689 (m1->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
5690
5691 if (locked_object) {
5692 vm_object_unlock(locked_object);
5693 locked_object = VM_OBJECT_NULL;
5694 }
5695 tmp_start_idx = cur_idx;
5696 abort_run = TRUE;
5697 continue;
5698 }
5699
5700 disconnected = FALSE;
5701 reusable = FALSE;
5702
5703 if ((m1->vmp_reusable ||
5704 object->all_reusable) &&
5705 (m1->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5706 !m1->vmp_dirty &&
5707 !m1->vmp_reference) {
5708 /* reusable page... */
5709 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5710 disconnected = TRUE;
5711 if (refmod == 0) {
5712 /*
5713 * ... not reused: can steal
5714 * without relocating contents.
5715 */
5716 reusable = TRUE;
5717 }
5718 }
5719
5720 if ((m1->vmp_pmapped &&
5721 ! reusable) ||
5722 m1->vmp_dirty ||
5723 m1->vmp_precious) {
5724 vm_object_offset_t offset;
5725
5726 m2 = vm_page_grab();
5727
5728 if (m2 == VM_PAGE_NULL) {
5729 if (locked_object) {
5730 vm_object_unlock(locked_object);
5731 locked_object = VM_OBJECT_NULL;
5732 }
5733 tmp_start_idx = cur_idx;
5734 abort_run = TRUE;
5735 continue;
5736 }
5737 if (! disconnected) {
5738 if (m1->vmp_pmapped)
5739 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
5740 else
5741 refmod = 0;
5742 }
5743
5744 /* copy the page's contents */
5745 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
5746 /* copy the page's state */
5747 assert(!VM_PAGE_WIRED(m1));
5748 assert(m1->vmp_q_state != VM_PAGE_ON_FREE_Q);
5749 assert(m1->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q);
5750 assert(!m1->vmp_laundry);
5751 m2->vmp_reference = m1->vmp_reference;
5752 assert(!m1->vmp_gobbled);
5753 assert(!m1->vmp_private);
5754 m2->vmp_no_cache = m1->vmp_no_cache;
5755 m2->vmp_xpmapped = 0;
5756 assert(!m1->vmp_busy);
5757 assert(!m1->vmp_wanted);
5758 assert(!m1->vmp_fictitious);
5759 m2->vmp_pmapped = m1->vmp_pmapped; /* should flush cache ? */
5760 m2->vmp_wpmapped = m1->vmp_wpmapped;
5761 assert(!m1->vmp_free_when_done);
5762 m2->vmp_absent = m1->vmp_absent;
5763 m2->vmp_error = m1->vmp_error;
5764 m2->vmp_dirty = m1->vmp_dirty;
5765 assert(!m1->vmp_cleaning);
5766 m2->vmp_precious = m1->vmp_precious;
5767 m2->vmp_clustered = m1->vmp_clustered;
5768 assert(!m1->vmp_overwriting);
5769 m2->vmp_restart = m1->vmp_restart;
5770 m2->vmp_unusual = m1->vmp_unusual;
5771 m2->vmp_cs_validated = m1->vmp_cs_validated;
5772 m2->vmp_cs_tainted = m1->vmp_cs_tainted;
5773 m2->vmp_cs_nx = m1->vmp_cs_nx;
5774
5775 /*
5776 * If m1 had really been reusable,
5777 * we would have just stolen it, so
5778 * let's not propagate it's "reusable"
5779 * bit and assert that m2 is not
5780 * marked as "reusable".
5781 */
5782 // m2->vmp_reusable = m1->vmp_reusable;
5783 assert(!m2->vmp_reusable);
5784
5785 // assert(!m1->vmp_lopage);
5786
5787 if (m1->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5788 m2->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
5789
5790 /*
5791 * page may need to be flushed if
5792 * it is marshalled into a UPL
5793 * that is going to be used by a device
5794 * that doesn't support coherency
5795 */
5796 m2->vmp_written_by_kernel = TRUE;
5797
5798 /*
5799 * make sure we clear the ref/mod state
5800 * from the pmap layer... else we risk
5801 * inheriting state from the last time
5802 * this page was used...
5803 */
5804 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
5805
5806 if (refmod & VM_MEM_REFERENCED)
5807 m2->vmp_reference = TRUE;
5808 if (refmod & VM_MEM_MODIFIED) {
5809 SET_PAGE_DIRTY(m2, TRUE);
5810 }
5811 offset = m1->vmp_offset;
5812
5813 /*
5814 * completely cleans up the state
5815 * of the page so that it is ready
5816 * to be put onto the free list, or
5817 * for this purpose it looks like it
5818 * just came off of the free list
5819 */
5820 vm_page_free_prepare(m1);
5821
5822 /*
5823 * now put the substitute page
5824 * on the object
5825 */
5826 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
5827
5828 if (m2->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5829 m2->vmp_pmapped = TRUE;
5830 m2->vmp_wpmapped = TRUE;
5831
5832 PMAP_ENTER(kernel_pmap, m2->vmp_offset, m2,
5833 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5834
5835 assert(kr == KERN_SUCCESS);
5836
5837 compressed_pages++;
5838
5839 } else {
5840 if (m2->vmp_reference)
5841 vm_page_activate(m2);
5842 else
5843 vm_page_deactivate(m2);
5844 }
5845 PAGE_WAKEUP_DONE(m2);
5846
5847 } else {
5848 assert(m1->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
5849
5850 /*
5851 * completely cleans up the state
5852 * of the page so that it is ready
5853 * to be put onto the free list, or
5854 * for this purpose it looks like it
5855 * just came off of the free list
5856 */
5857 vm_page_free_prepare(m1);
5858 }
5859
5860 stolen_pages++;
5861
5862 }
5863 #if CONFIG_BACKGROUND_QUEUE
5864 vm_page_assign_background_state(m1);
5865 #endif
5866 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5867 m1->vmp_snext = m;
5868 m = m1;
5869 }
5870 if (locked_object) {
5871 vm_object_unlock(locked_object);
5872 locked_object = VM_OBJECT_NULL;
5873 }
5874
5875 if (abort_run == TRUE) {
5876 /*
5877 * want the index of the last
5878 * page in this run that was
5879 * successfully 'stolen', so back
5880 * it up 1 for the auto-decrement on use
5881 * and 1 more to bump back over this page
5882 */
5883 page_idx = tmp_start_idx + 2;
5884 if (page_idx >= vm_pages_count) {
5885 if (wrapped) {
5886 if (m != VM_PAGE_NULL) {
5887 vm_page_unlock_queues();
5888 vm_page_free_list(m, FALSE);
5889 vm_page_lock_queues();
5890 m = VM_PAGE_NULL;
5891 }
5892 dumped_run++;
5893 goto done_scanning;
5894 }
5895 page_idx = last_idx = 0;
5896 wrapped = TRUE;
5897 }
5898 abort_run = FALSE;
5899
5900 /*
5901 * We didn't find a contiguous range but we didn't
5902 * start from the very first page.
5903 * Start again from the very first page.
5904 */
5905 RESET_STATE_OF_RUN();
5906
5907 if( flags & KMA_LOMEM)
5908 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
5909 else
5910 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5911
5912 last_idx = page_idx;
5913
5914 if (m != VM_PAGE_NULL) {
5915 vm_page_unlock_queues();
5916 vm_page_free_list(m, FALSE);
5917 vm_page_lock_queues();
5918 m = VM_PAGE_NULL;
5919 }
5920 dumped_run++;
5921
5922 lck_mtx_lock(&vm_page_queue_free_lock);
5923 /*
5924 * reset our free page limit since we
5925 * dropped the lock protecting the vm_page_free_queue
5926 */
5927 free_available = vm_page_free_count - vm_page_free_reserved;
5928 goto retry;
5929 }
5930
5931 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
5932
5933 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
5934 assert(m1->vmp_wire_count == 0);
5935
5936 if (wire == TRUE) {
5937 m1->vmp_wire_count++;
5938 m1->vmp_q_state = VM_PAGE_IS_WIRED;
5939 } else
5940 m1->vmp_gobbled = TRUE;
5941 }
5942 if (wire == FALSE)
5943 vm_page_gobble_count += npages;
5944
5945 /*
5946 * gobbled pages are also counted as wired pages
5947 */
5948 vm_page_wire_count += npages;
5949
5950 assert(vm_page_verify_contiguous(m, npages));
5951 }
5952 done_scanning:
5953 PAGE_REPLACEMENT_ALLOWED(FALSE);
5954
5955 vm_page_unlock_queues();
5956
5957 #if DEBUG
5958 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5959
5960 tv_end_sec -= tv_start_sec;
5961 if (tv_end_usec < tv_start_usec) {
5962 tv_end_sec--;
5963 tv_end_usec += 1000000;
5964 }
5965 tv_end_usec -= tv_start_usec;
5966 if (tv_end_usec >= 1000000) {
5967 tv_end_sec++;
5968 tv_end_sec -= 1000000;
5969 }
5970 if (vm_page_find_contig_debug) {
5971 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5972 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5973 (long)tv_end_sec, tv_end_usec, orig_last_idx,
5974 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
5975 }
5976
5977 #endif
5978 #if MACH_ASSERT
5979 vm_page_verify_free_lists();
5980 #endif
5981 if (m == NULL && zone_gc_called == FALSE) {
5982 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5983 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5984 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5985
5986 if (consider_buffer_cache_collect != NULL) {
5987 (void)(*consider_buffer_cache_collect)(1);
5988 }
5989
5990 consider_zone_gc(FALSE);
5991
5992 zone_gc_called = TRUE;
5993
5994 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5995 goto full_scan_again;
5996 }
5997
5998 return m;
5999 }
6000
6001 /*
6002 * Allocate a list of contiguous, wired pages.
6003 */
6004 kern_return_t
6005 cpm_allocate(
6006 vm_size_t size,
6007 vm_page_t *list,
6008 ppnum_t max_pnum,
6009 ppnum_t pnum_mask,
6010 boolean_t wire,
6011 int flags)
6012 {
6013 vm_page_t pages;
6014 unsigned int npages;
6015
6016 if (size % PAGE_SIZE != 0)
6017 return KERN_INVALID_ARGUMENT;
6018
6019 npages = (unsigned int) (size / PAGE_SIZE);
6020 if (npages != size / PAGE_SIZE) {
6021 /* 32-bit overflow */
6022 return KERN_INVALID_ARGUMENT;
6023 }
6024
6025 /*
6026 * Obtain a pointer to a subset of the free
6027 * list large enough to satisfy the request;
6028 * the region will be physically contiguous.
6029 */
6030 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
6031
6032 if (pages == VM_PAGE_NULL)
6033 return KERN_NO_SPACE;
6034 /*
6035 * determine need for wakeups
6036 */
6037 if (vm_page_free_count < vm_page_free_min)
6038 thread_wakeup((event_t) &vm_page_free_wanted);
6039
6040 VM_CHECK_MEMORYSTATUS;
6041
6042 /*
6043 * The CPM pages should now be available and
6044 * ordered by ascending physical address.
6045 */
6046 assert(vm_page_verify_contiguous(pages, npages));
6047
6048 *list = pages;
6049 return KERN_SUCCESS;
6050 }
6051
6052
6053 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
6054
6055 /*
6056 * when working on a 'run' of pages, it is necessary to hold
6057 * the vm_page_queue_lock (a hot global lock) for certain operations
6058 * on the page... however, the majority of the work can be done
6059 * while merely holding the object lock... in fact there are certain
6060 * collections of pages that don't require any work brokered by the
6061 * vm_page_queue_lock... to mitigate the time spent behind the global
6062 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
6063 * while doing all of the work that doesn't require the vm_page_queue_lock...
6064 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
6065 * necessary work for each page... we will grab the busy bit on the page
6066 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
6067 * if it can't immediately take the vm_page_queue_lock in order to compete
6068 * for the locks in the same order that vm_pageout_scan takes them.
6069 * the operation names are modeled after the names of the routines that
6070 * need to be called in order to make the changes very obvious in the
6071 * original loop
6072 */
6073
6074 void
6075 vm_page_do_delayed_work(
6076 vm_object_t object,
6077 vm_tag_t tag,
6078 struct vm_page_delayed_work *dwp,
6079 int dw_count)
6080 {
6081 int j;
6082 vm_page_t m;
6083 vm_page_t local_free_q = VM_PAGE_NULL;
6084
6085 /*
6086 * pageout_scan takes the vm_page_lock_queues first
6087 * then tries for the object lock... to avoid what
6088 * is effectively a lock inversion, we'll go to the
6089 * trouble of taking them in that same order... otherwise
6090 * if this object contains the majority of the pages resident
6091 * in the UBC (or a small set of large objects actively being
6092 * worked on contain the majority of the pages), we could
6093 * cause the pageout_scan thread to 'starve' in its attempt
6094 * to find pages to move to the free queue, since it has to
6095 * successfully acquire the object lock of any candidate page
6096 * before it can steal/clean it.
6097 */
6098 if (!vm_page_trylockspin_queues()) {
6099 vm_object_unlock(object);
6100
6101 vm_page_lockspin_queues();
6102
6103 for (j = 0; ; j++) {
6104 if (!vm_object_lock_avoid(object) &&
6105 _vm_object_lock_try(object))
6106 break;
6107 vm_page_unlock_queues();
6108 mutex_pause(j);
6109 vm_page_lockspin_queues();
6110 }
6111 }
6112 for (j = 0; j < dw_count; j++, dwp++) {
6113
6114 m = dwp->dw_m;
6115
6116 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
6117 vm_pageout_throttle_up(m);
6118 #if CONFIG_PHANTOM_CACHE
6119 if (dwp->dw_mask & DW_vm_phantom_cache_update)
6120 vm_phantom_cache_update(m);
6121 #endif
6122 if (dwp->dw_mask & DW_vm_page_wire)
6123 vm_page_wire(m, tag, FALSE);
6124 else if (dwp->dw_mask & DW_vm_page_unwire) {
6125 boolean_t queueit;
6126
6127 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6128
6129 vm_page_unwire(m, queueit);
6130 }
6131 if (dwp->dw_mask & DW_vm_page_free) {
6132 vm_page_free_prepare_queues(m);
6133
6134 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6135 /*
6136 * Add this page to our list of reclaimed pages,
6137 * to be freed later.
6138 */
6139 m->vmp_snext = local_free_q;
6140 local_free_q = m;
6141 } else {
6142 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
6143 vm_page_deactivate_internal(m, FALSE);
6144 else if (dwp->dw_mask & DW_vm_page_activate) {
6145 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6146 vm_page_activate(m);
6147 }
6148 }
6149 else if (dwp->dw_mask & DW_vm_page_speculate)
6150 vm_page_speculate(m, TRUE);
6151 else if (dwp->dw_mask & DW_enqueue_cleaned) {
6152 /*
6153 * if we didn't hold the object lock and did this,
6154 * we might disconnect the page, then someone might
6155 * soft fault it back in, then we would put it on the
6156 * cleaned queue, and so we would have a referenced (maybe even dirty)
6157 * page on that queue, which we don't want
6158 */
6159 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6160
6161 if ((refmod_state & VM_MEM_REFERENCED)) {
6162 /*
6163 * this page has been touched since it got cleaned; let's activate it
6164 * if it hasn't already been
6165 */
6166 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
6167 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
6168
6169 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q)
6170 vm_page_activate(m);
6171 } else {
6172 m->vmp_reference = FALSE;
6173 vm_page_enqueue_cleaned(m);
6174 }
6175 }
6176 else if (dwp->dw_mask & DW_vm_page_lru)
6177 vm_page_lru(m);
6178 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
6179 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q)
6180 vm_page_queues_remove(m, TRUE);
6181 }
6182 if (dwp->dw_mask & DW_set_reference)
6183 m->vmp_reference = TRUE;
6184 else if (dwp->dw_mask & DW_clear_reference)
6185 m->vmp_reference = FALSE;
6186
6187 if (dwp->dw_mask & DW_move_page) {
6188 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
6189 vm_page_queues_remove(m, FALSE);
6190
6191 assert(VM_PAGE_OBJECT(m) != kernel_object);
6192
6193 vm_page_enqueue_inactive(m, FALSE);
6194 }
6195 }
6196 if (dwp->dw_mask & DW_clear_busy)
6197 m->vmp_busy = FALSE;
6198
6199 if (dwp->dw_mask & DW_PAGE_WAKEUP)
6200 PAGE_WAKEUP(m);
6201 }
6202 }
6203 vm_page_unlock_queues();
6204
6205 if (local_free_q)
6206 vm_page_free_list(local_free_q, TRUE);
6207
6208 VM_CHECK_MEMORYSTATUS;
6209
6210 }
6211
6212 kern_return_t
6213 vm_page_alloc_list(
6214 int page_count,
6215 int flags,
6216 vm_page_t *list)
6217 {
6218 vm_page_t lo_page_list = VM_PAGE_NULL;
6219 vm_page_t mem;
6220 int i;
6221
6222 if ( !(flags & KMA_LOMEM))
6223 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6224
6225 for (i = 0; i < page_count; i++) {
6226
6227 mem = vm_page_grablo();
6228
6229 if (mem == VM_PAGE_NULL) {
6230 if (lo_page_list)
6231 vm_page_free_list(lo_page_list, FALSE);
6232
6233 *list = VM_PAGE_NULL;
6234
6235 return (KERN_RESOURCE_SHORTAGE);
6236 }
6237 mem->vmp_snext = lo_page_list;
6238 lo_page_list = mem;
6239 }
6240 *list = lo_page_list;
6241
6242 return (KERN_SUCCESS);
6243 }
6244
6245 void
6246 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6247 {
6248 page->vmp_offset = offset;
6249 }
6250
6251 vm_page_t
6252 vm_page_get_next(vm_page_t page)
6253 {
6254 return (page->vmp_snext);
6255 }
6256
6257 vm_object_offset_t
6258 vm_page_get_offset(vm_page_t page)
6259 {
6260 return (page->vmp_offset);
6261 }
6262
6263 ppnum_t
6264 vm_page_get_phys_page(vm_page_t page)
6265 {
6266 return (VM_PAGE_GET_PHYS_PAGE(page));
6267 }
6268
6269
6270 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6271
6272 #if HIBERNATION
6273
6274 static vm_page_t hibernate_gobble_queue;
6275
6276 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
6277 static int hibernate_flush_dirty_pages(int);
6278 static int hibernate_flush_queue(vm_page_queue_head_t *, int);
6279
6280 void hibernate_flush_wait(void);
6281 void hibernate_mark_in_progress(void);
6282 void hibernate_clear_in_progress(void);
6283
6284 void hibernate_free_range(int, int);
6285 void hibernate_hash_insert_page(vm_page_t);
6286 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6287 void hibernate_rebuild_vm_structs(void);
6288 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6289 ppnum_t hibernate_lookup_paddr(unsigned int);
6290
6291 struct hibernate_statistics {
6292 int hibernate_considered;
6293 int hibernate_reentered_on_q;
6294 int hibernate_found_dirty;
6295 int hibernate_skipped_cleaning;
6296 int hibernate_skipped_transient;
6297 int hibernate_skipped_precious;
6298 int hibernate_skipped_external;
6299 int hibernate_queue_nolock;
6300 int hibernate_queue_paused;
6301 int hibernate_throttled;
6302 int hibernate_throttle_timeout;
6303 int hibernate_drained;
6304 int hibernate_drain_timeout;
6305 int cd_lock_failed;
6306 int cd_found_precious;
6307 int cd_found_wired;
6308 int cd_found_busy;
6309 int cd_found_unusual;
6310 int cd_found_cleaning;
6311 int cd_found_laundry;
6312 int cd_found_dirty;
6313 int cd_found_xpmapped;
6314 int cd_skipped_xpmapped;
6315 int cd_local_free;
6316 int cd_total_free;
6317 int cd_vm_page_wire_count;
6318 int cd_vm_struct_pages_unneeded;
6319 int cd_pages;
6320 int cd_discarded;
6321 int cd_count_wire;
6322 } hibernate_stats;
6323
6324
6325 /*
6326 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6327 * so that we don't overrun the estimated image size, which would
6328 * result in a hibernation failure.
6329 */
6330 #define HIBERNATE_XPMAPPED_LIMIT 40000
6331
6332
6333 static int
6334 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6335 {
6336 wait_result_t wait_result;
6337
6338 vm_page_lock_queues();
6339
6340 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
6341
6342 q->pgo_draining = TRUE;
6343
6344 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
6345
6346 vm_page_unlock_queues();
6347
6348 wait_result = thread_block(THREAD_CONTINUE_NULL);
6349
6350 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
6351 hibernate_stats.hibernate_drain_timeout++;
6352
6353 if (q == &vm_pageout_queue_external)
6354 return (0);
6355
6356 return (1);
6357 }
6358 vm_page_lock_queues();
6359
6360 hibernate_stats.hibernate_drained++;
6361 }
6362 vm_page_unlock_queues();
6363
6364 return (0);
6365 }
6366
6367
6368 boolean_t hibernate_skip_external = FALSE;
6369
6370 static int
6371 hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
6372 {
6373 vm_page_t m;
6374 vm_object_t l_object = NULL;
6375 vm_object_t m_object = NULL;
6376 int refmod_state = 0;
6377 int try_failed_count = 0;
6378 int retval = 0;
6379 int current_run = 0;
6380 struct vm_pageout_queue *iq;
6381 struct vm_pageout_queue *eq;
6382 struct vm_pageout_queue *tq;
6383
6384 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6385 VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
6386
6387 iq = &vm_pageout_queue_internal;
6388 eq = &vm_pageout_queue_external;
6389
6390 vm_page_lock_queues();
6391
6392 while (qcount && !vm_page_queue_empty(q)) {
6393
6394 if (current_run++ == 1000) {
6395 if (hibernate_should_abort()) {
6396 retval = 1;
6397 break;
6398 }
6399 current_run = 0;
6400 }
6401
6402 m = (vm_page_t) vm_page_queue_first(q);
6403 m_object = VM_PAGE_OBJECT(m);
6404
6405 /*
6406 * check to see if we currently are working
6407 * with the same object... if so, we've
6408 * already got the lock
6409 */
6410 if (m_object != l_object) {
6411 /*
6412 * the object associated with candidate page is
6413 * different from the one we were just working
6414 * with... dump the lock if we still own it
6415 */
6416 if (l_object != NULL) {
6417 vm_object_unlock(l_object);
6418 l_object = NULL;
6419 }
6420 /*
6421 * Try to lock object; since we've alread got the
6422 * page queues lock, we can only 'try' for this one.
6423 * if the 'try' fails, we need to do a mutex_pause
6424 * to allow the owner of the object lock a chance to
6425 * run...
6426 */
6427 if ( !vm_object_lock_try_scan(m_object)) {
6428
6429 if (try_failed_count > 20) {
6430 hibernate_stats.hibernate_queue_nolock++;
6431
6432 goto reenter_pg_on_q;
6433 }
6434
6435 vm_page_unlock_queues();
6436 mutex_pause(try_failed_count++);
6437 vm_page_lock_queues();
6438
6439 hibernate_stats.hibernate_queue_paused++;
6440 continue;
6441 } else {
6442 l_object = m_object;
6443 }
6444 }
6445 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error) {
6446 /*
6447 * page is not to be cleaned
6448 * put it back on the head of its queue
6449 */
6450 if (m->vmp_cleaning)
6451 hibernate_stats.hibernate_skipped_cleaning++;
6452 else
6453 hibernate_stats.hibernate_skipped_transient++;
6454
6455 goto reenter_pg_on_q;
6456 }
6457 if (m_object->copy == VM_OBJECT_NULL) {
6458 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6459 /*
6460 * let the normal hibernate image path
6461 * deal with these
6462 */
6463 goto reenter_pg_on_q;
6464 }
6465 }
6466 if ( !m->vmp_dirty && m->vmp_pmapped) {
6467 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6468
6469 if ((refmod_state & VM_MEM_MODIFIED)) {
6470 SET_PAGE_DIRTY(m, FALSE);
6471 }
6472 } else
6473 refmod_state = 0;
6474
6475 if ( !m->vmp_dirty) {
6476 /*
6477 * page is not to be cleaned
6478 * put it back on the head of its queue
6479 */
6480 if (m->vmp_precious)
6481 hibernate_stats.hibernate_skipped_precious++;
6482
6483 goto reenter_pg_on_q;
6484 }
6485
6486 if (hibernate_skip_external == TRUE && !m_object->internal) {
6487
6488 hibernate_stats.hibernate_skipped_external++;
6489
6490 goto reenter_pg_on_q;
6491 }
6492 tq = NULL;
6493
6494 if (m_object->internal) {
6495 if (VM_PAGE_Q_THROTTLED(iq))
6496 tq = iq;
6497 } else if (VM_PAGE_Q_THROTTLED(eq))
6498 tq = eq;
6499
6500 if (tq != NULL) {
6501 wait_result_t wait_result;
6502 int wait_count = 5;
6503
6504 if (l_object != NULL) {
6505 vm_object_unlock(l_object);
6506 l_object = NULL;
6507 }
6508
6509 while (retval == 0) {
6510
6511 tq->pgo_throttled = TRUE;
6512
6513 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6514
6515 vm_page_unlock_queues();
6516
6517 wait_result = thread_block(THREAD_CONTINUE_NULL);
6518
6519 vm_page_lock_queues();
6520
6521 if (wait_result != THREAD_TIMED_OUT)
6522 break;
6523 if (!VM_PAGE_Q_THROTTLED(tq))
6524 break;
6525
6526 if (hibernate_should_abort())
6527 retval = 1;
6528
6529 if (--wait_count == 0) {
6530
6531 hibernate_stats.hibernate_throttle_timeout++;
6532
6533 if (tq == eq) {
6534 hibernate_skip_external = TRUE;
6535 break;
6536 }
6537 retval = 1;
6538 }
6539 }
6540 if (retval)
6541 break;
6542
6543 hibernate_stats.hibernate_throttled++;
6544
6545 continue;
6546 }
6547 /*
6548 * we've already factored out pages in the laundry which
6549 * means this page can't be on the pageout queue so it's
6550 * safe to do the vm_page_queues_remove
6551 */
6552 vm_page_queues_remove(m, TRUE);
6553
6554 if (m_object->internal == TRUE)
6555 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
6556
6557 vm_pageout_cluster(m);
6558
6559 hibernate_stats.hibernate_found_dirty++;
6560
6561 goto next_pg;
6562
6563 reenter_pg_on_q:
6564 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
6565 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
6566
6567 hibernate_stats.hibernate_reentered_on_q++;
6568 next_pg:
6569 hibernate_stats.hibernate_considered++;
6570
6571 qcount--;
6572 try_failed_count = 0;
6573 }
6574 if (l_object != NULL) {
6575 vm_object_unlock(l_object);
6576 l_object = NULL;
6577 }
6578
6579 vm_page_unlock_queues();
6580
6581 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6582
6583 return (retval);
6584 }
6585
6586
6587 static int
6588 hibernate_flush_dirty_pages(int pass)
6589 {
6590 struct vm_speculative_age_q *aq;
6591 uint32_t i;
6592
6593 if (vm_page_local_q) {
6594 for (i = 0; i < vm_page_local_q_count; i++)
6595 vm_page_reactivate_local(i, TRUE, FALSE);
6596 }
6597
6598 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6599 int qcount;
6600 vm_page_t m;
6601
6602 aq = &vm_page_queue_speculative[i];
6603
6604 if (vm_page_queue_empty(&aq->age_q))
6605 continue;
6606 qcount = 0;
6607
6608 vm_page_lockspin_queues();
6609
6610 vm_page_queue_iterate(&aq->age_q,
6611 m,
6612 vm_page_t,
6613 vmp_pageq)
6614 {
6615 qcount++;
6616 }
6617 vm_page_unlock_queues();
6618
6619 if (qcount) {
6620 if (hibernate_flush_queue(&aq->age_q, qcount))
6621 return (1);
6622 }
6623 }
6624 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
6625 return (1);
6626 /* XXX FBDP TODO: flush secluded queue */
6627 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6628 return (1);
6629 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
6630 return (1);
6631 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6632 return (1);
6633
6634 if (pass == 1)
6635 vm_compressor_record_warmup_start();
6636
6637 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
6638 if (pass == 1)
6639 vm_compressor_record_warmup_end();
6640 return (1);
6641 }
6642 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
6643 if (pass == 1)
6644 vm_compressor_record_warmup_end();
6645 return (1);
6646 }
6647 if (pass == 1)
6648 vm_compressor_record_warmup_end();
6649
6650 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6651 return (1);
6652
6653 return (0);
6654 }
6655
6656
6657 void
6658 hibernate_reset_stats()
6659 {
6660 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6661 }
6662
6663
6664 int
6665 hibernate_flush_memory()
6666 {
6667 int retval;
6668
6669 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6670
6671 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6672
6673 hibernate_cleaning_in_progress = TRUE;
6674 hibernate_skip_external = FALSE;
6675
6676 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6677
6678 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6679
6680 vm_compressor_flush();
6681
6682 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
6683
6684 if (consider_buffer_cache_collect != NULL) {
6685 unsigned int orig_wire_count;
6686
6687 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6688 orig_wire_count = vm_page_wire_count;
6689
6690 (void)(*consider_buffer_cache_collect)(1);
6691 consider_zone_gc(FALSE);
6692
6693 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6694
6695 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
6696 }
6697 }
6698 hibernate_cleaning_in_progress = FALSE;
6699
6700 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6701
6702 if (retval)
6703 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6704
6705
6706 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6707 hibernate_stats.hibernate_considered,
6708 hibernate_stats.hibernate_reentered_on_q,
6709 hibernate_stats.hibernate_found_dirty);
6710 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6711 hibernate_stats.hibernate_skipped_cleaning,
6712 hibernate_stats.hibernate_skipped_transient,
6713 hibernate_stats.hibernate_skipped_precious,
6714 hibernate_stats.hibernate_skipped_external,
6715 hibernate_stats.hibernate_queue_nolock);
6716 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6717 hibernate_stats.hibernate_queue_paused,
6718 hibernate_stats.hibernate_throttled,
6719 hibernate_stats.hibernate_throttle_timeout,
6720 hibernate_stats.hibernate_drained,
6721 hibernate_stats.hibernate_drain_timeout);
6722
6723 return (retval);
6724 }
6725
6726
6727 static void
6728 hibernate_page_list_zero(hibernate_page_list_t *list)
6729 {
6730 uint32_t bank;
6731 hibernate_bitmap_t * bitmap;
6732
6733 bitmap = &list->bank_bitmap[0];
6734 for (bank = 0; bank < list->bank_count; bank++)
6735 {
6736 uint32_t last_bit;
6737
6738 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6739 // set out-of-bound bits at end of bitmap.
6740 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6741 if (last_bit)
6742 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6743
6744 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6745 }
6746 }
6747
6748 void
6749 hibernate_free_gobble_pages(void)
6750 {
6751 vm_page_t m, next;
6752 uint32_t count = 0;
6753
6754 m = (vm_page_t) hibernate_gobble_queue;
6755 while(m)
6756 {
6757 next = m->vmp_snext;
6758 vm_page_free(m);
6759 count++;
6760 m = next;
6761 }
6762 hibernate_gobble_queue = VM_PAGE_NULL;
6763
6764 if (count)
6765 HIBLOG("Freed %d pages\n", count);
6766 }
6767
6768 static boolean_t
6769 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
6770 {
6771 vm_object_t object = NULL;
6772 int refmod_state;
6773 boolean_t discard = FALSE;
6774
6775 do
6776 {
6777 if (m->vmp_private)
6778 panic("hibernate_consider_discard: private");
6779
6780 object = VM_PAGE_OBJECT(m);
6781
6782 if (!vm_object_lock_try(object)) {
6783 object = NULL;
6784 if (!preflight) hibernate_stats.cd_lock_failed++;
6785 break;
6786 }
6787 if (VM_PAGE_WIRED(m)) {
6788 if (!preflight) hibernate_stats.cd_found_wired++;
6789 break;
6790 }
6791 if (m->vmp_precious) {
6792 if (!preflight) hibernate_stats.cd_found_precious++;
6793 break;
6794 }
6795 if (m->vmp_busy || !object->alive) {
6796 /*
6797 * Somebody is playing with this page.
6798 */
6799 if (!preflight) hibernate_stats.cd_found_busy++;
6800 break;
6801 }
6802 if (m->vmp_absent || m->vmp_unusual || m->vmp_error) {
6803 /*
6804 * If it's unusual in anyway, ignore it
6805 */
6806 if (!preflight) hibernate_stats.cd_found_unusual++;
6807 break;
6808 }
6809 if (m->vmp_cleaning) {
6810 if (!preflight) hibernate_stats.cd_found_cleaning++;
6811 break;
6812 }
6813 if (m->vmp_laundry) {
6814 if (!preflight) hibernate_stats.cd_found_laundry++;
6815 break;
6816 }
6817 if (!m->vmp_dirty)
6818 {
6819 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
6820
6821 if (refmod_state & VM_MEM_REFERENCED)
6822 m->vmp_reference = TRUE;
6823 if (refmod_state & VM_MEM_MODIFIED) {
6824 SET_PAGE_DIRTY(m, FALSE);
6825 }
6826 }
6827
6828 /*
6829 * If it's clean or purgeable we can discard the page on wakeup.
6830 */
6831 discard = (!m->vmp_dirty)
6832 || (VM_PURGABLE_VOLATILE == object->purgable)
6833 || (VM_PURGABLE_EMPTY == object->purgable);
6834
6835
6836 if (discard == FALSE) {
6837 if (!preflight)
6838 hibernate_stats.cd_found_dirty++;
6839 } else if (m->vmp_xpmapped && m->vmp_reference && !object->internal) {
6840 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6841 if (!preflight)
6842 hibernate_stats.cd_found_xpmapped++;
6843 discard = FALSE;
6844 } else {
6845 if (!preflight)
6846 hibernate_stats.cd_skipped_xpmapped++;
6847 }
6848 }
6849 }
6850 while (FALSE);
6851
6852 if (object)
6853 vm_object_unlock(object);
6854
6855 return (discard);
6856 }
6857
6858
6859 static void
6860 hibernate_discard_page(vm_page_t m)
6861 {
6862 vm_object_t m_object;
6863
6864 if (m->vmp_absent || m->vmp_unusual || m->vmp_error)
6865 /*
6866 * If it's unusual in anyway, ignore
6867 */
6868 return;
6869
6870 m_object = VM_PAGE_OBJECT(m);
6871
6872 #if MACH_ASSERT || DEBUG
6873 if (!vm_object_lock_try(m_object))
6874 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6875 #else
6876 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6877 makes sure these locks are uncontended before sleep */
6878 #endif /* MACH_ASSERT || DEBUG */
6879
6880 if (m->vmp_pmapped == TRUE)
6881 {
6882 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6883 }
6884
6885 if (m->vmp_laundry)
6886 panic("hibernate_discard_page(%p) laundry", m);
6887 if (m->vmp_private)
6888 panic("hibernate_discard_page(%p) private", m);
6889 if (m->vmp_fictitious)
6890 panic("hibernate_discard_page(%p) fictitious", m);
6891
6892 if (VM_PURGABLE_VOLATILE == m_object->purgable)
6893 {
6894 /* object should be on a queue */
6895 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6896 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
6897 assert(old_queue);
6898 if (m_object->purgeable_when_ripe) {
6899 vm_purgeable_token_delete_first(old_queue);
6900 }
6901 vm_object_lock_assert_exclusive(m_object);
6902 m_object->purgable = VM_PURGABLE_EMPTY;
6903
6904 /*
6905 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6906 * accounted in the "volatile" ledger, so no change here.
6907 * We have to update vm_page_purgeable_count, though, since we're
6908 * effectively purging this object.
6909 */
6910 unsigned int delta;
6911 assert(m_object->resident_page_count >= m_object->wired_page_count);
6912 delta = (m_object->resident_page_count - m_object->wired_page_count);
6913 assert(vm_page_purgeable_count >= delta);
6914 assert(delta > 0);
6915 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
6916 }
6917
6918 vm_page_free(m);
6919
6920 #if MACH_ASSERT || DEBUG
6921 vm_object_unlock(m_object);
6922 #endif /* MACH_ASSERT || DEBUG */
6923 }
6924
6925 /*
6926 Grab locks for hibernate_page_list_setall()
6927 */
6928 void
6929 hibernate_vm_lock_queues(void)
6930 {
6931 vm_object_lock(compressor_object);
6932 vm_page_lock_queues();
6933 lck_mtx_lock(&vm_page_queue_free_lock);
6934 lck_mtx_lock(&vm_purgeable_queue_lock);
6935
6936 if (vm_page_local_q) {
6937 uint32_t i;
6938 for (i = 0; i < vm_page_local_q_count; i++) {
6939 struct vpl *lq;
6940 lq = &vm_page_local_q[i].vpl_un.vpl;
6941 VPL_LOCK(&lq->vpl_lock);
6942 }
6943 }
6944 }
6945
6946 void
6947 hibernate_vm_unlock_queues(void)
6948 {
6949 if (vm_page_local_q) {
6950 uint32_t i;
6951 for (i = 0; i < vm_page_local_q_count; i++) {
6952 struct vpl *lq;
6953 lq = &vm_page_local_q[i].vpl_un.vpl;
6954 VPL_UNLOCK(&lq->vpl_lock);
6955 }
6956 }
6957 lck_mtx_unlock(&vm_purgeable_queue_lock);
6958 lck_mtx_unlock(&vm_page_queue_free_lock);
6959 vm_page_unlock_queues();
6960 vm_object_unlock(compressor_object);
6961 }
6962
6963 /*
6964 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6965 pages known to VM to not need saving are subtracted.
6966 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6967 */
6968
6969 void
6970 hibernate_page_list_setall(hibernate_page_list_t * page_list,
6971 hibernate_page_list_t * page_list_wired,
6972 hibernate_page_list_t * page_list_pal,
6973 boolean_t preflight,
6974 boolean_t will_discard,
6975 uint32_t * pagesOut)
6976 {
6977 uint64_t start, end, nsec;
6978 vm_page_t m;
6979 vm_page_t next;
6980 uint32_t pages = page_list->page_count;
6981 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
6982 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
6983 uint32_t count_wire = pages;
6984 uint32_t count_discard_active = 0;
6985 uint32_t count_discard_inactive = 0;
6986 uint32_t count_discard_cleaned = 0;
6987 uint32_t count_discard_purgeable = 0;
6988 uint32_t count_discard_speculative = 0;
6989 uint32_t count_discard_vm_struct_pages = 0;
6990 uint32_t i;
6991 uint32_t bank;
6992 hibernate_bitmap_t * bitmap;
6993 hibernate_bitmap_t * bitmap_wired;
6994 boolean_t discard_all;
6995 boolean_t discard;
6996
6997 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
6998
6999 if (preflight) {
7000 page_list = NULL;
7001 page_list_wired = NULL;
7002 page_list_pal = NULL;
7003 discard_all = FALSE;
7004 } else {
7005 discard_all = will_discard;
7006 }
7007
7008 #if MACH_ASSERT || DEBUG
7009 if (!preflight)
7010 {
7011 assert(hibernate_vm_locks_are_safe());
7012 vm_page_lock_queues();
7013 if (vm_page_local_q) {
7014 for (i = 0; i < vm_page_local_q_count; i++) {
7015 struct vpl *lq;
7016 lq = &vm_page_local_q[i].vpl_un.vpl;
7017 VPL_LOCK(&lq->vpl_lock);
7018 }
7019 }
7020 }
7021 #endif /* MACH_ASSERT || DEBUG */
7022
7023
7024 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
7025
7026 clock_get_uptime(&start);
7027
7028 if (!preflight) {
7029 hibernate_page_list_zero(page_list);
7030 hibernate_page_list_zero(page_list_wired);
7031 hibernate_page_list_zero(page_list_pal);
7032
7033 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
7034 hibernate_stats.cd_pages = pages;
7035 }
7036
7037 if (vm_page_local_q) {
7038 for (i = 0; i < vm_page_local_q_count; i++)
7039 vm_page_reactivate_local(i, TRUE, !preflight);
7040 }
7041
7042 if (preflight) {
7043 vm_object_lock(compressor_object);
7044 vm_page_lock_queues();
7045 lck_mtx_lock(&vm_page_queue_free_lock);
7046 }
7047
7048 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7049
7050 hibernation_vmqueues_inspection = TRUE;
7051
7052 m = (vm_page_t) hibernate_gobble_queue;
7053 while (m)
7054 {
7055 pages--;
7056 count_wire--;
7057 if (!preflight) {
7058 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7059 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7060 }
7061 m = m->vmp_snext;
7062 }
7063
7064 if (!preflight) for( i = 0; i < real_ncpus; i++ )
7065 {
7066 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
7067 {
7068 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->vmp_snext)
7069 {
7070 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
7071
7072 pages--;
7073 count_wire--;
7074 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7075 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7076
7077 hibernate_stats.cd_local_free++;
7078 hibernate_stats.cd_total_free++;
7079 }
7080 }
7081 }
7082
7083 for( i = 0; i < vm_colors; i++ )
7084 {
7085 vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
7086 m,
7087 vm_page_t,
7088 vmp_pageq)
7089 {
7090 assert(m->vmp_q_state == VM_PAGE_ON_FREE_Q);
7091
7092 pages--;
7093 count_wire--;
7094 if (!preflight) {
7095 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7096 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7097
7098 hibernate_stats.cd_total_free++;
7099 }
7100 }
7101 }
7102
7103 vm_page_queue_iterate(&vm_lopage_queue_free,
7104 m,
7105 vm_page_t,
7106 vmp_pageq)
7107 {
7108 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
7109
7110 pages--;
7111 count_wire--;
7112 if (!preflight) {
7113 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7114 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7115
7116 hibernate_stats.cd_total_free++;
7117 }
7118 }
7119
7120 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
7121 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
7122 {
7123 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
7124
7125 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7126 discard = FALSE;
7127 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7128 && hibernate_consider_discard(m, preflight))
7129 {
7130 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7131 count_discard_inactive++;
7132 discard = discard_all;
7133 }
7134 else
7135 count_throttled++;
7136 count_wire--;
7137 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7138
7139 if (discard) hibernate_discard_page(m);
7140 m = next;
7141 }
7142
7143 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7144 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7145 {
7146 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7147
7148 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7149 discard = FALSE;
7150 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7151 && hibernate_consider_discard(m, preflight))
7152 {
7153 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7154 if (m->vmp_dirty)
7155 count_discard_purgeable++;
7156 else
7157 count_discard_inactive++;
7158 discard = discard_all;
7159 }
7160 else
7161 count_anonymous++;
7162 count_wire--;
7163 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7164 if (discard) hibernate_discard_page(m);
7165 m = next;
7166 }
7167
7168 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7169 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7170 {
7171 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7172
7173 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7174 discard = FALSE;
7175 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7176 && hibernate_consider_discard(m, preflight))
7177 {
7178 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7179 if (m->vmp_dirty)
7180 count_discard_purgeable++;
7181 else
7182 count_discard_cleaned++;
7183 discard = discard_all;
7184 }
7185 else
7186 count_cleaned++;
7187 count_wire--;
7188 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7189 if (discard) hibernate_discard_page(m);
7190 m = next;
7191 }
7192
7193 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7194 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7195 {
7196 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7197
7198 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7199 discard = FALSE;
7200 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
7201 && hibernate_consider_discard(m, preflight))
7202 {
7203 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7204 if (m->vmp_dirty)
7205 count_discard_purgeable++;
7206 else
7207 count_discard_active++;
7208 discard = discard_all;
7209 }
7210 else
7211 count_active++;
7212 count_wire--;
7213 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7214 if (discard) hibernate_discard_page(m);
7215 m = next;
7216 }
7217
7218 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7219 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7220 {
7221 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7222
7223 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7224 discard = FALSE;
7225 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7226 && hibernate_consider_discard(m, preflight))
7227 {
7228 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7229 if (m->vmp_dirty)
7230 count_discard_purgeable++;
7231 else
7232 count_discard_inactive++;
7233 discard = discard_all;
7234 }
7235 else
7236 count_inactive++;
7237 count_wire--;
7238 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7239 if (discard) hibernate_discard_page(m);
7240 m = next;
7241 }
7242 /* XXX FBDP TODO: secluded queue */
7243
7244 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7245 {
7246 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7247 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7248 {
7249 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7250 assertf(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q,
7251 "Bad page: %p (0x%x:0x%x) on queue %d has state: %d (Discard: %d, Preflight: %d)",
7252 m, m->vmp_pageq.next, m->vmp_pageq.prev, i, m->vmp_q_state, discard, preflight);
7253
7254 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7255 discard = FALSE;
7256 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7257 && hibernate_consider_discard(m, preflight))
7258 {
7259 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7260 count_discard_speculative++;
7261 discard = discard_all;
7262 }
7263 else
7264 count_speculative++;
7265 count_wire--;
7266 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7267 if (discard) hibernate_discard_page(m);
7268 m = next;
7269 }
7270 }
7271
7272 vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, vmp_listq)
7273 {
7274 assert(m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
7275
7276 count_compressor++;
7277 count_wire--;
7278 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7279 }
7280
7281 if (preflight == FALSE && discard_all == TRUE) {
7282 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
7283
7284 HIBLOG("hibernate_teardown started\n");
7285 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7286 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7287
7288 pages -= count_discard_vm_struct_pages;
7289 count_wire -= count_discard_vm_struct_pages;
7290
7291 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7292
7293 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7294 }
7295
7296 if (!preflight) {
7297 // pull wired from hibernate_bitmap
7298 bitmap = &page_list->bank_bitmap[0];
7299 bitmap_wired = &page_list_wired->bank_bitmap[0];
7300 for (bank = 0; bank < page_list->bank_count; bank++)
7301 {
7302 for (i = 0; i < bitmap->bitmapwords; i++)
7303 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7304 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
7305 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7306 }
7307 }
7308
7309 // machine dependent adjustments
7310 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
7311
7312 if (!preflight) {
7313 hibernate_stats.cd_count_wire = count_wire;
7314 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7315 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
7316 }
7317
7318 clock_get_uptime(&end);
7319 absolutetime_to_nanoseconds(end - start, &nsec);
7320 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7321
7322 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7323 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7324 discard_all ? "did" : "could",
7325 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7326
7327 if (hibernate_stats.cd_skipped_xpmapped)
7328 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7329
7330 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7331
7332 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7333
7334 hibernation_vmqueues_inspection = FALSE;
7335
7336 #if MACH_ASSERT || DEBUG
7337 if (!preflight)
7338 {
7339 if (vm_page_local_q) {
7340 for (i = 0; i < vm_page_local_q_count; i++) {
7341 struct vpl *lq;
7342 lq = &vm_page_local_q[i].vpl_un.vpl;
7343 VPL_UNLOCK(&lq->vpl_lock);
7344 }
7345 }
7346 vm_page_unlock_queues();
7347 }
7348 #endif /* MACH_ASSERT || DEBUG */
7349
7350 if (preflight) {
7351 lck_mtx_unlock(&vm_page_queue_free_lock);
7352 vm_page_unlock_queues();
7353 vm_object_unlock(compressor_object);
7354 }
7355
7356 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
7357 }
7358
7359 void
7360 hibernate_page_list_discard(hibernate_page_list_t * page_list)
7361 {
7362 uint64_t start, end, nsec;
7363 vm_page_t m;
7364 vm_page_t next;
7365 uint32_t i;
7366 uint32_t count_discard_active = 0;
7367 uint32_t count_discard_inactive = 0;
7368 uint32_t count_discard_purgeable = 0;
7369 uint32_t count_discard_cleaned = 0;
7370 uint32_t count_discard_speculative = 0;
7371
7372
7373 #if MACH_ASSERT || DEBUG
7374 vm_page_lock_queues();
7375 if (vm_page_local_q) {
7376 for (i = 0; i < vm_page_local_q_count; i++) {
7377 struct vpl *lq;
7378 lq = &vm_page_local_q[i].vpl_un.vpl;
7379 VPL_LOCK(&lq->vpl_lock);
7380 }
7381 }
7382 #endif /* MACH_ASSERT || DEBUG */
7383
7384 clock_get_uptime(&start);
7385
7386 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7387 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
7388 {
7389 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7390
7391 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7392 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7393 {
7394 if (m->vmp_dirty)
7395 count_discard_purgeable++;
7396 else
7397 count_discard_inactive++;
7398 hibernate_discard_page(m);
7399 }
7400 m = next;
7401 }
7402
7403 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7404 {
7405 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7406 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
7407 {
7408 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7409
7410 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7411 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7412 {
7413 count_discard_speculative++;
7414 hibernate_discard_page(m);
7415 }
7416 m = next;
7417 }
7418 }
7419
7420 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7421 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
7422 {
7423 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7424
7425 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7426 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7427 {
7428 if (m->vmp_dirty)
7429 count_discard_purgeable++;
7430 else
7431 count_discard_inactive++;
7432 hibernate_discard_page(m);
7433 }
7434 m = next;
7435 }
7436 /* XXX FBDP TODO: secluded queue */
7437
7438 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7439 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
7440 {
7441 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7442
7443 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7444 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7445 {
7446 if (m->vmp_dirty)
7447 count_discard_purgeable++;
7448 else
7449 count_discard_active++;
7450 hibernate_discard_page(m);
7451 }
7452 m = next;
7453 }
7454
7455 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7456 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
7457 {
7458 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7459
7460 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7461 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
7462 {
7463 if (m->vmp_dirty)
7464 count_discard_purgeable++;
7465 else
7466 count_discard_cleaned++;
7467 hibernate_discard_page(m);
7468 }
7469 m = next;
7470 }
7471
7472 #if MACH_ASSERT || DEBUG
7473 if (vm_page_local_q) {
7474 for (i = 0; i < vm_page_local_q_count; i++) {
7475 struct vpl *lq;
7476 lq = &vm_page_local_q[i].vpl_un.vpl;
7477 VPL_UNLOCK(&lq->vpl_lock);
7478 }
7479 }
7480 vm_page_unlock_queues();
7481 #endif /* MACH_ASSERT || DEBUG */
7482
7483 clock_get_uptime(&end);
7484 absolutetime_to_nanoseconds(end - start, &nsec);
7485 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7486 nsec / 1000000ULL,
7487 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
7488 }
7489
7490 boolean_t hibernate_paddr_map_inited = FALSE;
7491 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
7492 vm_page_t hibernate_rebuild_hash_list = NULL;
7493
7494 unsigned int hibernate_teardown_found_tabled_pages = 0;
7495 unsigned int hibernate_teardown_found_created_pages = 0;
7496 unsigned int hibernate_teardown_found_free_pages = 0;
7497 unsigned int hibernate_teardown_vm_page_free_count;
7498
7499
7500 struct ppnum_mapping {
7501 struct ppnum_mapping *ppnm_next;
7502 ppnum_t ppnm_base_paddr;
7503 unsigned int ppnm_sindx;
7504 unsigned int ppnm_eindx;
7505 };
7506
7507 struct ppnum_mapping *ppnm_head;
7508 struct ppnum_mapping *ppnm_last_found = NULL;
7509
7510
7511 void
7512 hibernate_create_paddr_map()
7513 {
7514 unsigned int i;
7515 ppnum_t next_ppnum_in_run = 0;
7516 struct ppnum_mapping *ppnm = NULL;
7517
7518 if (hibernate_paddr_map_inited == FALSE) {
7519
7520 for (i = 0; i < vm_pages_count; i++) {
7521
7522 if (ppnm)
7523 ppnm->ppnm_eindx = i;
7524
7525 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
7526
7527 ppnm = kalloc(sizeof(struct ppnum_mapping));
7528
7529 ppnm->ppnm_next = ppnm_head;
7530 ppnm_head = ppnm;
7531
7532 ppnm->ppnm_sindx = i;
7533 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
7534 }
7535 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
7536 }
7537 ppnm->ppnm_eindx++;
7538
7539 hibernate_paddr_map_inited = TRUE;
7540 }
7541 }
7542
7543 ppnum_t
7544 hibernate_lookup_paddr(unsigned int indx)
7545 {
7546 struct ppnum_mapping *ppnm = NULL;
7547
7548 ppnm = ppnm_last_found;
7549
7550 if (ppnm) {
7551 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7552 goto done;
7553 }
7554 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7555
7556 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7557 ppnm_last_found = ppnm;
7558 break;
7559 }
7560 }
7561 if (ppnm == NULL)
7562 panic("hibernate_lookup_paddr of %d failed\n", indx);
7563 done:
7564 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7565 }
7566
7567
7568 uint32_t
7569 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7570 {
7571 addr64_t saddr_aligned;
7572 addr64_t eaddr_aligned;
7573 addr64_t addr;
7574 ppnum_t paddr;
7575 unsigned int mark_as_unneeded_pages = 0;
7576
7577 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7578 eaddr_aligned = eaddr & ~PAGE_MASK_64;
7579
7580 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7581
7582 paddr = pmap_find_phys(kernel_pmap, addr);
7583
7584 assert(paddr);
7585
7586 hibernate_page_bitset(page_list, TRUE, paddr);
7587 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7588
7589 mark_as_unneeded_pages++;
7590 }
7591 return (mark_as_unneeded_pages);
7592 }
7593
7594
7595 void
7596 hibernate_hash_insert_page(vm_page_t mem)
7597 {
7598 vm_page_bucket_t *bucket;
7599 int hash_id;
7600 vm_object_t m_object;
7601
7602 m_object = VM_PAGE_OBJECT(mem);
7603
7604 assert(mem->vmp_hashed);
7605 assert(m_object);
7606 assert(mem->vmp_offset != (vm_object_offset_t) -1);
7607
7608 /*
7609 * Insert it into the object_object/offset hash table
7610 */
7611 hash_id = vm_page_hash(m_object, mem->vmp_offset);
7612 bucket = &vm_page_buckets[hash_id];
7613
7614 mem->vmp_next_m = bucket->page_list;
7615 bucket->page_list = VM_PAGE_PACK_PTR(mem);
7616 }
7617
7618
7619 void
7620 hibernate_free_range(int sindx, int eindx)
7621 {
7622 vm_page_t mem;
7623 unsigned int color;
7624
7625 while (sindx < eindx) {
7626 mem = &vm_pages[sindx];
7627
7628 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7629
7630 mem->vmp_lopage = FALSE;
7631 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
7632
7633 color = VM_PAGE_GET_COLOR(mem);
7634 #if defined(__x86_64__)
7635 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
7636 mem,
7637 vm_page_t,
7638 vmp_pageq);
7639 #else
7640 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
7641 mem,
7642 vm_page_t,
7643 vmp_pageq);
7644 #endif
7645 vm_page_free_count++;
7646
7647 sindx++;
7648 }
7649 }
7650
7651
7652 extern void hibernate_rebuild_pmap_structs(void);
7653
7654 void
7655 hibernate_rebuild_vm_structs(void)
7656 {
7657 int i, cindx, sindx, eindx;
7658 vm_page_t mem, tmem, mem_next;
7659 AbsoluteTime startTime, endTime;
7660 uint64_t nsec;
7661
7662 if (hibernate_rebuild_needed == FALSE)
7663 return;
7664
7665 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
7666 HIBLOG("hibernate_rebuild started\n");
7667
7668 clock_get_uptime(&startTime);
7669
7670 hibernate_rebuild_pmap_structs();
7671
7672 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7673 eindx = vm_pages_count;
7674
7675 /*
7676 * Mark all the vm_pages[] that have not been initialized yet as being
7677 * transient. This is needed to ensure that buddy page search is corrrect.
7678 * Without this random data in these vm_pages[] can trip the buddy search
7679 */
7680 for (i = hibernate_teardown_last_valid_compact_indx+1; i < eindx; ++i)
7681 vm_pages[i].vmp_q_state = VM_PAGE_NOT_ON_Q;
7682
7683 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7684
7685 mem = &vm_pages[cindx];
7686 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
7687 /*
7688 * hibernate_teardown_vm_structs leaves the location where
7689 * this vm_page_t must be located in "next".
7690 */
7691 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
7692 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
7693
7694 sindx = (int)(tmem - &vm_pages[0]);
7695
7696 if (mem != tmem) {
7697 /*
7698 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7699 * so move it back to its real location
7700 */
7701 *tmem = *mem;
7702 mem = tmem;
7703 }
7704 if (mem->vmp_hashed)
7705 hibernate_hash_insert_page(mem);
7706 /*
7707 * the 'hole' between this vm_page_t and the previous
7708 * vm_page_t we moved needs to be initialized as
7709 * a range of free vm_page_t's
7710 */
7711 hibernate_free_range(sindx + 1, eindx);
7712
7713 eindx = sindx;
7714 }
7715 if (sindx)
7716 hibernate_free_range(0, sindx);
7717
7718 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7719
7720 /*
7721 * process the list of vm_page_t's that were entered in the hash,
7722 * but were not located in the vm_pages arrary... these are
7723 * vm_page_t's that were created on the fly (i.e. fictitious)
7724 */
7725 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
7726 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
7727
7728 mem->vmp_next_m = 0;
7729 hibernate_hash_insert_page(mem);
7730 }
7731 hibernate_rebuild_hash_list = NULL;
7732
7733 clock_get_uptime(&endTime);
7734 SUB_ABSOLUTETIME(&endTime, &startTime);
7735 absolutetime_to_nanoseconds(endTime, &nsec);
7736
7737 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7738
7739 hibernate_rebuild_needed = FALSE;
7740
7741 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
7742 }
7743
7744
7745 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7746
7747 uint32_t
7748 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7749 {
7750 unsigned int i;
7751 unsigned int compact_target_indx;
7752 vm_page_t mem, mem_next;
7753 vm_page_bucket_t *bucket;
7754 unsigned int mark_as_unneeded_pages = 0;
7755 unsigned int unneeded_vm_page_bucket_pages = 0;
7756 unsigned int unneeded_vm_pages_pages = 0;
7757 unsigned int unneeded_pmap_pages = 0;
7758 addr64_t start_of_unneeded = 0;
7759 addr64_t end_of_unneeded = 0;
7760
7761
7762 if (hibernate_should_abort())
7763 return (0);
7764
7765 hibernate_rebuild_needed = TRUE;
7766
7767 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7768 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7769 vm_page_cleaned_count, compressor_object->resident_page_count);
7770
7771 for (i = 0; i < vm_page_bucket_count; i++) {
7772
7773 bucket = &vm_page_buckets[i];
7774
7775 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
7776 assert(mem->vmp_hashed);
7777
7778 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
7779
7780 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
7781 mem->vmp_next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
7782 hibernate_rebuild_hash_list = mem;
7783 }
7784 }
7785 }
7786 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7787 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7788
7789 hibernate_teardown_vm_page_free_count = vm_page_free_count;
7790
7791 compact_target_indx = 0;
7792
7793 for (i = 0; i < vm_pages_count; i++) {
7794
7795 mem = &vm_pages[i];
7796
7797 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
7798 unsigned int color;
7799
7800 assert(mem->vmp_busy);
7801 assert(!mem->vmp_lopage);
7802
7803 color = VM_PAGE_GET_COLOR(mem);
7804
7805 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7806 mem,
7807 vm_page_t,
7808 vmp_pageq);
7809
7810 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7811
7812 vm_page_free_count--;
7813
7814 hibernate_teardown_found_free_pages++;
7815
7816 if (vm_pages[compact_target_indx].vmp_q_state != VM_PAGE_ON_FREE_Q)
7817 compact_target_indx = i;
7818 } else {
7819 /*
7820 * record this vm_page_t's original location
7821 * we need this even if it doesn't get moved
7822 * as an indicator to the rebuild function that
7823 * we don't have to move it
7824 */
7825 mem->vmp_next_m = VM_PAGE_PACK_PTR(mem);
7826
7827 if (vm_pages[compact_target_indx].vmp_q_state == VM_PAGE_ON_FREE_Q) {
7828 /*
7829 * we've got a hole to fill, so
7830 * move this vm_page_t to it's new home
7831 */
7832 vm_pages[compact_target_indx] = *mem;
7833 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
7834
7835 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7836 compact_target_indx++;
7837 } else
7838 hibernate_teardown_last_valid_compact_indx = i;
7839 }
7840 }
7841 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7842 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7843 mark_as_unneeded_pages += unneeded_vm_pages_pages;
7844
7845 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7846
7847 if (start_of_unneeded) {
7848 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7849 mark_as_unneeded_pages += unneeded_pmap_pages;
7850 }
7851 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7852
7853 return (mark_as_unneeded_pages);
7854 }
7855
7856
7857 #endif /* HIBERNATION */
7858
7859 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7860
7861 #include <mach_vm_debug.h>
7862 #if MACH_VM_DEBUG
7863
7864 #include <mach_debug/hash_info.h>
7865 #include <vm/vm_debug.h>
7866
7867 /*
7868 * Routine: vm_page_info
7869 * Purpose:
7870 * Return information about the global VP table.
7871 * Fills the buffer with as much information as possible
7872 * and returns the desired size of the buffer.
7873 * Conditions:
7874 * Nothing locked. The caller should provide
7875 * possibly-pageable memory.
7876 */
7877
7878 unsigned int
7879 vm_page_info(
7880 hash_info_bucket_t *info,
7881 unsigned int count)
7882 {
7883 unsigned int i;
7884 lck_spin_t *bucket_lock;
7885
7886 if (vm_page_bucket_count < count)
7887 count = vm_page_bucket_count;
7888
7889 for (i = 0; i < count; i++) {
7890 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7891 unsigned int bucket_count = 0;
7892 vm_page_t m;
7893
7894 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7895 lck_spin_lock(bucket_lock);
7896
7897 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7898 m != VM_PAGE_NULL;
7899 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->vmp_next_m)))
7900 bucket_count++;
7901
7902 lck_spin_unlock(bucket_lock);
7903
7904 /* don't touch pageable memory while holding locks */
7905 info[i].hib_count = bucket_count;
7906 }
7907
7908 return vm_page_bucket_count;
7909 }
7910 #endif /* MACH_VM_DEBUG */
7911
7912 #if VM_PAGE_BUCKETS_CHECK
7913 void
7914 vm_page_buckets_check(void)
7915 {
7916 unsigned int i;
7917 vm_page_t p;
7918 unsigned int p_hash;
7919 vm_page_bucket_t *bucket;
7920 lck_spin_t *bucket_lock;
7921
7922 if (!vm_page_buckets_check_ready) {
7923 return;
7924 }
7925
7926 #if HIBERNATION
7927 if (hibernate_rebuild_needed ||
7928 hibernate_rebuild_hash_list) {
7929 panic("BUCKET_CHECK: hibernation in progress: "
7930 "rebuild_needed=%d rebuild_hash_list=%p\n",
7931 hibernate_rebuild_needed,
7932 hibernate_rebuild_hash_list);
7933 }
7934 #endif /* HIBERNATION */
7935
7936 #if VM_PAGE_FAKE_BUCKETS
7937 char *cp;
7938 for (cp = (char *) vm_page_fake_buckets_start;
7939 cp < (char *) vm_page_fake_buckets_end;
7940 cp++) {
7941 if (*cp != 0x5a) {
7942 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7943 "[0x%llx:0x%llx]\n",
7944 cp,
7945 (uint64_t) vm_page_fake_buckets_start,
7946 (uint64_t) vm_page_fake_buckets_end);
7947 }
7948 }
7949 #endif /* VM_PAGE_FAKE_BUCKETS */
7950
7951 for (i = 0; i < vm_page_bucket_count; i++) {
7952 vm_object_t p_object;
7953
7954 bucket = &vm_page_buckets[i];
7955 if (!bucket->page_list) {
7956 continue;
7957 }
7958
7959 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7960 lck_spin_lock(bucket_lock);
7961 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7962
7963 while (p != VM_PAGE_NULL) {
7964 p_object = VM_PAGE_OBJECT(p);
7965
7966 if (!p->vmp_hashed) {
7967 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7968 "hash %d in bucket %d at %p "
7969 "is not hashed\n",
7970 p, p_object, p->vmp_offset,
7971 p_hash, i, bucket);
7972 }
7973 p_hash = vm_page_hash(p_object, p->vmp_offset);
7974 if (p_hash != i) {
7975 panic("BUCKET_CHECK: corruption in bucket %d "
7976 "at %p: page %p object %p offset 0x%llx "
7977 "hash %d\n",
7978 i, bucket, p, p_object, p->vmp_offset,
7979 p_hash);
7980 }
7981 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m));
7982 }
7983 lck_spin_unlock(bucket_lock);
7984 }
7985
7986 // printf("BUCKET_CHECK: checked buckets\n");
7987 }
7988 #endif /* VM_PAGE_BUCKETS_CHECK */
7989
7990 /*
7991 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7992 * local queues if they exist... its the only spot in the system where we add pages
7993 * to those queues... once on those queues, those pages can only move to one of the
7994 * global page queues or the free queues... they NEVER move from local q to local q.
7995 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7996 * the global vm_page_queue_lock at this point... we still need to take the local lock
7997 * in case this operation is being run on a different CPU then the local queue's identity,
7998 * but we don't have to worry about the page moving to a global queue or becoming wired
7999 * while we're grabbing the local lock since those operations would require the global
8000 * vm_page_queue_lock to be held, and we already own it.
8001 *
8002 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
8003 * 'wired' and local are ALWAYS mutually exclusive conditions.
8004 */
8005
8006 #if CONFIG_BACKGROUND_QUEUE
8007 void
8008 vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
8009 #else
8010 void
8011 vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
8012 #endif
8013 {
8014 boolean_t was_pageable = TRUE;
8015 vm_object_t m_object;
8016
8017 m_object = VM_PAGE_OBJECT(mem);
8018
8019 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8020
8021 if (mem->vmp_q_state == VM_PAGE_NOT_ON_Q)
8022 {
8023 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8024 #if CONFIG_BACKGROUND_QUEUE
8025 if (remove_from_backgroundq == TRUE) {
8026 vm_page_remove_from_backgroundq(mem);
8027 }
8028 if (mem->vmp_on_backgroundq) {
8029 assert(mem->vmp_backgroundq.next != 0);
8030 assert(mem->vmp_backgroundq.prev != 0);
8031 } else {
8032 assert(mem->vmp_backgroundq.next == 0);
8033 assert(mem->vmp_backgroundq.prev == 0);
8034 }
8035 #endif /* CONFIG_BACKGROUND_QUEUE */
8036 return;
8037 }
8038
8039 if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR)
8040 {
8041 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8042 #if CONFIG_BACKGROUND_QUEUE
8043 assert(mem->vmp_backgroundq.next == 0 &&
8044 mem->vmp_backgroundq.prev == 0 &&
8045 mem->vmp_on_backgroundq == FALSE);
8046 #endif
8047 return;
8048 }
8049 if (mem->vmp_q_state == VM_PAGE_IS_WIRED) {
8050 /*
8051 * might put these guys on a list for debugging purposes
8052 * if we do, we'll need to remove this assert
8053 */
8054 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
8055 #if CONFIG_BACKGROUND_QUEUE
8056 assert(mem->vmp_backgroundq.next == 0 &&
8057 mem->vmp_backgroundq.prev == 0 &&
8058 mem->vmp_on_backgroundq == FALSE);
8059 #endif
8060 return;
8061 }
8062
8063 assert(m_object != compressor_object);
8064 assert(m_object != kernel_object);
8065 assert(m_object != vm_submap_object);
8066 assert(!mem->vmp_fictitious);
8067
8068 switch(mem->vmp_q_state) {
8069
8070 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
8071 {
8072 struct vpl *lq;
8073
8074 lq = &vm_page_local_q[mem->vmp_local_id].vpl_un.vpl;
8075 VPL_LOCK(&lq->vpl_lock);
8076 vm_page_queue_remove(&lq->vpl_queue,
8077 mem, vm_page_t, vmp_pageq);
8078 mem->vmp_local_id = 0;
8079 lq->vpl_count--;
8080 if (m_object->internal) {
8081 lq->vpl_internal_count--;
8082 } else {
8083 lq->vpl_external_count--;
8084 }
8085 VPL_UNLOCK(&lq->vpl_lock);
8086 was_pageable = FALSE;
8087 break;
8088 }
8089 case VM_PAGE_ON_ACTIVE_Q:
8090 {
8091 vm_page_queue_remove(&vm_page_queue_active,
8092 mem, vm_page_t, vmp_pageq);
8093 vm_page_active_count--;
8094 break;
8095 }
8096
8097 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
8098 {
8099 assert(m_object->internal == TRUE);
8100
8101 vm_page_inactive_count--;
8102 vm_page_queue_remove(&vm_page_queue_anonymous,
8103 mem, vm_page_t, vmp_pageq);
8104 vm_page_anonymous_count--;
8105
8106 vm_purgeable_q_advance_all();
8107 vm_page_balance_inactive(3);
8108 break;
8109 }
8110
8111 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
8112 {
8113 assert(m_object->internal == FALSE);
8114
8115 vm_page_inactive_count--;
8116 vm_page_queue_remove(&vm_page_queue_inactive,
8117 mem, vm_page_t, vmp_pageq);
8118 vm_purgeable_q_advance_all();
8119 vm_page_balance_inactive(3);
8120 break;
8121 }
8122
8123 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
8124 {
8125 assert(m_object->internal == FALSE);
8126
8127 vm_page_inactive_count--;
8128 vm_page_queue_remove(&vm_page_queue_cleaned,
8129 mem, vm_page_t, vmp_pageq);
8130 vm_page_cleaned_count--;
8131 vm_page_balance_inactive(3);
8132 break;
8133 }
8134
8135 case VM_PAGE_ON_THROTTLED_Q:
8136 {
8137 assert(m_object->internal == TRUE);
8138
8139 vm_page_queue_remove(&vm_page_queue_throttled,
8140 mem, vm_page_t, vmp_pageq);
8141 vm_page_throttled_count--;
8142 was_pageable = FALSE;
8143 break;
8144 }
8145
8146 case VM_PAGE_ON_SPECULATIVE_Q:
8147 {
8148 assert(m_object->internal == FALSE);
8149
8150 vm_page_remque(&mem->vmp_pageq);
8151 vm_page_speculative_count--;
8152 vm_page_balance_inactive(3);
8153 break;
8154 }
8155
8156 #if CONFIG_SECLUDED_MEMORY
8157 case VM_PAGE_ON_SECLUDED_Q:
8158 {
8159 vm_page_queue_remove(&vm_page_queue_secluded,
8160 mem, vm_page_t, vmp_pageq);
8161 vm_page_secluded_count--;
8162 if (m_object == VM_OBJECT_NULL) {
8163 vm_page_secluded_count_free--;
8164 was_pageable = FALSE;
8165 } else {
8166 assert(!m_object->internal);
8167 vm_page_secluded_count_inuse--;
8168 was_pageable = FALSE;
8169 // was_pageable = TRUE;
8170 }
8171 break;
8172 }
8173 #endif /* CONFIG_SECLUDED_MEMORY */
8174
8175 default:
8176 {
8177 /*
8178 * if (mem->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
8179 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
8180 * the caller is responsible for determing if the page is on that queue, and if so, must
8181 * either first remove it (it needs both the page queues lock and the object lock to do
8182 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
8183 *
8184 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
8185 * or any of the undefined states
8186 */
8187 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vmp_q_state);
8188 break;
8189 }
8190
8191 }
8192 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
8193 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
8194
8195 #if CONFIG_BACKGROUND_QUEUE
8196 if (remove_from_backgroundq == TRUE)
8197 vm_page_remove_from_backgroundq(mem);
8198 #endif
8199 if (was_pageable) {
8200 if (m_object->internal) {
8201 vm_page_pageable_internal_count--;
8202 } else {
8203 vm_page_pageable_external_count--;
8204 }
8205 }
8206 }
8207
8208 void
8209 vm_page_remove_internal(vm_page_t page)
8210 {
8211 vm_object_t __object = VM_PAGE_OBJECT(page);
8212 if (page == __object->memq_hint) {
8213 vm_page_t __new_hint;
8214 vm_page_queue_entry_t __qe;
8215 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->vmp_listq);
8216 if (vm_page_queue_end(&__object->memq, __qe)) {
8217 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->vmp_listq);
8218 if (vm_page_queue_end(&__object->memq, __qe)) {
8219 __qe = NULL;
8220 }
8221 }
8222 __new_hint = (vm_page_t)((uintptr_t) __qe);
8223 __object->memq_hint = __new_hint;
8224 }
8225 vm_page_queue_remove(&__object->memq, page, vm_page_t, vmp_listq);
8226 #if CONFIG_SECLUDED_MEMORY
8227 if (__object->eligible_for_secluded) {
8228 vm_page_secluded.eligible_for_secluded--;
8229 }
8230 #endif /* CONFIG_SECLUDED_MEMORY */
8231 }
8232
8233 void
8234 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8235 {
8236 vm_object_t m_object;
8237
8238 m_object = VM_PAGE_OBJECT(mem);
8239
8240 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8241 assert(!mem->vmp_fictitious);
8242 assert(!mem->vmp_laundry);
8243 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
8244 vm_page_check_pageable_safe(mem);
8245
8246 if (m_object->internal) {
8247 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
8248
8249 if (first == TRUE)
8250 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, vmp_pageq);
8251 else
8252 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, vmp_pageq);
8253
8254 vm_page_anonymous_count++;
8255 vm_page_pageable_internal_count++;
8256 } else {
8257 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
8258
8259 if (first == TRUE)
8260 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, vmp_pageq);
8261 else
8262 vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, vmp_pageq);
8263
8264 vm_page_pageable_external_count++;
8265 }
8266 vm_page_inactive_count++;
8267 token_new_pagecount++;
8268
8269 #if CONFIG_BACKGROUND_QUEUE
8270 if (mem->vmp_in_background)
8271 vm_page_add_to_backgroundq(mem, FALSE);
8272 #endif
8273 }
8274
8275 void
8276 vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8277 {
8278 vm_object_t m_object;
8279
8280 m_object = VM_PAGE_OBJECT(mem);
8281
8282 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8283 assert(!mem->vmp_fictitious);
8284 assert(!mem->vmp_laundry);
8285 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
8286 vm_page_check_pageable_safe(mem);
8287
8288 mem->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
8289 if (first == TRUE)
8290 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, vmp_pageq);
8291 else
8292 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, vmp_pageq);
8293 vm_page_active_count++;
8294
8295 if (m_object->internal) {
8296 vm_page_pageable_internal_count++;
8297 } else {
8298 vm_page_pageable_external_count++;
8299 }
8300
8301 #if CONFIG_BACKGROUND_QUEUE
8302 if (mem->vmp_in_background)
8303 vm_page_add_to_backgroundq(mem, FALSE);
8304 #endif
8305 vm_page_balance_inactive(3);
8306 }
8307
8308 /*
8309 * Pages from special kernel objects shouldn't
8310 * be placed on pageable queues.
8311 */
8312 void
8313 vm_page_check_pageable_safe(vm_page_t page)
8314 {
8315 vm_object_t page_object;
8316
8317 page_object = VM_PAGE_OBJECT(page);
8318
8319 if (page_object == kernel_object) {
8320 panic("vm_page_check_pageable_safe: trying to add page" \
8321 "from kernel object (%p) to pageable queue", kernel_object);
8322 }
8323
8324 if (page_object == compressor_object) {
8325 panic("vm_page_check_pageable_safe: trying to add page" \
8326 "from compressor object (%p) to pageable queue", compressor_object);
8327 }
8328
8329 if (page_object == vm_submap_object) {
8330 panic("vm_page_check_pageable_safe: trying to add page" \
8331 "from submap object (%p) to pageable queue", vm_submap_object);
8332 }
8333 }
8334
8335 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8336 * wired page diagnose
8337 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8338
8339 #include <libkern/OSKextLibPrivate.h>
8340
8341 #define KA_SIZE(namelen, subtotalscount) \
8342 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8343
8344 #define KA_NAME(alloc) \
8345 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8346
8347 #define KA_NAME_LEN(alloc) \
8348 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8349
8350 vm_tag_t
8351 vm_tag_bt(void)
8352 {
8353 uintptr_t* frameptr;
8354 uintptr_t* frameptr_next;
8355 uintptr_t retaddr;
8356 uintptr_t kstackb, kstackt;
8357 const vm_allocation_site_t * site;
8358 thread_t cthread;
8359 kern_allocation_name_t name;
8360
8361 cthread = current_thread();
8362 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
8363
8364 if ((name = thread_get_kernel_state(cthread)->allocation_name))
8365 {
8366 if (!name->tag) vm_tag_alloc(name);
8367 return name->tag;
8368 }
8369
8370 kstackb = cthread->kernel_stack;
8371 kstackt = kstackb + kernel_stack_size;
8372
8373 /* Load stack frame pointer (EBP on x86) into frameptr */
8374 frameptr = __builtin_frame_address(0);
8375 site = NULL;
8376 while (frameptr != NULL)
8377 {
8378 /* Verify thread stack bounds */
8379 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
8380
8381 /* Next frame pointer is pointed to by the previous one */
8382 frameptr_next = (uintptr_t*) *frameptr;
8383
8384 /* Pull return address from one spot above the frame pointer */
8385 retaddr = *(frameptr + 1);
8386
8387
8388 if (((retaddr < vm_kernel_builtinkmod_text_end) && (retaddr >= vm_kernel_builtinkmod_text))
8389 || (retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
8390 {
8391 site = OSKextGetAllocationSiteForCaller(retaddr);
8392 break;
8393 }
8394 frameptr = frameptr_next;
8395 }
8396
8397 return (site ? site->tag : VM_KERN_MEMORY_NONE);
8398 }
8399
8400 static uint64_t free_tag_bits[VM_MAX_TAG_VALUE/64];
8401
8402 void
8403 vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
8404 {
8405 vm_tag_t tag;
8406 uint64_t avail;
8407 uint32_t idx;
8408 vm_allocation_site_t * prev;
8409
8410 if (site->tag) return;
8411
8412 idx = 0;
8413 while (TRUE)
8414 {
8415 avail = free_tag_bits[idx];
8416 if (avail)
8417 {
8418 tag = __builtin_clzll(avail);
8419 avail &= ~(1ULL << (63 - tag));
8420 free_tag_bits[idx] = avail;
8421 tag += (idx << 6);
8422 break;
8423 }
8424 idx++;
8425 if (idx >= ARRAY_COUNT(free_tag_bits))
8426 {
8427 for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++)
8428 {
8429 prev = vm_allocation_sites[idx];
8430 if (!prev) continue;
8431 if (!KA_NAME_LEN(prev)) continue;
8432 if (!prev->tag) continue;
8433 if (prev->total) continue;
8434 if (1 != prev->refcount) continue;
8435
8436 assert(idx == prev->tag);
8437 tag = idx;
8438 prev->tag = VM_KERN_MEMORY_NONE;
8439 *releasesiteP = prev;
8440 break;
8441 }
8442 if (idx >= ARRAY_COUNT(vm_allocation_sites))
8443 {
8444 tag = VM_KERN_MEMORY_ANY;
8445 }
8446 break;
8447 }
8448 }
8449 site->tag = tag;
8450
8451 OSAddAtomic16(1, &site->refcount);
8452
8453 if (VM_KERN_MEMORY_ANY != tag) vm_allocation_sites[tag] = site;
8454
8455 if (tag > vm_allocation_tag_highest) vm_allocation_tag_highest = tag;
8456 }
8457
8458 static void
8459 vm_tag_free_locked(vm_tag_t tag)
8460 {
8461 uint64_t avail;
8462 uint32_t idx;
8463 uint64_t bit;
8464
8465 if (VM_KERN_MEMORY_ANY == tag) return;
8466
8467 idx = (tag >> 6);
8468 avail = free_tag_bits[idx];
8469 tag &= 63;
8470 bit = (1ULL << (63 - tag));
8471 assert(!(avail & bit));
8472 free_tag_bits[idx] = (avail | bit);
8473 }
8474
8475 static void
8476 vm_tag_init(void)
8477 {
8478 vm_tag_t tag;
8479 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8480 {
8481 vm_tag_free_locked(tag);
8482 }
8483
8484 for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++)
8485 {
8486 vm_tag_free_locked(tag);
8487 }
8488 }
8489
8490 vm_tag_t
8491 vm_tag_alloc(vm_allocation_site_t * site)
8492 {
8493 vm_tag_t tag;
8494 vm_allocation_site_t * releasesite;
8495
8496 if (VM_TAG_BT & site->flags)
8497 {
8498 tag = vm_tag_bt();
8499 if (VM_KERN_MEMORY_NONE != tag) return (tag);
8500 }
8501
8502 if (!site->tag)
8503 {
8504 releasesite = NULL;
8505 lck_spin_lock(&vm_allocation_sites_lock);
8506 vm_tag_alloc_locked(site, &releasesite);
8507 lck_spin_unlock(&vm_allocation_sites_lock);
8508 if (releasesite) kern_allocation_name_release(releasesite);
8509 }
8510
8511 return (site->tag);
8512 }
8513
8514 void
8515 vm_tag_update_size(vm_tag_t tag, int64_t delta)
8516 {
8517 vm_allocation_site_t * allocation;
8518 uint64_t prior;
8519
8520 assert(VM_KERN_MEMORY_NONE != tag);
8521 assert(tag < VM_MAX_TAG_VALUE);
8522
8523 allocation = vm_allocation_sites[tag];
8524 assert(allocation);
8525
8526 if (delta < 0) {
8527 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
8528 }
8529 prior = OSAddAtomic64(delta, &allocation->total);
8530
8531 #if DEBUG || DEVELOPMENT
8532
8533 uint64_t new, peak;
8534 new = prior + delta;
8535 do
8536 {
8537 peak = allocation->peak;
8538 if (new <= peak) break;
8539 }
8540 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8541
8542 #endif /* DEBUG || DEVELOPMENT */
8543
8544 if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) return;
8545
8546 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8547 }
8548
8549 void
8550 kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
8551 {
8552 uint64_t prior;
8553
8554 if (delta < 0) {
8555 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
8556 }
8557 prior = OSAddAtomic64(delta, &allocation->total);
8558
8559 #if DEBUG || DEVELOPMENT
8560
8561 uint64_t new, peak;
8562 new = prior + delta;
8563 do
8564 {
8565 peak = allocation->peak;
8566 if (new <= peak) break;
8567 }
8568 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8569
8570 #endif /* DEBUG || DEVELOPMENT */
8571
8572 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8573 }
8574
8575 #if VM_MAX_TAG_ZONES
8576
8577 void
8578 vm_allocation_zones_init(void)
8579 {
8580 kern_return_t ret;
8581 vm_offset_t addr;
8582 vm_size_t size;
8583
8584 size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **)
8585 + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8586
8587 ret = kernel_memory_allocate(kernel_map,
8588 &addr, round_page(size), 0,
8589 KMA_ZERO, VM_KERN_MEMORY_DIAG);
8590 assert(KERN_SUCCESS == ret);
8591
8592 vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
8593 addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **);
8594
8595 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8596 // in vm_tag_update_zone_size() won't recurse
8597 vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG] = (vm_allocation_zone_total_t *) addr;
8598 addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8599 vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
8600 }
8601
8602 void
8603 vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
8604 {
8605 vm_allocation_zone_total_t * zone;
8606
8607 assert(VM_KERN_MEMORY_NONE != tag);
8608 assert(tag < VM_MAX_TAG_VALUE);
8609
8610 if (zidx >= VM_MAX_TAG_ZONES) return;
8611
8612 zone = vm_allocation_zone_totals[tag];
8613 if (!zone)
8614 {
8615 zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
8616 if (!zone) return;
8617 bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8618 if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag]))
8619 {
8620 kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8621 }
8622 }
8623 }
8624
8625 void
8626 vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
8627 {
8628 vm_allocation_zone_total_t * zone;
8629 uint32_t new;
8630
8631 assert(VM_KERN_MEMORY_NONE != tag);
8632 assert(tag < VM_MAX_TAG_VALUE);
8633
8634 if (zidx >= VM_MAX_TAG_ZONES) return;
8635
8636 zone = vm_allocation_zone_totals[tag];
8637 assert(zone);
8638 zone += zidx;
8639
8640 /* the zone is locked */
8641 if (delta < 0)
8642 {
8643 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
8644 zone->total += delta;
8645 }
8646 else
8647 {
8648 zone->total += delta;
8649 if (zone->total > zone->peak) zone->peak = zone->total;
8650 if (dwaste)
8651 {
8652 new = zone->waste;
8653 if (zone->wastediv < 65536) zone->wastediv++;
8654 else new -= (new >> 16);
8655 __assert_only bool ov = os_add_overflow(new, dwaste, &new);
8656 assert(!ov);
8657 zone->waste = new;
8658 }
8659 }
8660 }
8661
8662 #endif /* VM_MAX_TAG_ZONES */
8663
8664 void
8665 kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
8666 {
8667 kern_allocation_name_t other;
8668 struct vm_allocation_total * total;
8669 uint32_t subidx;
8670
8671 subidx = 0;
8672 assert(VM_KERN_MEMORY_NONE != subtag);
8673 for (; subidx < allocation->subtotalscount; subidx++)
8674 {
8675 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag)
8676 {
8677 allocation->subtotals[subidx].tag = subtag;
8678 break;
8679 }
8680 if (subtag == allocation->subtotals[subidx].tag) break;
8681 }
8682 assert(subidx < allocation->subtotalscount);
8683 if (subidx >= allocation->subtotalscount) return;
8684
8685 total = &allocation->subtotals[subidx];
8686 other = vm_allocation_sites[subtag];
8687 assert(other);
8688
8689 if (delta < 0)
8690 {
8691 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
8692 OSAddAtomic64(delta, &total->total);
8693 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
8694 OSAddAtomic64(delta, &other->mapped);
8695 }
8696 else
8697 {
8698 OSAddAtomic64(delta, &other->mapped);
8699 OSAddAtomic64(delta, &total->total);
8700 }
8701 }
8702
8703 const char *
8704 kern_allocation_get_name(kern_allocation_name_t allocation)
8705 {
8706 return (KA_NAME(allocation));
8707 }
8708
8709 kern_allocation_name_t
8710 kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
8711 {
8712 uint32_t namelen;
8713
8714 namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
8715
8716 kern_allocation_name_t allocation;
8717 allocation = kalloc(KA_SIZE(namelen, subtotalscount));
8718 bzero(allocation, KA_SIZE(namelen, subtotalscount));
8719
8720 allocation->refcount = 1;
8721 allocation->subtotalscount = subtotalscount;
8722 allocation->flags = (namelen << VM_TAG_NAME_LEN_SHIFT);
8723 strlcpy(KA_NAME(allocation), name, namelen + 1);
8724
8725 return (allocation);
8726 }
8727
8728 void
8729 kern_allocation_name_release(kern_allocation_name_t allocation)
8730 {
8731 assert(allocation->refcount > 0);
8732 if (1 == OSAddAtomic16(-1, &allocation->refcount))
8733 {
8734 kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
8735 }
8736 }
8737
8738 vm_tag_t
8739 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
8740 {
8741 return (vm_tag_alloc(allocation));
8742 }
8743
8744 #if ! VM_TAG_ACTIVE_UPDATE
8745 static void
8746 vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
8747 {
8748 if (!object->wired_page_count) return;
8749 if (object != kernel_object)
8750 {
8751 assert(object->wire_tag < num_info);
8752 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
8753 }
8754 }
8755
8756 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
8757 unsigned int num_info, vm_object_t object);
8758
8759 static void
8760 vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
8761 vm_page_iterate_proc proc, purgeable_q_t queue,
8762 int group)
8763 {
8764 vm_object_t object;
8765
8766 for (object = (vm_object_t) queue_first(&queue->objq[group]);
8767 !queue_end(&queue->objq[group], (queue_entry_t) object);
8768 object = (vm_object_t) queue_next(&object->objq))
8769 {
8770 proc(info, num_info, object);
8771 }
8772 }
8773
8774 static void
8775 vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
8776 vm_page_iterate_proc proc)
8777 {
8778 vm_object_t object;
8779
8780 lck_spin_lock(&vm_objects_wired_lock);
8781 queue_iterate(&vm_objects_wired,
8782 object,
8783 vm_object_t,
8784 wired_objq)
8785 {
8786 proc(info, num_info, object);
8787 }
8788 lck_spin_unlock(&vm_objects_wired_lock);
8789 }
8790 #endif /* ! VM_TAG_ACTIVE_UPDATE */
8791
8792 static uint64_t
8793 process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
8794 {
8795 size_t namelen;
8796 unsigned int idx, count, nextinfo;
8797 vm_allocation_site_t * site;
8798 lck_spin_lock(&vm_allocation_sites_lock);
8799
8800 for (idx = 0; idx <= vm_allocation_tag_highest; idx++)
8801 {
8802 site = vm_allocation_sites[idx];
8803 if (!site) continue;
8804 info[idx].mapped = site->mapped;
8805 info[idx].tag = site->tag;
8806 if (!iterated)
8807 {
8808 info[idx].size = site->total;
8809 #if DEBUG || DEVELOPMENT
8810 info[idx].peak = site->peak;
8811 #endif /* DEBUG || DEVELOPMENT */
8812 }
8813 else
8814 {
8815 if (!site->subtotalscount && (site->total != info[idx].size))
8816 {
8817 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
8818 info[idx].size = site->total;
8819 }
8820 }
8821 }
8822
8823 nextinfo = (vm_allocation_tag_highest + 1);
8824 count = nextinfo;
8825 if (count >= num_info) count = num_info;
8826
8827 for (idx = 0; idx < count; idx++)
8828 {
8829 site = vm_allocation_sites[idx];
8830 if (!site) continue;
8831 info[idx].flags |= VM_KERN_SITE_WIRED;
8832 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8833 {
8834 info[idx].site = idx;
8835 info[idx].flags |= VM_KERN_SITE_TAG;
8836 if (VM_KERN_MEMORY_ZONE == idx)
8837 {
8838 info[idx].flags |= VM_KERN_SITE_HIDE;
8839 info[idx].flags &= ~VM_KERN_SITE_WIRED;
8840 info[idx].collectable_bytes = zones_collectable_bytes;
8841 }
8842 }
8843 else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT))))
8844 {
8845 info[idx].site = 0;
8846 info[idx].flags |= VM_KERN_SITE_NAMED;
8847 if (namelen > sizeof(info[idx].name)) namelen = sizeof(info[idx].name);
8848 strncpy(&info[idx].name[0], KA_NAME(site), namelen);
8849 }
8850 else if (VM_TAG_KMOD & site->flags)
8851 {
8852 info[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
8853 info[idx].flags |= VM_KERN_SITE_KMOD;
8854 }
8855 else
8856 {
8857 info[idx].site = VM_KERNEL_UNSLIDE(site);
8858 info[idx].flags |= VM_KERN_SITE_KERNEL;
8859 }
8860 #if VM_MAX_TAG_ZONES
8861 vm_allocation_zone_total_t * zone;
8862 unsigned int zidx;
8863 vm_size_t elem_size;
8864
8865 if (vm_allocation_zone_totals
8866 && (zone = vm_allocation_zone_totals[idx])
8867 && (nextinfo < num_info))
8868 {
8869 for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++)
8870 {
8871 if (!zone[zidx].peak) continue;
8872 info[nextinfo] = info[idx];
8873 info[nextinfo].zone = zone_index_from_tag_index(zidx, &elem_size);
8874 info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
8875 info[nextinfo].flags |= VM_KERN_SITE_ZONE;
8876 info[nextinfo].size = zone[zidx].total;
8877 info[nextinfo].peak = zone[zidx].peak;
8878 info[nextinfo].mapped = 0;
8879 if (zone[zidx].wastediv)
8880 {
8881 info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
8882 }
8883 nextinfo++;
8884 }
8885 }
8886 #endif /* VM_MAX_TAG_ZONES */
8887 if (site->subtotalscount)
8888 {
8889 uint64_t mapped, mapcost, take;
8890 uint32_t sub;
8891 vm_tag_t alloctag;
8892
8893 info[idx].size = site->total;
8894 mapped = info[idx].size;
8895 info[idx].mapped = mapped;
8896 mapcost = 0;
8897 for (sub = 0; sub < site->subtotalscount; sub++)
8898 {
8899 alloctag = site->subtotals[sub].tag;
8900 assert(alloctag < num_info);
8901 if (info[alloctag].name[0]) continue;
8902 take = info[alloctag].mapped;
8903 if (take > info[alloctag].size) take = info[alloctag].size;
8904 if (take > mapped) take = mapped;
8905 info[alloctag].mapped -= take;
8906 info[alloctag].size -= take;
8907 mapped -= take;
8908 mapcost += take;
8909 }
8910 info[idx].size = mapcost;
8911 }
8912 }
8913 lck_spin_unlock(&vm_allocation_sites_lock);
8914
8915 return (0);
8916 }
8917
8918 uint32_t
8919 vm_page_diagnose_estimate(void)
8920 {
8921 vm_allocation_site_t * site;
8922 uint32_t count;
8923 uint32_t idx;
8924
8925 lck_spin_lock(&vm_allocation_sites_lock);
8926 for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++)
8927 {
8928 site = vm_allocation_sites[idx];
8929 if (!site) continue;
8930 count++;
8931 #if VM_MAX_TAG_ZONES
8932 if (vm_allocation_zone_totals)
8933 {
8934 vm_allocation_zone_total_t * zone;
8935 zone = vm_allocation_zone_totals[idx];
8936 if (!zone) continue;
8937 for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) if (zone[zidx].peak) count++;
8938 }
8939 #endif
8940 }
8941 lck_spin_unlock(&vm_allocation_sites_lock);
8942
8943 /* some slop for new tags created */
8944 count += 8;
8945 count += VM_KERN_COUNTER_COUNT;
8946
8947 return (count);
8948 }
8949
8950
8951 kern_return_t
8952 vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
8953 {
8954 uint64_t wired_size;
8955 uint64_t wired_managed_size;
8956 uint64_t wired_reserved_size;
8957 uint64_t booter_size;
8958 boolean_t iterate;
8959 mach_memory_info_t * counts;
8960
8961 bzero(info, num_info * sizeof(mach_memory_info_t));
8962
8963 if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8964
8965 #if CONFIG_EMBEDDED
8966 wired_size = ptoa_64(vm_page_wire_count);
8967 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
8968 #else
8969 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8970 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8971 #endif
8972 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8973
8974 booter_size = ml_get_booter_memory_size();
8975 wired_size += booter_size;
8976
8977 assert(num_info >= VM_KERN_COUNTER_COUNT);
8978 num_info -= VM_KERN_COUNTER_COUNT;
8979 counts = &info[num_info];
8980
8981 #define SET_COUNT(xcount, xsize, xflags) \
8982 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
8983 counts[xcount].site = (xcount); \
8984 counts[xcount].size = (xsize); \
8985 counts[xcount].mapped = (xsize); \
8986 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8987
8988 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
8989 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
8990 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
8991 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
8992 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8993 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8994 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
8995 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
8996
8997 #define SET_MAP(xcount, xsize, xfree, xlargest) \
8998 counts[xcount].site = (xcount); \
8999 counts[xcount].size = (xsize); \
9000 counts[xcount].mapped = (xsize); \
9001 counts[xcount].free = (xfree); \
9002 counts[xcount].largest = (xlargest); \
9003 counts[xcount].flags = VM_KERN_SITE_COUNTER;
9004
9005 vm_map_size_t map_size, map_free, map_largest;
9006
9007 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
9008 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
9009
9010 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
9011 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
9012
9013 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
9014 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
9015
9016 iterate = !VM_TAG_ACTIVE_UPDATE;
9017 if (iterate)
9018 {
9019 enum { kMaxKernelDepth = 1 };
9020 vm_map_t maps [kMaxKernelDepth];
9021 vm_map_entry_t entries[kMaxKernelDepth];
9022 vm_map_t map;
9023 vm_map_entry_t entry;
9024 vm_object_offset_t offset;
9025 vm_page_t page;
9026 int stackIdx, count;
9027
9028 #if ! VM_TAG_ACTIVE_UPDATE
9029 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
9030 #endif /* ! VM_TAG_ACTIVE_UPDATE */
9031
9032 map = kernel_map;
9033 stackIdx = 0;
9034 while (map)
9035 {
9036 vm_map_lock(map);
9037 for (entry = map->hdr.links.next; map; entry = entry->links.next)
9038 {
9039 if (entry->is_sub_map)
9040 {
9041 assert(stackIdx < kMaxKernelDepth);
9042 maps[stackIdx] = map;
9043 entries[stackIdx] = entry;
9044 stackIdx++;
9045 map = VME_SUBMAP(entry);
9046 entry = NULL;
9047 break;
9048 }
9049 if (VME_OBJECT(entry) == kernel_object)
9050 {
9051 count = 0;
9052 vm_object_lock(VME_OBJECT(entry));
9053 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
9054 {
9055 page = vm_page_lookup(VME_OBJECT(entry), offset);
9056 if (page && VM_PAGE_WIRED(page)) count++;
9057 }
9058 vm_object_unlock(VME_OBJECT(entry));
9059
9060 if (count)
9061 {
9062 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
9063 assert(VME_ALIAS(entry) < num_info);
9064 info[VME_ALIAS(entry)].size += ptoa_64(count);
9065 }
9066 }
9067 while (map && (entry == vm_map_last_entry(map)))
9068 {
9069 vm_map_unlock(map);
9070 if (!stackIdx) map = NULL;
9071 else
9072 {
9073 --stackIdx;
9074 map = maps[stackIdx];
9075 entry = entries[stackIdx];
9076 }
9077 }
9078 }
9079 }
9080 }
9081
9082 process_account(info, num_info, zones_collectable_bytes, iterate);
9083
9084 return (KERN_SUCCESS);
9085 }
9086
9087 #if DEBUG || DEVELOPMENT
9088
9089 kern_return_t
9090 vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
9091 {
9092 kern_return_t ret;
9093 vm_size_t zsize;
9094 vm_map_t map;
9095 vm_map_entry_t entry;
9096
9097 zsize = zone_element_info((void *) addr, tag);
9098 if (zsize)
9099 {
9100 *zone_size = *size = zsize;
9101 return (KERN_SUCCESS);
9102 }
9103
9104 *zone_size = 0;
9105 ret = KERN_INVALID_ADDRESS;
9106 for (map = kernel_map; map; )
9107 {
9108 vm_map_lock(map);
9109 if (!vm_map_lookup_entry(map, addr, &entry)) break;
9110 if (entry->is_sub_map)
9111 {
9112 if (map != kernel_map) break;
9113 map = VME_SUBMAP(entry);
9114 continue;
9115 }
9116 if (entry->vme_start != addr) break;
9117 *tag = VME_ALIAS(entry);
9118 *size = (entry->vme_end - addr);
9119 ret = KERN_SUCCESS;
9120 break;
9121 }
9122 if (map != kernel_map) vm_map_unlock(map);
9123 vm_map_unlock(kernel_map);
9124
9125 return (ret);
9126 }
9127
9128 #endif /* DEBUG || DEVELOPMENT */
9129
9130 uint32_t
9131 vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
9132 {
9133 vm_allocation_site_t * site;
9134 uint32_t kmodId;
9135
9136 kmodId = 0;
9137 lck_spin_lock(&vm_allocation_sites_lock);
9138 if ((site = vm_allocation_sites[tag]))
9139 {
9140 if (VM_TAG_KMOD & site->flags)
9141 {
9142 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
9143 }
9144 }
9145 lck_spin_unlock(&vm_allocation_sites_lock);
9146
9147 return (kmodId);
9148 }
9149
9150
9151 #if CONFIG_SECLUDED_MEMORY
9152 /*
9153 * Note that there's no locking around other accesses to vm_page_secluded_target.
9154 * That should be OK, since these are the only place where it can be changed after
9155 * initialization. Other users (like vm_pageout) may see the wrong value briefly,
9156 * but will eventually get the correct value. This brief mismatch is OK as pageout
9157 * and page freeing will auto-adjust the vm_page_secluded_count to match the target
9158 * over time.
9159 */
9160 unsigned int vm_page_secluded_suppress_cnt = 0;
9161 unsigned int vm_page_secluded_save_target;
9162
9163
9164 lck_grp_attr_t secluded_suppress_slock_grp_attr;
9165 lck_grp_t secluded_suppress_slock_grp;
9166 lck_attr_t secluded_suppress_slock_attr;
9167 lck_spin_t secluded_suppress_slock;
9168
9169 void
9170 secluded_suppression_init(void)
9171 {
9172 lck_grp_attr_setdefault(&secluded_suppress_slock_grp_attr);
9173 lck_grp_init(&secluded_suppress_slock_grp,
9174 "secluded_suppress_slock", &secluded_suppress_slock_grp_attr);
9175 lck_attr_setdefault(&secluded_suppress_slock_attr);
9176 lck_spin_init(&secluded_suppress_slock,
9177 &secluded_suppress_slock_grp, &secluded_suppress_slock_attr);
9178 }
9179
9180 void
9181 start_secluded_suppression(task_t task)
9182 {
9183 if (task->task_suppressed_secluded)
9184 return;
9185 lck_spin_lock(&secluded_suppress_slock);
9186 if (!task->task_suppressed_secluded && vm_page_secluded_suppress_cnt++ == 0) {
9187 task->task_suppressed_secluded = TRUE;
9188 vm_page_secluded_save_target = vm_page_secluded_target;
9189 vm_page_secluded_target = 0;
9190 }
9191 lck_spin_unlock(&secluded_suppress_slock);
9192 }
9193
9194 void
9195 stop_secluded_suppression(task_t task)
9196 {
9197 lck_spin_lock(&secluded_suppress_slock);
9198 if (task->task_suppressed_secluded && --vm_page_secluded_suppress_cnt == 0) {
9199 task->task_suppressed_secluded = FALSE;
9200 vm_page_secluded_target = vm_page_secluded_save_target;
9201 }
9202 lck_spin_unlock(&secluded_suppress_slock);
9203 }
9204
9205 #endif /* CONFIG_SECLUDED_MEMORY */