]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
3e170ce0 67#include <libkern/OSDebug.h>
91447636 68
9bccf70c 69#include <mach/clock_types.h>
1c79356b
A
70#include <mach/vm_prot.h>
71#include <mach/vm_statistics.h>
2d21ac55 72#include <mach/sdt.h>
1c79356b
A
73#include <kern/counters.h>
74#include <kern/sched_prim.h>
39037602 75#include <kern/policy_internal.h>
1c79356b
A
76#include <kern/task.h>
77#include <kern/thread.h>
b0d623f7 78#include <kern/kalloc.h>
1c79356b
A
79#include <kern/zalloc.h>
80#include <kern/xpr.h>
fe8ab488 81#include <kern/ledger.h>
1c79356b
A
82#include <vm/pmap.h>
83#include <vm/vm_init.h>
84#include <vm/vm_map.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pageout.h>
87#include <vm/vm_kern.h> /* kernel_memory_allocate() */
88#include <kern/misc_protos.h>
89#include <zone_debug.h>
3e170ce0 90#include <mach_debug/zone_info.h>
1c79356b 91#include <vm/cpm.h>
6d2010ae 92#include <pexpert/pexpert.h>
5ba3f43e 93#include <san/kasan.h>
55e303ae 94
91447636 95#include <vm/vm_protos.h>
2d21ac55
A
96#include <vm/memory_object.h>
97#include <vm/vm_purgeable_internal.h>
39236c6e 98#include <vm/vm_compressor.h>
2d21ac55 99
fe8ab488
A
100#if CONFIG_PHANTOM_CACHE
101#include <vm/vm_phantom_cache.h>
102#endif
103
b0d623f7
A
104#include <IOKit/IOHibernatePrivate.h>
105
b0d623f7
A
106#include <sys/kdebug.h>
107
39037602 108
d9a64523 109
39037602
A
110char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
111char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
112char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
113char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
114
115#if CONFIG_SECLUDED_MEMORY
116struct vm_page_secluded_data vm_page_secluded;
d9a64523 117void secluded_suppression_init(void);
39037602
A
118#endif /* CONFIG_SECLUDED_MEMORY */
119
316670eb 120boolean_t hibernate_cleaning_in_progress = FALSE;
b0d623f7
A
121boolean_t vm_page_free_verify = TRUE;
122
6d2010ae
A
123uint32_t vm_lopage_free_count = 0;
124uint32_t vm_lopage_free_limit = 0;
125uint32_t vm_lopage_lowater = 0;
0b4c1975
A
126boolean_t vm_lopage_refill = FALSE;
127boolean_t vm_lopage_needed = FALSE;
128
b0d623f7
A
129lck_mtx_ext_t vm_page_queue_lock_ext;
130lck_mtx_ext_t vm_page_queue_free_lock_ext;
131lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55 132
0b4c1975
A
133int speculative_age_index = 0;
134int speculative_steal_index = 0;
2d21ac55
A
135struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
136
e8c3f781
A
137boolean_t hibernation_vmqueues_inspection = FALSE; /* Tracks if the hibernation code is looking at the VM queues.
138 * Updated and checked behind the vm_page_queues_lock. */
0b4e3aa0 139
b0d623f7
A
140__private_extern__ void vm_page_init_lck_grp(void);
141
6d2010ae
A
142static void vm_page_free_prepare(vm_page_t page);
143static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
144
3e170ce0 145static void vm_tag_init(void);
b0d623f7 146
3e170ce0 147uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
39037602
A
148uint32_t vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
149uint32_t vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
b0d623f7 150
1c79356b
A
151/*
152 * Associated with page of user-allocatable memory is a
153 * page structure.
154 */
155
156/*
157 * These variables record the values returned by vm_page_bootstrap,
158 * for debugging purposes. The implementation of pmap_steal_memory
159 * and pmap_startup here also uses them internally.
160 */
161
162vm_offset_t virtual_space_start;
163vm_offset_t virtual_space_end;
7ddcb079 164uint32_t vm_page_pages;
1c79356b
A
165
166/*
167 * The vm_page_lookup() routine, which provides for fast
168 * (virtual memory object, offset) to page lookup, employs
169 * the following hash table. The vm_page_{insert,remove}
170 * routines install and remove associations in the table.
171 * [This table is often called the virtual-to-physical,
172 * or VP, table.]
173 */
174typedef struct {
fe8ab488 175 vm_page_packed_t page_list;
1c79356b
A
176#if MACH_PAGE_HASH_STATS
177 int cur_count; /* current count */
178 int hi_count; /* high water mark */
179#endif /* MACH_PAGE_HASH_STATS */
180} vm_page_bucket_t;
181
b0d623f7
A
182
183#define BUCKETS_PER_LOCK 16
184
1c79356b
A
185vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
186unsigned int vm_page_bucket_count = 0; /* How big is array? */
187unsigned int vm_page_hash_mask; /* Mask for hash function */
188unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 189uint32_t vm_page_bucket_hash; /* Basic bucket hash */
b0d623f7
A
190unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
191
5ba3f43e
A
192#ifndef VM_TAG_ACTIVE_UPDATE
193#error VM_TAG_ACTIVE_UPDATE
194#endif
195#ifndef VM_MAX_TAG_ZONES
196#error VM_MAX_TAG_ZONES
197#endif
198
199boolean_t vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
b0d623f7 200lck_spin_t *vm_page_bucket_locks;
3e170ce0
A
201lck_spin_t vm_objects_wired_lock;
202lck_spin_t vm_allocation_sites_lock;
1c79356b 203
5ba3f43e
A
204vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
205vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
206#if VM_MAX_TAG_ZONES
207vm_allocation_zone_total_t ** vm_allocation_zone_totals;
208#endif /* VM_MAX_TAG_ZONES */
209
210vm_tag_t vm_allocation_tag_highest;
211
15129b1c
A
212#if VM_PAGE_BUCKETS_CHECK
213boolean_t vm_page_buckets_check_ready = FALSE;
214#if VM_PAGE_FAKE_BUCKETS
215vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
216vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
217#endif /* VM_PAGE_FAKE_BUCKETS */
218#endif /* VM_PAGE_BUCKETS_CHECK */
91447636 219
3e170ce0
A
220
221
1c79356b
A
222#if MACH_PAGE_HASH_STATS
223/* This routine is only for debug. It is intended to be called by
224 * hand by a developer using a kernel debugger. This routine prints
225 * out vm_page_hash table statistics to the kernel debug console.
226 */
227void
228hash_debug(void)
229{
230 int i;
231 int numbuckets = 0;
232 int highsum = 0;
233 int maxdepth = 0;
234
235 for (i = 0; i < vm_page_bucket_count; i++) {
236 if (vm_page_buckets[i].hi_count) {
237 numbuckets++;
238 highsum += vm_page_buckets[i].hi_count;
239 if (vm_page_buckets[i].hi_count > maxdepth)
240 maxdepth = vm_page_buckets[i].hi_count;
241 }
242 }
243 printf("Total number of buckets: %d\n", vm_page_bucket_count);
244 printf("Number used buckets: %d = %d%%\n",
245 numbuckets, 100*numbuckets/vm_page_bucket_count);
246 printf("Number unused buckets: %d = %d%%\n",
247 vm_page_bucket_count - numbuckets,
248 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
249 printf("Sum of bucket max depth: %d\n", highsum);
250 printf("Average bucket depth: %d.%2d\n",
251 highsum/vm_page_bucket_count,
252 highsum%vm_page_bucket_count);
253 printf("Maximum bucket depth: %d\n", maxdepth);
254}
255#endif /* MACH_PAGE_HASH_STATS */
256
257/*
258 * The virtual page size is currently implemented as a runtime
259 * variable, but is constant once initialized using vm_set_page_size.
260 * This initialization must be done in the machine-dependent
261 * bootstrap sequence, before calling other machine-independent
262 * initializations.
263 *
264 * All references to the virtual page size outside this
265 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
266 * constants.
267 */
5ba3f43e
A
268#if defined(__arm__) || defined(__arm64__)
269vm_size_t page_size;
270vm_size_t page_mask;
271int page_shift;
272#else
55e303ae
A
273vm_size_t page_size = PAGE_SIZE;
274vm_size_t page_mask = PAGE_MASK;
2d21ac55 275int page_shift = PAGE_SHIFT;
5ba3f43e 276#endif
1c79356b
A
277
278/*
279 * Resident page structures are initialized from
280 * a template (see vm_page_alloc).
281 *
282 * When adding a new field to the virtual memory
283 * object structure, be sure to add initialization
284 * (see vm_page_bootstrap).
285 */
286struct vm_page vm_page_template;
287
2d21ac55 288vm_page_t vm_pages = VM_PAGE_NULL;
39037602
A
289vm_page_t vm_page_array_beginning_addr;
290vm_page_t vm_page_array_ending_addr;
5ba3f43e 291vm_page_t vm_page_array_boundary;
39037602 292
2d21ac55 293unsigned int vm_pages_count = 0;
0b4c1975 294ppnum_t vm_page_lowest = 0;
2d21ac55 295
1c79356b
A
296/*
297 * Resident pages that represent real memory
2d21ac55
A
298 * are allocated from a set of free lists,
299 * one per color.
1c79356b 300 */
2d21ac55
A
301unsigned int vm_colors;
302unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
303unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
fe8ab488 304unsigned int vm_free_magazine_refill_limit = 0;
39037602
A
305
306
307struct vm_page_queue_free_head {
308 vm_page_queue_head_t qhead;
309} __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
310
311struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
312
313
1c79356b 314unsigned int vm_page_free_wanted;
2d21ac55 315unsigned int vm_page_free_wanted_privileged;
39037602
A
316#if CONFIG_SECLUDED_MEMORY
317unsigned int vm_page_free_wanted_secluded;
318#endif /* CONFIG_SECLUDED_MEMORY */
91447636 319unsigned int vm_page_free_count;
1c79356b 320
1c79356b
A
321/*
322 * Occasionally, the virtual memory system uses
323 * resident page structures that do not refer to
324 * real pages, for example to leave a page with
325 * important state information in the VP table.
326 *
327 * These page structures are allocated the way
328 * most other kernel structures are.
329 */
39037602 330zone_t vm_page_array_zone;
1c79356b 331zone_t vm_page_zone;
b0d623f7
A
332vm_locks_array_t vm_page_locks;
333decl_lck_mtx_data(,vm_page_alloc_lock)
316670eb
A
334lck_mtx_ext_t vm_page_alloc_lock_ext;
335
b0d623f7
A
336unsigned int vm_page_local_q_count = 0;
337unsigned int vm_page_local_q_soft_limit = 250;
338unsigned int vm_page_local_q_hard_limit = 500;
339struct vplq *vm_page_local_q = NULL;
340
316670eb
A
341/* N.B. Guard and fictitious pages must not
342 * be assigned a zero phys_page value.
343 */
1c79356b
A
344/*
345 * Fictitious pages don't have a physical address,
55e303ae 346 * but we must initialize phys_page to something.
1c79356b
A
347 * For debugging, this should be a strange value
348 * that the pmap module can recognize in assertions.
349 */
5ba3f43e 350const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 351
2d21ac55
A
352/*
353 * Guard pages are not accessible so they don't
354 * need a physical address, but we need to enter
355 * one in the pmap.
356 * Let's make it recognizable and make sure that
357 * we don't use a real physical page with that
358 * physical address.
359 */
5ba3f43e 360const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 361
1c79356b
A
362/*
363 * Resident page structures are also chained on
364 * queues that are used by the page replacement
365 * system (pageout daemon). These queues are
366 * defined here, but are shared by the pageout
9bccf70c 367 * module. The inactive queue is broken into
39236c6e 368 * file backed and anonymous for convenience as the
9bccf70c 369 * pageout daemon often assignes a higher
39236c6e 370 * importance to anonymous pages (less likely to pick)
1c79356b 371 */
39037602
A
372vm_page_queue_head_t vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
373vm_page_queue_head_t vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
374#if CONFIG_SECLUDED_MEMORY
375vm_page_queue_head_t vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
376#endif /* CONFIG_SECLUDED_MEMORY */
377vm_page_queue_head_t vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* inactive memory queue for anonymous pages */
378vm_page_queue_head_t vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2d21ac55 379
3e170ce0
A
380queue_head_t vm_objects_wired;
381
d9a64523
A
382void vm_update_darkwake_mode(boolean_t);
383
39037602
A
384#if CONFIG_BACKGROUND_QUEUE
385vm_page_queue_head_t vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
39037602 386uint32_t vm_page_background_target;
d9a64523 387uint32_t vm_page_background_target_snapshot;
39037602
A
388uint32_t vm_page_background_count;
389uint64_t vm_page_background_promoted_count;
390
391uint32_t vm_page_background_internal_count;
392uint32_t vm_page_background_external_count;
393
394uint32_t vm_page_background_mode;
395uint32_t vm_page_background_exclude_external;
396#endif
397
91447636
A
398unsigned int vm_page_active_count;
399unsigned int vm_page_inactive_count;
39037602
A
400#if CONFIG_SECLUDED_MEMORY
401unsigned int vm_page_secluded_count;
402unsigned int vm_page_secluded_count_free;
403unsigned int vm_page_secluded_count_inuse;
404#endif /* CONFIG_SECLUDED_MEMORY */
316670eb 405unsigned int vm_page_anonymous_count;
2d21ac55
A
406unsigned int vm_page_throttled_count;
407unsigned int vm_page_speculative_count;
3e170ce0 408
91447636 409unsigned int vm_page_wire_count;
5ba3f43e 410unsigned int vm_page_wire_count_on_boot = 0;
3e170ce0 411unsigned int vm_page_stolen_count;
0b4c1975 412unsigned int vm_page_wire_count_initial;
3e170ce0 413unsigned int vm_page_pages_initial;
91447636 414unsigned int vm_page_gobble_count = 0;
fe8ab488
A
415
416#define VM_PAGE_WIRE_COUNT_WARNING 0
417#define VM_PAGE_GOBBLE_COUNT_WARNING 0
91447636
A
418
419unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
b0d623f7 420unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
91447636 421uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 422
fe8ab488 423unsigned int vm_page_xpmapped_external_count = 0;
39236c6e
A
424unsigned int vm_page_external_count = 0;
425unsigned int vm_page_internal_count = 0;
426unsigned int vm_page_pageable_external_count = 0;
427unsigned int vm_page_pageable_internal_count = 0;
428
b0d623f7 429#if DEVELOPMENT || DEBUG
2d21ac55
A
430unsigned int vm_page_speculative_recreated = 0;
431unsigned int vm_page_speculative_created = 0;
432unsigned int vm_page_speculative_used = 0;
b0d623f7 433#endif
2d21ac55 434
39037602 435vm_page_queue_head_t vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
316670eb
A
436
437unsigned int vm_page_cleaned_count = 0;
316670eb 438
0c530ab8 439uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
0b4c1975 440ppnum_t max_valid_low_ppnum = 0xffffffff;
0c530ab8
A
441
442
1c79356b
A
443/*
444 * Several page replacement parameters are also
445 * shared with this module, so that page allocation
446 * (done here in vm_page_alloc) can trigger the
447 * pageout daemon.
448 */
91447636
A
449unsigned int vm_page_free_target = 0;
450unsigned int vm_page_free_min = 0;
b0d623f7 451unsigned int vm_page_throttle_limit = 0;
91447636 452unsigned int vm_page_inactive_target = 0;
39037602
A
453#if CONFIG_SECLUDED_MEMORY
454unsigned int vm_page_secluded_target = 0;
455#endif /* CONFIG_SECLUDED_MEMORY */
39236c6e 456unsigned int vm_page_anonymous_min = 0;
91447636 457unsigned int vm_page_free_reserved = 0;
1c79356b 458
316670eb 459
1c79356b
A
460/*
461 * The VM system has a couple of heuristics for deciding
462 * that pages are "uninteresting" and should be placed
463 * on the inactive queue as likely candidates for replacement.
464 * These variables let the heuristics be controlled at run-time
465 * to make experimentation easier.
466 */
467
468boolean_t vm_page_deactivate_hint = TRUE;
469
b0d623f7
A
470struct vm_page_stats_reusable vm_page_stats_reusable;
471
1c79356b
A
472/*
473 * vm_set_page_size:
474 *
475 * Sets the page size, perhaps based upon the memory
476 * size. Must be called before any use of page-size
477 * dependent functions.
478 *
479 * Sets page_shift and page_mask from page_size.
480 */
481void
482vm_set_page_size(void)
483{
fe8ab488
A
484 page_size = PAGE_SIZE;
485 page_mask = PAGE_MASK;
486 page_shift = PAGE_SHIFT;
1c79356b
A
487
488 if ((page_mask & page_size) != 0)
489 panic("vm_set_page_size: page size not a power of two");
490
491 for (page_shift = 0; ; page_shift++)
91447636 492 if ((1U << page_shift) == page_size)
1c79356b 493 break;
1c79356b
A
494}
495
5ba3f43e
A
496#if defined (__x86_64__)
497
498#define MAX_CLUMP_SIZE 16
499#define DEFAULT_CLUMP_SIZE 4
500
501unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
502
503#if DEVELOPMENT || DEBUG
504unsigned long vm_clump_stats[MAX_CLUMP_SIZE+1];
505unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
506
507static inline void vm_clump_update_stats(unsigned int c) {
508 assert(c<=vm_clump_size);
509 if(c>0 && c<=vm_clump_size) vm_clump_stats[c]+=c;
510 vm_clump_allocs+=c;
511}
512#endif /* if DEVELOPMENT || DEBUG */
513
514/* Called once to setup the VM clump knobs */
515static void
516vm_page_setup_clump( void )
517{
518 unsigned int override, n;
519
520 vm_clump_size = DEFAULT_CLUMP_SIZE;
521 if ( PE_parse_boot_argn("clump_size", &override, sizeof (override)) ) vm_clump_size = override;
522
523 if(vm_clump_size > MAX_CLUMP_SIZE) panic("vm_page_setup_clump:: clump_size is too large!");
524 if(vm_clump_size < 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
525 if((vm_clump_size & (vm_clump_size-1)) != 0) panic("vm_page_setup_clump:: clump_size must be a power of 2");
526
527 vm_clump_promote_threshold = vm_clump_size;
528 vm_clump_mask = vm_clump_size - 1;
529 for(vm_clump_shift=0, n=vm_clump_size; n>1; n>>=1, vm_clump_shift++);
530
531#if DEVELOPMENT || DEBUG
532 bzero(vm_clump_stats, sizeof(vm_clump_stats));
533 vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
534#endif /* if DEVELOPMENT || DEBUG */
535}
fe8ab488 536
5ba3f43e
A
537#endif /* #if defined (__x86_64__) */
538
539#define COLOR_GROUPS_TO_STEAL 4
2d21ac55
A
540
541/* Called once during statup, once the cache geometry is known.
542 */
543static void
544vm_page_set_colors( void )
545{
546 unsigned int n, override;
5ba3f43e
A
547
548#if defined (__x86_64__)
549 /* adjust #colors because we need to color outside the clump boundary */
550 vm_cache_geometry_colors >>= vm_clump_shift;
551#endif
593a1d5f 552 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
553 n = override;
554 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
555 n = vm_cache_geometry_colors;
556 else n = DEFAULT_COLORS; /* use default if all else fails */
557
558 if ( n == 0 )
559 n = 1;
560 if ( n > MAX_COLORS )
561 n = MAX_COLORS;
562
563 /* the count must be a power of 2 */
b0d623f7 564 if ( ( n & (n - 1)) != 0 )
5ba3f43e 565 n = DEFAULT_COLORS; /* use default if all else fails */
2d21ac55
A
566
567 vm_colors = n;
568 vm_color_mask = n - 1;
fe8ab488
A
569
570 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
5ba3f43e
A
571
572#if defined (__x86_64__)
573 /* adjust for reduction in colors due to clumping and multiple cores */
574 if (real_ncpus)
575 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
576#endif
2d21ac55
A
577}
578
579
b0d623f7
A
580lck_grp_t vm_page_lck_grp_free;
581lck_grp_t vm_page_lck_grp_queue;
582lck_grp_t vm_page_lck_grp_local;
583lck_grp_t vm_page_lck_grp_purge;
584lck_grp_t vm_page_lck_grp_alloc;
585lck_grp_t vm_page_lck_grp_bucket;
586lck_grp_attr_t vm_page_lck_grp_attr;
587lck_attr_t vm_page_lck_attr;
588
589
590__private_extern__ void
591vm_page_init_lck_grp(void)
592{
593 /*
594 * initialze the vm_page lock world
595 */
596 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
597 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
598 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
599 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
600 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
601 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
602 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
603 lck_attr_setdefault(&vm_page_lck_attr);
316670eb 604 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
39236c6e
A
605
606 vm_compressor_init_locks();
b0d623f7
A
607}
608
5c9f4661
A
609#define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
610
b0d623f7
A
611void
612vm_page_init_local_q()
613{
614 unsigned int num_cpus;
615 unsigned int i;
616 struct vplq *t_local_q;
617
618 num_cpus = ml_get_max_cpus();
619
620 /*
621 * no point in this for a uni-processor system
622 */
623 if (num_cpus >= 2) {
5ba3f43e
A
624#if KASAN
625 /* KASAN breaks the expectation of a size-aligned object by adding a
5c9f4661 626 * redzone, so explicitly align. */
5ba3f43e
A
627 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq) + VM_PACKED_POINTER_ALIGNMENT);
628 t_local_q = (void *)(((uintptr_t)t_local_q + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT-1));
629#else
5c9f4661
A
630 /* round the size up to the nearest power of two */
631 t_local_q = (struct vplq *)kalloc(ROUNDUP_NEXTP2(num_cpus * sizeof(struct vplq)));
5ba3f43e 632#endif
b0d623f7
A
633
634 for (i = 0; i < num_cpus; i++) {
635 struct vpl *lq;
636
637 lq = &t_local_q[i].vpl_un.vpl;
638 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
39037602 639 vm_page_queue_init(&lq->vpl_queue);
b0d623f7 640 lq->vpl_count = 0;
39236c6e
A
641 lq->vpl_internal_count = 0;
642 lq->vpl_external_count = 0;
b0d623f7
A
643 }
644 vm_page_local_q_count = num_cpus;
645
646 vm_page_local_q = (struct vplq *)t_local_q;
647 }
648}
649
5ba3f43e
A
650/*
651 * vm_init_before_launchd
652 *
653 * This should be called right before launchd is loaded.
654 */
655void
656vm_init_before_launchd()
657{
658 vm_page_wire_count_on_boot = vm_page_wire_count;
659}
660
b0d623f7 661
1c79356b
A
662/*
663 * vm_page_bootstrap:
664 *
665 * Initializes the resident memory module.
666 *
667 * Allocates memory for the page cells, and
668 * for the object/offset-to-page hash table headers.
669 * Each page cell is initialized and placed on the free list.
670 * Returns the range of available kernel virtual memory.
671 */
672
673void
674vm_page_bootstrap(
675 vm_offset_t *startp,
676 vm_offset_t *endp)
677{
39037602 678 vm_page_t m;
91447636 679 unsigned int i;
1c79356b
A
680 unsigned int log1;
681 unsigned int log2;
682 unsigned int size;
683
684 /*
685 * Initialize the vm_page template.
686 */
687
688 m = &vm_page_template;
b0d623f7 689 bzero(m, sizeof (*m));
1c79356b 690
39037602 691#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
692 m->vmp_backgroundq.next = 0;
693 m->vmp_backgroundq.prev = 0;
694 m->vmp_in_background = FALSE;
695 m->vmp_on_backgroundq = FALSE;
39037602
A
696#endif
697
698 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
d9a64523
A
699 m->vmp_listq.next = 0;
700 m->vmp_listq.prev = 0;
701 m->vmp_next_m = 0;
91447636 702
d9a64523
A
703 m->vmp_object = 0; /* reset later */
704 m->vmp_offset = (vm_object_offset_t) -1; /* reset later */
b0d623f7 705
d9a64523
A
706 m->vmp_wire_count = 0;
707 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
708 m->vmp_laundry = FALSE;
709 m->vmp_reference = FALSE;
710 m->vmp_gobbled = FALSE;
711 m->vmp_private = FALSE;
712 m->vmp_unused_page_bits = 0;
b0d623f7 713
5ba3f43e 714#if !defined(__arm__) && !defined(__arm64__)
39037602 715 VM_PAGE_SET_PHYS_PAGE(m, 0); /* reset later */
5ba3f43e 716#endif
d9a64523
A
717 m->vmp_busy = TRUE;
718 m->vmp_wanted = FALSE;
719 m->vmp_tabled = FALSE;
720 m->vmp_hashed = FALSE;
721 m->vmp_fictitious = FALSE;
722 m->vmp_pmapped = FALSE;
723 m->vmp_wpmapped = FALSE;
724 m->vmp_free_when_done = FALSE;
725 m->vmp_absent = FALSE;
726 m->vmp_error = FALSE;
727 m->vmp_dirty = FALSE;
728 m->vmp_cleaning = FALSE;
729 m->vmp_precious = FALSE;
730 m->vmp_clustered = FALSE;
731 m->vmp_overwriting = FALSE;
732 m->vmp_restart = FALSE;
733 m->vmp_unusual = FALSE;
734 m->vmp_cs_validated = FALSE;
735 m->vmp_cs_tainted = FALSE;
736 m->vmp_cs_nx = FALSE;
737 m->vmp_no_cache = FALSE;
738 m->vmp_reusable = FALSE;
739 m->vmp_xpmapped = FALSE;
740 m->vmp_written_by_kernel = FALSE;
741 m->vmp_unused_object_bits = 0;
1c79356b 742
1c79356b
A
743 /*
744 * Initialize the page queues.
745 */
b0d623f7
A
746 vm_page_init_lck_grp();
747
748 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
749 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
750 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
2d21ac55
A
751
752 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
753 int group;
754
755 purgeable_queues[i].token_q_head = 0;
756 purgeable_queues[i].token_q_tail = 0;
757 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
758 queue_init(&purgeable_queues[i].objq[group]);
759
760 purgeable_queues[i].type = i;
761 purgeable_queues[i].new_pages = 0;
762#if MACH_ASSERT
763 purgeable_queues[i].debug_count_tokens = 0;
764 purgeable_queues[i].debug_count_objects = 0;
765#endif
766 };
fe8ab488
A
767 purgeable_nonvolatile_count = 0;
768 queue_init(&purgeable_nonvolatile_queue);
2d21ac55
A
769
770 for (i = 0; i < MAX_COLORS; i++ )
39037602
A
771 vm_page_queue_init(&vm_page_queue_free[i].qhead);
772
773 vm_page_queue_init(&vm_lopage_queue_free);
774 vm_page_queue_init(&vm_page_queue_active);
775 vm_page_queue_init(&vm_page_queue_inactive);
776#if CONFIG_SECLUDED_MEMORY
777 vm_page_queue_init(&vm_page_queue_secluded);
778#endif /* CONFIG_SECLUDED_MEMORY */
779 vm_page_queue_init(&vm_page_queue_cleaned);
780 vm_page_queue_init(&vm_page_queue_throttled);
781 vm_page_queue_init(&vm_page_queue_anonymous);
3e170ce0 782 queue_init(&vm_objects_wired);
1c79356b 783
2d21ac55 784 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
39037602 785 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
2d21ac55
A
786
787 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
788 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
789 }
39037602
A
790#if CONFIG_BACKGROUND_QUEUE
791 vm_page_queue_init(&vm_page_queue_background);
792
793 vm_page_background_count = 0;
794 vm_page_background_internal_count = 0;
795 vm_page_background_external_count = 0;
796 vm_page_background_promoted_count = 0;
797
798 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
799
800 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
801 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
39037602
A
802
803 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
804 vm_page_background_exclude_external = 0;
805
806 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
807 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
808 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
39037602 809
5ba3f43e 810 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1)
39037602 811 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
39037602 812#endif
1c79356b 813 vm_page_free_wanted = 0;
2d21ac55 814 vm_page_free_wanted_privileged = 0;
39037602
A
815#if CONFIG_SECLUDED_MEMORY
816 vm_page_free_wanted_secluded = 0;
817#endif /* CONFIG_SECLUDED_MEMORY */
2d21ac55 818
5ba3f43e
A
819#if defined (__x86_64__)
820 /* this must be called before vm_page_set_colors() */
821 vm_page_setup_clump();
822#endif
823
2d21ac55
A
824 vm_page_set_colors();
825
39037602
A
826 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
827 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
828 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
829 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
830
831 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
832 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
833 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
834 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
835 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
836 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
837 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
838#if CONFIG_SECLUDED_MEMORY
839 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
840#endif /* CONFIG_SECLUDED_MEMORY */
841
842 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
843 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
844 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
845 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
846 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
847 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
848#if CONFIG_SECLUDED_MEMORY
849 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
850#endif /* CONFIG_SECLUDED_MEMORY */
851
852 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
853 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
854 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
855 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
856 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
857#if CONFIG_SECLUDED_MEMORY
858 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
859#endif /* CONFIG_SECLUDED_MEMORY */
860
5ba3f43e
A
861 for (i = 0; i < VM_KERN_MEMORY_FIRST_DYNAMIC; i++)
862 {
863 vm_allocation_sites_static[i].refcount = 2;
864 vm_allocation_sites_static[i].tag = i;
865 vm_allocation_sites[i] = &vm_allocation_sites_static[i];
866 }
867 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
868 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
869 vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
1c79356b
A
870
871 /*
872 * Steal memory for the map and zone subsystems.
873 */
39037602
A
874#if CONFIG_GZALLOC
875 gzalloc_configure();
876#endif
877 kernel_debug_string_early("vm_map_steal_memory");
316670eb 878 vm_map_steal_memory();
1c79356b
A
879
880 /*
881 * Allocate (and initialize) the virtual-to-physical
882 * table hash buckets.
883 *
884 * The number of buckets should be a power of two to
885 * get a good hash function. The following computation
886 * chooses the first power of two that is greater
887 * than the number of physical pages in the system.
888 */
889
1c79356b
A
890 if (vm_page_bucket_count == 0) {
891 unsigned int npages = pmap_free_pages();
892
893 vm_page_bucket_count = 1;
894 while (vm_page_bucket_count < npages)
895 vm_page_bucket_count <<= 1;
896 }
b0d623f7 897 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
898
899 vm_page_hash_mask = vm_page_bucket_count - 1;
900
901 /*
902 * Calculate object shift value for hashing algorithm:
903 * O = log2(sizeof(struct vm_object))
904 * B = log2(vm_page_bucket_count)
905 * hash shifts the object left by
906 * B/2 - O
907 */
908 size = vm_page_bucket_count;
909 for (log1 = 0; size > 1; log1++)
910 size /= 2;
911 size = sizeof(struct vm_object);
912 for (log2 = 0; size > 1; log2++)
913 size /= 2;
914 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
915
916 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
917 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
918 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
919
920 if (vm_page_hash_mask & vm_page_bucket_count)
921 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
922
15129b1c
A
923#if VM_PAGE_BUCKETS_CHECK
924#if VM_PAGE_FAKE_BUCKETS
925 /*
926 * Allocate a decoy set of page buckets, to detect
927 * any stomping there.
928 */
929 vm_page_fake_buckets = (vm_page_bucket_t *)
930 pmap_steal_memory(vm_page_bucket_count *
931 sizeof(vm_page_bucket_t));
932 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
933 vm_page_fake_buckets_end =
934 vm_map_round_page((vm_page_fake_buckets_start +
935 (vm_page_bucket_count *
936 sizeof (vm_page_bucket_t))),
937 PAGE_MASK);
938 char *cp;
939 for (cp = (char *)vm_page_fake_buckets_start;
940 cp < (char *)vm_page_fake_buckets_end;
941 cp++) {
942 *cp = 0x5a;
943 }
944#endif /* VM_PAGE_FAKE_BUCKETS */
945#endif /* VM_PAGE_BUCKETS_CHECK */
946
39037602 947 kernel_debug_string_early("vm_page_buckets");
1c79356b
A
948 vm_page_buckets = (vm_page_bucket_t *)
949 pmap_steal_memory(vm_page_bucket_count *
950 sizeof(vm_page_bucket_t));
951
39037602 952 kernel_debug_string_early("vm_page_bucket_locks");
b0d623f7
A
953 vm_page_bucket_locks = (lck_spin_t *)
954 pmap_steal_memory(vm_page_bucket_lock_count *
955 sizeof(lck_spin_t));
956
1c79356b 957 for (i = 0; i < vm_page_bucket_count; i++) {
39037602 958 vm_page_bucket_t *bucket = &vm_page_buckets[i];
1c79356b 959
fe8ab488 960 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1c79356b
A
961#if MACH_PAGE_HASH_STATS
962 bucket->cur_count = 0;
963 bucket->hi_count = 0;
964#endif /* MACH_PAGE_HASH_STATS */
965 }
966
b0d623f7
A
967 for (i = 0; i < vm_page_bucket_lock_count; i++)
968 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
969
3e170ce0
A
970 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
971 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
972 vm_tag_init();
973
15129b1c
A
974#if VM_PAGE_BUCKETS_CHECK
975 vm_page_buckets_check_ready = TRUE;
976#endif /* VM_PAGE_BUCKETS_CHECK */
977
1c79356b
A
978 /*
979 * Machine-dependent code allocates the resident page table.
980 * It uses vm_page_init to initialize the page frames.
981 * The code also returns to us the virtual space available
982 * to the kernel. We don't trust the pmap module
983 * to get the alignment right.
984 */
985
39037602 986 kernel_debug_string_early("pmap_startup");
1c79356b 987 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
988 virtual_space_start = round_page(virtual_space_start);
989 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
990
991 *startp = virtual_space_start;
992 *endp = virtual_space_end;
993
994 /*
995 * Compute the initial "wire" count.
996 * Up until now, the pages which have been set aside are not under
997 * the VM system's control, so although they aren't explicitly
998 * wired, they nonetheless can't be moved. At this moment,
999 * all VM managed pages are "free", courtesy of pmap_startup.
1000 */
b0d623f7 1001 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
0b4c1975 1002 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
39037602
A
1003#if CONFIG_SECLUDED_MEMORY
1004 vm_page_wire_count -= vm_page_secluded_count;
1005#endif
0b4c1975 1006 vm_page_wire_count_initial = vm_page_wire_count;
3e170ce0 1007 vm_page_pages_initial = vm_page_pages;
91447636 1008
2d21ac55
A
1009 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1010 vm_page_free_count, vm_page_wire_count);
1011
39037602 1012 kernel_debug_string_early("vm_page_bootstrap complete");
91447636 1013 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
1014}
1015
1016#ifndef MACHINE_PAGES
1017/*
1018 * We implement pmap_steal_memory and pmap_startup with the help
1019 * of two simpler functions, pmap_virtual_space and pmap_next_page.
1020 */
1021
91447636 1022void *
1c79356b
A
1023pmap_steal_memory(
1024 vm_size_t size)
1025{
5ba3f43e 1026 kern_return_t kr;
55e303ae 1027 vm_offset_t addr, vaddr;
5ba3f43e 1028 ppnum_t phys_page;
1c79356b
A
1029
1030 /*
1031 * We round the size to a round multiple.
1032 */
1033
1034 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1035
1036 /*
1037 * If this is the first call to pmap_steal_memory,
1038 * we have to initialize ourself.
1039 */
1040
1041 if (virtual_space_start == virtual_space_end) {
1042 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
1043
1044 /*
1045 * The initial values must be aligned properly, and
1046 * we don't trust the pmap module to do it right.
1047 */
1048
91447636
A
1049 virtual_space_start = round_page(virtual_space_start);
1050 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
1051 }
1052
1053 /*
1054 * Allocate virtual memory for this request.
1055 */
1056
1057 addr = virtual_space_start;
1058 virtual_space_start += size;
1059
6d2010ae 1060 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
1061
1062 /*
1063 * Allocate and map physical pages to back new virtual pages.
1064 */
1065
91447636 1066 for (vaddr = round_page(addr);
1c79356b
A
1067 vaddr < addr + size;
1068 vaddr += PAGE_SIZE) {
b0d623f7 1069
0b4c1975 1070 if (!pmap_next_page_hi(&phys_page))
39037602 1071 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1c79356b
A
1072
1073 /*
1074 * XXX Logically, these mappings should be wired,
1075 * but some pmap modules barf if they are.
1076 */
b0d623f7 1077#if defined(__LP64__)
5ba3f43e
A
1078#ifdef __arm64__
1079 /* ARM64_TODO: verify that we really don't need this */
1080#else
b0d623f7
A
1081 pmap_pre_expand(kernel_pmap, vaddr);
1082#endif
5ba3f43e
A
1083#endif
1084
1085 kr = pmap_enter(kernel_pmap, vaddr, phys_page,
1086 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
1087 VM_WIMG_USE_DEFAULT, FALSE);
1088
1089 if (kr != KERN_SUCCESS) {
1090 panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1091 (unsigned long)vaddr, phys_page);
1092 }
1c79356b 1093
1c79356b
A
1094 /*
1095 * Account for newly stolen memory
1096 */
1097 vm_page_wire_count++;
3e170ce0 1098 vm_page_stolen_count++;
1c79356b
A
1099 }
1100
5ba3f43e
A
1101#if KASAN
1102 kasan_notify_address(round_page(addr), size);
1103#endif
91447636 1104 return (void *) addr;
1c79356b
A
1105}
1106
39037602
A
1107#if CONFIG_SECLUDED_MEMORY
1108/* boot-args to control secluded memory */
1109unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
1110int secluded_for_iokit = 1; /* IOKit can use secluded memory */
1111int secluded_for_apps = 1; /* apps can use secluded memory */
1112int secluded_for_filecache = 2; /* filecache can use seclude memory */
1113#if 11
1114int secluded_for_fbdp = 0;
1115#endif
d9a64523 1116uint64_t secluded_shutoff_trigger = 0;
39037602
A
1117#endif /* CONFIG_SECLUDED_MEMORY */
1118
1119
5ba3f43e
A
1120#if defined(__arm__) || defined(__arm64__)
1121extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1122unsigned int vm_first_phys_ppnum = 0;
1123#endif
39037602
A
1124
1125
fe8ab488 1126void vm_page_release_startup(vm_page_t mem);
1c79356b
A
1127void
1128pmap_startup(
1129 vm_offset_t *startp,
1130 vm_offset_t *endp)
1131{
55e303ae 1132 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
1133 ppnum_t phys_page;
1134 addr64_t tmpaddr;
1c79356b 1135
fe8ab488 1136#if defined(__LP64__)
fe8ab488
A
1137 /*
1138 * make sure we are aligned on a 64 byte boundary
1139 * for VM_PAGE_PACK_PTR (it clips off the low-order
1140 * 6 bits of the pointer)
1141 */
1142 if (virtual_space_start != virtual_space_end)
1143 virtual_space_start = round_page(virtual_space_start);
1144#endif
1145
1c79356b
A
1146 /*
1147 * We calculate how many page frames we will have
1148 * and then allocate the page structures in one chunk.
1149 */
1150
55e303ae 1151 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
b0d623f7 1152 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 1153 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 1154
2d21ac55 1155 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
1156
1157 /*
1158 * Initialize the page frames.
1159 */
39037602
A
1160 kernel_debug_string_early("Initialize the page frames");
1161
1162 vm_page_array_beginning_addr = &vm_pages[0];
1163 vm_page_array_ending_addr = &vm_pages[npages];
1164
1c79356b 1165 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 1166 if (!pmap_next_page(&phys_page))
1c79356b 1167 break;
5ba3f43e
A
1168#if defined(__arm__) || defined(__arm64__)
1169 if (pages_initialized == 0) {
1170 vm_first_phys_ppnum = phys_page;
1171 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr, (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
1172 }
1173 assert((i + vm_first_phys_ppnum) == phys_page);
1174#endif
0b4c1975
A
1175 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1176 vm_page_lowest = phys_page;
1c79356b 1177
0b4c1975 1178 vm_page_init(&vm_pages[i], phys_page, FALSE);
1c79356b
A
1179 vm_page_pages++;
1180 pages_initialized++;
1181 }
2d21ac55 1182 vm_pages_count = pages_initialized;
5ba3f43e 1183 vm_page_array_boundary = &vm_pages[pages_initialized];
1c79356b 1184
fe8ab488
A
1185#if defined(__LP64__)
1186
39037602 1187 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
fe8ab488
A
1188 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1189
39037602 1190 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
fe8ab488
A
1191 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1192#endif
39037602 1193 kernel_debug_string_early("page fill/release");
0c530ab8
A
1194 /*
1195 * Check if we want to initialize pages to a known value
1196 */
1197 fill = 0; /* Assume no fill */
593a1d5f 1198 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
316670eb
A
1199#if DEBUG
1200 /* This slows down booting the DEBUG kernel, particularly on
1201 * large memory systems, but is worthwhile in deterministically
1202 * trapping uninitialized memory usage.
1203 */
1204 if (fill == 0) {
1205 fill = 1;
1206 fillval = 0xDEB8F177;
1207 }
1208#endif
1209 if (fill)
1210 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
39037602
A
1211
1212#if CONFIG_SECLUDED_MEMORY
1213 /* default: no secluded mem */
1214 secluded_mem_mb = 0;
1215 if (max_mem > 1*1024*1024*1024) {
1216 /* default to 90MB for devices with > 1GB of RAM */
1217 secluded_mem_mb = 90;
1218 }
1219 /* override with value from device tree, if provided */
1220 PE_get_default("kern.secluded_mem_mb",
1221 &secluded_mem_mb, sizeof(secluded_mem_mb));
1222 /* override with value from boot-args, if provided */
1223 PE_parse_boot_argn("secluded_mem_mb",
1224 &secluded_mem_mb,
1225 sizeof (secluded_mem_mb));
1226
1227 vm_page_secluded_target = (unsigned int)
1228 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1229 PE_parse_boot_argn("secluded_for_iokit",
1230 &secluded_for_iokit,
1231 sizeof (secluded_for_iokit));
1232 PE_parse_boot_argn("secluded_for_apps",
1233 &secluded_for_apps,
1234 sizeof (secluded_for_apps));
1235 PE_parse_boot_argn("secluded_for_filecache",
1236 &secluded_for_filecache,
1237 sizeof (secluded_for_filecache));
1238#if 11
1239 PE_parse_boot_argn("secluded_for_fbdp",
1240 &secluded_for_fbdp,
1241 sizeof (secluded_for_fbdp));
1242#endif
39037602 1243
d9a64523
A
1244 /*
1245 * On small devices, allow a large app to effectively suppress
1246 * secluded memory until it exits.
1247 */
1248 if (max_mem <= 1 * 1024 * 1024 * 1024 && vm_page_secluded_target != 0) {
1249
1250 /*
1251 * Get an amount from boot-args, else use 500MB.
1252 * 500MB was chosen from a Peace daemon tentpole test which used munch
1253 * to induce jetsam thrashing of false idle daemons.
1254 */
1255 int secluded_shutoff_mb;
1256 if (PE_parse_boot_argn("secluded_shutoff_mb", &secluded_shutoff_mb,
1257 sizeof (secluded_shutoff_mb)))
1258 secluded_shutoff_trigger = (uint64_t)secluded_shutoff_mb * 1024 * 1024;
1259 else
1260 secluded_shutoff_trigger = 500 * 1024 * 1024;
1261
1262 if (secluded_shutoff_trigger != 0)
1263 secluded_suppression_init();
0c530ab8 1264 }
d9a64523
A
1265
1266#endif /* CONFIG_SECLUDED_MEMORY */
0c530ab8 1267
1c79356b 1268 /*
d9a64523 1269 * By default release pages in reverse order so that physical pages
1c79356b
A
1270 * initially get allocated in ascending addresses. This keeps
1271 * the devices (which must address physical memory) happy if
1272 * they require several consecutive pages.
d9a64523
A
1273 *
1274 * For debugging, you can reverse this ordering and/or fill
1275 * all pages with a known value.
1c79356b 1276 */
d9a64523
A
1277 if (vm_himemory_mode == 2) {
1278 for (i = 0; i < pages_initialized; i++) {
1279 if (fill)
1280 fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]), fillval);
1281 vm_page_release_startup(&vm_pages[i]);
1282 }
1283 } else {
1284 for (i = pages_initialized; i-- > 0; ) {
1285 if (fill)
1286 fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]), fillval);
1287 vm_page_release_startup(&vm_pages[i]);
1288 }
1c79356b
A
1289 }
1290
fe8ab488
A
1291 VM_CHECK_MEMORYSTATUS;
1292
55e303ae
A
1293#if 0
1294 {
1295 vm_page_t xx, xxo, xxl;
2d21ac55 1296 int i, j, k, l;
55e303ae
A
1297
1298 j = 0; /* (BRINGUP) */
1299 xxl = 0;
1300
2d21ac55 1301 for( i = 0; i < vm_colors; i++ ) {
39037602 1302 queue_iterate(&vm_page_queue_free[i].qhead,
2d21ac55
A
1303 xx,
1304 vm_page_t,
d9a64523 1305 vmp_pageq) { /* BRINGUP */
2d21ac55
A
1306 j++; /* (BRINGUP) */
1307 if(j > vm_page_free_count) { /* (BRINGUP) */
1308 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 1309 }
2d21ac55
A
1310
1311 l = vm_page_free_count - j; /* (BRINGUP) */
1312 k = 0; /* (BRINGUP) */
1313
1314 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1315
39037602 1316 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
2d21ac55
A
1317 k++;
1318 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1319 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
1320 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1321 }
1322 }
1323
1324 xxl = xx;
55e303ae
A
1325 }
1326 }
1327
1328 if(j != vm_page_free_count) { /* (BRINGUP) */
1329 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1330 }
1331 }
1332#endif
1333
1334
1c79356b
A
1335 /*
1336 * We have to re-align virtual_space_start,
1337 * because pmap_steal_memory has been using it.
1338 */
1339
b0d623f7 1340 virtual_space_start = round_page(virtual_space_start);
1c79356b
A
1341
1342 *startp = virtual_space_start;
1343 *endp = virtual_space_end;
1344}
1345#endif /* MACHINE_PAGES */
1346
1347/*
1348 * Routine: vm_page_module_init
1349 * Purpose:
1350 * Second initialization pass, to be done after
1351 * the basic VM system is ready.
1352 */
1353void
1354vm_page_module_init(void)
1355{
39037602
A
1356 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1357 vm_size_t vm_page_with_ppnum_size;
1c79356b 1358
39037602
A
1359 vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1360 0, PAGE_SIZE, "vm pages array");
1c79356b 1361
39037602
A
1362 zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1363 zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1364 zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1365 zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1366 zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
3e170ce0
A
1367 /*
1368 * Adjust zone statistics to account for the real pages allocated
1369 * in vm_page_create(). [Q: is this really what we want?]
1370 */
39037602
A
1371 vm_page_array_zone->count += vm_page_pages;
1372 vm_page_array_zone->sum_count += vm_page_pages;
1373 vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1374 vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1375 vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1376 OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
3e170ce0
A
1377 /* since zone accounts for these, take them out of stolen */
1378 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
39037602
A
1379
1380 vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1381
1382 vm_page_zone = zinit(vm_page_with_ppnum_size,
1383 0, PAGE_SIZE, "vm pages");
1384
1385 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1386 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1387 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1388 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1389 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
5ba3f43e 1390 zone_change(vm_page_zone, Z_ALIGNMENT_REQUIRED, TRUE);
1c79356b
A
1391}
1392
1393/*
1394 * Routine: vm_page_create
1395 * Purpose:
1396 * After the VM system is up, machine-dependent code
1397 * may stumble across more physical memory. For example,
1398 * memory that it was reserving for a frame buffer.
1399 * vm_page_create turns this memory into available pages.
1400 */
1401
1402void
1403vm_page_create(
55e303ae
A
1404 ppnum_t start,
1405 ppnum_t end)
1c79356b 1406{
55e303ae
A
1407 ppnum_t phys_page;
1408 vm_page_t m;
1c79356b 1409
55e303ae
A
1410 for (phys_page = start;
1411 phys_page < end;
1412 phys_page++) {
6d2010ae 1413 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1c79356b
A
1414 == VM_PAGE_NULL)
1415 vm_page_more_fictitious();
1416
d9a64523 1417 m->vmp_fictitious = FALSE;
0b4c1975 1418 pmap_clear_noencrypt(phys_page);
6d2010ae 1419
1c79356b 1420 vm_page_pages++;
39037602 1421 vm_page_release(m, FALSE);
1c79356b
A
1422 }
1423}
1424
1425/*
1426 * vm_page_hash:
1427 *
1428 * Distributes the object/offset key pair among hash buckets.
1429 *
55e303ae 1430 * NOTE: The bucket count must be a power of 2
1c79356b
A
1431 */
1432#define vm_page_hash(object, offset) (\
b0d623f7 1433 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
1434 & vm_page_hash_mask)
1435
2d21ac55 1436
1c79356b
A
1437/*
1438 * vm_page_insert: [ internal use only ]
1439 *
1440 * Inserts the given mem entry into the object/object-page
1441 * table and object list.
1442 *
1443 * The object must be locked.
1444 */
1c79356b
A
1445void
1446vm_page_insert(
2d21ac55
A
1447 vm_page_t mem,
1448 vm_object_t object,
1449 vm_object_offset_t offset)
1450{
3e170ce0
A
1451 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1452}
1453
1454void
1455vm_page_insert_wired(
1456 vm_page_t mem,
1457 vm_object_t object,
1458 vm_object_offset_t offset,
1459 vm_tag_t tag)
1460{
1461 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
2d21ac55
A
1462}
1463
4a3eedf9 1464void
2d21ac55
A
1465vm_page_insert_internal(
1466 vm_page_t mem,
1467 vm_object_t object,
1468 vm_object_offset_t offset,
3e170ce0 1469 vm_tag_t tag,
b0d623f7 1470 boolean_t queues_lock_held,
316670eb 1471 boolean_t insert_in_hash,
3e170ce0
A
1472 boolean_t batch_pmap_op,
1473 boolean_t batch_accounting,
1474 uint64_t *delayed_ledger_update)
1c79356b 1475{
fe8ab488
A
1476 vm_page_bucket_t *bucket;
1477 lck_spin_t *bucket_lock;
1478 int hash_id;
1479 task_t owner;
d9a64523
A
1480 int ledger_idx_volatile;
1481 int ledger_idx_nonvolatile;
1482 int ledger_idx_volatile_compressed;
1483 int ledger_idx_nonvolatile_compressed;
1484 boolean_t do_footprint;
1c79356b
A
1485
1486 XPR(XPR_VM_PAGE,
1487 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 1488 object, offset, mem, 0,0);
316670eb
A
1489#if 0
1490 /*
1491 * we may not hold the page queue lock
1492 * so this check isn't safe to make
1493 */
1c79356b 1494 VM_PAGE_CHECK(mem);
316670eb 1495#endif
1c79356b 1496
39236c6e
A
1497 assert(page_aligned(offset));
1498
d9a64523 1499 assert(!VM_PAGE_WIRED(mem) || mem->vmp_private || mem->vmp_fictitious || (tag != VM_KERN_MEMORY_NONE));
3e170ce0 1500
fe8ab488
A
1501 /* the vm_submap_object is only a placeholder for submaps */
1502 assert(object != vm_submap_object);
2d21ac55
A
1503
1504 vm_object_lock_assert_exclusive(object);
39037602 1505 LCK_MTX_ASSERT(&vm_page_queue_lock,
b0d623f7
A
1506 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1507 : LCK_MTX_ASSERT_NOTOWNED);
5ba3f43e 1508
39037602
A
1509 if (queues_lock_held == FALSE)
1510 assert(!VM_PAGE_PAGEABLE(mem));
3e170ce0 1511
b0d623f7 1512 if (insert_in_hash == TRUE) {
15129b1c 1513#if DEBUG || VM_PAGE_CHECK_BUCKETS
d9a64523 1514 if (mem->vmp_tabled || mem->vmp_object)
b0d623f7
A
1515 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1516 "already in (obj=%p,off=0x%llx)",
d9a64523 1517 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
91447636 1518#endif
5ba3f43e
A
1519 if (object->internal && (offset >= object->vo_size)) {
1520 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1521 mem, object, offset, object->vo_size);
1522 }
1523
b0d623f7
A
1524 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1525
1526 /*
1527 * Record the object/offset pair in this page
1528 */
1c79356b 1529
d9a64523
A
1530 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1531 mem->vmp_offset = offset;
1c79356b 1532
39037602
A
1533#if CONFIG_SECLUDED_MEMORY
1534 if (object->eligible_for_secluded) {
1535 vm_page_secluded.eligible_for_secluded++;
1536 }
1537#endif /* CONFIG_SECLUDED_MEMORY */
1538
b0d623f7
A
1539 /*
1540 * Insert it into the object_object/offset hash table
1541 */
1542 hash_id = vm_page_hash(object, offset);
1543 bucket = &vm_page_buckets[hash_id];
1544 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1545
1546 lck_spin_lock(bucket_lock);
1c79356b 1547
d9a64523 1548 mem->vmp_next_m = bucket->page_list;
fe8ab488 1549 bucket->page_list = VM_PAGE_PACK_PTR(mem);
39037602 1550 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
fe8ab488 1551
1c79356b 1552#if MACH_PAGE_HASH_STATS
b0d623f7
A
1553 if (++bucket->cur_count > bucket->hi_count)
1554 bucket->hi_count = bucket->cur_count;
1c79356b 1555#endif /* MACH_PAGE_HASH_STATS */
d9a64523 1556 mem->vmp_hashed = TRUE;
b0d623f7
A
1557 lck_spin_unlock(bucket_lock);
1558 }
6d2010ae 1559
316670eb
A
1560 {
1561 unsigned int cache_attr;
6d2010ae
A
1562
1563 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1564
1565 if (cache_attr != VM_WIMG_USE_DEFAULT) {
316670eb 1566 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
6d2010ae
A
1567 }
1568 }
1c79356b
A
1569 /*
1570 * Now link into the object's list of backed pages.
1571 */
d9a64523 1572 vm_page_queue_enter(&object->memq, mem, vm_page_t, vmp_listq);
3e170ce0 1573 object->memq_hint = mem;
d9a64523 1574 mem->vmp_tabled = TRUE;
1c79356b
A
1575
1576 /*
1577 * Show that the object has one more resident page.
1578 */
1579
1580 object->resident_page_count++;
b0d623f7 1581 if (VM_PAGE_WIRED(mem)) {
d9a64523 1582 assert(mem->vmp_wire_count > 0);
5ba3f43e
A
1583 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1584 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1585 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
b0d623f7
A
1586 }
1587 assert(object->resident_page_count >= object->wired_page_count);
91447636 1588
3e170ce0
A
1589 if (batch_accounting == FALSE) {
1590 if (object->internal) {
1591 OSAddAtomic(1, &vm_page_internal_count);
1592 } else {
1593 OSAddAtomic(1, &vm_page_external_count);
1594 }
39236c6e
A
1595 }
1596
1597 /*
1598 * It wouldn't make sense to insert a "reusable" page in
1599 * an object (the page would have been marked "reusable" only
1600 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1601 * in the object at that time).
1602 * But a page could be inserted in a "all_reusable" object, if
1603 * something faults it in (a vm_read() from another task or a
1604 * "use-after-free" issue in user space, for example). It can
1605 * also happen if we're relocating a page from that object to
1606 * a different physical page during a physically-contiguous
1607 * allocation.
1608 */
d9a64523 1609 assert(!mem->vmp_reusable);
39037602 1610 if (object->all_reusable) {
39236c6e
A
1611 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1612 }
2d21ac55 1613
d9a64523
A
1614 if (object->purgable == VM_PURGABLE_DENY &&
1615 ! object->vo_ledger_tag) {
fe8ab488
A
1616 owner = TASK_NULL;
1617 } else {
d9a64523
A
1618 owner = VM_OBJECT_OWNER(object);
1619 vm_object_ledger_tag_ledgers(object,
1620 &ledger_idx_volatile,
1621 &ledger_idx_nonvolatile,
1622 &ledger_idx_volatile_compressed,
1623 &ledger_idx_nonvolatile_compressed,
1624 &do_footprint);
fe8ab488
A
1625 }
1626 if (owner &&
1627 (object->purgable == VM_PURGABLE_NONVOLATILE ||
d9a64523 1628 object->purgable == VM_PURGABLE_DENY ||
fe8ab488 1629 VM_PAGE_WIRED(mem))) {
3e170ce0
A
1630
1631 if (delayed_ledger_update)
1632 *delayed_ledger_update += PAGE_SIZE;
1633 else {
1634 /* more non-volatile bytes */
1635 ledger_credit(owner->ledger,
d9a64523 1636 ledger_idx_nonvolatile,
3e170ce0 1637 PAGE_SIZE);
d9a64523
A
1638 if (do_footprint) {
1639 /* more footprint */
1640 ledger_credit(owner->ledger,
1641 task_ledgers.phys_footprint,
1642 PAGE_SIZE);
1643 }
3e170ce0 1644 }
fe8ab488
A
1645
1646 } else if (owner &&
1647 (object->purgable == VM_PURGABLE_VOLATILE ||
1648 object->purgable == VM_PURGABLE_EMPTY)) {
1649 assert(! VM_PAGE_WIRED(mem));
1650 /* more volatile bytes */
1651 ledger_credit(owner->ledger,
d9a64523 1652 ledger_idx_volatile,
fe8ab488
A
1653 PAGE_SIZE);
1654 }
1655
b0d623f7
A
1656 if (object->purgable == VM_PURGABLE_VOLATILE) {
1657 if (VM_PAGE_WIRED(mem)) {
fe8ab488 1658 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
b0d623f7 1659 } else {
fe8ab488 1660 OSAddAtomic(+1, &vm_page_purgeable_count);
b0d623f7 1661 }
593a1d5f 1662 } else if (object->purgable == VM_PURGABLE_EMPTY &&
d9a64523 1663 mem->vmp_q_state == VM_PAGE_ON_THROTTLED_Q) {
b0d623f7
A
1664 /*
1665 * This page belongs to a purged VM object but hasn't
1666 * been purged (because it was "busy").
1667 * It's in the "throttled" queue and hence not
1668 * visible to vm_pageout_scan(). Move it to a pageable
1669 * queue, so that it can eventually be reclaimed, instead
1670 * of lingering in the "empty" object.
1671 */
593a1d5f 1672 if (queues_lock_held == FALSE)
b0d623f7 1673 vm_page_lockspin_queues();
593a1d5f 1674 vm_page_deactivate(mem);
2d21ac55
A
1675 if (queues_lock_held == FALSE)
1676 vm_page_unlock_queues();
91447636 1677 }
fe8ab488
A
1678
1679#if VM_OBJECT_TRACKING_OP_MODIFIED
1680 if (vm_object_tracking_inited &&
1681 object->internal &&
1682 object->resident_page_count == 0 &&
1683 object->pager == NULL &&
1684 object->shadow != NULL &&
1685 object->shadow->copy == object) {
1686 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1687 int numsaved = 0;
1688
1689 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1690 btlog_add_entry(vm_object_tracking_btlog,
1691 object,
1692 VM_OBJECT_TRACKING_OP_MODIFIED,
1693 bt,
1694 numsaved);
1695 }
1696#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1c79356b
A
1697}
1698
1699/*
1700 * vm_page_replace:
1701 *
1702 * Exactly like vm_page_insert, except that we first
1703 * remove any existing page at the given offset in object.
1704 *
b0d623f7 1705 * The object must be locked.
1c79356b 1706 */
1c79356b
A
1707void
1708vm_page_replace(
39037602
A
1709 vm_page_t mem,
1710 vm_object_t object,
1711 vm_object_offset_t offset)
1c79356b 1712{
0c530ab8
A
1713 vm_page_bucket_t *bucket;
1714 vm_page_t found_m = VM_PAGE_NULL;
b0d623f7
A
1715 lck_spin_t *bucket_lock;
1716 int hash_id;
1c79356b 1717
316670eb
A
1718#if 0
1719 /*
1720 * we don't hold the page queue lock
1721 * so this check isn't safe to make
1722 */
1c79356b 1723 VM_PAGE_CHECK(mem);
316670eb 1724#endif
2d21ac55 1725 vm_object_lock_assert_exclusive(object);
15129b1c 1726#if DEBUG || VM_PAGE_CHECK_BUCKETS
d9a64523 1727 if (mem->vmp_tabled || mem->vmp_object)
91447636
A
1728 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1729 "already in (obj=%p,off=0x%llx)",
d9a64523 1730 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
91447636 1731#endif
39037602
A
1732 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1733
1734 assert(!VM_PAGE_PAGEABLE(mem));
1735
1c79356b
A
1736 /*
1737 * Record the object/offset pair in this page
1738 */
d9a64523
A
1739 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1740 mem->vmp_offset = offset;
1c79356b
A
1741
1742 /*
1743 * Insert it into the object_object/offset hash table,
1744 * replacing any page that might have been there.
1745 */
1746
b0d623f7
A
1747 hash_id = vm_page_hash(object, offset);
1748 bucket = &vm_page_buckets[hash_id];
1749 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1750
1751 lck_spin_lock(bucket_lock);
0c530ab8 1752
fe8ab488
A
1753 if (bucket->page_list) {
1754 vm_page_packed_t *mp = &bucket->page_list;
39037602 1755 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
0c530ab8 1756
1c79356b 1757 do {
39037602
A
1758 /*
1759 * compare packed object pointers
1760 */
d9a64523 1761 if (m->vmp_object == mem->vmp_object && m->vmp_offset == offset) {
1c79356b 1762 /*
0c530ab8 1763 * Remove old page from hash list
1c79356b 1764 */
d9a64523
A
1765 *mp = m->vmp_next_m;
1766 m->vmp_hashed = FALSE;
1767 m->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1c79356b 1768
0c530ab8 1769 found_m = m;
1c79356b
A
1770 break;
1771 }
d9a64523 1772 mp = &m->vmp_next_m;
39037602 1773 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
0c530ab8 1774
d9a64523 1775 mem->vmp_next_m = bucket->page_list;
1c79356b 1776 } else {
d9a64523 1777 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1c79356b 1778 }
0c530ab8
A
1779 /*
1780 * insert new page at head of hash list
1781 */
fe8ab488 1782 bucket->page_list = VM_PAGE_PACK_PTR(mem);
d9a64523 1783 mem->vmp_hashed = TRUE;
0c530ab8 1784
b0d623f7 1785 lck_spin_unlock(bucket_lock);
1c79356b 1786
0c530ab8
A
1787 if (found_m) {
1788 /*
1789 * there was already a page at the specified
1790 * offset for this object... remove it from
1791 * the object and free it back to the free list
1792 */
b0d623f7 1793 vm_page_free_unlocked(found_m, FALSE);
91447636 1794 }
3e170ce0 1795 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1c79356b
A
1796}
1797
1798/*
1799 * vm_page_remove: [ internal use only ]
1800 *
1801 * Removes the given mem entry from the object/offset-page
1802 * table and the object page list.
1803 *
b0d623f7 1804 * The object must be locked.
1c79356b
A
1805 */
1806
1807void
1808vm_page_remove(
b0d623f7
A
1809 vm_page_t mem,
1810 boolean_t remove_from_hash)
1c79356b 1811{
b0d623f7
A
1812 vm_page_bucket_t *bucket;
1813 vm_page_t this;
1814 lck_spin_t *bucket_lock;
1815 int hash_id;
fe8ab488 1816 task_t owner;
39037602 1817 vm_object_t m_object;
d9a64523
A
1818 int ledger_idx_volatile;
1819 int ledger_idx_nonvolatile;
1820 int ledger_idx_volatile_compressed;
1821 int ledger_idx_nonvolatile_compressed;
1822 int do_footprint;
39037602
A
1823
1824 m_object = VM_PAGE_OBJECT(mem);
1c79356b
A
1825
1826 XPR(XPR_VM_PAGE,
1827 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
d9a64523 1828 m_object, mem->vmp_offset,
b0d623f7
A
1829 mem, 0,0);
1830
39037602 1831 vm_object_lock_assert_exclusive(m_object);
d9a64523
A
1832 assert(mem->vmp_tabled);
1833 assert(!mem->vmp_cleaning);
1834 assert(!mem->vmp_laundry);
39037602
A
1835
1836 if (VM_PAGE_PAGEABLE(mem)) {
1837 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1838 }
316670eb
A
1839#if 0
1840 /*
1841 * we don't hold the page queue lock
1842 * so this check isn't safe to make
1843 */
1c79356b 1844 VM_PAGE_CHECK(mem);
316670eb 1845#endif
b0d623f7
A
1846 if (remove_from_hash == TRUE) {
1847 /*
1848 * Remove from the object_object/offset hash table
1849 */
d9a64523 1850 hash_id = vm_page_hash(m_object, mem->vmp_offset);
b0d623f7
A
1851 bucket = &vm_page_buckets[hash_id];
1852 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 1853
b0d623f7 1854 lck_spin_lock(bucket_lock);
1c79356b 1855
39037602 1856 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
b0d623f7 1857 /* optimize for common case */
1c79356b 1858
d9a64523 1859 bucket->page_list = mem->vmp_next_m;
b0d623f7 1860 } else {
fe8ab488 1861 vm_page_packed_t *prev;
1c79356b 1862
d9a64523 1863 for (prev = &this->vmp_next_m;
39037602 1864 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
d9a64523 1865 prev = &this->vmp_next_m)
b0d623f7 1866 continue;
d9a64523 1867 *prev = this->vmp_next_m;
b0d623f7 1868 }
1c79356b 1869#if MACH_PAGE_HASH_STATS
b0d623f7 1870 bucket->cur_count--;
1c79356b 1871#endif /* MACH_PAGE_HASH_STATS */
d9a64523
A
1872 mem->vmp_hashed = FALSE;
1873 this->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
b0d623f7
A
1874 lck_spin_unlock(bucket_lock);
1875 }
1c79356b
A
1876 /*
1877 * Now remove from the object's list of backed pages.
1878 */
1879
3e170ce0 1880 vm_page_remove_internal(mem);
1c79356b
A
1881
1882 /*
1883 * And show that the object has one fewer resident
1884 * page.
1885 */
1886
39037602
A
1887 assert(m_object->resident_page_count > 0);
1888 m_object->resident_page_count--;
6d2010ae 1889
39037602 1890 if (m_object->internal) {
fe8ab488 1891#if DEBUG
39236c6e 1892 assert(vm_page_internal_count);
fe8ab488
A
1893#endif /* DEBUG */
1894
39236c6e
A
1895 OSAddAtomic(-1, &vm_page_internal_count);
1896 } else {
1897 assert(vm_page_external_count);
1898 OSAddAtomic(-1, &vm_page_external_count);
fe8ab488 1899
d9a64523 1900 if (mem->vmp_xpmapped) {
fe8ab488
A
1901 assert(vm_page_xpmapped_external_count);
1902 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1903 }
39236c6e 1904 }
d9a64523
A
1905 if (!m_object->internal &&
1906 m_object->cached_list.next &&
1907 m_object->cached_list.prev) {
39037602
A
1908 if (m_object->resident_page_count == 0)
1909 vm_object_cache_remove(m_object);
6d2010ae
A
1910 }
1911
b0d623f7 1912 if (VM_PAGE_WIRED(mem)) {
d9a64523 1913 assert(mem->vmp_wire_count > 0);
5ba3f43e
A
1914 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
1915 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
1916 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
b0d623f7 1917 }
39037602
A
1918 assert(m_object->resident_page_count >=
1919 m_object->wired_page_count);
d9a64523 1920 if (mem->vmp_reusable) {
39037602
A
1921 assert(m_object->reusable_page_count > 0);
1922 m_object->reusable_page_count--;
1923 assert(m_object->reusable_page_count <=
1924 m_object->resident_page_count);
d9a64523 1925 mem->vmp_reusable = FALSE;
b0d623f7
A
1926 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1927 vm_page_stats_reusable.reused_remove++;
39037602 1928 } else if (m_object->all_reusable) {
b0d623f7
A
1929 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1930 vm_page_stats_reusable.reused_remove++;
1931 }
1c79356b 1932
d9a64523
A
1933 if (m_object->purgable == VM_PURGABLE_DENY &&
1934 ! m_object->vo_ledger_tag) {
fe8ab488
A
1935 owner = TASK_NULL;
1936 } else {
d9a64523
A
1937 owner = VM_OBJECT_OWNER(m_object);
1938 vm_object_ledger_tag_ledgers(m_object,
1939 &ledger_idx_volatile,
1940 &ledger_idx_nonvolatile,
1941 &ledger_idx_volatile_compressed,
1942 &ledger_idx_nonvolatile_compressed,
1943 &do_footprint);
fe8ab488
A
1944 }
1945 if (owner &&
39037602 1946 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
d9a64523 1947 m_object->purgable == VM_PURGABLE_DENY ||
fe8ab488
A
1948 VM_PAGE_WIRED(mem))) {
1949 /* less non-volatile bytes */
1950 ledger_debit(owner->ledger,
d9a64523 1951 ledger_idx_nonvolatile,
fe8ab488 1952 PAGE_SIZE);
d9a64523
A
1953 if (do_footprint) {
1954 /* less footprint */
1955 ledger_debit(owner->ledger,
1956 task_ledgers.phys_footprint,
1957 PAGE_SIZE);
1958 }
fe8ab488 1959 } else if (owner &&
39037602
A
1960 (m_object->purgable == VM_PURGABLE_VOLATILE ||
1961 m_object->purgable == VM_PURGABLE_EMPTY)) {
fe8ab488
A
1962 assert(! VM_PAGE_WIRED(mem));
1963 /* less volatile bytes */
1964 ledger_debit(owner->ledger,
d9a64523 1965 ledger_idx_volatile,
fe8ab488
A
1966 PAGE_SIZE);
1967 }
39037602 1968 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
1969 if (VM_PAGE_WIRED(mem)) {
1970 assert(vm_page_purgeable_wired_count > 0);
1971 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1972 } else {
1973 assert(vm_page_purgeable_count > 0);
1974 OSAddAtomic(-1, &vm_page_purgeable_count);
1975 }
91447636 1976 }
5ba3f43e 1977
39037602
A
1978 if (m_object->set_cache_attr == TRUE)
1979 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
6d2010ae 1980
d9a64523
A
1981 mem->vmp_tabled = FALSE;
1982 mem->vmp_object = 0;
1983 mem->vmp_offset = (vm_object_offset_t) -1;
1c79356b
A
1984}
1985
b0d623f7 1986
1c79356b
A
1987/*
1988 * vm_page_lookup:
1989 *
1990 * Returns the page associated with the object/offset
1991 * pair specified; if none is found, VM_PAGE_NULL is returned.
1992 *
1993 * The object must be locked. No side effects.
1994 */
1995
3e170ce0
A
1996#define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1997
1998#if DEBUG_VM_PAGE_LOOKUP
2d21ac55 1999
3e170ce0
A
2000struct {
2001 uint64_t vpl_total;
2002 uint64_t vpl_empty_obj;
2003 uint64_t vpl_bucket_NULL;
2004 uint64_t vpl_hit_hint;
2005 uint64_t vpl_hit_hint_next;
2006 uint64_t vpl_hit_hint_prev;
2007 uint64_t vpl_fast;
2008 uint64_t vpl_slow;
2009 uint64_t vpl_hit;
2010 uint64_t vpl_miss;
2011
2012 uint64_t vpl_fast_elapsed;
2013 uint64_t vpl_slow_elapsed;
2014} vm_page_lookup_stats __attribute__((aligned(8)));
2015
2016#endif
2017
2018#define KDP_VM_PAGE_WALK_MAX 1000
2019
2020vm_page_t
2021kdp_vm_page_lookup(
2022 vm_object_t object,
2023 vm_object_offset_t offset)
2024{
2025 vm_page_t cur_page;
2026 int num_traversed = 0;
2027
2028 if (not_in_kdp) {
2029 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
2030 }
2031
d9a64523
A
2032 vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, vmp_listq) {
2033 if (cur_page->vmp_offset == offset) {
3e170ce0
A
2034 return cur_page;
2035 }
2036 num_traversed++;
2037
2038 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
2039 return VM_PAGE_NULL;
2040 }
2041 }
2042
2043 return VM_PAGE_NULL;
2044}
91447636 2045
1c79356b
A
2046vm_page_t
2047vm_page_lookup(
b0d623f7
A
2048 vm_object_t object,
2049 vm_object_offset_t offset)
1c79356b 2050{
b0d623f7
A
2051 vm_page_t mem;
2052 vm_page_bucket_t *bucket;
39037602 2053 vm_page_queue_entry_t qe;
3e170ce0 2054 lck_spin_t *bucket_lock = NULL;
b0d623f7 2055 int hash_id;
3e170ce0
A
2056#if DEBUG_VM_PAGE_LOOKUP
2057 uint64_t start, elapsed;
91447636 2058
3e170ce0
A
2059 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2060#endif
2d21ac55 2061 vm_object_lock_assert_held(object);
3e170ce0
A
2062
2063 if (object->resident_page_count == 0) {
2064#if DEBUG_VM_PAGE_LOOKUP
2065 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2066#endif
2067 return (VM_PAGE_NULL);
2068 }
2069
91447636 2070 mem = object->memq_hint;
2d21ac55 2071
91447636 2072 if (mem != VM_PAGE_NULL) {
39037602 2073 assert(VM_PAGE_OBJECT(mem) == object);
2d21ac55 2074
d9a64523 2075 if (mem->vmp_offset == offset) {
3e170ce0
A
2076#if DEBUG_VM_PAGE_LOOKUP
2077 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2078#endif
2079 return (mem);
91447636 2080 }
d9a64523 2081 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->vmp_listq);
2d21ac55 2082
39037602 2083 if (! vm_page_queue_end(&object->memq, qe)) {
91447636
A
2084 vm_page_t next_page;
2085
39037602
A
2086 next_page = (vm_page_t)((uintptr_t)qe);
2087 assert(VM_PAGE_OBJECT(next_page) == object);
2d21ac55 2088
d9a64523 2089 if (next_page->vmp_offset == offset) {
91447636 2090 object->memq_hint = next_page; /* new hint */
3e170ce0
A
2091#if DEBUG_VM_PAGE_LOOKUP
2092 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2093#endif
2094 return (next_page);
91447636
A
2095 }
2096 }
d9a64523 2097 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->vmp_listq);
2d21ac55 2098
39037602 2099 if (! vm_page_queue_end(&object->memq, qe)) {
91447636
A
2100 vm_page_t prev_page;
2101
39037602
A
2102 prev_page = (vm_page_t)((uintptr_t)qe);
2103 assert(VM_PAGE_OBJECT(prev_page) == object);
2d21ac55 2104
d9a64523 2105 if (prev_page->vmp_offset == offset) {
91447636 2106 object->memq_hint = prev_page; /* new hint */
3e170ce0
A
2107#if DEBUG_VM_PAGE_LOOKUP
2108 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2109#endif
2110 return (prev_page);
91447636
A
2111 }
2112 }
2113 }
1c79356b 2114 /*
2d21ac55 2115 * Search the hash table for this object/offset pair
1c79356b 2116 */
b0d623f7
A
2117 hash_id = vm_page_hash(object, offset);
2118 bucket = &vm_page_buckets[hash_id];
1c79356b 2119
2d21ac55
A
2120 /*
2121 * since we hold the object lock, we are guaranteed that no
2122 * new pages can be inserted into this object... this in turn
2123 * guarantess that the page we're looking for can't exist
2124 * if the bucket it hashes to is currently NULL even when looked
2125 * at outside the scope of the hash bucket lock... this is a
2126 * really cheap optimiztion to avoid taking the lock
2127 */
fe8ab488 2128 if (!bucket->page_list) {
3e170ce0
A
2129#if DEBUG_VM_PAGE_LOOKUP
2130 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2131#endif
2d21ac55
A
2132 return (VM_PAGE_NULL);
2133 }
0c530ab8 2134
3e170ce0
A
2135#if DEBUG_VM_PAGE_LOOKUP
2136 start = mach_absolute_time();
2137#endif
2138 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
316670eb 2139 /*
3e170ce0
A
2140 * on average, it's roughly 3 times faster to run a short memq list
2141 * than to take the spin lock and go through the hash list
316670eb 2142 */
39037602 2143 mem = (vm_page_t)vm_page_queue_first(&object->memq);
3e170ce0 2144
39037602 2145 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
3e170ce0 2146
d9a64523 2147 if (mem->vmp_offset == offset)
3e170ce0
A
2148 break;
2149
d9a64523 2150 mem = (vm_page_t)vm_page_queue_next(&mem->vmp_listq);
3e170ce0 2151 }
39037602 2152 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
3e170ce0
A
2153 mem = NULL;
2154 } else {
39037602
A
2155 vm_page_object_t packed_object;
2156
2157 packed_object = VM_PAGE_PACK_OBJECT(object);
3e170ce0
A
2158
2159 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2160
2161 lck_spin_lock(bucket_lock);
2162
39037602
A
2163 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
2164 mem != VM_PAGE_NULL;
d9a64523 2165 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m))) {
3e170ce0
A
2166#if 0
2167 /*
2168 * we don't hold the page queue lock
2169 * so this check isn't safe to make
2170 */
2171 VM_PAGE_CHECK(mem);
316670eb 2172#endif
d9a64523 2173 if ((mem->vmp_object == packed_object) && (mem->vmp_offset == offset))
3e170ce0
A
2174 break;
2175 }
2176 lck_spin_unlock(bucket_lock);
1c79356b 2177 }
55e303ae 2178
3e170ce0
A
2179#if DEBUG_VM_PAGE_LOOKUP
2180 elapsed = mach_absolute_time() - start;
2181
2182 if (bucket_lock) {
2183 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2184 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2185 } else {
2186 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2187 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2188 }
2189 if (mem != VM_PAGE_NULL)
2190 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2191 else
2192 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2193#endif
91447636 2194 if (mem != VM_PAGE_NULL) {
39037602 2195 assert(VM_PAGE_OBJECT(mem) == object);
91447636 2196
3e170ce0
A
2197 object->memq_hint = mem;
2198 }
2199 return (mem);
91447636
A
2200}
2201
2202
1c79356b
A
2203/*
2204 * vm_page_rename:
2205 *
2206 * Move the given memory entry from its
2207 * current object to the specified target object/offset.
2208 *
2209 * The object must be locked.
2210 */
2211void
2212vm_page_rename(
39037602
A
2213 vm_page_t mem,
2214 vm_object_t new_object,
5ba3f43e 2215 vm_object_offset_t new_offset)
1c79356b 2216{
39037602
A
2217 boolean_t internal_to_external, external_to_internal;
2218 vm_tag_t tag;
2219 vm_object_t m_object;
39236c6e 2220
39037602 2221 m_object = VM_PAGE_OBJECT(mem);
2d21ac55 2222
39037602
A
2223 assert(m_object != new_object);
2224 assert(m_object);
3e170ce0 2225
b0d623f7
A
2226 XPR(XPR_VM_PAGE,
2227 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2228 new_object, new_offset,
2229 mem, 0,0);
2230
1c79356b 2231 /*
d9a64523 2232 * Changes to mem->vmp_object require the page lock because
1c79356b
A
2233 * the pageout daemon uses that lock to get the object.
2234 */
b0d623f7 2235 vm_page_lockspin_queues();
1c79356b 2236
39236c6e
A
2237 internal_to_external = FALSE;
2238 external_to_internal = FALSE;
2239
d9a64523 2240 if (mem->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
39236c6e
A
2241 /*
2242 * it's much easier to get the vm_page_pageable_xxx accounting correct
2243 * if we first move the page to the active queue... it's going to end
2244 * up there anyway, and we don't do vm_page_rename's frequently enough
2245 * for this to matter.
2246 */
39037602 2247 vm_page_queues_remove(mem, FALSE);
39236c6e
A
2248 vm_page_activate(mem);
2249 }
39037602
A
2250 if (VM_PAGE_PAGEABLE(mem)) {
2251 if (m_object->internal && !new_object->internal) {
39236c6e
A
2252 internal_to_external = TRUE;
2253 }
39037602 2254 if (!m_object->internal && new_object->internal) {
39236c6e
A
2255 external_to_internal = TRUE;
2256 }
2257 }
2258
39037602 2259 tag = m_object->wire_tag;
b0d623f7 2260 vm_page_remove(mem, TRUE);
3e170ce0 2261 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
1c79356b 2262
39236c6e
A
2263 if (internal_to_external) {
2264 vm_page_pageable_internal_count--;
2265 vm_page_pageable_external_count++;
2266 } else if (external_to_internal) {
2267 vm_page_pageable_external_count--;
2268 vm_page_pageable_internal_count++;
2269 }
2270
1c79356b
A
2271 vm_page_unlock_queues();
2272}
2273
2274/*
2275 * vm_page_init:
2276 *
2277 * Initialize the fields in a new page.
2278 * This takes a structure with random values and initializes it
2279 * so that it can be given to vm_page_release or vm_page_insert.
2280 */
2281void
2282vm_page_init(
2283 vm_page_t mem,
0b4c1975
A
2284 ppnum_t phys_page,
2285 boolean_t lopage)
1c79356b 2286{
91447636 2287 assert(phys_page);
7ddcb079
A
2288
2289#if DEBUG
2290 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2291 if (!(pmap_valid_page(phys_page))) {
2292 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2293 }
2294 }
2295#endif
1c79356b 2296 *mem = vm_page_template;
39037602
A
2297
2298 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
6d2010ae
A
2299#if 0
2300 /*
2301 * we're leaving this turned off for now... currently pages
2302 * come off the free list and are either immediately dirtied/referenced
2303 * due to zero-fill or COW faults, or are used to read or write files...
2304 * in the file I/O case, the UPL mechanism takes care of clearing
2305 * the state of the HW ref/mod bits in a somewhat fragile way.
2306 * Since we may change the way this works in the future (to toughen it up),
2307 * I'm leaving this as a reminder of where these bits could get cleared
2308 */
2309
2310 /*
2311 * make sure both the h/w referenced and modified bits are
2312 * clear at this point... we are especially dependent on
2313 * not finding a 'stale' h/w modified in a number of spots
2314 * once this page goes back into use
2315 */
2316 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2317#endif
d9a64523 2318 mem->vmp_lopage = lopage;
1c79356b
A
2319}
2320
2321/*
2322 * vm_page_grab_fictitious:
2323 *
2324 * Remove a fictitious page from the free list.
2325 * Returns VM_PAGE_NULL if there are no free pages.
2326 */
2327int c_vm_page_grab_fictitious = 0;
6d2010ae 2328int c_vm_page_grab_fictitious_failed = 0;
1c79356b
A
2329int c_vm_page_release_fictitious = 0;
2330int c_vm_page_more_fictitious = 0;
2331
2332vm_page_t
2d21ac55 2333vm_page_grab_fictitious_common(
b0d623f7 2334 ppnum_t phys_addr)
1c79356b 2335{
6d2010ae
A
2336 vm_page_t m;
2337
2338 if ((m = (vm_page_t)zget(vm_page_zone))) {
1c79356b 2339
0b4c1975 2340 vm_page_init(m, phys_addr, FALSE);
d9a64523 2341 m->vmp_fictitious = TRUE;
1c79356b 2342
6d2010ae
A
2343 c_vm_page_grab_fictitious++;
2344 } else
2345 c_vm_page_grab_fictitious_failed++;
2346
1c79356b
A
2347 return m;
2348}
2349
2d21ac55
A
2350vm_page_t
2351vm_page_grab_fictitious(void)
2352{
2353 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2354}
2355
5ba3f43e
A
2356int vm_guard_count;
2357
2358
2d21ac55
A
2359vm_page_t
2360vm_page_grab_guard(void)
2361{
5ba3f43e
A
2362 vm_page_t page;
2363 page = vm_page_grab_fictitious_common(vm_page_guard_addr);
2364 if (page) OSAddAtomic(1, &vm_guard_count);
2365 return page;
2d21ac55
A
2366}
2367
6d2010ae 2368
1c79356b
A
2369/*
2370 * vm_page_release_fictitious:
2371 *
6d2010ae 2372 * Release a fictitious page to the zone pool
1c79356b 2373 */
1c79356b
A
2374void
2375vm_page_release_fictitious(
6d2010ae 2376 vm_page_t m)
1c79356b 2377{
d9a64523
A
2378 assert((m->vmp_q_state == VM_PAGE_NOT_ON_Q) || (m->vmp_q_state == VM_PAGE_IS_WIRED));
2379 assert(m->vmp_fictitious);
39037602
A
2380 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2381 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
1c79356b 2382
5ba3f43e
A
2383
2384if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) OSAddAtomic(-1, &vm_guard_count);
2385
1c79356b 2386 c_vm_page_release_fictitious++;
6d2010ae 2387
91447636 2388 zfree(vm_page_zone, m);
1c79356b
A
2389}
2390
2391/*
2392 * vm_page_more_fictitious:
2393 *
6d2010ae 2394 * Add more fictitious pages to the zone.
1c79356b
A
2395 * Allowed to block. This routine is way intimate
2396 * with the zones code, for several reasons:
2397 * 1. we need to carve some page structures out of physical
2398 * memory before zones work, so they _cannot_ come from
2399 * the zone_map.
2400 * 2. the zone needs to be collectable in order to prevent
2401 * growth without bound. These structures are used by
2402 * the device pager (by the hundreds and thousands), as
2403 * private pages for pageout, and as blocking pages for
2404 * pagein. Temporary bursts in demand should not result in
2405 * permanent allocation of a resource.
2406 * 3. To smooth allocation humps, we allocate single pages
2407 * with kernel_memory_allocate(), and cram them into the
6d2010ae 2408 * zone.
1c79356b
A
2409 */
2410
2411void vm_page_more_fictitious(void)
2412{
6d2010ae
A
2413 vm_offset_t addr;
2414 kern_return_t retval;
1c79356b
A
2415
2416 c_vm_page_more_fictitious++;
2417
1c79356b
A
2418 /*
2419 * Allocate a single page from the zone_map. Do not wait if no physical
2420 * pages are immediately available, and do not zero the space. We need
2421 * our own blocking lock here to prevent having multiple,
2422 * simultaneous requests from piling up on the zone_map lock. Exactly
2423 * one (of our) threads should be potentially waiting on the map lock.
2424 * If winner is not vm-privileged, then the page allocation will fail,
2425 * and it will temporarily block here in the vm_page_wait().
2426 */
b0d623f7 2427 lck_mtx_lock(&vm_page_alloc_lock);
1c79356b
A
2428 /*
2429 * If another thread allocated space, just bail out now.
2430 */
2431 if (zone_free_count(vm_page_zone) > 5) {
2432 /*
2433 * The number "5" is a small number that is larger than the
2434 * number of fictitious pages that any single caller will
2435 * attempt to allocate. Otherwise, a thread will attempt to
2436 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2437 * release all of the resources and locks already acquired,
2438 * and then call this routine. This routine finds the pages
2439 * that the caller released, so fails to allocate new space.
2440 * The process repeats infinitely. The largest known number
2441 * of fictitious pages required in this manner is 2. 5 is
2442 * simply a somewhat larger number.
2443 */
b0d623f7 2444 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2445 return;
2446 }
2447
91447636 2448 retval = kernel_memory_allocate(zone_map,
5ba3f43e 2449 &addr, PAGE_SIZE, 0,
3e170ce0 2450 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
91447636 2451 if (retval != KERN_SUCCESS) {
1c79356b 2452 /*
6d2010ae 2453 * No page was available. Drop the
1c79356b
A
2454 * lock to give another thread a chance at it, and
2455 * wait for the pageout daemon to make progress.
2456 */
b0d623f7 2457 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2458 vm_page_wait(THREAD_UNINT);
2459 return;
2460 }
39236c6e 2461
7ddcb079 2462 zcram(vm_page_zone, addr, PAGE_SIZE);
6d2010ae 2463
b0d623f7 2464 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2465}
2466
1c79356b
A
2467
2468/*
2469 * vm_pool_low():
2470 *
2471 * Return true if it is not likely that a non-vm_privileged thread
2472 * can get memory without blocking. Advisory only, since the
2473 * situation may change under us.
2474 */
2475int
2476vm_pool_low(void)
2477{
2478 /* No locking, at worst we will fib. */
b0d623f7 2479 return( vm_page_free_count <= vm_page_free_reserved );
1c79356b
A
2480}
2481
d9a64523
A
2482boolean_t vm_darkwake_mode = FALSE;
2483
2484/*
2485 * vm_update_darkwake_mode():
2486 *
2487 * Tells the VM that the system is in / out of darkwake.
2488 *
2489 * Today, the VM only lowers/raises the background queue target
2490 * so as to favor consuming more/less background pages when
2491 * darwake is ON/OFF.
2492 *
2493 * We might need to do more things in the future.
2494 */
2495
2496void
2497vm_update_darkwake_mode(boolean_t darkwake_mode)
2498{
2499 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2500
2501 vm_page_lockspin_queues();
2502
2503 if (vm_darkwake_mode == darkwake_mode) {
2504 /*
2505 * No change.
2506 */
2507 vm_page_unlock_queues();
2508 return;
2509 }
2510
2511 vm_darkwake_mode = darkwake_mode;
2512
2513 if (vm_darkwake_mode == TRUE) {
2514#if CONFIG_BACKGROUND_QUEUE
2515
2516 /* save background target to restore later */
2517 vm_page_background_target_snapshot = vm_page_background_target;
2518
2519 /* target is set to 0...no protection for background pages */
2520 vm_page_background_target = 0;
2521
2522#endif /* CONFIG_BACKGROUND_QUEUE */
2523
2524 } else if (vm_darkwake_mode == FALSE) {
2525#if CONFIG_BACKGROUND_QUEUE
2526
2527 if (vm_page_background_target_snapshot) {
2528 vm_page_background_target = vm_page_background_target_snapshot;
2529 }
2530#endif /* CONFIG_BACKGROUND_QUEUE */
2531 }
2532 vm_page_unlock_queues();
2533}
0c530ab8 2534
39037602
A
2535#if CONFIG_BACKGROUND_QUEUE
2536
2537void
2538vm_page_update_background_state(vm_page_t mem)
2539{
2540 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2541 return;
2542
d9a64523 2543 if (mem->vmp_in_background == FALSE)
39037602
A
2544 return;
2545
d9a64523
A
2546 task_t my_task = current_task();
2547
2548 if (my_task) {
2549 if (task_get_darkwake_mode(my_task)) {
2550 return;
2551 }
2552 }
2553
39037602
A
2554#if BACKGROUNDQ_BASED_ON_QOS
2555 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2556 return;
2557#else
39037602
A
2558 if (my_task) {
2559 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2560 return;
2561 }
2562#endif
2563 vm_page_lockspin_queues();
2564
d9a64523 2565 mem->vmp_in_background = FALSE;
39037602
A
2566 vm_page_background_promoted_count++;
2567
2568 vm_page_remove_from_backgroundq(mem);
2569
2570 vm_page_unlock_queues();
2571}
2572
2573
2574void
2575vm_page_assign_background_state(vm_page_t mem)
2576{
2577 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2578 return;
2579
d9a64523
A
2580 task_t my_task = current_task();
2581
2582 if (my_task) {
2583 if (task_get_darkwake_mode(my_task)) {
2584 mem->vmp_in_background = TRUE;
2585 return;
2586 }
2587 }
2588
39037602
A
2589#if BACKGROUNDQ_BASED_ON_QOS
2590 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
d9a64523 2591 mem->vmp_in_background = TRUE;
39037602 2592 else
d9a64523 2593 mem->vmp_in_background = FALSE;
39037602 2594#else
39037602 2595 if (my_task)
d9a64523 2596 mem->vmp_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
39037602
A
2597#endif
2598}
2599
2600
2601void
2602vm_page_remove_from_backgroundq(
2603 vm_page_t mem)
2604{
2605 vm_object_t m_object;
2606
2607 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2608
d9a64523
A
2609 if (mem->vmp_on_backgroundq) {
2610 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
39037602 2611
d9a64523
A
2612 mem->vmp_backgroundq.next = 0;
2613 mem->vmp_backgroundq.prev = 0;
2614 mem->vmp_on_backgroundq = FALSE;
39037602
A
2615
2616 vm_page_background_count--;
2617
2618 m_object = VM_PAGE_OBJECT(mem);
2619
2620 if (m_object->internal)
2621 vm_page_background_internal_count--;
2622 else
2623 vm_page_background_external_count--;
2624 } else {
d9a64523
A
2625 assert(VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.next) == (uintptr_t)NULL &&
2626 VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.prev) == (uintptr_t)NULL);
39037602
A
2627 }
2628}
2629
2630
2631void
2632vm_page_add_to_backgroundq(
2633 vm_page_t mem,
2634 boolean_t first)
2635{
2636 vm_object_t m_object;
2637
2638 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2639
2640 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2641 return;
2642
d9a64523 2643 if (mem->vmp_on_backgroundq == FALSE) {
39037602
A
2644
2645 m_object = VM_PAGE_OBJECT(mem);
2646
2647 if (vm_page_background_exclude_external && !m_object->internal)
2648 return;
2649
2650 if (first == TRUE)
d9a64523 2651 vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
39037602 2652 else
d9a64523
A
2653 vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vmp_backgroundq);
2654 mem->vmp_on_backgroundq = TRUE;
39037602
A
2655
2656 vm_page_background_count++;
2657
2658 if (m_object->internal)
2659 vm_page_background_internal_count++;
2660 else
2661 vm_page_background_external_count++;
2662 }
2663}
2664
d9a64523 2665#endif /* CONFIG_BACKGROUND_QUEUE */
0c530ab8
A
2666
2667/*
2668 * this is an interface to support bring-up of drivers
2669 * on platforms with physical memory > 4G...
2670 */
fe8ab488 2671int vm_himemory_mode = 2;
0c530ab8
A
2672
2673
2674/*
2675 * this interface exists to support hardware controllers
2676 * incapable of generating DMAs with more than 32 bits
2677 * of address on platforms with physical memory > 4G...
2678 */
0b4c1975
A
2679unsigned int vm_lopages_allocated_q = 0;
2680unsigned int vm_lopages_allocated_cpm_success = 0;
2681unsigned int vm_lopages_allocated_cpm_failed = 0;
39037602 2682vm_page_queue_head_t vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
0c530ab8
A
2683
2684vm_page_t
2685vm_page_grablo(void)
2686{
0b4c1975 2687 vm_page_t mem;
0c530ab8 2688
0b4c1975 2689 if (vm_lopage_needed == FALSE)
0c530ab8
A
2690 return (vm_page_grab());
2691
b0d623f7 2692 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 2693
39037602
A
2694 if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2695 vm_page_queue_remove_first(&vm_lopage_queue_free,
0b4c1975
A
2696 mem,
2697 vm_page_t,
d9a64523 2698 vmp_pageq);
0b4c1975 2699 assert(vm_lopage_free_count);
d9a64523
A
2700 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2701 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
0c530ab8 2702
0b4c1975
A
2703 vm_lopage_free_count--;
2704 vm_lopages_allocated_q++;
2705
2706 if (vm_lopage_free_count < vm_lopage_lowater)
2707 vm_lopage_refill = TRUE;
0c530ab8 2708
0b4c1975 2709 lck_mtx_unlock(&vm_page_queue_free_lock);
39037602
A
2710
2711#if CONFIG_BACKGROUND_QUEUE
2712 vm_page_assign_background_state(mem);
2713#endif
2d21ac55 2714 } else {
0b4c1975
A
2715 lck_mtx_unlock(&vm_page_queue_free_lock);
2716
2717 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2718
2719 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2720 vm_lopages_allocated_cpm_failed++;
2721 lck_mtx_unlock(&vm_page_queue_free_lock);
2722
2723 return (VM_PAGE_NULL);
2724 }
d9a64523 2725 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
39037602 2726
d9a64523 2727 mem->vmp_busy = TRUE;
0b4c1975
A
2728
2729 vm_page_lockspin_queues();
2730
d9a64523 2731 mem->vmp_gobbled = FALSE;
0b4c1975
A
2732 vm_page_gobble_count--;
2733 vm_page_wire_count--;
2734
2735 vm_lopages_allocated_cpm_success++;
2736 vm_page_unlock_queues();
0c530ab8 2737 }
d9a64523
A
2738 assert(mem->vmp_busy);
2739 assert(!mem->vmp_pmapped);
2740 assert(!mem->vmp_wpmapped);
39037602 2741 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
0b4c1975 2742
39037602 2743 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
0c530ab8 2744
d9a64523
A
2745 disable_preemption();
2746 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2747 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, 0, 1, 0, 0);
2748 enable_preemption();
2749
0c530ab8
A
2750 return (mem);
2751}
2752
6d2010ae 2753
1c79356b
A
2754/*
2755 * vm_page_grab:
2756 *
2d21ac55
A
2757 * first try to grab a page from the per-cpu free list...
2758 * this must be done while pre-emption is disabled... if
2759 * a page is available, we're done...
2760 * if no page is available, grab the vm_page_queue_free_lock
2761 * and see if current number of free pages would allow us
2762 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2763 * if there are pages available, disable preemption and
2764 * recheck the state of the per-cpu free list... we could
2765 * have been preempted and moved to a different cpu, or
2766 * some other thread could have re-filled it... if still
2767 * empty, figure out how many pages we can steal from the
2768 * global free queue and move to the per-cpu queue...
2769 * return 1 of these pages when done... only wakeup the
2770 * pageout_scan thread if we moved pages from the global
2771 * list... no need for the wakeup if we've satisfied the
2772 * request from the per-cpu queue.
1c79356b
A
2773 */
2774
39037602
A
2775#if CONFIG_SECLUDED_MEMORY
2776vm_page_t vm_page_grab_secluded(void);
2777#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b
A
2778
2779vm_page_t
39037602 2780vm_page_grab(void)
1c79356b 2781{
39037602
A
2782 return vm_page_grab_options(0);
2783}
2d21ac55 2784
5ba3f43e
A
2785#if HIBERNATION
2786boolean_t hibernate_rebuild_needed = FALSE;
2787#endif /* HIBERNATION */
2788
39037602
A
2789vm_page_t
2790vm_page_grab_options(
2791 int grab_options)
2792{
2793 vm_page_t mem;
2d21ac55
A
2794
2795 disable_preemption();
2796
2797 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2798return_page_from_cpu_list:
d9a64523 2799 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
39037602 2800
5ba3f43e
A
2801#if HIBERNATION
2802 if (hibernate_rebuild_needed) {
2803 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
2804 }
2805#endif /* HIBERNATION */
2d21ac55 2806 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
d9a64523
A
2807 PROCESSOR_DATA(current_processor(), free_pages) = mem->vmp_snext;
2808 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2d21ac55
A
2809
2810 enable_preemption();
39037602 2811 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
d9a64523
A
2812 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
2813
2814 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
2815 assert(mem->vmp_tabled == FALSE);
2816 assert(mem->vmp_object == 0);
2817 assert(!mem->vmp_laundry);
2818 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
2819 assert(mem->vmp_busy);
2820 assert(!mem->vmp_pmapped);
2821 assert(!mem->vmp_wpmapped);
39037602 2822 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55 2823
39037602
A
2824#if CONFIG_BACKGROUND_QUEUE
2825 vm_page_assign_background_state(mem);
2826#endif
2d21ac55
A
2827 return mem;
2828 }
2829 enable_preemption();
2830
1c79356b 2831
1c79356b
A
2832 /*
2833 * Optionally produce warnings if the wire or gobble
2834 * counts exceed some threshold.
2835 */
fe8ab488
A
2836#if VM_PAGE_WIRE_COUNT_WARNING
2837 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
1c79356b
A
2838 printf("mk: vm_page_grab(): high wired page count of %d\n",
2839 vm_page_wire_count);
1c79356b 2840 }
fe8ab488
A
2841#endif
2842#if VM_PAGE_GOBBLE_COUNT_WARNING
2843 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
1c79356b
A
2844 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2845 vm_page_gobble_count);
1c79356b 2846 }
fe8ab488 2847#endif
39037602 2848
b0d623f7
A
2849 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2850
1c79356b
A
2851 /*
2852 * Only let privileged threads (involved in pageout)
2853 * dip into the reserved pool.
2854 */
1c79356b 2855 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 2856 !(current_thread()->options & TH_OPT_VMPRIV)) {
39037602 2857 /* no page for us in the free queue... */
b0d623f7 2858 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2859 mem = VM_PAGE_NULL;
39037602
A
2860
2861#if CONFIG_SECLUDED_MEMORY
2862 /* ... but can we try and grab from the secluded queue? */
2863 if (vm_page_secluded_count > 0 &&
2864 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
d9a64523 2865 task_can_use_secluded_mem(current_task(), TRUE))) {
39037602
A
2866 mem = vm_page_grab_secluded();
2867 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2868 vm_page_secluded.grab_for_iokit++;
2869 if (mem) {
2870 vm_page_secluded.grab_for_iokit_success++;
2871 }
2872 }
2873 if (mem) {
2874 VM_CHECK_MEMORYSTATUS;
d9a64523
A
2875
2876 disable_preemption();
2877 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2878 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2879 enable_preemption();
2880
39037602
A
2881 return mem;
2882 }
2883 }
2884#else /* CONFIG_SECLUDED_MEMORY */
2885 (void) grab_options;
2886#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b 2887 }
2d21ac55
A
2888 else {
2889 vm_page_t head;
2890 vm_page_t tail;
2891 unsigned int pages_to_steal;
2892 unsigned int color;
5ba3f43e 2893 unsigned int clump_end, sub_count;
1c79356b 2894
2d21ac55 2895 while ( vm_page_free_count == 0 ) {
1c79356b 2896
b0d623f7 2897 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2898 /*
2899 * must be a privileged thread to be
2900 * in this state since a non-privileged
2901 * thread would have bailed if we were
2902 * under the vm_page_free_reserved mark
2903 */
2904 VM_PAGE_WAIT();
b0d623f7 2905 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2906 }
2907
2908 disable_preemption();
2909
2910 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
b0d623f7 2911 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2912
2913 /*
2914 * we got preempted and moved to another processor
2915 * or we got preempted and someone else ran and filled the cache
2916 */
2917 goto return_page_from_cpu_list;
2918 }
2919 if (vm_page_free_count <= vm_page_free_reserved)
2920 pages_to_steal = 1;
2921 else {
fe8ab488
A
2922 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2923 pages_to_steal = vm_free_magazine_refill_limit;
2924 else
2d21ac55
A
2925 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2926 }
2927 color = PROCESSOR_DATA(current_processor(), start_color);
2928 head = tail = NULL;
2929
fe8ab488 2930 vm_page_free_count -= pages_to_steal;
5ba3f43e 2931 clump_end = sub_count = 0;
fe8ab488 2932
2d21ac55 2933 while (pages_to_steal--) {
2d21ac55 2934
39037602 2935 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2d21ac55 2936 color = (color + 1) & vm_color_mask;
5ba3f43e
A
2937#if defined(__x86_64__)
2938 vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
2939 mem,
2940 vm_page_t,
d9a64523 2941 vmp_pageq,
5ba3f43e
A
2942 clump_end);
2943#else
39037602 2944 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
5ba3f43e
A
2945 mem,
2946 vm_page_t,
d9a64523 2947 vmp_pageq);
5ba3f43e
A
2948#endif
2949
d9a64523 2950 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_Q);
6d2010ae 2951
39037602 2952 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
5ba3f43e
A
2953
2954#if defined(__arm__) || defined(__arm64__)
2d21ac55 2955 color = (color + 1) & vm_color_mask;
5ba3f43e
A
2956#else
2957
2958#if DEVELOPMENT || DEBUG
2959
2960 sub_count++;
2961 if (clump_end) {
2962 vm_clump_update_stats(sub_count);
2963 sub_count = 0;
2964 color = (color + 1) & vm_color_mask;
2965 }
2966#else
2967 if (clump_end) color = (color + 1) & vm_color_mask;
2968
2969#endif /* if DEVELOPMENT || DEBUG */
2970
2971#endif /* if defined(__arm__) || defined(__arm64__) */
2d21ac55
A
2972
2973 if (head == NULL)
2974 head = mem;
2975 else
d9a64523 2976 tail->vmp_snext = mem;
2d21ac55
A
2977 tail = mem;
2978
d9a64523
A
2979 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
2980 assert(mem->vmp_tabled == FALSE);
2981 assert(mem->vmp_object == 0);
2982 assert(!mem->vmp_laundry);
2d21ac55 2983
d9a64523 2984 mem->vmp_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
39037602 2985
d9a64523
A
2986 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
2987 assert(mem->vmp_busy);
2988 assert(!mem->vmp_pmapped);
2989 assert(!mem->vmp_wpmapped);
39037602 2990 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55 2991 }
5ba3f43e
A
2992#if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2993 vm_clump_update_stats(sub_count);
2994#endif
fe8ab488
A
2995 lck_mtx_unlock(&vm_page_queue_free_lock);
2996
5ba3f43e
A
2997#if HIBERNATION
2998 if (hibernate_rebuild_needed) {
2999 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3000 }
3001#endif /* HIBERNATION */
d9a64523 3002 PROCESSOR_DATA(current_processor(), free_pages) = head->vmp_snext;
2d21ac55
A
3003 PROCESSOR_DATA(current_processor(), start_color) = color;
3004
3005 /*
3006 * satisfy this request
3007 */
3008 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
d9a64523 3009 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2d21ac55 3010 mem = head;
d9a64523 3011 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
39037602
A
3012
3013 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
d9a64523 3014 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
91447636 3015
2d21ac55
A
3016 enable_preemption();
3017 }
1c79356b
A
3018 /*
3019 * Decide if we should poke the pageout daemon.
3020 * We do this if the free count is less than the low
3021 * water mark, or if the free count is less than the high
3022 * water mark (but above the low water mark) and the inactive
3023 * count is less than its target.
3024 *
3025 * We don't have the counts locked ... if they change a little,
3026 * it doesn't really matter.
3027 */
d9a64523 3028 if (vm_page_free_count < vm_page_free_min)
316670eb 3029 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 3030
6d2010ae 3031 VM_CHECK_MEMORYSTATUS;
39037602
A
3032
3033 if (mem) {
3034// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
3035
3036#if CONFIG_BACKGROUND_QUEUE
3037 vm_page_assign_background_state(mem);
3038#endif
3039 }
3040 return mem;
3041}
3042
3043#if CONFIG_SECLUDED_MEMORY
3044vm_page_t
3045vm_page_grab_secluded(void)
3046{
3047 vm_page_t mem;
3048 vm_object_t object;
3049 int refmod_state;
3050
3051 if (vm_page_secluded_count == 0) {
3052 /* no secluded pages to grab... */
3053 return VM_PAGE_NULL;
3054 }
3055
3056 /* secluded queue is protected by the VM page queue lock */
3057 vm_page_lock_queues();
3058
3059 if (vm_page_secluded_count == 0) {
3060 /* no secluded pages to grab... */
3061 vm_page_unlock_queues();
3062 return VM_PAGE_NULL;
3063 }
3064
3065#if 00
3066 /* can we grab from the secluded queue? */
3067 if (vm_page_secluded_count > vm_page_secluded_target ||
3068 (vm_page_secluded_count > 0 &&
d9a64523 3069 task_can_use_secluded_mem(current_task(), TRUE))) {
39037602
A
3070 /* OK */
3071 } else {
3072 /* can't grab from secluded queue... */
3073 vm_page_unlock_queues();
3074 return VM_PAGE_NULL;
3075 }
3076#endif
3077
3078 /* we can grab a page from secluded queue! */
3079 assert((vm_page_secluded_count_free +
3080 vm_page_secluded_count_inuse) ==
3081 vm_page_secluded_count);
3082 if (current_task()->task_can_use_secluded_mem) {
3083 assert(num_tasks_can_use_secluded_mem > 0);
3084 }
3085 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
3086 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
5ba3f43e 3087 mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
d9a64523 3088 assert(mem->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
d190cdc3 3089 vm_page_queues_remove(mem, TRUE);
39037602
A
3090
3091 object = VM_PAGE_OBJECT(mem);
3092
d9a64523 3093 assert(!mem->vmp_fictitious);
39037602
A
3094 assert(!VM_PAGE_WIRED(mem));
3095 if (object == VM_OBJECT_NULL) {
3096 /* free for grab! */
39037602
A
3097 vm_page_unlock_queues();
3098 vm_page_secluded.grab_success_free++;
d190cdc3 3099
d9a64523
A
3100 assert(mem->vmp_busy);
3101 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
d190cdc3 3102 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
d9a64523
A
3103 assert(mem->vmp_pageq.next == 0);
3104 assert(mem->vmp_pageq.prev == 0);
3105 assert(mem->vmp_listq.next == 0);
3106 assert(mem->vmp_listq.prev == 0);
d190cdc3 3107#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
3108 assert(mem->vmp_on_backgroundq == 0);
3109 assert(mem->vmp_backgroundq.next == 0);
3110 assert(mem->vmp_backgroundq.prev == 0);
d190cdc3 3111#endif /* CONFIG_BACKGROUND_QUEUE */
39037602
A
3112 return mem;
3113 }
3114
39037602
A
3115 assert(!object->internal);
3116// vm_page_pageable_external_count--;
3117
3118 if (!vm_object_lock_try(object)) {
3119// printf("SECLUDED: page %p: object %p locked\n", mem, object);
3120 vm_page_secluded.grab_failure_locked++;
3121 reactivate_secluded_page:
3122 vm_page_activate(mem);
3123 vm_page_unlock_queues();
3124 return VM_PAGE_NULL;
3125 }
d9a64523
A
3126 if (mem->vmp_busy ||
3127 mem->vmp_cleaning ||
3128 mem->vmp_laundry) {
39037602
A
3129 /* can't steal page in this state... */
3130 vm_object_unlock(object);
3131 vm_page_secluded.grab_failure_state++;
3132 goto reactivate_secluded_page;
3133 }
3134
d9a64523 3135 mem->vmp_busy = TRUE;
39037602
A
3136 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3137 if (refmod_state & VM_MEM_REFERENCED) {
d9a64523 3138 mem->vmp_reference = TRUE;
39037602
A
3139 }
3140 if (refmod_state & VM_MEM_MODIFIED) {
3141 SET_PAGE_DIRTY(mem, FALSE);
3142 }
d9a64523 3143 if (mem->vmp_dirty || mem->vmp_precious) {
39037602
A
3144 /* can't grab a dirty page; re-activate */
3145// printf("SECLUDED: dirty page %p\n", mem);
743345f9 3146 PAGE_WAKEUP_DONE(mem);
39037602
A
3147 vm_page_secluded.grab_failure_dirty++;
3148 vm_object_unlock(object);
3149 goto reactivate_secluded_page;
3150 }
d9a64523 3151 if (mem->vmp_reference) {
39037602
A
3152 /* it's been used but we do need to grab a page... */
3153 }
743345f9 3154
39037602
A
3155 vm_page_unlock_queues();
3156
3157 /* finish what vm_page_free() would have done... */
3158 vm_page_free_prepare_object(mem, TRUE);
3159 vm_object_unlock(object);
3160 object = VM_OBJECT_NULL;
3161 if (vm_page_free_verify) {
d9a64523 3162 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
39037602
A
3163 }
3164 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
39037602 3165 vm_page_secluded.grab_success_other++;
1c79356b 3166
d9a64523
A
3167 assert(mem->vmp_busy);
3168 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
d190cdc3 3169 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
d9a64523
A
3170 assert(mem->vmp_pageq.next == 0);
3171 assert(mem->vmp_pageq.prev == 0);
3172 assert(mem->vmp_listq.next == 0);
3173 assert(mem->vmp_listq.prev == 0);
d190cdc3 3174#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
3175 assert(mem->vmp_on_backgroundq == 0);
3176 assert(mem->vmp_backgroundq.next == 0);
3177 assert(mem->vmp_backgroundq.prev == 0);
d190cdc3
A
3178#endif /* CONFIG_BACKGROUND_QUEUE */
3179
1c79356b
A
3180 return mem;
3181}
39037602 3182#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b
A
3183
3184/*
3185 * vm_page_release:
3186 *
3187 * Return a page to the free list.
3188 */
3189
3190void
3191vm_page_release(
39037602
A
3192 vm_page_t mem,
3193 boolean_t page_queues_locked)
1c79356b 3194{
2d21ac55 3195 unsigned int color;
b0d623f7
A
3196 int need_wakeup = 0;
3197 int need_priv_wakeup = 0;
39037602
A
3198#if CONFIG_SECLUDED_MEMORY
3199 int need_secluded_wakeup = 0;
3200#endif /* CONFIG_SECLUDED_MEMORY */
55e303ae 3201
39037602
A
3202 if (page_queues_locked) {
3203 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3204 } else {
3205 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3206 }
6d2010ae 3207
d9a64523 3208 assert(!mem->vmp_private && !mem->vmp_fictitious);
b0d623f7 3209 if (vm_page_free_verify) {
d9a64523 3210 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
b0d623f7 3211 }
39037602 3212// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 3213
39037602 3214 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
7ddcb079 3215
b0d623f7 3216 lck_mtx_lock_spin(&vm_page_queue_free_lock);
6d2010ae 3217
d9a64523
A
3218 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3219 assert(mem->vmp_busy);
3220 assert(!mem->vmp_laundry);
3221 assert(mem->vmp_object == 0);
3222 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
3223 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
39037602 3224#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
3225 assert(mem->vmp_backgroundq.next == 0 &&
3226 mem->vmp_backgroundq.prev == 0 &&
3227 mem->vmp_on_backgroundq == FALSE);
39037602 3228#endif
d9a64523 3229 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
0b4c1975 3230 vm_lopage_free_count < vm_lopage_free_limit &&
39037602 3231 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
0c530ab8
A
3232 /*
3233 * this exists to support hardware controllers
3234 * incapable of generating DMAs with more than 32 bits
3235 * of address on platforms with physical memory > 4G...
3236 */
39037602
A
3237 vm_page_queue_enter_first(&vm_lopage_queue_free,
3238 mem,
3239 vm_page_t,
d9a64523 3240 vmp_pageq);
0c530ab8 3241 vm_lopage_free_count++;
0b4c1975
A
3242
3243 if (vm_lopage_free_count >= vm_lopage_free_limit)
3244 vm_lopage_refill = FALSE;
3245
d9a64523
A
3246 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3247 mem->vmp_lopage = TRUE;
39037602
A
3248#if CONFIG_SECLUDED_MEMORY
3249 } else if (vm_page_free_count > vm_page_free_reserved &&
3250 vm_page_secluded_count < vm_page_secluded_target &&
3251 num_tasks_can_use_secluded_mem == 0) {
3252 /*
3253 * XXX FBDP TODO: also avoid refilling secluded queue
3254 * when some IOKit objects are already grabbing from it...
3255 */
3256 if (!page_queues_locked) {
3257 if (!vm_page_trylock_queues()) {
3258 /* take locks in right order */
3259 lck_mtx_unlock(&vm_page_queue_free_lock);
3260 vm_page_lock_queues();
3261 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3262 }
3263 }
d9a64523 3264 mem->vmp_lopage = FALSE;
39037602
A
3265 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3266 vm_page_queue_enter_first(&vm_page_queue_secluded,
3267 mem,
3268 vm_page_t,
d9a64523
A
3269 vmp_pageq);
3270 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
39037602
A
3271 vm_page_secluded_count++;
3272 vm_page_secluded_count_free++;
3273 if (!page_queues_locked) {
3274 vm_page_unlock_queues();
3275 }
3276 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3277 if (vm_page_free_wanted_secluded > 0) {
3278 vm_page_free_wanted_secluded--;
3279 need_secluded_wakeup = 1;
3280 }
3281#endif /* CONFIG_SECLUDED_MEMORY */
3282 } else {
d9a64523
A
3283 mem->vmp_lopage = FALSE;
3284 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
0b4c1975 3285
5ba3f43e
A
3286 color = VM_PAGE_GET_COLOR(mem);
3287#if defined(__x86_64__)
3288 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
39037602
A
3289 mem,
3290 vm_page_t,
d9a64523 3291 vmp_pageq);
5ba3f43e
A
3292#else
3293 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3294 mem,
3295 vm_page_t,
d9a64523 3296 vmp_pageq);
5ba3f43e 3297#endif
0c530ab8
A
3298 vm_page_free_count++;
3299 /*
3300 * Check if we should wake up someone waiting for page.
3301 * But don't bother waking them unless they can allocate.
3302 *
3303 * We wakeup only one thread, to prevent starvation.
3304 * Because the scheduling system handles wait queues FIFO,
3305 * if we wakeup all waiting threads, one greedy thread
3306 * can starve multiple niceguy threads. When the threads
3307 * all wakeup, the greedy threads runs first, grabs the page,
3308 * and waits for another page. It will be the first to run
3309 * when the next page is freed.
3310 *
3311 * However, there is a slight danger here.
3312 * The thread we wake might not use the free page.
3313 * Then the other threads could wait indefinitely
3314 * while the page goes unused. To forestall this,
3315 * the pageout daemon will keep making free pages
3316 * as long as vm_page_free_wanted is non-zero.
3317 */
1c79356b 3318
b0d623f7
A
3319 assert(vm_page_free_count > 0);
3320 if (vm_page_free_wanted_privileged > 0) {
2d21ac55 3321 vm_page_free_wanted_privileged--;
b0d623f7 3322 need_priv_wakeup = 1;
39037602
A
3323#if CONFIG_SECLUDED_MEMORY
3324 } else if (vm_page_free_wanted_secluded > 0 &&
3325 vm_page_free_count > vm_page_free_reserved) {
3326 vm_page_free_wanted_secluded--;
3327 need_secluded_wakeup = 1;
3328#endif /* CONFIG_SECLUDED_MEMORY */
b0d623f7
A
3329 } else if (vm_page_free_wanted > 0 &&
3330 vm_page_free_count > vm_page_free_reserved) {
0c530ab8 3331 vm_page_free_wanted--;
b0d623f7 3332 need_wakeup = 1;
0c530ab8 3333 }
1c79356b 3334 }
d9a64523
A
3335 vm_pageout_vminfo.vm_page_pages_freed++;
3336
3337 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, 1, 0, 0, 0);
3338
b0d623f7
A
3339 lck_mtx_unlock(&vm_page_queue_free_lock);
3340
3341 if (need_priv_wakeup)
3342 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
39037602
A
3343#if CONFIG_SECLUDED_MEMORY
3344 else if (need_secluded_wakeup)
3345 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3346#endif /* CONFIG_SECLUDED_MEMORY */
b0d623f7
A
3347 else if (need_wakeup)
3348 thread_wakeup_one((event_t) &vm_page_free_count);
2d21ac55 3349
6d2010ae 3350 VM_CHECK_MEMORYSTATUS;
1c79356b
A
3351}
3352
fe8ab488
A
3353/*
3354 * This version of vm_page_release() is used only at startup
3355 * when we are single-threaded and pages are being released
3356 * for the first time. Hence, no locking or unnecessary checks are made.
3357 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3358 */
3359void
3360vm_page_release_startup(
39037602 3361 vm_page_t mem)
fe8ab488 3362{
39037602 3363 vm_page_queue_t queue_free;
fe8ab488
A
3364
3365 if (vm_lopage_free_count < vm_lopage_free_limit &&
39037602 3366 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
d9a64523
A
3367 mem->vmp_lopage = TRUE;
3368 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
fe8ab488
A
3369 vm_lopage_free_count++;
3370 queue_free = &vm_lopage_queue_free;
39037602
A
3371#if CONFIG_SECLUDED_MEMORY
3372 } else if (vm_page_secluded_count < vm_page_secluded_target) {
d9a64523
A
3373 mem->vmp_lopage = FALSE;
3374 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
39037602
A
3375 vm_page_secluded_count++;
3376 vm_page_secluded_count_free++;
3377 queue_free = &vm_page_queue_secluded;
3378#endif /* CONFIG_SECLUDED_MEMORY */
3379 } else {
d9a64523
A
3380 mem->vmp_lopage = FALSE;
3381 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
fe8ab488 3382 vm_page_free_count++;
5ba3f43e 3383 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
fe8ab488 3384 }
d9a64523 3385 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
5ba3f43e 3386#if defined(__x86_64__)
d9a64523 3387 vm_page_queue_enter_clump(queue_free, mem, vm_page_t, vmp_pageq);
5ba3f43e 3388#else
d9a64523 3389 vm_page_queue_enter(queue_free, mem, vm_page_t, vmp_pageq);
5ba3f43e
A
3390#endif
3391 } else
d9a64523 3392 vm_page_queue_enter_first(queue_free, mem, vm_page_t, vmp_pageq);
fe8ab488
A
3393}
3394
1c79356b
A
3395/*
3396 * vm_page_wait:
3397 *
3398 * Wait for a page to become available.
3399 * If there are plenty of free pages, then we don't sleep.
3400 *
3401 * Returns:
3402 * TRUE: There may be another page, try again
3403 * FALSE: We were interrupted out of our wait, don't try again
3404 */
3405
3406boolean_t
3407vm_page_wait(
3408 int interruptible )
3409{
3410 /*
3411 * We can't use vm_page_free_reserved to make this
3412 * determination. Consider: some thread might
3413 * need to allocate two pages. The first allocation
3414 * succeeds, the second fails. After the first page is freed,
3415 * a call to vm_page_wait must really block.
3416 */
9bccf70c 3417 kern_return_t wait_result;
9bccf70c 3418 int need_wakeup = 0;
2d21ac55 3419 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b 3420
b0d623f7 3421 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
3422
3423 if (is_privileged && vm_page_free_count) {
b0d623f7 3424 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
3425 return TRUE;
3426 }
2d21ac55 3427
39037602 3428 if (vm_page_free_count >= vm_page_free_target) {
b0d623f7 3429 lck_mtx_unlock(&vm_page_queue_free_lock);
39037602
A
3430 return TRUE;
3431 }
9bccf70c 3432
39037602
A
3433 if (is_privileged) {
3434 if (vm_page_free_wanted_privileged++ == 0)
3435 need_wakeup = 1;
3436 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3437#if CONFIG_SECLUDED_MEMORY
3438 } else if (secluded_for_apps &&
d9a64523 3439 task_can_use_secluded_mem(current_task(), FALSE)) {
39037602
A
3440#if 00
3441 /* XXX FBDP: need pageq lock for this... */
3442 /* XXX FBDP: might wait even if pages available, */
3443 /* XXX FBDP: hopefully not for too long... */
3444 if (vm_page_secluded_count > 0) {
3445 lck_mtx_unlock(&vm_page_queue_free_lock);
3446 return TRUE;
39236c6e 3447 }
39037602
A
3448#endif
3449 if (vm_page_free_wanted_secluded++ == 0) {
3450 need_wakeup = 1;
3451 }
3452 wait_result = assert_wait(
3453 (event_t)&vm_page_free_wanted_secluded,
3454 interruptible);
3455#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b 3456 } else {
39037602
A
3457 if (vm_page_free_wanted++ == 0)
3458 need_wakeup = 1;
3459 wait_result = assert_wait((event_t)&vm_page_free_count,
3460 interruptible);
3461 }
3462 lck_mtx_unlock(&vm_page_queue_free_lock);
3463 counter(c_vm_page_wait_block++);
3464
3465 if (need_wakeup)
3466 thread_wakeup((event_t)&vm_page_free_wanted);
3467
3468 if (wait_result == THREAD_WAITING) {
d9a64523 3469 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
39037602
A
3470 vm_page_free_wanted_privileged,
3471 vm_page_free_wanted,
3472#if CONFIG_SECLUDED_MEMORY
3473 vm_page_free_wanted_secluded,
3474#else /* CONFIG_SECLUDED_MEMORY */
3475 0,
3476#endif /* CONFIG_SECLUDED_MEMORY */
3477 0);
3478 wait_result = thread_block(THREAD_CONTINUE_NULL);
d9a64523
A
3479 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block,
3480 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
1c79356b 3481 }
39037602
A
3482
3483 return (wait_result == THREAD_AWAKENED);
1c79356b
A
3484}
3485
3486/*
3487 * vm_page_alloc:
3488 *
3489 * Allocate and return a memory cell associated
3490 * with this VM object/offset pair.
3491 *
3492 * Object must be locked.
3493 */
3494
3495vm_page_t
3496vm_page_alloc(
3497 vm_object_t object,
3498 vm_object_offset_t offset)
3499{
39037602
A
3500 vm_page_t mem;
3501 int grab_options;
1c79356b 3502
2d21ac55 3503 vm_object_lock_assert_exclusive(object);
39037602
A
3504 grab_options = 0;
3505#if CONFIG_SECLUDED_MEMORY
3506 if (object->can_grab_secluded) {
3507 grab_options |= VM_PAGE_GRAB_SECLUDED;
3508 }
3509#endif /* CONFIG_SECLUDED_MEMORY */
3510 mem = vm_page_grab_options(grab_options);
1c79356b
A
3511 if (mem == VM_PAGE_NULL)
3512 return VM_PAGE_NULL;
3513
3514 vm_page_insert(mem, object, offset);
3515
3516 return(mem);
3517}
3518
2d21ac55
A
3519/*
3520 * vm_page_alloc_guard:
3521 *
b0d623f7 3522 * Allocate a fictitious page which will be used
2d21ac55
A
3523 * as a guard page. The page will be inserted into
3524 * the object and returned to the caller.
3525 */
3526
3527vm_page_t
3528vm_page_alloc_guard(
3529 vm_object_t object,
3530 vm_object_offset_t offset)
3531{
39037602 3532 vm_page_t mem;
2d21ac55
A
3533
3534 vm_object_lock_assert_exclusive(object);
3535 mem = vm_page_grab_guard();
3536 if (mem == VM_PAGE_NULL)
3537 return VM_PAGE_NULL;
3538
3539 vm_page_insert(mem, object, offset);
3540
3541 return(mem);
3542}
3543
3544
1c79356b
A
3545counter(unsigned int c_laundry_pages_freed = 0;)
3546
1c79356b 3547/*
6d2010ae 3548 * vm_page_free_prepare:
1c79356b 3549 *
6d2010ae
A
3550 * Removes page from any queue it may be on
3551 * and disassociates it from its VM object.
1c79356b
A
3552 *
3553 * Object and page queues must be locked prior to entry.
3554 */
b0d623f7 3555static void
2d21ac55 3556vm_page_free_prepare(
6d2010ae 3557 vm_page_t mem)
b0d623f7
A
3558{
3559 vm_page_free_prepare_queues(mem);
3560 vm_page_free_prepare_object(mem, TRUE);
3561}
3562
3563
3564void
3565vm_page_free_prepare_queues(
3566 vm_page_t mem)
1c79356b 3567{
39037602
A
3568 vm_object_t m_object;
3569
2d21ac55 3570 VM_PAGE_CHECK(mem);
39037602 3571
d9a64523
A
3572 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
3573 assert(!mem->vmp_cleaning);
39037602 3574 m_object = VM_PAGE_OBJECT(mem);
fe8ab488 3575
39037602
A
3576 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3577 if (m_object) {
3578 vm_object_lock_assert_exclusive(m_object);
b0d623f7 3579 }
d9a64523 3580 if (mem->vmp_laundry) {
2d21ac55
A
3581 /*
3582 * We may have to free a page while it's being laundered
3583 * if we lost its pager (due to a forced unmount, for example).
316670eb
A
3584 * We need to call vm_pageout_steal_laundry() before removing
3585 * the page from its VM object, so that we can remove it
3586 * from its pageout queue and adjust the laundry accounting
2d21ac55 3587 */
316670eb 3588 vm_pageout_steal_laundry(mem, TRUE);
2d21ac55
A
3589 counter(++c_laundry_pages_freed);
3590 }
39236c6e 3591
39037602 3592 vm_page_queues_remove(mem, TRUE);
b0d623f7
A
3593
3594 if (VM_PAGE_WIRED(mem)) {
d9a64523 3595 assert(mem->vmp_wire_count > 0);
39037602
A
3596
3597 if (m_object) {
5ba3f43e
A
3598
3599 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
3600 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
3601 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3e170ce0 3602
39037602
A
3603 assert(m_object->resident_page_count >=
3604 m_object->wired_page_count);
6d2010ae 3605
39037602 3606 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
6d2010ae
A
3607 OSAddAtomic(+1, &vm_page_purgeable_count);
3608 assert(vm_page_purgeable_wired_count > 0);
3609 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3610 }
39037602
A
3611 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3612 m_object->purgable == VM_PURGABLE_EMPTY) &&
d9a64523
A
3613 m_object->vo_owner != TASK_NULL) {
3614 task_t owner;
3615 int ledger_idx_volatile;
3616 int ledger_idx_nonvolatile;
3617 int ledger_idx_volatile_compressed;
3618 int ledger_idx_nonvolatile_compressed;
3619 boolean_t do_footprint;
3620
3621 owner = VM_OBJECT_OWNER(m_object);
3622 vm_object_ledger_tag_ledgers(
3623 m_object,
3624 &ledger_idx_volatile,
3625 &ledger_idx_nonvolatile,
3626 &ledger_idx_volatile_compressed,
3627 &ledger_idx_nonvolatile_compressed,
3628 &do_footprint);
fe8ab488
A
3629 /*
3630 * While wired, this page was accounted
3631 * as "non-volatile" but it should now
3632 * be accounted as "volatile".
3633 */
3634 /* one less "non-volatile"... */
3635 ledger_debit(owner->ledger,
d9a64523 3636 ledger_idx_nonvolatile,
fe8ab488 3637 PAGE_SIZE);
d9a64523
A
3638 if (do_footprint) {
3639 /* ... and "phys_footprint" */
3640 ledger_debit(owner->ledger,
3641 task_ledgers.phys_footprint,
3642 PAGE_SIZE);
3643 }
fe8ab488
A
3644 /* one more "volatile" */
3645 ledger_credit(owner->ledger,
d9a64523 3646 ledger_idx_volatile,
fe8ab488
A
3647 PAGE_SIZE);
3648 }
b0d623f7 3649 }
d9a64523 3650 if (!mem->vmp_private && !mem->vmp_fictitious)
1c79356b 3651 vm_page_wire_count--;
39037602 3652
d9a64523
A
3653 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3654 mem->vmp_wire_count = 0;
3655 assert(!mem->vmp_gobbled);
3656 } else if (mem->vmp_gobbled) {
3657 if (!mem->vmp_private && !mem->vmp_fictitious)
1c79356b
A
3658 vm_page_wire_count--;
3659 vm_page_gobble_count--;
3660 }
b0d623f7
A
3661}
3662
3663
3664void
3665vm_page_free_prepare_object(
3666 vm_page_t mem,
3667 boolean_t remove_from_hash)
3668{
d9a64523 3669 if (mem->vmp_tabled)
b0d623f7 3670 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
1c79356b 3671
b0d623f7 3672 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b 3673
d9a64523
A
3674 if (mem->vmp_private) {
3675 mem->vmp_private = FALSE;
3676 mem->vmp_fictitious = TRUE;
39037602 3677 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
1c79356b 3678 }
d9a64523
A
3679 if ( !mem->vmp_fictitious) {
3680 assert(mem->vmp_pageq.next == 0);
3681 assert(mem->vmp_pageq.prev == 0);
3682 assert(mem->vmp_listq.next == 0);
3683 assert(mem->vmp_listq.prev == 0);
d190cdc3 3684#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
3685 assert(mem->vmp_backgroundq.next == 0);
3686 assert(mem->vmp_backgroundq.prev == 0);
d190cdc3 3687#endif /* CONFIG_BACKGROUND_QUEUE */
d9a64523
A
3688 assert(mem->vmp_next_m == 0);
3689 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->vmp_lopage);
1c79356b
A
3690 }
3691}
3692
b0d623f7 3693
6d2010ae
A
3694/*
3695 * vm_page_free:
3696 *
3697 * Returns the given page to the free list,
3698 * disassociating it with any VM object.
3699 *
3700 * Object and page queues must be locked prior to entry.
3701 */
2d21ac55
A
3702void
3703vm_page_free(
3704 vm_page_t mem)
3705{
b0d623f7 3706 vm_page_free_prepare(mem);
6d2010ae 3707
d9a64523 3708 if (mem->vmp_fictitious) {
b0d623f7
A
3709 vm_page_release_fictitious(mem);
3710 } else {
39037602
A
3711 vm_page_release(mem,
3712 TRUE); /* page queues are locked */
b0d623f7
A
3713 }
3714}
3715
3716
3717void
3718vm_page_free_unlocked(
3719 vm_page_t mem,
3720 boolean_t remove_from_hash)
3721{
3722 vm_page_lockspin_queues();
3723 vm_page_free_prepare_queues(mem);
3724 vm_page_unlock_queues();
3725
3726 vm_page_free_prepare_object(mem, remove_from_hash);
3727
d9a64523 3728 if (mem->vmp_fictitious) {
2d21ac55
A
3729 vm_page_release_fictitious(mem);
3730 } else {
39037602 3731 vm_page_release(mem, FALSE); /* page queues are not locked */
2d21ac55
A
3732 }
3733}
55e303ae 3734
316670eb 3735
2d21ac55
A
3736/*
3737 * Free a list of pages. The list can be up to several hundred pages,
3738 * as blocked up by vm_pageout_scan().
b0d623f7 3739 * The big win is not having to take the free list lock once
316670eb 3740 * per page.
d190cdc3
A
3741 *
3742 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3743 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
2d21ac55 3744 */
55e303ae
A
3745void
3746vm_page_free_list(
316670eb 3747 vm_page_t freeq,
b0d623f7 3748 boolean_t prepare_object)
55e303ae 3749{
316670eb 3750 vm_page_t mem;
2d21ac55 3751 vm_page_t nxt;
316670eb
A
3752 vm_page_t local_freeq;
3753 int pg_count;
2d21ac55 3754
d190cdc3
A
3755 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3756 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3757
316670eb 3758 while (freeq) {
55e303ae 3759
316670eb
A
3760 pg_count = 0;
3761 local_freeq = VM_PAGE_NULL;
3762 mem = freeq;
b0d623f7 3763
316670eb
A
3764 /*
3765 * break up the processing into smaller chunks so
3766 * that we can 'pipeline' the pages onto the
3767 * free list w/o introducing too much
3768 * contention on the global free queue lock
3769 */
3770 while (mem && pg_count < 64) {
3771
d9a64523
A
3772 assert((mem->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
3773 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
39037602 3774#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
3775 assert(mem->vmp_backgroundq.next == 0 &&
3776 mem->vmp_backgroundq.prev == 0 &&
3777 mem->vmp_on_backgroundq == FALSE);
39037602 3778#endif
d9a64523
A
3779 nxt = mem->vmp_snext;
3780 mem->vmp_snext = NULL;
3781 assert(mem->vmp_pageq.prev == 0);
316670eb 3782
d9a64523
A
3783 if (vm_page_free_verify && !mem->vmp_fictitious && !mem->vmp_private) {
3784 assertf(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)), "page = 0x%llx", (uint64_t)VM_PAGE_GET_PHYS_PAGE(mem));
316670eb
A
3785 }
3786 if (prepare_object == TRUE)
3787 vm_page_free_prepare_object(mem, TRUE);
b0d623f7 3788
d9a64523
A
3789 if (!mem->vmp_fictitious) {
3790 assert(mem->vmp_busy);
55e303ae 3791
d9a64523 3792 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
316670eb 3793 vm_lopage_free_count < vm_lopage_free_limit &&
39037602
A
3794 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3795 vm_page_release(mem, FALSE); /* page queues are not locked */
3796#if CONFIG_SECLUDED_MEMORY
3797 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3798 num_tasks_can_use_secluded_mem == 0) {
3799 vm_page_release(mem,
3800 FALSE); /* page queues are not locked */
3801#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
3802 } else {
3803 /*
3804 * IMPORTANT: we can't set the page "free" here
3805 * because that would make the page eligible for
3806 * a physically-contiguous allocation (see
3807 * vm_page_find_contiguous()) right away (we don't
3808 * hold the vm_page_queue_free lock). That would
3809 * cause trouble because the page is not actually
3810 * in the free queue yet...
3811 */
d9a64523 3812 mem->vmp_snext = local_freeq;
316670eb
A
3813 local_freeq = mem;
3814 pg_count++;
935ed37a 3815
39037602 3816 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
935ed37a 3817 }
316670eb 3818 } else {
39037602
A
3819 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3820 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
316670eb 3821 vm_page_release_fictitious(mem);
2d21ac55 3822 }
316670eb 3823 mem = nxt;
55e303ae 3824 }
316670eb
A
3825 freeq = mem;
3826
3827 if ( (mem = local_freeq) ) {
3828 unsigned int avail_free_count;
3829 unsigned int need_wakeup = 0;
3830 unsigned int need_priv_wakeup = 0;
39037602
A
3831#if CONFIG_SECLUDED_MEMORY
3832 unsigned int need_wakeup_secluded = 0;
3833#endif /* CONFIG_SECLUDED_MEMORY */
2d21ac55 3834
316670eb 3835 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 3836
316670eb
A
3837 while (mem) {
3838 int color;
3839
d9a64523 3840 nxt = mem->vmp_snext;
2d21ac55 3841
d9a64523
A
3842 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3843 assert(mem->vmp_busy);
3844 mem->vmp_lopage = FALSE;
3845 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
39037602 3846
5ba3f43e
A
3847 color = VM_PAGE_GET_COLOR(mem);
3848#if defined(__x86_64__)
3849 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
39037602
A
3850 mem,
3851 vm_page_t,
d9a64523 3852 vmp_pageq);
5ba3f43e
A
3853#else
3854 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
3855 mem,
3856 vm_page_t,
d9a64523 3857 vmp_pageq);
5ba3f43e 3858#endif
316670eb 3859 mem = nxt;
2d21ac55 3860 }
d9a64523 3861 vm_pageout_vminfo.vm_page_pages_freed += pg_count;
316670eb
A
3862 vm_page_free_count += pg_count;
3863 avail_free_count = vm_page_free_count;
3864
d9a64523
A
3865 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, pg_count, 0, 0, 0);
3866
316670eb
A
3867 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3868
3869 if (avail_free_count < vm_page_free_wanted_privileged) {
3870 need_priv_wakeup = avail_free_count;
3871 vm_page_free_wanted_privileged -= avail_free_count;
3872 avail_free_count = 0;
3873 } else {
3874 need_priv_wakeup = vm_page_free_wanted_privileged;
316670eb 3875 avail_free_count -= vm_page_free_wanted_privileged;
39037602 3876 vm_page_free_wanted_privileged = 0;
316670eb 3877 }
b0d623f7 3878 }
39037602
A
3879#if CONFIG_SECLUDED_MEMORY
3880 if (vm_page_free_wanted_secluded > 0 &&
3881 avail_free_count > vm_page_free_reserved) {
3882 unsigned int available_pages;
3883 available_pages = (avail_free_count -
3884 vm_page_free_reserved);
3885 if (available_pages <
3886 vm_page_free_wanted_secluded) {
3887 need_wakeup_secluded = available_pages;
3888 vm_page_free_wanted_secluded -=
3889 available_pages;
3890 avail_free_count -= available_pages;
3891 } else {
3892 need_wakeup_secluded =
3893 vm_page_free_wanted_secluded;
3894 avail_free_count -=
3895 vm_page_free_wanted_secluded;
3896 vm_page_free_wanted_secluded = 0;
3897 }
3898 }
3899#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
3900 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3901 unsigned int available_pages;
55e303ae 3902
316670eb 3903 available_pages = avail_free_count - vm_page_free_reserved;
55e303ae 3904
316670eb
A
3905 if (available_pages >= vm_page_free_wanted) {
3906 need_wakeup = vm_page_free_wanted;
3907 vm_page_free_wanted = 0;
3908 } else {
3909 need_wakeup = available_pages;
3910 vm_page_free_wanted -= available_pages;
3911 }
3912 }
3913 lck_mtx_unlock(&vm_page_queue_free_lock);
55e303ae 3914
316670eb
A
3915 if (need_priv_wakeup != 0) {
3916 /*
3917 * There shouldn't be that many VM-privileged threads,
3918 * so let's wake them all up, even if we don't quite
3919 * have enough pages to satisfy them all.
3920 */
3921 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3922 }
39037602
A
3923#if CONFIG_SECLUDED_MEMORY
3924 if (need_wakeup_secluded != 0 &&
3925 vm_page_free_wanted_secluded == 0) {
3926 thread_wakeup((event_t)
3927 &vm_page_free_wanted_secluded);
3928 } else {
3929 for (;
3930 need_wakeup_secluded != 0;
3931 need_wakeup_secluded--) {
3932 thread_wakeup_one(
3933 (event_t)
3934 &vm_page_free_wanted_secluded);
3935 }
3936 }
3937#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
3938 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3939 /*
3940 * We don't expect to have any more waiters
3941 * after this, so let's wake them all up at
3942 * once.
3943 */
3944 thread_wakeup((event_t) &vm_page_free_count);
3945 } else for (; need_wakeup != 0; need_wakeup--) {
3946 /*
3947 * Wake up one waiter per page we just released.
3948 */
3949 thread_wakeup_one((event_t) &vm_page_free_count);
55e303ae 3950 }
2d21ac55 3951
316670eb 3952 VM_CHECK_MEMORYSTATUS;
b0d623f7 3953 }
55e303ae
A
3954 }
3955}
3956
3957
1c79356b
A
3958/*
3959 * vm_page_wire:
3960 *
3961 * Mark this page as wired down by yet
3962 * another map, removing it from paging queues
3963 * as necessary.
3964 *
3965 * The page's object and the page queues must be locked.
3966 */
3e170ce0
A
3967
3968
1c79356b
A
3969void
3970vm_page_wire(
39037602 3971 vm_page_t mem,
3e170ce0
A
3972 vm_tag_t tag,
3973 boolean_t check_memorystatus)
1c79356b 3974{
39037602
A
3975 vm_object_t m_object;
3976
3977 m_object = VM_PAGE_OBJECT(mem);
1c79356b 3978
d9a64523 3979// dbgLog(current_thread(), mem->vmp_offset, m_object, 1); /* (TEST/DEBUG) */
1c79356b
A
3980
3981 VM_PAGE_CHECK(mem);
39037602
A
3982 if (m_object) {
3983 vm_object_lock_assert_exclusive(m_object);
b0d623f7
A
3984 } else {
3985 /*
3986 * In theory, the page should be in an object before it
3987 * gets wired, since we need to hold the object lock
3988 * to update some fields in the page structure.
3989 * However, some code (i386 pmap, for example) might want
3990 * to wire a page before it gets inserted into an object.
3991 * That's somewhat OK, as long as nobody else can get to
3992 * that page and update it at the same time.
3993 */
3994 }
39037602 3995 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7 3996 if ( !VM_PAGE_WIRED(mem)) {
316670eb 3997
d9a64523 3998 if (mem->vmp_laundry)
39037602
A
3999 vm_pageout_steal_laundry(mem, TRUE);
4000
4001 vm_page_queues_remove(mem, TRUE);
4002
d9a64523
A
4003 assert(mem->vmp_wire_count == 0);
4004 mem->vmp_q_state = VM_PAGE_IS_WIRED;
b0d623f7 4005
39037602 4006 if (m_object) {
3e170ce0 4007
5ba3f43e
A
4008 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4009 VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
4010 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
3e170ce0 4011
39037602
A
4012 assert(m_object->resident_page_count >=
4013 m_object->wired_page_count);
4014 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
4015 assert(vm_page_purgeable_count > 0);
4016 OSAddAtomic(-1, &vm_page_purgeable_count);
4017 OSAddAtomic(1, &vm_page_purgeable_wired_count);
4018 }
39037602
A
4019 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4020 m_object->purgable == VM_PURGABLE_EMPTY) &&
d9a64523
A
4021 m_object->vo_owner != TASK_NULL) {
4022 task_t owner;
4023 int ledger_idx_volatile;
4024 int ledger_idx_nonvolatile;
4025 int ledger_idx_volatile_compressed;
4026 int ledger_idx_nonvolatile_compressed;
4027 boolean_t do_footprint;
4028
4029 owner = VM_OBJECT_OWNER(m_object);
4030 vm_object_ledger_tag_ledgers(
4031 m_object,
4032 &ledger_idx_volatile,
4033 &ledger_idx_nonvolatile,
4034 &ledger_idx_volatile_compressed,
4035 &ledger_idx_nonvolatile_compressed,
4036 &do_footprint);
fe8ab488
A
4037 /* less volatile bytes */
4038 ledger_debit(owner->ledger,
d9a64523 4039 ledger_idx_volatile,
fe8ab488
A
4040 PAGE_SIZE);
4041 /* more not-quite-volatile bytes */
4042 ledger_credit(owner->ledger,
d9a64523 4043 ledger_idx_nonvolatile,
fe8ab488 4044 PAGE_SIZE);
d9a64523
A
4045 if (do_footprint) {
4046 /* more footprint */
4047 ledger_credit(owner->ledger,
4048 task_ledgers.phys_footprint,
4049 PAGE_SIZE);
4050 }
fe8ab488 4051 }
39037602 4052 if (m_object->all_reusable) {
b0d623f7
A
4053 /*
4054 * Wired pages are not counted as "re-usable"
4055 * in "all_reusable" VM objects, so nothing
4056 * to do here.
4057 */
d9a64523 4058 } else if (mem->vmp_reusable) {
b0d623f7
A
4059 /*
4060 * This page is not "re-usable" when it's
4061 * wired, so adjust its state and the
4062 * accounting.
4063 */
39037602 4064 vm_object_reuse_pages(m_object,
d9a64523
A
4065 mem->vmp_offset,
4066 mem->vmp_offset+PAGE_SIZE_64,
b0d623f7
A
4067 FALSE);
4068 }
4069 }
d9a64523 4070 assert(!mem->vmp_reusable);
b0d623f7 4071
d9a64523 4072 if (!mem->vmp_private && !mem->vmp_fictitious && !mem->vmp_gobbled)
1c79356b 4073 vm_page_wire_count++;
d9a64523 4074 if (mem->vmp_gobbled)
1c79356b 4075 vm_page_gobble_count--;
d9a64523 4076 mem->vmp_gobbled = FALSE;
593a1d5f 4077
3e170ce0
A
4078 if (check_memorystatus == TRUE) {
4079 VM_CHECK_MEMORYSTATUS;
4080 }
1c79356b 4081 }
d9a64523
A
4082 assert(!mem->vmp_gobbled);
4083 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
4084 mem->vmp_wire_count++;
4085 if (__improbable(mem->vmp_wire_count == 0)) {
39037602
A
4086 panic("vm_page_wire(%p): wire_count overflow", mem);
4087 }
b0d623f7 4088 VM_PAGE_CHECK(mem);
1c79356b
A
4089}
4090
1c79356b
A
4091/*
4092 * vm_page_unwire:
4093 *
4094 * Release one wiring of this page, potentially
4095 * enabling it to be paged again.
4096 *
4097 * The page's object and the page queues must be locked.
4098 */
4099void
4100vm_page_unwire(
0b4c1975
A
4101 vm_page_t mem,
4102 boolean_t queueit)
1c79356b 4103{
39037602
A
4104 vm_object_t m_object;
4105
4106 m_object = VM_PAGE_OBJECT(mem);
1c79356b 4107
d9a64523 4108// dbgLog(current_thread(), mem->vmp_offset, m_object, 0); /* (TEST/DEBUG) */
1c79356b
A
4109
4110 VM_PAGE_CHECK(mem);
b0d623f7 4111 assert(VM_PAGE_WIRED(mem));
d9a64523
A
4112 assert(mem->vmp_wire_count > 0);
4113 assert(!mem->vmp_gobbled);
39037602
A
4114 assert(m_object != VM_OBJECT_NULL);
4115 vm_object_lock_assert_exclusive(m_object);
4116 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4117 if (--mem->vmp_wire_count == 0) {
5ba3f43e 4118
d9a64523 4119 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
39037602 4120
5ba3f43e
A
4121 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4122 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4123 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
d9a64523 4124 if (!mem->vmp_private && !mem->vmp_fictitious) {
4bd07ac2
A
4125 vm_page_wire_count--;
4126 }
5ba3f43e 4127
39037602
A
4128 assert(m_object->resident_page_count >=
4129 m_object->wired_page_count);
4130 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
4131 OSAddAtomic(+1, &vm_page_purgeable_count);
4132 assert(vm_page_purgeable_wired_count > 0);
4133 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4134 }
39037602
A
4135 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
4136 m_object->purgable == VM_PURGABLE_EMPTY) &&
d9a64523
A
4137 m_object->vo_owner != TASK_NULL) {
4138 task_t owner;
4139 int ledger_idx_volatile;
4140 int ledger_idx_nonvolatile;
4141 int ledger_idx_volatile_compressed;
4142 int ledger_idx_nonvolatile_compressed;
4143 boolean_t do_footprint;
4144
4145 owner = VM_OBJECT_OWNER(m_object);
4146 vm_object_ledger_tag_ledgers(
4147 m_object,
4148 &ledger_idx_volatile,
4149 &ledger_idx_nonvolatile,
4150 &ledger_idx_volatile_compressed,
4151 &ledger_idx_nonvolatile_compressed,
4152 &do_footprint);
fe8ab488
A
4153 /* more volatile bytes */
4154 ledger_credit(owner->ledger,
d9a64523 4155 ledger_idx_volatile,
fe8ab488
A
4156 PAGE_SIZE);
4157 /* less not-quite-volatile bytes */
4158 ledger_debit(owner->ledger,
d9a64523 4159 ledger_idx_nonvolatile,
fe8ab488 4160 PAGE_SIZE);
d9a64523
A
4161 if (do_footprint) {
4162 /* less footprint */
4163 ledger_debit(owner->ledger,
4164 task_ledgers.phys_footprint,
4165 PAGE_SIZE);
4166 }
fe8ab488 4167 }
39037602 4168 assert(m_object != kernel_object);
d9a64523 4169 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
0b4c1975
A
4170
4171 if (queueit == TRUE) {
39037602 4172 if (m_object->purgable == VM_PURGABLE_EMPTY) {
0b4c1975
A
4173 vm_page_deactivate(mem);
4174 } else {
4175 vm_page_activate(mem);
4176 }
2d21ac55 4177 }
593a1d5f 4178
6d2010ae
A
4179 VM_CHECK_MEMORYSTATUS;
4180
1c79356b 4181 }
b0d623f7 4182 VM_PAGE_CHECK(mem);
1c79356b
A
4183}
4184
4185/*
4186 * vm_page_deactivate:
4187 *
4188 * Returns the given page to the inactive list,
4189 * indicating that no physical maps have access
4190 * to this page. [Used by the physical mapping system.]
4191 *
4192 * The page queues must be locked.
4193 */
4194void
4195vm_page_deactivate(
b0d623f7
A
4196 vm_page_t m)
4197{
4198 vm_page_deactivate_internal(m, TRUE);
4199}
4200
4201
4202void
4203vm_page_deactivate_internal(
4204 vm_page_t m,
4205 boolean_t clear_hw_reference)
1c79356b 4206{
39037602
A
4207 vm_object_t m_object;
4208
4209 m_object = VM_PAGE_OBJECT(m);
2d21ac55 4210
1c79356b 4211 VM_PAGE_CHECK(m);
39037602
A
4212 assert(m_object != kernel_object);
4213 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1c79356b 4214
39037602
A
4215// dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4216 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1c79356b
A
4217 /*
4218 * This page is no longer very interesting. If it was
4219 * interesting (active or inactive/referenced), then we
4220 * clear the reference bit and (re)enter it in the
4221 * inactive queue. Note wired pages should not have
4222 * their reference bit cleared.
4223 */
d9a64523 4224 assert ( !(m->vmp_absent && !m->vmp_unusual));
0b4c1975 4225
d9a64523 4226 if (m->vmp_gobbled) { /* can this happen? */
b0d623f7 4227 assert( !VM_PAGE_WIRED(m));
2d21ac55 4228
d9a64523 4229 if (!m->vmp_private && !m->vmp_fictitious)
1c79356b
A
4230 vm_page_wire_count--;
4231 vm_page_gobble_count--;
d9a64523 4232 m->vmp_gobbled = FALSE;
1c79356b 4233 }
316670eb
A
4234 /*
4235 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4236 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4237 * and we can't remove it manually since we would need the object lock
4238 * (which is not required here) to decrement the activity_in_progress
4239 * reference which is held on the object while the page is in the pageout queue...
4240 * just let the normal laundry processing proceed
39037602 4241 */
d9a64523
A
4242 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4243 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4244 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
39037602
A
4245 VM_PAGE_WIRED(m)) {
4246 return;
4247 }
d9a64523 4248 if (!m->vmp_absent && clear_hw_reference == TRUE)
39037602 4249 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2d21ac55 4250
d9a64523
A
4251 m->vmp_reference = FALSE;
4252 m->vmp_no_cache = FALSE;
2d21ac55 4253
39037602
A
4254 if ( !VM_PAGE_INACTIVE(m)) {
4255 vm_page_queues_remove(m, FALSE);
0b4e3aa0 4256
39037602 4257 if (!VM_DYNAMIC_PAGING_ENABLED() &&
d9a64523 4258 m->vmp_dirty && m_object->internal &&
39037602
A
4259 (m_object->purgable == VM_PURGABLE_DENY ||
4260 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4261 m_object->purgable == VM_PURGABLE_VOLATILE)) {
3e170ce0 4262 vm_page_check_pageable_safe(m);
d9a64523
A
4263 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq);
4264 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
2d21ac55 4265 vm_page_throttled_count++;
9bccf70c 4266 } else {
39037602 4267 if (m_object->named && m_object->ref_count == 1) {
2d21ac55 4268 vm_page_speculate(m, FALSE);
b0d623f7 4269#if DEVELOPMENT || DEBUG
2d21ac55 4270 vm_page_speculative_recreated++;
b0d623f7 4271#endif
2d21ac55 4272 } else {
3e170ce0 4273 vm_page_enqueue_inactive(m, FALSE);
2d21ac55 4274 }
9bccf70c 4275 }
1c79356b
A
4276 }
4277}
4278
316670eb
A
4279/*
4280 * vm_page_enqueue_cleaned
4281 *
4282 * Put the page on the cleaned queue, mark it cleaned, etc.
4283 * Being on the cleaned queue (and having m->clean_queue set)
4284 * does ** NOT ** guarantee that the page is clean!
4285 *
4286 * Call with the queues lock held.
4287 */
4288
4289void vm_page_enqueue_cleaned(vm_page_t m)
4290{
39037602
A
4291 vm_object_t m_object;
4292
4293 m_object = VM_PAGE_OBJECT(m);
4294
4295 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4296 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4297 assert( !(m->vmp_absent && !m->vmp_unusual));
5ba3f43e
A
4298
4299 if (VM_PAGE_WIRED(m)) {
4300 return;
4301 }
316670eb 4302
d9a64523
A
4303 if (m->vmp_gobbled) {
4304 if (!m->vmp_private && !m->vmp_fictitious)
316670eb
A
4305 vm_page_wire_count--;
4306 vm_page_gobble_count--;
d9a64523 4307 m->vmp_gobbled = FALSE;
316670eb
A
4308 }
4309 /*
4310 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4311 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4312 * and we can't remove it manually since we would need the object lock
4313 * (which is not required here) to decrement the activity_in_progress
4314 * reference which is held on the object while the page is in the pageout queue...
4315 * just let the normal laundry processing proceed
4316 */
d9a64523
A
4317 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4318 (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4319 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
39037602
A
4320 return;
4321 }
4322 vm_page_queues_remove(m, FALSE);
316670eb 4323
3e170ce0 4324 vm_page_check_pageable_safe(m);
d9a64523
A
4325 vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, vmp_pageq);
4326 m->vmp_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
316670eb
A
4327 vm_page_cleaned_count++;
4328
316670eb 4329 vm_page_inactive_count++;
39037602 4330 if (m_object->internal) {
39236c6e
A
4331 vm_page_pageable_internal_count++;
4332 } else {
4333 vm_page_pageable_external_count++;
4334 }
39037602 4335#if CONFIG_BACKGROUND_QUEUE
d9a64523 4336 if (m->vmp_in_background)
39037602
A
4337 vm_page_add_to_backgroundq(m, TRUE);
4338#endif
d9a64523 4339 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
316670eb
A
4340}
4341
1c79356b
A
4342/*
4343 * vm_page_activate:
4344 *
4345 * Put the specified page on the active list (if appropriate).
4346 *
4347 * The page queues must be locked.
4348 */
4349
4350void
4351vm_page_activate(
39037602 4352 vm_page_t m)
1c79356b 4353{
39037602
A
4354 vm_object_t m_object;
4355
4356 m_object = VM_PAGE_OBJECT(m);
4357
1c79356b 4358 VM_PAGE_CHECK(m);
2d21ac55 4359#ifdef FIXME_4778297
39037602 4360 assert(m_object != kernel_object);
91447636 4361#endif
39037602
A
4362 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4363 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4364 assert( !(m->vmp_absent && !m->vmp_unusual));
0b4c1975 4365
d9a64523 4366 if (m->vmp_gobbled) {
b0d623f7 4367 assert( !VM_PAGE_WIRED(m));
d9a64523 4368 if (!m->vmp_private && !m->vmp_fictitious)
1c79356b
A
4369 vm_page_wire_count--;
4370 vm_page_gobble_count--;
d9a64523 4371 m->vmp_gobbled = FALSE;
1c79356b 4372 }
316670eb
A
4373 /*
4374 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4375 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4376 * and we can't remove it manually since we would need the object lock
4377 * (which is not required here) to decrement the activity_in_progress
4378 * reference which is held on the object while the page is in the pageout queue...
4379 * just let the normal laundry processing proceed
4380 */
d9a64523
A
4381 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4382 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4383 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q))
1c79356b
A
4384 return;
4385
2d21ac55 4386#if DEBUG
d9a64523 4387 if (m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q)
2d21ac55
A
4388 panic("vm_page_activate: already active");
4389#endif
4390
d9a64523 4391 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
2d21ac55
A
4392 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4393 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4394 }
316670eb 4395
39037602 4396 vm_page_queues_remove(m, FALSE);
2d21ac55 4397
b0d623f7 4398 if ( !VM_PAGE_WIRED(m)) {
3e170ce0 4399 vm_page_check_pageable_safe(m);
39037602 4400 if (!VM_DYNAMIC_PAGING_ENABLED() &&
d9a64523 4401 m->vmp_dirty && m_object->internal &&
39037602
A
4402 (m_object->purgable == VM_PURGABLE_DENY ||
4403 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4404 m_object->purgable == VM_PURGABLE_VOLATILE)) {
d9a64523
A
4405 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq);
4406 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
2d21ac55 4407 vm_page_throttled_count++;
9bccf70c 4408 } else {
39037602
A
4409#if CONFIG_SECLUDED_MEMORY
4410 if (secluded_for_filecache &&
4411 vm_page_secluded_target != 0 &&
4412 num_tasks_can_use_secluded_mem == 0 &&
5ba3f43e 4413 m_object->eligible_for_secluded) {
39037602 4414 vm_page_queue_enter(&vm_page_queue_secluded, m,
d9a64523
A
4415 vm_page_t, vmp_pageq);
4416 m->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
39037602
A
4417 vm_page_secluded_count++;
4418 vm_page_secluded_count_inuse++;
4419 assert(!m_object->internal);
4420// vm_page_pageable_external_count++;
4421 } else
4422#endif /* CONFIG_SECLUDED_MEMORY */
4423 vm_page_enqueue_active(m, FALSE);
9bccf70c 4424 }
d9a64523
A
4425 m->vmp_reference = TRUE;
4426 m->vmp_no_cache = FALSE;
1c79356b 4427 }
b0d623f7 4428 VM_PAGE_CHECK(m);
2d21ac55
A
4429}
4430
4431
4432/*
4433 * vm_page_speculate:
4434 *
4435 * Put the specified page on the speculative list (if appropriate).
4436 *
4437 * The page queues must be locked.
4438 */
4439void
4440vm_page_speculate(
4441 vm_page_t m,
4442 boolean_t new)
4443{
4444 struct vm_speculative_age_q *aq;
39037602
A
4445 vm_object_t m_object;
4446
4447 m_object = VM_PAGE_OBJECT(m);
2d21ac55
A
4448
4449 VM_PAGE_CHECK(m);
3e170ce0
A
4450 vm_page_check_pageable_safe(m);
4451
39037602
A
4452 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4453 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4454 assert( !(m->vmp_absent && !m->vmp_unusual));
39037602 4455 assert(m_object->internal == FALSE);
b0d623f7 4456
316670eb
A
4457 /*
4458 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4459 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4460 * and we can't remove it manually since we would need the object lock
4461 * (which is not required here) to decrement the activity_in_progress
4462 * reference which is held on the object while the page is in the pageout queue...
4463 * just let the normal laundry processing proceed
4464 */
d9a64523
A
4465 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4466 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4467 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q))
6d2010ae 4468 return;
0b4c1975 4469
39037602 4470 vm_page_queues_remove(m, FALSE);
b0d623f7
A
4471
4472 if ( !VM_PAGE_WIRED(m)) {
2d21ac55 4473 mach_timespec_t ts;
b0d623f7
A
4474 clock_sec_t sec;
4475 clock_nsec_t nsec;
2d21ac55 4476
b0d623f7
A
4477 clock_get_system_nanotime(&sec, &nsec);
4478 ts.tv_sec = (unsigned int) sec;
4479 ts.tv_nsec = nsec;
2d21ac55
A
4480
4481 if (vm_page_speculative_count == 0) {
4482
4483 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4484 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4485
4486 aq = &vm_page_queue_speculative[speculative_age_index];
4487
4488 /*
4489 * set the timer to begin a new group
4490 */
d9a64523
A
4491 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
4492 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
4493 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4494 } else {
4495 aq = &vm_page_queue_speculative[speculative_age_index];
4496
4497 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4498
4499 speculative_age_index++;
4500
4501 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4502 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4503 if (speculative_age_index == speculative_steal_index) {
4504 speculative_steal_index = speculative_age_index + 1;
4505
4506 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4507 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4508 }
4509 aq = &vm_page_queue_speculative[speculative_age_index];
4510
39037602 4511 if (!vm_page_queue_empty(&aq->age_q))
2d21ac55
A
4512 vm_page_speculate_ageit(aq);
4513
d9a64523
A
4514 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
4515 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
4516 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4517 }
4518 }
d9a64523
A
4519 vm_page_enqueue_tail(&aq->age_q, &m->vmp_pageq);
4520 m->vmp_q_state = VM_PAGE_ON_SPECULATIVE_Q;
2d21ac55 4521 vm_page_speculative_count++;
39037602 4522 vm_page_pageable_external_count++;
2d21ac55
A
4523
4524 if (new == TRUE) {
39037602 4525 vm_object_lock_assert_exclusive(m_object);
6d2010ae 4526
39037602 4527 m_object->pages_created++;
b0d623f7 4528#if DEVELOPMENT || DEBUG
2d21ac55 4529 vm_page_speculative_created++;
b0d623f7 4530#endif
2d21ac55
A
4531 }
4532 }
b0d623f7 4533 VM_PAGE_CHECK(m);
2d21ac55
A
4534}
4535
4536
4537/*
4538 * move pages from the specified aging bin to
4539 * the speculative bin that pageout_scan claims from
4540 *
4541 * The page queues must be locked.
4542 */
4543void
4544vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4545{
4546 struct vm_speculative_age_q *sq;
4547 vm_page_t t;
4548
4549 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4550
39037602 4551 if (vm_page_queue_empty(&sq->age_q)) {
2d21ac55
A
4552 sq->age_q.next = aq->age_q.next;
4553 sq->age_q.prev = aq->age_q.prev;
4554
39037602 4555 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
d9a64523 4556 t->vmp_pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55 4557
39037602 4558 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
d9a64523 4559 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55 4560 } else {
39037602 4561 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
d9a64523 4562 t->vmp_pageq.next = aq->age_q.next;
2d21ac55 4563
39037602 4564 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
d9a64523 4565 t->vmp_pageq.prev = sq->age_q.prev;
2d21ac55 4566
39037602 4567 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
d9a64523 4568 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55
A
4569
4570 sq->age_q.prev = aq->age_q.prev;
1c79356b 4571 }
39037602 4572 vm_page_queue_init(&aq->age_q);
2d21ac55
A
4573}
4574
4575
4576void
4577vm_page_lru(
4578 vm_page_t m)
4579{
4580 VM_PAGE_CHECK(m);
39037602
A
4581 assert(VM_PAGE_OBJECT(m) != kernel_object);
4582 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2d21ac55 4583
39037602 4584 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523
A
4585
4586 if (m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q) {
4587 /*
4588 * we don't need to do all the other work that
4589 * vm_page_queues_remove and vm_page_enqueue_inactive
4590 * bring along for the ride
4591 */
4592 assert(!m->vmp_laundry);
4593 assert(!m->vmp_private);
4594
4595 m->vmp_no_cache = FALSE;
4596
4597 vm_page_queue_remove(&vm_page_queue_inactive, m, vm_page_t, vmp_pageq);
4598 vm_page_queue_enter(&vm_page_queue_inactive, m, vm_page_t, vmp_pageq);
4599
4600 return;
4601 }
316670eb
A
4602 /*
4603 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4604 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4605 * and we can't remove it manually since we would need the object lock
4606 * (which is not required here) to decrement the activity_in_progress
4607 * reference which is held on the object while the page is in the pageout queue...
4608 * just let the normal laundry processing proceed
4609 */
d9a64523
A
4610 if (m->vmp_laundry || m->vmp_private ||
4611 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4612 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
39037602 4613 VM_PAGE_WIRED(m))
2d21ac55
A
4614 return;
4615
d9a64523 4616 m->vmp_no_cache = FALSE;
2d21ac55 4617
39037602 4618 vm_page_queues_remove(m, FALSE);
2d21ac55 4619
3e170ce0 4620 vm_page_enqueue_inactive(m, FALSE);
1c79356b
A
4621}
4622
2d21ac55 4623
b0d623f7
A
4624void
4625vm_page_reactivate_all_throttled(void)
4626{
4627 vm_page_t first_throttled, last_throttled;
4628 vm_page_t first_active;
4629 vm_page_t m;
4630 int extra_active_count;
39236c6e 4631 int extra_internal_count, extra_external_count;
39037602 4632 vm_object_t m_object;
b0d623f7 4633
39037602 4634 if (!VM_DYNAMIC_PAGING_ENABLED())
6d2010ae
A
4635 return;
4636
b0d623f7 4637 extra_active_count = 0;
39236c6e
A
4638 extra_internal_count = 0;
4639 extra_external_count = 0;
b0d623f7 4640 vm_page_lock_queues();
39037602 4641 if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
b0d623f7
A
4642 /*
4643 * Switch "throttled" pages to "active".
4644 */
d9a64523 4645 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq) {
b0d623f7 4646 VM_PAGE_CHECK(m);
d9a64523 4647 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
39037602
A
4648
4649 m_object = VM_PAGE_OBJECT(m);
6d2010ae
A
4650
4651 extra_active_count++;
39037602 4652 if (m_object->internal) {
39236c6e
A
4653 extra_internal_count++;
4654 } else {
4655 extra_external_count++;
4656 }
6d2010ae 4657
d9a64523 4658 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
b0d623f7 4659 VM_PAGE_CHECK(m);
39037602 4660#if CONFIG_BACKGROUND_QUEUE
d9a64523 4661 if (m->vmp_in_background)
39037602
A
4662 vm_page_add_to_backgroundq(m, FALSE);
4663#endif
b0d623f7
A
4664 }
4665
4666 /*
4667 * Transfer the entire throttled queue to a regular LRU page queues.
4668 * We insert it at the head of the active queue, so that these pages
4669 * get re-evaluated by the LRU algorithm first, since they've been
4670 * completely out of it until now.
4671 */
39037602
A
4672 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4673 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4674 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4675 if (vm_page_queue_empty(&vm_page_queue_active)) {
4676 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
b0d623f7 4677 } else {
d9a64523 4678 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
b0d623f7 4679 }
39037602 4680 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
d9a64523
A
4681 first_throttled->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4682 last_throttled->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
b0d623f7
A
4683
4684#if DEBUG
4685 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4686#endif
39037602 4687 vm_page_queue_init(&vm_page_queue_throttled);
b0d623f7
A
4688 /*
4689 * Adjust the global page counts.
4690 */
4691 vm_page_active_count += extra_active_count;
39236c6e
A
4692 vm_page_pageable_internal_count += extra_internal_count;
4693 vm_page_pageable_external_count += extra_external_count;
b0d623f7
A
4694 vm_page_throttled_count = 0;
4695 }
4696 assert(vm_page_throttled_count == 0);
39037602 4697 assert(vm_page_queue_empty(&vm_page_queue_throttled));
b0d623f7
A
4698 vm_page_unlock_queues();
4699}
4700
4701
4702/*
4703 * move pages from the indicated local queue to the global active queue
4704 * its ok to fail if we're below the hard limit and force == FALSE
4705 * the nolocks == TRUE case is to allow this function to be run on
4706 * the hibernate path
4707 */
4708
4709void
4710vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4711{
4712 struct vpl *lq;
4713 vm_page_t first_local, last_local;
4714 vm_page_t first_active;
4715 vm_page_t m;
4716 uint32_t count = 0;
4717
4718 if (vm_page_local_q == NULL)
4719 return;
4720
4721 lq = &vm_page_local_q[lid].vpl_un.vpl;
4722
4723 if (nolocks == FALSE) {
4724 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4725 if ( !vm_page_trylockspin_queues())
4726 return;
4727 } else
4728 vm_page_lockspin_queues();
4729
4730 VPL_LOCK(&lq->vpl_lock);
4731 }
4732 if (lq->vpl_count) {
4733 /*
4734 * Switch "local" pages to "active".
4735 */
39037602 4736 assert(!vm_page_queue_empty(&lq->vpl_queue));
b0d623f7 4737
d9a64523 4738 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, vmp_pageq) {
b0d623f7 4739 VM_PAGE_CHECK(m);
3e170ce0 4740 vm_page_check_pageable_safe(m);
d9a64523
A
4741 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
4742 assert(!m->vmp_fictitious);
b0d623f7 4743
d9a64523 4744 if (m->vmp_local_id != lid)
b0d623f7
A
4745 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4746
d9a64523
A
4747 m->vmp_local_id = 0;
4748 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
b0d623f7 4749 VM_PAGE_CHECK(m);
39037602 4750#if CONFIG_BACKGROUND_QUEUE
d9a64523 4751 if (m->vmp_in_background)
39037602
A
4752 vm_page_add_to_backgroundq(m, FALSE);
4753#endif
b0d623f7
A
4754 count++;
4755 }
4756 if (count != lq->vpl_count)
4757 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4758
4759 /*
4760 * Transfer the entire local queue to a regular LRU page queues.
4761 */
39037602
A
4762 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4763 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4764 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
b0d623f7 4765
39037602
A
4766 if (vm_page_queue_empty(&vm_page_queue_active)) {
4767 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
b0d623f7 4768 } else {
d9a64523 4769 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
b0d623f7 4770 }
39037602 4771 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
d9a64523
A
4772 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4773 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
b0d623f7 4774
39037602 4775 vm_page_queue_init(&lq->vpl_queue);
b0d623f7
A
4776 /*
4777 * Adjust the global page counts.
4778 */
4779 vm_page_active_count += lq->vpl_count;
39236c6e
A
4780 vm_page_pageable_internal_count += lq->vpl_internal_count;
4781 vm_page_pageable_external_count += lq->vpl_external_count;
b0d623f7 4782 lq->vpl_count = 0;
39236c6e
A
4783 lq->vpl_internal_count = 0;
4784 lq->vpl_external_count = 0;
b0d623f7 4785 }
39037602 4786 assert(vm_page_queue_empty(&lq->vpl_queue));
b0d623f7
A
4787
4788 if (nolocks == FALSE) {
4789 VPL_UNLOCK(&lq->vpl_lock);
d9a64523
A
4790
4791 vm_page_balance_inactive(count / 4);
b0d623f7
A
4792 vm_page_unlock_queues();
4793 }
4794}
4795
1c79356b
A
4796/*
4797 * vm_page_part_zero_fill:
4798 *
4799 * Zero-fill a part of the page.
4800 */
39236c6e 4801#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
1c79356b
A
4802void
4803vm_page_part_zero_fill(
4804 vm_page_t m,
4805 vm_offset_t m_pa,
4806 vm_size_t len)
4807{
1c79356b 4808
316670eb
A
4809#if 0
4810 /*
4811 * we don't hold the page queue lock
4812 * so this check isn't safe to make
4813 */
1c79356b 4814 VM_PAGE_CHECK(m);
316670eb
A
4815#endif
4816
1c79356b 4817#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
39037602 4818 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
1c79356b 4819#else
39236c6e 4820 vm_page_t tmp;
1c79356b
A
4821 while (1) {
4822 tmp = vm_page_grab();
4823 if (tmp == VM_PAGE_NULL) {
4824 vm_page_wait(THREAD_UNINT);
4825 continue;
4826 }
4827 break;
4828 }
4829 vm_page_zero_fill(tmp);
4830 if(m_pa != 0) {
4831 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4832 }
4833 if((m_pa + len) < PAGE_SIZE) {
4834 vm_page_part_copy(m, m_pa + len, tmp,
4835 m_pa + len, PAGE_SIZE - (m_pa + len));
4836 }
4837 vm_page_copy(tmp,m);
b0d623f7 4838 VM_PAGE_FREE(tmp);
1c79356b
A
4839#endif
4840
4841}
4842
4843/*
4844 * vm_page_zero_fill:
4845 *
4846 * Zero-fill the specified page.
4847 */
4848void
4849vm_page_zero_fill(
4850 vm_page_t m)
4851{
4852 XPR(XPR_VM_PAGE,
39037602 4853 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
d9a64523 4854 VM_PAGE_OBJECT(m), m->vmp_offset, m, 0,0);
316670eb
A
4855#if 0
4856 /*
4857 * we don't hold the page queue lock
4858 * so this check isn't safe to make
4859 */
1c79356b 4860 VM_PAGE_CHECK(m);
316670eb 4861#endif
1c79356b 4862
39037602
A
4863// dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4864 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
1c79356b
A
4865}
4866
4867/*
4868 * vm_page_part_copy:
4869 *
4870 * copy part of one page to another
4871 */
4872
4873void
4874vm_page_part_copy(
4875 vm_page_t src_m,
4876 vm_offset_t src_pa,
4877 vm_page_t dst_m,
4878 vm_offset_t dst_pa,
4879 vm_size_t len)
4880{
316670eb
A
4881#if 0
4882 /*
4883 * we don't hold the page queue lock
4884 * so this check isn't safe to make
4885 */
1c79356b
A
4886 VM_PAGE_CHECK(src_m);
4887 VM_PAGE_CHECK(dst_m);
316670eb 4888#endif
39037602
A
4889 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4890 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
1c79356b
A
4891}
4892
4893/*
4894 * vm_page_copy:
4895 *
4896 * Copy one page to another
4897 */
4898
2d21ac55
A
4899int vm_page_copy_cs_validations = 0;
4900int vm_page_copy_cs_tainted = 0;
4901
1c79356b
A
4902void
4903vm_page_copy(
4904 vm_page_t src_m,
4905 vm_page_t dest_m)
4906{
39037602
A
4907 vm_object_t src_m_object;
4908
4909 src_m_object = VM_PAGE_OBJECT(src_m);
4910
1c79356b 4911 XPR(XPR_VM_PAGE,
39037602 4912 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
d9a64523
A
4913 src_m_object, src_m->vmp_offset,
4914 VM_PAGE_OBJECT(dest_m), dest_m->vmp_offset,
39037602 4915 0);
316670eb
A
4916#if 0
4917 /*
4918 * we don't hold the page queue lock
4919 * so this check isn't safe to make
4920 */
1c79356b
A
4921 VM_PAGE_CHECK(src_m);
4922 VM_PAGE_CHECK(dest_m);
316670eb 4923#endif
39037602 4924 vm_object_lock_assert_held(src_m_object);
1c79356b 4925
39037602
A
4926 if (src_m_object != VM_OBJECT_NULL &&
4927 src_m_object->code_signed) {
2d21ac55 4928 /*
4a3eedf9 4929 * We're copying a page from a code-signed object.
2d21ac55
A
4930 * Whoever ends up mapping the copy page might care about
4931 * the original page's integrity, so let's validate the
4932 * source page now.
4933 */
4934 vm_page_copy_cs_validations++;
4935 vm_page_validate_cs(src_m);
39037602
A
4936#if DEVELOPMENT || DEBUG
4937 DTRACE_VM4(codesigned_copy,
4938 vm_object_t, src_m_object,
d9a64523
A
4939 vm_object_offset_t, src_m->vmp_offset,
4940 int, src_m->vmp_cs_validated,
4941 int, src_m->vmp_cs_tainted);
39037602
A
4942#endif /* DEVELOPMENT || DEBUG */
4943
2d21ac55 4944 }
6d2010ae 4945
2d21ac55 4946 /*
b0d623f7
A
4947 * Propagate the cs_tainted bit to the copy page. Do not propagate
4948 * the cs_validated bit.
2d21ac55 4949 */
d9a64523
A
4950 dest_m->vmp_cs_tainted = src_m->vmp_cs_tainted;
4951 if (dest_m->vmp_cs_tainted) {
2d21ac55
A
4952 vm_page_copy_cs_tainted++;
4953 }
d9a64523 4954 dest_m->vmp_error = src_m->vmp_error; /* sliding src_m might have failed... */
39037602 4955 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
1c79356b
A
4956}
4957
2d21ac55 4958#if MACH_ASSERT
b0d623f7
A
4959static void
4960_vm_page_print(
4961 vm_page_t p)
4962{
4963 printf("vm_page %p: \n", p);
39037602 4964 printf(" pageq: next=%p prev=%p\n",
d9a64523
A
4965 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next),
4966 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev));
39037602 4967 printf(" listq: next=%p prev=%p\n",
d9a64523
A
4968 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.next)),
4969 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.prev)));
4970 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m)));
4971 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->vmp_offset);
4972 printf(" wire_count=%u\n", p->vmp_wire_count);
4973 printf(" q_state=%u\n", p->vmp_q_state);
b0d623f7 4974
39037602 4975 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
d9a64523
A
4976 (p->vmp_laundry ? "" : "!"),
4977 (p->vmp_reference ? "" : "!"),
4978 (p->vmp_gobbled ? "" : "!"),
4979 (p->vmp_private ? "" : "!"));
b0d623f7 4980 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
d9a64523
A
4981 (p->vmp_busy ? "" : "!"),
4982 (p->vmp_wanted ? "" : "!"),
4983 (p->vmp_tabled ? "" : "!"),
4984 (p->vmp_fictitious ? "" : "!"),
4985 (p->vmp_pmapped ? "" : "!"),
4986 (p->vmp_wpmapped ? "" : "!"));
39037602 4987 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
d9a64523
A
4988 (p->vmp_free_when_done ? "" : "!"),
4989 (p->vmp_absent ? "" : "!"),
4990 (p->vmp_error ? "" : "!"),
4991 (p->vmp_dirty ? "" : "!"),
4992 (p->vmp_cleaning ? "" : "!"),
4993 (p->vmp_precious ? "" : "!"),
4994 (p->vmp_clustered ? "" : "!"));
5ba3f43e 4995 printf(" %soverwriting, %srestart, %sunusual\n",
d9a64523
A
4996 (p->vmp_overwriting ? "" : "!"),
4997 (p->vmp_restart ? "" : "!"),
4998 (p->vmp_unusual ? "" : "!"));
c18c124e 4999 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
d9a64523
A
5000 (p->vmp_cs_validated ? "" : "!"),
5001 (p->vmp_cs_tainted ? "" : "!"),
5002 (p->vmp_cs_nx ? "" : "!"),
5003 (p->vmp_no_cache ? "" : "!"));
b0d623f7 5004
39037602 5005 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
b0d623f7
A
5006}
5007
1c79356b
A
5008/*
5009 * Check that the list of pages is ordered by
5010 * ascending physical address and has no holes.
5011 */
2d21ac55 5012static int
1c79356b
A
5013vm_page_verify_contiguous(
5014 vm_page_t pages,
5015 unsigned int npages)
5016{
39037602 5017 vm_page_t m;
1c79356b 5018 unsigned int page_count;
91447636 5019 vm_offset_t prev_addr;
1c79356b 5020
39037602 5021 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
1c79356b
A
5022 page_count = 1;
5023 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
39037602 5024 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
b0d623f7 5025 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
39037602 5026 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
6d2010ae 5027 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
1c79356b
A
5028 panic("vm_page_verify_contiguous: not contiguous!");
5029 }
39037602 5030 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
1c79356b
A
5031 ++page_count;
5032 }
5033 if (page_count != npages) {
2d21ac55 5034 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
5035 pages, page_count, npages);
5036 panic("vm_page_verify_contiguous: count error");
5037 }
5038 return 1;
5039}
1c79356b
A
5040
5041
2d21ac55
A
5042/*
5043 * Check the free lists for proper length etc.
5044 */
fe8ab488 5045static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
b0d623f7
A
5046static unsigned int
5047vm_page_verify_free_list(
39037602 5048 vm_page_queue_head_t *vm_page_queue,
b0d623f7
A
5049 unsigned int color,
5050 vm_page_t look_for_page,
5051 boolean_t expect_page)
5052{
5053 unsigned int npages;
5054 vm_page_t m;
5055 vm_page_t prev_m;
5056 boolean_t found_page;
5057
fe8ab488
A
5058 if (! vm_page_verify_this_free_list_enabled)
5059 return 0;
5060
b0d623f7
A
5061 found_page = FALSE;
5062 npages = 0;
39037602
A
5063 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
5064
5065 vm_page_queue_iterate(vm_page_queue,
5066 m,
5067 vm_page_t,
d9a64523 5068 vmp_pageq) {
6d2010ae 5069
b0d623f7
A
5070 if (m == look_for_page) {
5071 found_page = TRUE;
5072 }
d9a64523 5073 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev) != prev_m)
b0d623f7 5074 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
d9a64523
A
5075 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev), prev_m);
5076 if ( ! m->vmp_busy )
b0d623f7
A
5077 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
5078 color, npages, m);
6d2010ae 5079 if (color != (unsigned int) -1) {
5ba3f43e 5080 if (VM_PAGE_GET_COLOR(m) != color)
6d2010ae 5081 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
5ba3f43e 5082 color, npages, m, VM_PAGE_GET_COLOR(m), color);
d9a64523 5083 if (m->vmp_q_state != VM_PAGE_ON_FREE_Q)
39037602 5084 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
d9a64523 5085 color, npages, m, m->vmp_q_state);
39037602 5086 } else {
d9a64523 5087 if (m->vmp_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
39037602 5088 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
d9a64523 5089 npages, m, m->vmp_q_state);
6d2010ae 5090 }
b0d623f7
A
5091 ++npages;
5092 prev_m = m;
5093 }
5094 if (look_for_page != VM_PAGE_NULL) {
5095 unsigned int other_color;
5096
5097 if (expect_page && !found_page) {
5098 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
39037602 5099 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
b0d623f7
A
5100 _vm_page_print(look_for_page);
5101 for (other_color = 0;
5102 other_color < vm_colors;
5103 other_color++) {
5104 if (other_color == color)
5105 continue;
39037602 5106 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
6d2010ae 5107 other_color, look_for_page, FALSE);
b0d623f7 5108 }
6d2010ae 5109 if (color == (unsigned int) -1) {
d1ecb069
A
5110 vm_page_verify_free_list(&vm_lopage_queue_free,
5111 (unsigned int) -1, look_for_page, FALSE);
5112 }
b0d623f7
A
5113 panic("vm_page_verify_free_list(color=%u)\n", color);
5114 }
5115 if (!expect_page && found_page) {
5116 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
39037602 5117 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
b0d623f7
A
5118 }
5119 }
5120 return npages;
5121}
5122
fe8ab488 5123static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
2d21ac55
A
5124static void
5125vm_page_verify_free_lists( void )
5126{
d1ecb069 5127 unsigned int color, npages, nlopages;
fe8ab488 5128 boolean_t toggle = TRUE;
b0d623f7 5129
fe8ab488 5130 if (! vm_page_verify_all_free_lists_enabled)
b0d623f7
A
5131 return;
5132
2d21ac55 5133 npages = 0;
b0d623f7
A
5134
5135 lck_mtx_lock(&vm_page_queue_free_lock);
fe8ab488
A
5136
5137 if (vm_page_verify_this_free_list_enabled == TRUE) {
5138 /*
5139 * This variable has been set globally for extra checking of
5140 * each free list Q. Since we didn't set it, we don't own it
5141 * and we shouldn't toggle it.
5142 */
5143 toggle = FALSE;
5144 }
5145
5146 if (toggle == TRUE) {
5147 vm_page_verify_this_free_list_enabled = TRUE;
5148 }
2d21ac55
A
5149
5150 for( color = 0; color < vm_colors; color++ ) {
39037602 5151 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
6d2010ae 5152 color, VM_PAGE_NULL, FALSE);
2d21ac55 5153 }
d1ecb069
A
5154 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
5155 (unsigned int) -1,
5156 VM_PAGE_NULL, FALSE);
5157 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
5158 panic("vm_page_verify_free_lists: "
5159 "npages %u free_count %d nlopages %u lo_free_count %u",
5160 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
6d2010ae 5161
fe8ab488
A
5162 if (toggle == TRUE) {
5163 vm_page_verify_this_free_list_enabled = FALSE;
5164 }
5165
b0d623f7 5166 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 5167}
2d21ac55 5168
b0d623f7 5169#endif /* MACH_ASSERT */
2d21ac55 5170
91447636 5171
3e170ce0 5172
5ba3f43e
A
5173#if __arm64__
5174/*
5175 * 1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
5176 * after the system has 'aged'. To ensure that other allocation requests don't mess
5177 * with the chances of that request being satisfied, we pre-allocate a single contiguous
5178 * 10MB buffer and hand it out to the first request of >= 4MB.
5179 */
5180
5181kern_return_t cpm_preallocate_early(void);
5182
5183vm_page_t cpm_preallocated_pages_list = NULL;
5184boolean_t preallocated_buffer_available = FALSE;
5185
5186#define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
5187#define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER ((4 * 1024 *1024) / PAGE_SIZE_64) /* 4 MB */
5188
5189kern_return_t
5190cpm_preallocate_early(void)
5191{
5192
5193 kern_return_t kr = KERN_SUCCESS;
5194 vm_map_size_t prealloc_size = (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT * PAGE_SIZE_64);
5195
5196 printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT);
5197
5198 kr = cpm_allocate(CAST_DOWN(vm_size_t, prealloc_size), &cpm_preallocated_pages_list, 0, 0, TRUE, 0);
5199
5200 if (kr != KERN_SUCCESS) {
5201 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr);
5202 } else {
5203 preallocated_buffer_available = TRUE;
5204 }
5205
5206 return kr;
5207}
5208#endif /* __arm64__ */
3e170ce0
A
5209
5210
5211extern boolean_t (* volatile consider_buffer_cache_collect)(int);
5212
1c79356b 5213/*
2d21ac55 5214 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
5215 *
5216 * Find a region large enough to contain at least n pages
1c79356b
A
5217 * of contiguous physical memory.
5218 *
2d21ac55
A
5219 * This is done by traversing the vm_page_t array in a linear fashion
5220 * we assume that the vm_page_t array has the avaiable physical pages in an
5221 * ordered, ascending list... this is currently true of all our implementations
5222 * and must remain so... there can be 'holes' in the array... we also can
5223 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5224 * which use to happen via 'vm_page_convert'... that function was no longer
5225 * being called and was removed...
5226 *
5227 * The basic flow consists of stabilizing some of the interesting state of
5228 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5229 * sweep at the beginning of the array looking for pages that meet our criterea
5230 * for a 'stealable' page... currently we are pretty conservative... if the page
5231 * meets this criterea and is physically contiguous to the previous page in the 'run'
5232 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5233 * and start to develop a new run... if at this point we've already considered
5234 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5235 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5236 * to other threads trying to acquire free pages (or move pages from q to q),
5237 * and then continue from the spot we left off... we only make 1 pass through the
5238 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5239 * which steals the pages from the queues they're currently on... pages on the free
5240 * queue can be stolen directly... pages that are on any of the other queues
5241 * must be removed from the object they are tabled on... this requires taking the
5242 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5243 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5244 * dump the pages we've currently stolen back to the free list, and pick up our
5245 * scan from the point where we aborted the 'current' run.
5246 *
5247 *
1c79356b 5248 * Requirements:
2d21ac55 5249 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 5250 *
2d21ac55 5251 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 5252 *
e5568f75 5253 * Algorithm:
1c79356b 5254 */
2d21ac55
A
5255
5256#define MAX_CONSIDERED_BEFORE_YIELD 1000
5257
5258
5259#define RESET_STATE_OF_RUN() \
5260 MACRO_BEGIN \
5261 prevcontaddr = -2; \
b0d623f7 5262 start_pnum = -1; \
2d21ac55
A
5263 free_considered = 0; \
5264 substitute_needed = 0; \
5265 npages = 0; \
5266 MACRO_END
5267
b0d623f7
A
5268/*
5269 * Can we steal in-use (i.e. not free) pages when searching for
5270 * physically-contiguous pages ?
5271 */
5272#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5273
5274static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
5275#if DEBUG
5276int vm_page_find_contig_debug = 0;
5277#endif
2d21ac55 5278
1c79356b
A
5279static vm_page_t
5280vm_page_find_contiguous(
2d21ac55
A
5281 unsigned int contig_pages,
5282 ppnum_t max_pnum,
b0d623f7
A
5283 ppnum_t pnum_mask,
5284 boolean_t wire,
5285 int flags)
1c79356b 5286{
2d21ac55 5287 vm_page_t m = NULL;
5ba3f43e
A
5288 ppnum_t prevcontaddr = 0;
5289 ppnum_t start_pnum = 0;
5290 unsigned int npages = 0, considered = 0, scanned = 0;
5291 unsigned int page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
b0d623f7 5292 unsigned int idx_last_contig_page_found = 0;
5ba3f43e
A
5293 int free_considered = 0, free_available = 0;
5294 int substitute_needed = 0;
3e170ce0 5295 boolean_t wrapped, zone_gc_called = FALSE;
5ba3f43e 5296 kern_return_t kr;
593a1d5f 5297#if DEBUG
5ba3f43e
A
5298 clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
5299 clock_usec_t tv_start_usec = 0, tv_end_usec = 0;
593a1d5f 5300#endif
3e170ce0 5301
2d21ac55
A
5302 int yielded = 0;
5303 int dumped_run = 0;
5304 int stolen_pages = 0;
39236c6e 5305 int compressed_pages = 0;
3e170ce0 5306
1c79356b 5307
2d21ac55 5308 if (contig_pages == 0)
1c79356b
A
5309 return VM_PAGE_NULL;
5310
3e170ce0
A
5311full_scan_again:
5312
2d21ac55
A
5313#if MACH_ASSERT
5314 vm_page_verify_free_lists();
593a1d5f
A
5315#endif
5316#if DEBUG
2d21ac55
A
5317 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5318#endif
39236c6e
A
5319 PAGE_REPLACEMENT_ALLOWED(TRUE);
5320
2d21ac55 5321 vm_page_lock_queues();
3e170ce0 5322
5ba3f43e
A
5323#if __arm64__
5324 if (preallocated_buffer_available) {
5325
5326 if ((contig_pages >= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER) && (contig_pages <= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT)) {
5327
5328 m = cpm_preallocated_pages_list;
5329
5330 start_idx = (unsigned int) (m - &vm_pages[0]);
5331
5332 if (wire == FALSE) {
5333
5334 last_idx = start_idx;
5335
5336 for(npages = 0; npages < contig_pages; npages++, last_idx++) {
5337
d9a64523 5338 assert(vm_pages[last_idx].vmp_gobbled == FALSE);
5ba3f43e 5339
d9a64523 5340 vm_pages[last_idx].vmp_gobbled = TRUE;
5ba3f43e
A
5341 vm_page_gobble_count++;
5342
d9a64523 5343 assert(1 == vm_pages[last_idx].vmp_wire_count);
5ba3f43e
A
5344 /*
5345 * Gobbled pages are counted as wired pages. So no need to drop
5346 * the global wired page count. Just the page's wire count is fine.
5347 */
d9a64523
A
5348 vm_pages[last_idx].vmp_wire_count--;
5349 vm_pages[last_idx].vmp_q_state = VM_PAGE_NOT_ON_Q;
5ba3f43e
A
5350 }
5351
5352 }
5353
5354 last_idx = start_idx + contig_pages - 1;
5355
d9a64523 5356 vm_pages[last_idx].vmp_snext = NULL;
5ba3f43e
A
5357
5358 printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages, start_idx, last_idx, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT - contig_pages);
5359
5360 last_idx += 1;
5361 for(npages = contig_pages; npages < PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT; npages++, last_idx++) {
5362
5363 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages[last_idx]);
5364 vm_page_free(&vm_pages[last_idx]);
5365 }
5366
5367 cpm_preallocated_pages_list = NULL;
5368 preallocated_buffer_available = FALSE;
5369
5370 goto done_scanning;
5371 }
5372 }
5373#endif /* __arm64__ */
3e170ce0 5374
b0d623f7 5375 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
5376
5377 RESET_STATE_OF_RUN();
1c79356b 5378
b0d623f7 5379 scanned = 0;
2d21ac55
A
5380 considered = 0;
5381 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 5382
b0d623f7
A
5383 wrapped = FALSE;
5384
5385 if(flags & KMA_LOMEM)
5386 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
5387 else
5388 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
5389
5390 orig_last_idx = idx_last_contig_page_found;
5391 last_idx = orig_last_idx;
5392
5393 for (page_idx = last_idx, start_idx = last_idx;
2d21ac55
A
5394 npages < contig_pages && page_idx < vm_pages_count;
5395 page_idx++) {
b0d623f7
A
5396retry:
5397 if (wrapped &&
5398 npages == 0 &&
5399 page_idx >= orig_last_idx) {
5400 /*
5401 * We're back where we started and we haven't
5402 * found any suitable contiguous range. Let's
5403 * give up.
5404 */
5405 break;
5406 }
5407 scanned++;
2d21ac55 5408 m = &vm_pages[page_idx];
e5568f75 5409
d9a64523
A
5410 assert(!m->vmp_fictitious);
5411 assert(!m->vmp_private);
b0d623f7 5412
39037602 5413 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
2d21ac55
A
5414 /* no more low pages... */
5415 break;
e5568f75 5416 }
39037602 5417 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
b0d623f7
A
5418 /*
5419 * not aligned
5420 */
5421 RESET_STATE_OF_RUN();
5422
d9a64523
A
5423 } else if (VM_PAGE_WIRED(m) || m->vmp_gobbled ||
5424 m->vmp_laundry || m->vmp_wanted ||
5425 m->vmp_cleaning || m->vmp_overwriting || m->vmp_free_when_done) {
2d21ac55
A
5426 /*
5427 * page is in a transient state
5428 * or a state we don't want to deal
5429 * with, so don't consider it which
5430 * means starting a new run
5431 */
5432 RESET_STATE_OF_RUN();
1c79356b 5433
d9a64523
A
5434 } else if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
5435 (m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5436 (m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5437 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
2d21ac55 5438 /*
39037602
A
5439 * page needs to be on one of our queues (other then the pageout or special free queues)
5440 * or it needs to belong to the compressor pool (which is now indicated
d9a64523 5441 * by vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
39037602 5442 * from the check for VM_PAGE_NOT_ON_Q)
2d21ac55
A
5443 * in order for it to be stable behind the
5444 * locks we hold at this point...
5445 * if not, don't consider it which
5446 * means starting a new run
5447 */
5448 RESET_STATE_OF_RUN();
5449
d9a64523 5450 } else if ((m->vmp_q_state != VM_PAGE_ON_FREE_Q) && (!m->vmp_tabled || m->vmp_busy)) {
2d21ac55
A
5451 /*
5452 * pages on the free list are always 'busy'
5453 * so we couldn't test for 'busy' in the check
5454 * for the transient states... pages that are
5455 * 'free' are never 'tabled', so we also couldn't
5456 * test for 'tabled'. So we check here to make
5457 * sure that a non-free page is not busy and is
5458 * tabled on an object...
5459 * if not, don't consider it which
5460 * means starting a new run
5461 */
5462 RESET_STATE_OF_RUN();
5463
5464 } else {
39037602
A
5465 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5466 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
b0d623f7
A
5467 RESET_STATE_OF_RUN();
5468 goto did_consider;
5469 } else {
5470 npages = 1;
5471 start_idx = page_idx;
39037602 5472 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
b0d623f7 5473 }
2d21ac55
A
5474 } else {
5475 npages++;
e5568f75 5476 }
39037602 5477 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
b0d623f7
A
5478
5479 VM_PAGE_CHECK(m);
d9a64523 5480 if (m->vmp_q_state == VM_PAGE_ON_FREE_Q) {
2d21ac55 5481 free_considered++;
b0d623f7
A
5482 } else {
5483 /*
5484 * This page is not free.
5485 * If we can't steal used pages,
5486 * we have to give up this run
5487 * and keep looking.
5488 * Otherwise, we might need to
5489 * move the contents of this page
5490 * into a substitute page.
5491 */
5492#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
d9a64523 5493 if (m->vmp_pmapped || m->vmp_dirty || m->vmp_precious) {
b0d623f7
A
5494 substitute_needed++;
5495 }
5496#else
5497 RESET_STATE_OF_RUN();
5498#endif
2d21ac55 5499 }
b0d623f7 5500
2d21ac55
A
5501 if ((free_considered + substitute_needed) > free_available) {
5502 /*
5503 * if we let this run continue
5504 * we will end up dropping the vm_page_free_count
5505 * below the reserve limit... we need to abort
5506 * this run, but we can at least re-consider this
5507 * page... thus the jump back to 'retry'
5508 */
5509 RESET_STATE_OF_RUN();
5510
5511 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5512 considered++;
5513 goto retry;
e5568f75 5514 }
2d21ac55
A
5515 /*
5516 * free_available == 0
5517 * so can't consider any free pages... if
5518 * we went to retry in this case, we'd
5519 * get stuck looking at the same page
5520 * w/o making any forward progress
5521 * we also want to take this path if we've already
5522 * reached our limit that controls the lock latency
5523 */
e5568f75 5524 }
2d21ac55 5525 }
b0d623f7 5526did_consider:
2d21ac55 5527 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
39236c6e
A
5528
5529 PAGE_REPLACEMENT_ALLOWED(FALSE);
5530
b0d623f7 5531 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 5532 vm_page_unlock_queues();
e5568f75 5533
2d21ac55
A
5534 mutex_pause(0);
5535
39236c6e
A
5536 PAGE_REPLACEMENT_ALLOWED(TRUE);
5537
2d21ac55 5538 vm_page_lock_queues();
b0d623f7 5539 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
5540
5541 RESET_STATE_OF_RUN();
1c79356b 5542 /*
2d21ac55
A
5543 * reset our free page limit since we
5544 * dropped the lock protecting the vm_page_free_queue
1c79356b 5545 */
2d21ac55
A
5546 free_available = vm_page_free_count - vm_page_free_reserved;
5547 considered = 0;
3e170ce0 5548
2d21ac55 5549 yielded++;
3e170ce0 5550
2d21ac55
A
5551 goto retry;
5552 }
5553 considered++;
5554 }
5555 m = VM_PAGE_NULL;
5556
b0d623f7
A
5557 if (npages != contig_pages) {
5558 if (!wrapped) {
5559 /*
5560 * We didn't find a contiguous range but we didn't
5561 * start from the very first page.
5562 * Start again from the very first page.
5563 */
5564 RESET_STATE_OF_RUN();
5565 if( flags & KMA_LOMEM)
5566 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
5567 else
5568 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5569 last_idx = 0;
5570 page_idx = last_idx;
5571 wrapped = TRUE;
5572 goto retry;
5573 }
5574 lck_mtx_unlock(&vm_page_queue_free_lock);
5575 } else {
2d21ac55
A
5576 vm_page_t m1;
5577 vm_page_t m2;
5578 unsigned int cur_idx;
5579 unsigned int tmp_start_idx;
5580 vm_object_t locked_object = VM_OBJECT_NULL;
5581 boolean_t abort_run = FALSE;
5582
b0d623f7
A
5583 assert(page_idx - start_idx == contig_pages);
5584
2d21ac55
A
5585 tmp_start_idx = start_idx;
5586
5587 /*
5588 * first pass through to pull the free pages
5589 * off of the free queue so that in case we
5590 * need substitute pages, we won't grab any
5591 * of the free pages in the run... we'll clear
5592 * the 'free' bit in the 2nd pass, and even in
5593 * an abort_run case, we'll collect all of the
5594 * free pages in this run and return them to the free list
5595 */
5596 while (start_idx < page_idx) {
5597
5598 m1 = &vm_pages[start_idx++];
5599
b0d623f7 5600#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
d9a64523 5601 assert(m1->vmp_q_state == VM_PAGE_ON_FREE_Q);
b0d623f7
A
5602#endif
5603
d9a64523 5604 if (m1->vmp_q_state == VM_PAGE_ON_FREE_Q) {
0b4c1975 5605 unsigned int color;
2d21ac55 5606
5ba3f43e 5607 color = VM_PAGE_GET_COLOR(m1);
b0d623f7 5608#if MACH_ASSERT
39037602 5609 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
b0d623f7 5610#endif
39037602
A
5611 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5612 m1,
5613 vm_page_t,
d9a64523 5614 vmp_pageq);
39037602
A
5615
5616 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
0b4c1975 5617#if MACH_ASSERT
39037602 5618 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
0b4c1975 5619#endif
b0d623f7
A
5620 /*
5621 * Clear the "free" bit so that this page
5622 * does not get considered for another
5623 * concurrent physically-contiguous allocation.
5624 */
d9a64523
A
5625 m1->vmp_q_state = VM_PAGE_NOT_ON_Q;
5626 assert(m1->vmp_busy);
0b4c1975
A
5627
5628 vm_page_free_count--;
2d21ac55
A
5629 }
5630 }
b0d623f7
A
5631 if( flags & KMA_LOMEM)
5632 vm_page_lomem_find_contiguous_last_idx = page_idx;
5633 else
5634 vm_page_find_contiguous_last_idx = page_idx;
5635
2d21ac55
A
5636 /*
5637 * we can drop the free queue lock at this point since
5638 * we've pulled any 'free' candidates off of the list
5639 * we need it dropped so that we can do a vm_page_grab
5640 * when substituing for pmapped/dirty pages
5641 */
b0d623f7 5642 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
5643
5644 start_idx = tmp_start_idx;
5645 cur_idx = page_idx - 1;
5646
5647 while (start_idx++ < page_idx) {
5648 /*
5649 * must go through the list from back to front
5650 * so that the page list is created in the
5651 * correct order - low -> high phys addresses
5652 */
5653 m1 = &vm_pages[cur_idx--];
5654
d9a64523 5655 if (m1->vmp_object == 0) {
2d21ac55 5656 /*
b0d623f7 5657 * page has already been removed from
2d21ac55
A
5658 * the free list in the 1st pass
5659 */
d9a64523
A
5660 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
5661 assert(m1->vmp_offset == (vm_object_offset_t) -1);
5662 assert(m1->vmp_busy);
5663 assert(!m1->vmp_wanted);
5664 assert(!m1->vmp_laundry);
e5568f75 5665 } else {
2d21ac55 5666 vm_object_t object;
39236c6e
A
5667 int refmod;
5668 boolean_t disconnected, reusable;
2d21ac55
A
5669
5670 if (abort_run == TRUE)
5671 continue;
5672
d9a64523 5673 assert(m1->vmp_q_state != VM_PAGE_NOT_ON_Q);
39037602
A
5674
5675 object = VM_PAGE_OBJECT(m1);
2d21ac55
A
5676
5677 if (object != locked_object) {
5678 if (locked_object) {
5679 vm_object_unlock(locked_object);
5680 locked_object = VM_OBJECT_NULL;
5681 }
5682 if (vm_object_lock_try(object))
5683 locked_object = object;
5684 }
5685 if (locked_object == VM_OBJECT_NULL ||
d9a64523
A
5686 (VM_PAGE_WIRED(m1) || m1->vmp_gobbled ||
5687 m1->vmp_laundry || m1->vmp_wanted ||
5688 m1->vmp_cleaning || m1->vmp_overwriting || m1->vmp_free_when_done || m1->vmp_busy) ||
5689 (m1->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
2d21ac55
A
5690
5691 if (locked_object) {
5692 vm_object_unlock(locked_object);
5693 locked_object = VM_OBJECT_NULL;
5694 }
5695 tmp_start_idx = cur_idx;
5696 abort_run = TRUE;
5697 continue;
5698 }
39236c6e
A
5699
5700 disconnected = FALSE;
5701 reusable = FALSE;
5702
d9a64523 5703 if ((m1->vmp_reusable ||
39037602 5704 object->all_reusable) &&
d9a64523
A
5705 (m1->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
5706 !m1->vmp_dirty &&
5707 !m1->vmp_reference) {
39236c6e 5708 /* reusable page... */
39037602 5709 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
39236c6e
A
5710 disconnected = TRUE;
5711 if (refmod == 0) {
5712 /*
5713 * ... not reused: can steal
5714 * without relocating contents.
5715 */
5716 reusable = TRUE;
5717 }
5718 }
5719
d9a64523 5720 if ((m1->vmp_pmapped &&
39236c6e 5721 ! reusable) ||
d9a64523
A
5722 m1->vmp_dirty ||
5723 m1->vmp_precious) {
2d21ac55
A
5724 vm_object_offset_t offset;
5725
5726 m2 = vm_page_grab();
5727
5728 if (m2 == VM_PAGE_NULL) {
5729 if (locked_object) {
5730 vm_object_unlock(locked_object);
5731 locked_object = VM_OBJECT_NULL;
5732 }
5733 tmp_start_idx = cur_idx;
5734 abort_run = TRUE;
5735 continue;
5736 }
39236c6e 5737 if (! disconnected) {
d9a64523 5738 if (m1->vmp_pmapped)
39037602 5739 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
39236c6e
A
5740 else
5741 refmod = 0;
5742 }
5743
5744 /* copy the page's contents */
39037602 5745 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
39236c6e
A
5746 /* copy the page's state */
5747 assert(!VM_PAGE_WIRED(m1));
d9a64523
A
5748 assert(m1->vmp_q_state != VM_PAGE_ON_FREE_Q);
5749 assert(m1->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q);
5750 assert(!m1->vmp_laundry);
5751 m2->vmp_reference = m1->vmp_reference;
5752 assert(!m1->vmp_gobbled);
5753 assert(!m1->vmp_private);
5754 m2->vmp_no_cache = m1->vmp_no_cache;
5755 m2->vmp_xpmapped = 0;
5756 assert(!m1->vmp_busy);
5757 assert(!m1->vmp_wanted);
5758 assert(!m1->vmp_fictitious);
5759 m2->vmp_pmapped = m1->vmp_pmapped; /* should flush cache ? */
5760 m2->vmp_wpmapped = m1->vmp_wpmapped;
5761 assert(!m1->vmp_free_when_done);
5762 m2->vmp_absent = m1->vmp_absent;
5763 m2->vmp_error = m1->vmp_error;
5764 m2->vmp_dirty = m1->vmp_dirty;
5765 assert(!m1->vmp_cleaning);
5766 m2->vmp_precious = m1->vmp_precious;
5767 m2->vmp_clustered = m1->vmp_clustered;
5768 assert(!m1->vmp_overwriting);
5769 m2->vmp_restart = m1->vmp_restart;
5770 m2->vmp_unusual = m1->vmp_unusual;
5771 m2->vmp_cs_validated = m1->vmp_cs_validated;
5772 m2->vmp_cs_tainted = m1->vmp_cs_tainted;
5773 m2->vmp_cs_nx = m1->vmp_cs_nx;
39236c6e
A
5774
5775 /*
5776 * If m1 had really been reusable,
5777 * we would have just stolen it, so
5778 * let's not propagate it's "reusable"
5779 * bit and assert that m2 is not
5780 * marked as "reusable".
5781 */
d9a64523
A
5782 // m2->vmp_reusable = m1->vmp_reusable;
5783 assert(!m2->vmp_reusable);
39236c6e 5784
d9a64523 5785 // assert(!m1->vmp_lopage);
39037602 5786
d9a64523
A
5787 if (m1->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5788 m2->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
39236c6e 5789
15129b1c
A
5790 /*
5791 * page may need to be flushed if
5792 * it is marshalled into a UPL
5793 * that is going to be used by a device
5794 * that doesn't support coherency
5795 */
d9a64523 5796 m2->vmp_written_by_kernel = TRUE;
15129b1c 5797
39236c6e
A
5798 /*
5799 * make sure we clear the ref/mod state
5800 * from the pmap layer... else we risk
5801 * inheriting state from the last time
5802 * this page was used...
5803 */
39037602 5804 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2d21ac55
A
5805
5806 if (refmod & VM_MEM_REFERENCED)
d9a64523 5807 m2->vmp_reference = TRUE;
316670eb
A
5808 if (refmod & VM_MEM_MODIFIED) {
5809 SET_PAGE_DIRTY(m2, TRUE);
5810 }
d9a64523 5811 offset = m1->vmp_offset;
2d21ac55
A
5812
5813 /*
5814 * completely cleans up the state
5815 * of the page so that it is ready
5816 * to be put onto the free list, or
5817 * for this purpose it looks like it
5818 * just came off of the free list
5819 */
5820 vm_page_free_prepare(m1);
5821
5822 /*
39236c6e
A
5823 * now put the substitute page
5824 * on the object
2d21ac55 5825 */
3e170ce0 5826 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
2d21ac55 5827
d9a64523
A
5828 if (m2->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
5829 m2->vmp_pmapped = TRUE;
5830 m2->vmp_wpmapped = TRUE;
2d21ac55 5831
d9a64523 5832 PMAP_ENTER(kernel_pmap, m2->vmp_offset, m2,
5ba3f43e
A
5833 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5834
5835 assert(kr == KERN_SUCCESS);
3e170ce0 5836
39236c6e 5837 compressed_pages++;
3e170ce0 5838
39236c6e 5839 } else {
d9a64523 5840 if (m2->vmp_reference)
39236c6e
A
5841 vm_page_activate(m2);
5842 else
5843 vm_page_deactivate(m2);
5844 }
2d21ac55
A
5845 PAGE_WAKEUP_DONE(m2);
5846
5847 } else {
d9a64523 5848 assert(m1->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
39236c6e 5849
2d21ac55
A
5850 /*
5851 * completely cleans up the state
5852 * of the page so that it is ready
5853 * to be put onto the free list, or
5854 * for this purpose it looks like it
5855 * just came off of the free list
5856 */
5857 vm_page_free_prepare(m1);
5858 }
3e170ce0 5859
2d21ac55 5860 stolen_pages++;
3e170ce0 5861
1c79356b 5862 }
39037602
A
5863#if CONFIG_BACKGROUND_QUEUE
5864 vm_page_assign_background_state(m1);
5865#endif
5866 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
d9a64523 5867 m1->vmp_snext = m;
2d21ac55 5868 m = m1;
e5568f75 5869 }
2d21ac55
A
5870 if (locked_object) {
5871 vm_object_unlock(locked_object);
5872 locked_object = VM_OBJECT_NULL;
1c79356b
A
5873 }
5874
2d21ac55 5875 if (abort_run == TRUE) {
2d21ac55
A
5876 /*
5877 * want the index of the last
5878 * page in this run that was
5879 * successfully 'stolen', so back
5880 * it up 1 for the auto-decrement on use
5881 * and 1 more to bump back over this page
5882 */
5883 page_idx = tmp_start_idx + 2;
b0d623f7 5884 if (page_idx >= vm_pages_count) {
d190cdc3
A
5885 if (wrapped) {
5886 if (m != VM_PAGE_NULL) {
5887 vm_page_unlock_queues();
5888 vm_page_free_list(m, FALSE);
5889 vm_page_lock_queues();
5890 m = VM_PAGE_NULL;
5891 }
5892 dumped_run++;
b0d623f7 5893 goto done_scanning;
d190cdc3 5894 }
b0d623f7
A
5895 page_idx = last_idx = 0;
5896 wrapped = TRUE;
5897 }
5898 abort_run = FALSE;
5899
2d21ac55 5900 /*
b0d623f7
A
5901 * We didn't find a contiguous range but we didn't
5902 * start from the very first page.
5903 * Start again from the very first page.
2d21ac55 5904 */
b0d623f7
A
5905 RESET_STATE_OF_RUN();
5906
5907 if( flags & KMA_LOMEM)
5908 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
5909 else
5910 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5911
5912 last_idx = page_idx;
2d21ac55 5913
d190cdc3
A
5914 if (m != VM_PAGE_NULL) {
5915 vm_page_unlock_queues();
5916 vm_page_free_list(m, FALSE);
5917 vm_page_lock_queues();
5918 m = VM_PAGE_NULL;
5919 }
5920 dumped_run++;
5921
b0d623f7
A
5922 lck_mtx_lock(&vm_page_queue_free_lock);
5923 /*
5924 * reset our free page limit since we
5925 * dropped the lock protecting the vm_page_free_queue
5926 */
5927 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
5928 goto retry;
5929 }
e5568f75 5930
e5568f75 5931 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55 5932
d9a64523
A
5933 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
5934 assert(m1->vmp_wire_count == 0);
39037602
A
5935
5936 if (wire == TRUE) {
d9a64523
A
5937 m1->vmp_wire_count++;
5938 m1->vmp_q_state = VM_PAGE_IS_WIRED;
39037602 5939 } else
d9a64523 5940 m1->vmp_gobbled = TRUE;
e5568f75 5941 }
2d21ac55
A
5942 if (wire == FALSE)
5943 vm_page_gobble_count += npages;
5944
5945 /*
5946 * gobbled pages are also counted as wired pages
5947 */
e5568f75 5948 vm_page_wire_count += npages;
e5568f75 5949
2d21ac55
A
5950 assert(vm_page_verify_contiguous(m, npages));
5951 }
5952done_scanning:
39236c6e
A
5953 PAGE_REPLACEMENT_ALLOWED(FALSE);
5954
2d21ac55
A
5955 vm_page_unlock_queues();
5956
593a1d5f 5957#if DEBUG
2d21ac55
A
5958 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5959
5960 tv_end_sec -= tv_start_sec;
5961 if (tv_end_usec < tv_start_usec) {
5962 tv_end_sec--;
5963 tv_end_usec += 1000000;
1c79356b 5964 }
2d21ac55
A
5965 tv_end_usec -= tv_start_usec;
5966 if (tv_end_usec >= 1000000) {
5967 tv_end_sec++;
5968 tv_end_sec -= 1000000;
5969 }
b0d623f7 5970 if (vm_page_find_contig_debug) {
39236c6e
A
5971 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5972 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5973 (long)tv_end_sec, tv_end_usec, orig_last_idx,
5974 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
b0d623f7 5975 }
e5568f75 5976
593a1d5f
A
5977#endif
5978#if MACH_ASSERT
2d21ac55
A
5979 vm_page_verify_free_lists();
5980#endif
3e170ce0
A
5981 if (m == NULL && zone_gc_called == FALSE) {
5982 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5983 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5984 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5985
5986 if (consider_buffer_cache_collect != NULL) {
5987 (void)(*consider_buffer_cache_collect)(1);
5988 }
5989
5ba3f43e 5990 consider_zone_gc(FALSE);
3e170ce0
A
5991
5992 zone_gc_called = TRUE;
5993
5994 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5995 goto full_scan_again;
5996 }
5997
e5568f75 5998 return m;
1c79356b
A
5999}
6000
6001/*
6002 * Allocate a list of contiguous, wired pages.
6003 */
6004kern_return_t
6005cpm_allocate(
6006 vm_size_t size,
6007 vm_page_t *list,
2d21ac55 6008 ppnum_t max_pnum,
b0d623f7
A
6009 ppnum_t pnum_mask,
6010 boolean_t wire,
6011 int flags)
1c79356b 6012{
91447636
A
6013 vm_page_t pages;
6014 unsigned int npages;
1c79356b 6015
6d2010ae 6016 if (size % PAGE_SIZE != 0)
1c79356b
A
6017 return KERN_INVALID_ARGUMENT;
6018
b0d623f7
A
6019 npages = (unsigned int) (size / PAGE_SIZE);
6020 if (npages != size / PAGE_SIZE) {
6021 /* 32-bit overflow */
6022 return KERN_INVALID_ARGUMENT;
6023 }
1c79356b 6024
1c79356b
A
6025 /*
6026 * Obtain a pointer to a subset of the free
6027 * list large enough to satisfy the request;
6028 * the region will be physically contiguous.
6029 */
b0d623f7 6030 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 6031
2d21ac55 6032 if (pages == VM_PAGE_NULL)
1c79356b 6033 return KERN_NO_SPACE;
1c79356b 6034 /*
2d21ac55 6035 * determine need for wakeups
1c79356b 6036 */
d9a64523
A
6037 if (vm_page_free_count < vm_page_free_min)
6038 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 6039
6d2010ae
A
6040 VM_CHECK_MEMORYSTATUS;
6041
1c79356b
A
6042 /*
6043 * The CPM pages should now be available and
6044 * ordered by ascending physical address.
6045 */
6046 assert(vm_page_verify_contiguous(pages, npages));
6047
6048 *list = pages;
6049 return KERN_SUCCESS;
6050}
6d2010ae
A
6051
6052
6053unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
6054
6055/*
6056 * when working on a 'run' of pages, it is necessary to hold
6057 * the vm_page_queue_lock (a hot global lock) for certain operations
6058 * on the page... however, the majority of the work can be done
6059 * while merely holding the object lock... in fact there are certain
6060 * collections of pages that don't require any work brokered by the
6061 * vm_page_queue_lock... to mitigate the time spent behind the global
6062 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
6063 * while doing all of the work that doesn't require the vm_page_queue_lock...
6064 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
6065 * necessary work for each page... we will grab the busy bit on the page
6066 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
6067 * if it can't immediately take the vm_page_queue_lock in order to compete
6068 * for the locks in the same order that vm_pageout_scan takes them.
6069 * the operation names are modeled after the names of the routines that
6070 * need to be called in order to make the changes very obvious in the
6071 * original loop
6072 */
6073
6074void
6075vm_page_do_delayed_work(
6076 vm_object_t object,
3e170ce0 6077 vm_tag_t tag,
6d2010ae
A
6078 struct vm_page_delayed_work *dwp,
6079 int dw_count)
6080{
6081 int j;
6082 vm_page_t m;
6083 vm_page_t local_free_q = VM_PAGE_NULL;
6d2010ae
A
6084
6085 /*
6086 * pageout_scan takes the vm_page_lock_queues first
6087 * then tries for the object lock... to avoid what
6088 * is effectively a lock inversion, we'll go to the
6089 * trouble of taking them in that same order... otherwise
6090 * if this object contains the majority of the pages resident
6091 * in the UBC (or a small set of large objects actively being
6092 * worked on contain the majority of the pages), we could
6093 * cause the pageout_scan thread to 'starve' in its attempt
6094 * to find pages to move to the free queue, since it has to
6095 * successfully acquire the object lock of any candidate page
6096 * before it can steal/clean it.
6097 */
6098 if (!vm_page_trylockspin_queues()) {
6099 vm_object_unlock(object);
6100
6101 vm_page_lockspin_queues();
6102
6103 for (j = 0; ; j++) {
6104 if (!vm_object_lock_avoid(object) &&
6105 _vm_object_lock_try(object))
6106 break;
6107 vm_page_unlock_queues();
6108 mutex_pause(j);
6109 vm_page_lockspin_queues();
6110 }
6d2010ae
A
6111 }
6112 for (j = 0; j < dw_count; j++, dwp++) {
6113
6114 m = dwp->dw_m;
6115
6d2010ae
A
6116 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
6117 vm_pageout_throttle_up(m);
fe8ab488
A
6118#if CONFIG_PHANTOM_CACHE
6119 if (dwp->dw_mask & DW_vm_phantom_cache_update)
6120 vm_phantom_cache_update(m);
6121#endif
6d2010ae 6122 if (dwp->dw_mask & DW_vm_page_wire)
3e170ce0 6123 vm_page_wire(m, tag, FALSE);
6d2010ae
A
6124 else if (dwp->dw_mask & DW_vm_page_unwire) {
6125 boolean_t queueit;
6126
fe8ab488 6127 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6d2010ae
A
6128
6129 vm_page_unwire(m, queueit);
6130 }
6131 if (dwp->dw_mask & DW_vm_page_free) {
6132 vm_page_free_prepare_queues(m);
6133
d9a64523 6134 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6d2010ae
A
6135 /*
6136 * Add this page to our list of reclaimed pages,
6137 * to be freed later.
6138 */
d9a64523 6139 m->vmp_snext = local_free_q;
6d2010ae
A
6140 local_free_q = m;
6141 } else {
6142 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
6143 vm_page_deactivate_internal(m, FALSE);
6144 else if (dwp->dw_mask & DW_vm_page_activate) {
d9a64523 6145 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6d2010ae
A
6146 vm_page_activate(m);
6147 }
6148 }
6149 else if (dwp->dw_mask & DW_vm_page_speculate)
6150 vm_page_speculate(m, TRUE);
316670eb
A
6151 else if (dwp->dw_mask & DW_enqueue_cleaned) {
6152 /*
6153 * if we didn't hold the object lock and did this,
6154 * we might disconnect the page, then someone might
6155 * soft fault it back in, then we would put it on the
6156 * cleaned queue, and so we would have a referenced (maybe even dirty)
6157 * page on that queue, which we don't want
6158 */
39037602 6159 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
316670eb
A
6160
6161 if ((refmod_state & VM_MEM_REFERENCED)) {
6162 /*
6163 * this page has been touched since it got cleaned; let's activate it
6164 * if it hasn't already been
6165 */
d9a64523
A
6166 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
6167 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
316670eb 6168
d9a64523 6169 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q)
316670eb
A
6170 vm_page_activate(m);
6171 } else {
d9a64523 6172 m->vmp_reference = FALSE;
316670eb
A
6173 vm_page_enqueue_cleaned(m);
6174 }
6175 }
6d2010ae
A
6176 else if (dwp->dw_mask & DW_vm_page_lru)
6177 vm_page_lru(m);
316670eb 6178 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
d9a64523 6179 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q)
39037602 6180 vm_page_queues_remove(m, TRUE);
316670eb 6181 }
6d2010ae 6182 if (dwp->dw_mask & DW_set_reference)
d9a64523 6183 m->vmp_reference = TRUE;
6d2010ae 6184 else if (dwp->dw_mask & DW_clear_reference)
d9a64523 6185 m->vmp_reference = FALSE;
6d2010ae
A
6186
6187 if (dwp->dw_mask & DW_move_page) {
d9a64523 6188 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
39037602 6189 vm_page_queues_remove(m, FALSE);
6d2010ae 6190
39037602 6191 assert(VM_PAGE_OBJECT(m) != kernel_object);
6d2010ae 6192
3e170ce0 6193 vm_page_enqueue_inactive(m, FALSE);
316670eb 6194 }
6d2010ae
A
6195 }
6196 if (dwp->dw_mask & DW_clear_busy)
d9a64523 6197 m->vmp_busy = FALSE;
6d2010ae
A
6198
6199 if (dwp->dw_mask & DW_PAGE_WAKEUP)
6200 PAGE_WAKEUP(m);
6201 }
6202 }
6203 vm_page_unlock_queues();
6204
6205 if (local_free_q)
6206 vm_page_free_list(local_free_q, TRUE);
6207
6208 VM_CHECK_MEMORYSTATUS;
6209
6210}
6211
0b4c1975
A
6212kern_return_t
6213vm_page_alloc_list(
6214 int page_count,
6215 int flags,
6216 vm_page_t *list)
6217{
6218 vm_page_t lo_page_list = VM_PAGE_NULL;
6219 vm_page_t mem;
6220 int i;
6221
6222 if ( !(flags & KMA_LOMEM))
6223 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6224
6225 for (i = 0; i < page_count; i++) {
6226
6227 mem = vm_page_grablo();
6228
6229 if (mem == VM_PAGE_NULL) {
6230 if (lo_page_list)
6231 vm_page_free_list(lo_page_list, FALSE);
6232
6233 *list = VM_PAGE_NULL;
6234
6235 return (KERN_RESOURCE_SHORTAGE);
6236 }
d9a64523 6237 mem->vmp_snext = lo_page_list;
0b4c1975
A
6238 lo_page_list = mem;
6239 }
6240 *list = lo_page_list;
6241
6242 return (KERN_SUCCESS);
6243}
6244
6245void
6246vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6247{
d9a64523 6248 page->vmp_offset = offset;
0b4c1975
A
6249}
6250
6251vm_page_t
6252vm_page_get_next(vm_page_t page)
6253{
d9a64523 6254 return (page->vmp_snext);
0b4c1975
A
6255}
6256
6257vm_object_offset_t
6258vm_page_get_offset(vm_page_t page)
6259{
d9a64523 6260 return (page->vmp_offset);
0b4c1975
A
6261}
6262
6263ppnum_t
6264vm_page_get_phys_page(vm_page_t page)
6265{
39037602 6266 return (VM_PAGE_GET_PHYS_PAGE(page));
0b4c1975
A
6267}
6268
6269
b0d623f7
A
6270/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6271
d1ecb069
A
6272#if HIBERNATION
6273
b0d623f7
A
6274static vm_page_t hibernate_gobble_queue;
6275
0b4c1975 6276static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
39236c6e 6277static int hibernate_flush_dirty_pages(int);
39037602 6278static int hibernate_flush_queue(vm_page_queue_head_t *, int);
0b4c1975
A
6279
6280void hibernate_flush_wait(void);
6281void hibernate_mark_in_progress(void);
6282void hibernate_clear_in_progress(void);
6283
39236c6e
A
6284void hibernate_free_range(int, int);
6285void hibernate_hash_insert_page(vm_page_t);
6286uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
6287void hibernate_rebuild_vm_structs(void);
6288uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6289ppnum_t hibernate_lookup_paddr(unsigned int);
0b4c1975
A
6290
6291struct hibernate_statistics {
6292 int hibernate_considered;
6293 int hibernate_reentered_on_q;
6294 int hibernate_found_dirty;
6295 int hibernate_skipped_cleaning;
6296 int hibernate_skipped_transient;
6297 int hibernate_skipped_precious;
39236c6e 6298 int hibernate_skipped_external;
0b4c1975
A
6299 int hibernate_queue_nolock;
6300 int hibernate_queue_paused;
6301 int hibernate_throttled;
6302 int hibernate_throttle_timeout;
6303 int hibernate_drained;
6304 int hibernate_drain_timeout;
6305 int cd_lock_failed;
6306 int cd_found_precious;
6307 int cd_found_wired;
6308 int cd_found_busy;
6309 int cd_found_unusual;
6310 int cd_found_cleaning;
6311 int cd_found_laundry;
6312 int cd_found_dirty;
39236c6e 6313 int cd_found_xpmapped;
8a3053a0 6314 int cd_skipped_xpmapped;
0b4c1975
A
6315 int cd_local_free;
6316 int cd_total_free;
6317 int cd_vm_page_wire_count;
39236c6e 6318 int cd_vm_struct_pages_unneeded;
0b4c1975
A
6319 int cd_pages;
6320 int cd_discarded;
6321 int cd_count_wire;
6322} hibernate_stats;
6323
6324
8a3053a0
A
6325/*
6326 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6327 * so that we don't overrun the estimated image size, which would
6328 * result in a hibernation failure.
6329 */
6330#define HIBERNATE_XPMAPPED_LIMIT 40000
6331
0b4c1975
A
6332
6333static int
6334hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6335{
6336 wait_result_t wait_result;
6337
6338 vm_page_lock_queues();
6339
39037602 6340 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
0b4c1975
A
6341
6342 q->pgo_draining = TRUE;
6343
6344 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
6345
6346 vm_page_unlock_queues();
6347
6348 wait_result = thread_block(THREAD_CONTINUE_NULL);
6349
39037602 6350 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
0b4c1975 6351 hibernate_stats.hibernate_drain_timeout++;
39236c6e
A
6352
6353 if (q == &vm_pageout_queue_external)
6354 return (0);
6355
0b4c1975
A
6356 return (1);
6357 }
6358 vm_page_lock_queues();
6359
6360 hibernate_stats.hibernate_drained++;
6361 }
6362 vm_page_unlock_queues();
6363
6364 return (0);
6365}
6366
0b4c1975 6367
39236c6e
A
6368boolean_t hibernate_skip_external = FALSE;
6369
0b4c1975 6370static int
39037602 6371hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
0b4c1975
A
6372{
6373 vm_page_t m;
6374 vm_object_t l_object = NULL;
6375 vm_object_t m_object = NULL;
6376 int refmod_state = 0;
6377 int try_failed_count = 0;
6378 int retval = 0;
6379 int current_run = 0;
6380 struct vm_pageout_queue *iq;
6381 struct vm_pageout_queue *eq;
6382 struct vm_pageout_queue *tq;
6383
5ba3f43e
A
6384 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
6385 VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
0b4c1975 6386
0b4c1975
A
6387 iq = &vm_pageout_queue_internal;
6388 eq = &vm_pageout_queue_external;
6389
6390 vm_page_lock_queues();
6391
39037602 6392 while (qcount && !vm_page_queue_empty(q)) {
0b4c1975
A
6393
6394 if (current_run++ == 1000) {
6395 if (hibernate_should_abort()) {
6396 retval = 1;
6397 break;
6398 }
6399 current_run = 0;
6400 }
6401
39037602
A
6402 m = (vm_page_t) vm_page_queue_first(q);
6403 m_object = VM_PAGE_OBJECT(m);
0b4c1975
A
6404
6405 /*
6406 * check to see if we currently are working
6407 * with the same object... if so, we've
6408 * already got the lock
6409 */
6410 if (m_object != l_object) {
6411 /*
6412 * the object associated with candidate page is
6413 * different from the one we were just working
6414 * with... dump the lock if we still own it
6415 */
6416 if (l_object != NULL) {
6417 vm_object_unlock(l_object);
6418 l_object = NULL;
6419 }
6420 /*
6421 * Try to lock object; since we've alread got the
6422 * page queues lock, we can only 'try' for this one.
6423 * if the 'try' fails, we need to do a mutex_pause
6424 * to allow the owner of the object lock a chance to
6425 * run...
6426 */
6427 if ( !vm_object_lock_try_scan(m_object)) {
6428
6429 if (try_failed_count > 20) {
6430 hibernate_stats.hibernate_queue_nolock++;
6431
6432 goto reenter_pg_on_q;
6433 }
0b4c1975
A
6434
6435 vm_page_unlock_queues();
6436 mutex_pause(try_failed_count++);
6437 vm_page_lock_queues();
6438
6439 hibernate_stats.hibernate_queue_paused++;
6440 continue;
6441 } else {
6442 l_object = m_object;
0b4c1975
A
6443 }
6444 }
d9a64523 6445 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error) {
0b4c1975
A
6446 /*
6447 * page is not to be cleaned
6448 * put it back on the head of its queue
6449 */
d9a64523 6450 if (m->vmp_cleaning)
0b4c1975
A
6451 hibernate_stats.hibernate_skipped_cleaning++;
6452 else
6453 hibernate_stats.hibernate_skipped_transient++;
6454
6455 goto reenter_pg_on_q;
6456 }
0b4c1975
A
6457 if (m_object->copy == VM_OBJECT_NULL) {
6458 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6459 /*
6460 * let the normal hibernate image path
6461 * deal with these
6462 */
6463 goto reenter_pg_on_q;
6464 }
6465 }
d9a64523 6466 if ( !m->vmp_dirty && m->vmp_pmapped) {
39037602 6467 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
0b4c1975 6468
316670eb
A
6469 if ((refmod_state & VM_MEM_MODIFIED)) {
6470 SET_PAGE_DIRTY(m, FALSE);
6471 }
0b4c1975
A
6472 } else
6473 refmod_state = 0;
6474
d9a64523 6475 if ( !m->vmp_dirty) {
0b4c1975
A
6476 /*
6477 * page is not to be cleaned
6478 * put it back on the head of its queue
6479 */
d9a64523 6480 if (m->vmp_precious)
0b4c1975
A
6481 hibernate_stats.hibernate_skipped_precious++;
6482
6483 goto reenter_pg_on_q;
6484 }
39236c6e
A
6485
6486 if (hibernate_skip_external == TRUE && !m_object->internal) {
6487
6488 hibernate_stats.hibernate_skipped_external++;
6489
6490 goto reenter_pg_on_q;
6491 }
0b4c1975
A
6492 tq = NULL;
6493
6494 if (m_object->internal) {
6495 if (VM_PAGE_Q_THROTTLED(iq))
6496 tq = iq;
6497 } else if (VM_PAGE_Q_THROTTLED(eq))
6498 tq = eq;
6499
6500 if (tq != NULL) {
6501 wait_result_t wait_result;
6502 int wait_count = 5;
6503
6504 if (l_object != NULL) {
6505 vm_object_unlock(l_object);
6506 l_object = NULL;
6507 }
0b4c1975 6508
0b4c1975
A
6509 while (retval == 0) {
6510
39236c6e
A
6511 tq->pgo_throttled = TRUE;
6512
0b4c1975
A
6513 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6514
316670eb 6515 vm_page_unlock_queues();
0b4c1975 6516
316670eb 6517 wait_result = thread_block(THREAD_CONTINUE_NULL);
0b4c1975
A
6518
6519 vm_page_lock_queues();
6520
39236c6e
A
6521 if (wait_result != THREAD_TIMED_OUT)
6522 break;
6523 if (!VM_PAGE_Q_THROTTLED(tq))
6524 break;
6525
0b4c1975
A
6526 if (hibernate_should_abort())
6527 retval = 1;
6528
0b4c1975 6529 if (--wait_count == 0) {
39236c6e 6530
316670eb 6531 hibernate_stats.hibernate_throttle_timeout++;
39236c6e
A
6532
6533 if (tq == eq) {
6534 hibernate_skip_external = TRUE;
6535 break;
6536 }
316670eb
A
6537 retval = 1;
6538 }
0b4c1975
A
6539 }
6540 if (retval)
6541 break;
6542
6543 hibernate_stats.hibernate_throttled++;
6544
6545 continue;
6546 }
316670eb
A
6547 /*
6548 * we've already factored out pages in the laundry which
6549 * means this page can't be on the pageout queue so it's
3e170ce0 6550 * safe to do the vm_page_queues_remove
316670eb 6551 */
39037602 6552 vm_page_queues_remove(m, TRUE);
0b4c1975 6553
39037602
A
6554 if (m_object->internal == TRUE)
6555 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
39236c6e 6556
5ba3f43e 6557 vm_pageout_cluster(m);
0b4c1975
A
6558
6559 hibernate_stats.hibernate_found_dirty++;
6560
6561 goto next_pg;
6562
6563reenter_pg_on_q:
d9a64523
A
6564 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
6565 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
0b4c1975
A
6566
6567 hibernate_stats.hibernate_reentered_on_q++;
6568next_pg:
6569 hibernate_stats.hibernate_considered++;
6570
6571 qcount--;
6572 try_failed_count = 0;
6573 }
6574 if (l_object != NULL) {
6575 vm_object_unlock(l_object);
6576 l_object = NULL;
6577 }
0b4c1975
A
6578
6579 vm_page_unlock_queues();
6580
6581 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6582
6583 return (retval);
6584}
6585
6586
6587static int
39236c6e 6588hibernate_flush_dirty_pages(int pass)
0b4c1975
A
6589{
6590 struct vm_speculative_age_q *aq;
6591 uint32_t i;
6592
0b4c1975
A
6593 if (vm_page_local_q) {
6594 for (i = 0; i < vm_page_local_q_count; i++)
6595 vm_page_reactivate_local(i, TRUE, FALSE);
6596 }
6597
6598 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6599 int qcount;
6600 vm_page_t m;
6601
6602 aq = &vm_page_queue_speculative[i];
6603
39037602 6604 if (vm_page_queue_empty(&aq->age_q))
0b4c1975
A
6605 continue;
6606 qcount = 0;
6607
6608 vm_page_lockspin_queues();
6609
39037602 6610 vm_page_queue_iterate(&aq->age_q,
0b4c1975
A
6611 m,
6612 vm_page_t,
d9a64523 6613 vmp_pageq)
0b4c1975
A
6614 {
6615 qcount++;
6616 }
6617 vm_page_unlock_queues();
6618
6619 if (qcount) {
6620 if (hibernate_flush_queue(&aq->age_q, qcount))
6621 return (1);
6622 }
6623 }
316670eb 6624 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
0b4c1975 6625 return (1);
39037602 6626 /* XXX FBDP TODO: flush secluded queue */
316670eb
A
6627 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6628 return (1);
6629 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
0b4c1975 6630 return (1);
0b4c1975
A
6631 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6632 return (1);
0b4c1975 6633
39037602 6634 if (pass == 1)
39236c6e
A
6635 vm_compressor_record_warmup_start();
6636
6637 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
39037602 6638 if (pass == 1)
39236c6e
A
6639 vm_compressor_record_warmup_end();
6640 return (1);
6641 }
6642 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
39037602 6643 if (pass == 1)
39236c6e
A
6644 vm_compressor_record_warmup_end();
6645 return (1);
6646 }
39037602 6647 if (pass == 1)
39236c6e
A
6648 vm_compressor_record_warmup_end();
6649
6650 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6651 return (1);
6652
6653 return (0);
6654}
0b4c1975 6655
0b4c1975 6656
fe8ab488
A
6657void
6658hibernate_reset_stats()
6659{
6660 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6661}
6662
6663
0b4c1975
A
6664int
6665hibernate_flush_memory()
6666{
6667 int retval;
6668
39037602
A
6669 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6670
0b4c1975
A
6671 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6672
39236c6e
A
6673 hibernate_cleaning_in_progress = TRUE;
6674 hibernate_skip_external = FALSE;
6675
6676 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6677
39037602 6678 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
0b4c1975 6679
39037602 6680 vm_compressor_flush();
0b4c1975 6681
39037602 6682 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
39236c6e 6683
fe8ab488 6684 if (consider_buffer_cache_collect != NULL) {
39236c6e
A
6685 unsigned int orig_wire_count;
6686
6687 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6688 orig_wire_count = vm_page_wire_count;
0b4c1975 6689
0b4c1975 6690 (void)(*consider_buffer_cache_collect)(1);
5ba3f43e 6691 consider_zone_gc(FALSE);
0b4c1975 6692
39236c6e
A
6693 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6694
6695 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
0b4c1975
A
6696 }
6697 }
39236c6e
A
6698 hibernate_cleaning_in_progress = FALSE;
6699
0b4c1975
A
6700 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6701
39037602 6702 if (retval)
39236c6e
A
6703 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6704
6705
0b4c1975
A
6706 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6707 hibernate_stats.hibernate_considered,
6708 hibernate_stats.hibernate_reentered_on_q,
6709 hibernate_stats.hibernate_found_dirty);
39236c6e 6710 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
0b4c1975
A
6711 hibernate_stats.hibernate_skipped_cleaning,
6712 hibernate_stats.hibernate_skipped_transient,
6713 hibernate_stats.hibernate_skipped_precious,
39236c6e 6714 hibernate_stats.hibernate_skipped_external,
0b4c1975
A
6715 hibernate_stats.hibernate_queue_nolock);
6716 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6717 hibernate_stats.hibernate_queue_paused,
6718 hibernate_stats.hibernate_throttled,
6719 hibernate_stats.hibernate_throttle_timeout,
6720 hibernate_stats.hibernate_drained,
6721 hibernate_stats.hibernate_drain_timeout);
6722
6723 return (retval);
6724}
6725
6d2010ae 6726
b0d623f7
A
6727static void
6728hibernate_page_list_zero(hibernate_page_list_t *list)
6729{
6730 uint32_t bank;
6731 hibernate_bitmap_t * bitmap;
6732
6733 bitmap = &list->bank_bitmap[0];
6734 for (bank = 0; bank < list->bank_count; bank++)
6735 {
6736 uint32_t last_bit;
6737
6738 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6739 // set out-of-bound bits at end of bitmap.
6740 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6741 if (last_bit)
6742 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6743
6744 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6745 }
6746}
6747
b0d623f7
A
6748void
6749hibernate_free_gobble_pages(void)
6750{
6751 vm_page_t m, next;
6752 uint32_t count = 0;
6753
6754 m = (vm_page_t) hibernate_gobble_queue;
6755 while(m)
6756 {
d9a64523 6757 next = m->vmp_snext;
b0d623f7
A
6758 vm_page_free(m);
6759 count++;
6760 m = next;
6761 }
6762 hibernate_gobble_queue = VM_PAGE_NULL;
6763
6764 if (count)
6765 HIBLOG("Freed %d pages\n", count);
6766}
6767
6768static boolean_t
db609669 6769hibernate_consider_discard(vm_page_t m, boolean_t preflight)
b0d623f7
A
6770{
6771 vm_object_t object = NULL;
6772 int refmod_state;
6773 boolean_t discard = FALSE;
6774
6775 do
6776 {
d9a64523 6777 if (m->vmp_private)
b0d623f7
A
6778 panic("hibernate_consider_discard: private");
6779
39037602
A
6780 object = VM_PAGE_OBJECT(m);
6781
6782 if (!vm_object_lock_try(object)) {
6783 object = NULL;
db609669 6784 if (!preflight) hibernate_stats.cd_lock_failed++;
b0d623f7 6785 break;
0b4c1975 6786 }
0b4c1975 6787 if (VM_PAGE_WIRED(m)) {
db609669 6788 if (!preflight) hibernate_stats.cd_found_wired++;
b0d623f7 6789 break;
0b4c1975 6790 }
d9a64523 6791 if (m->vmp_precious) {
db609669 6792 if (!preflight) hibernate_stats.cd_found_precious++;
b0d623f7 6793 break;
0b4c1975 6794 }
d9a64523 6795 if (m->vmp_busy || !object->alive) {
b0d623f7
A
6796 /*
6797 * Somebody is playing with this page.
6798 */
db609669 6799 if (!preflight) hibernate_stats.cd_found_busy++;
6d2010ae 6800 break;
0b4c1975 6801 }
d9a64523 6802 if (m->vmp_absent || m->vmp_unusual || m->vmp_error) {
b0d623f7
A
6803 /*
6804 * If it's unusual in anyway, ignore it
6805 */
db609669 6806 if (!preflight) hibernate_stats.cd_found_unusual++;
b0d623f7 6807 break;
0b4c1975 6808 }
d9a64523 6809 if (m->vmp_cleaning) {
db609669 6810 if (!preflight) hibernate_stats.cd_found_cleaning++;
b0d623f7 6811 break;
0b4c1975 6812 }
d9a64523 6813 if (m->vmp_laundry) {
db609669 6814 if (!preflight) hibernate_stats.cd_found_laundry++;
b0d623f7 6815 break;
0b4c1975 6816 }
d9a64523 6817 if (!m->vmp_dirty)
b0d623f7 6818 {
39037602 6819 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
6820
6821 if (refmod_state & VM_MEM_REFERENCED)
d9a64523 6822 m->vmp_reference = TRUE;
316670eb
A
6823 if (refmod_state & VM_MEM_MODIFIED) {
6824 SET_PAGE_DIRTY(m, FALSE);
6825 }
b0d623f7
A
6826 }
6827
6828 /*
6829 * If it's clean or purgeable we can discard the page on wakeup.
6830 */
d9a64523 6831 discard = (!m->vmp_dirty)
b0d623f7 6832 || (VM_PURGABLE_VOLATILE == object->purgable)
0b4c1975
A
6833 || (VM_PURGABLE_EMPTY == object->purgable);
6834
39236c6e
A
6835
6836 if (discard == FALSE) {
6837 if (!preflight)
6838 hibernate_stats.cd_found_dirty++;
d9a64523 6839 } else if (m->vmp_xpmapped && m->vmp_reference && !object->internal) {
8a3053a0
A
6840 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6841 if (!preflight)
6842 hibernate_stats.cd_found_xpmapped++;
6843 discard = FALSE;
6844 } else {
6845 if (!preflight)
6846 hibernate_stats.cd_skipped_xpmapped++;
6847 }
39236c6e 6848 }
b0d623f7
A
6849 }
6850 while (FALSE);
6851
6852 if (object)
6853 vm_object_unlock(object);
6854
6855 return (discard);
6856}
6857
6858
6859static void
6860hibernate_discard_page(vm_page_t m)
6861{
39037602
A
6862 vm_object_t m_object;
6863
d9a64523 6864 if (m->vmp_absent || m->vmp_unusual || m->vmp_error)
b0d623f7
A
6865 /*
6866 * If it's unusual in anyway, ignore
6867 */
6868 return;
6869
39037602
A
6870 m_object = VM_PAGE_OBJECT(m);
6871
fe8ab488 6872#if MACH_ASSERT || DEBUG
39037602 6873 if (!vm_object_lock_try(m_object))
316670eb
A
6874 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6875#else
6876 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6877 makes sure these locks are uncontended before sleep */
fe8ab488 6878#endif /* MACH_ASSERT || DEBUG */
316670eb 6879
d9a64523 6880 if (m->vmp_pmapped == TRUE)
b0d623f7 6881 {
39037602 6882 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
6883 }
6884
d9a64523 6885 if (m->vmp_laundry)
b0d623f7 6886 panic("hibernate_discard_page(%p) laundry", m);
d9a64523 6887 if (m->vmp_private)
b0d623f7 6888 panic("hibernate_discard_page(%p) private", m);
d9a64523 6889 if (m->vmp_fictitious)
b0d623f7
A
6890 panic("hibernate_discard_page(%p) fictitious", m);
6891
39037602 6892 if (VM_PURGABLE_VOLATILE == m_object->purgable)
b0d623f7
A
6893 {
6894 /* object should be on a queue */
39037602
A
6895 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6896 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
b0d623f7 6897 assert(old_queue);
39037602 6898 if (m_object->purgeable_when_ripe) {
39236c6e
A
6899 vm_purgeable_token_delete_first(old_queue);
6900 }
39037602
A
6901 vm_object_lock_assert_exclusive(m_object);
6902 m_object->purgable = VM_PURGABLE_EMPTY;
fe8ab488
A
6903
6904 /*
6905 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6906 * accounted in the "volatile" ledger, so no change here.
6907 * We have to update vm_page_purgeable_count, though, since we're
6908 * effectively purging this object.
6909 */
6910 unsigned int delta;
39037602
A
6911 assert(m_object->resident_page_count >= m_object->wired_page_count);
6912 delta = (m_object->resident_page_count - m_object->wired_page_count);
fe8ab488
A
6913 assert(vm_page_purgeable_count >= delta);
6914 assert(delta > 0);
6915 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
b0d623f7
A
6916 }
6917
6918 vm_page_free(m);
316670eb 6919
fe8ab488 6920#if MACH_ASSERT || DEBUG
39037602 6921 vm_object_unlock(m_object);
fe8ab488 6922#endif /* MACH_ASSERT || DEBUG */
b0d623f7
A
6923}
6924
db609669
A
6925/*
6926 Grab locks for hibernate_page_list_setall()
6927*/
6928void
6929hibernate_vm_lock_queues(void)
6930{
39236c6e 6931 vm_object_lock(compressor_object);
db609669
A
6932 vm_page_lock_queues();
6933 lck_mtx_lock(&vm_page_queue_free_lock);
813fb2f6 6934 lck_mtx_lock(&vm_purgeable_queue_lock);
db609669
A
6935
6936 if (vm_page_local_q) {
6937 uint32_t i;
6938 for (i = 0; i < vm_page_local_q_count; i++) {
6939 struct vpl *lq;
6940 lq = &vm_page_local_q[i].vpl_un.vpl;
6941 VPL_LOCK(&lq->vpl_lock);
6942 }
6943 }
6944}
6945
6946void
6947hibernate_vm_unlock_queues(void)
6948{
6949 if (vm_page_local_q) {
6950 uint32_t i;
6951 for (i = 0; i < vm_page_local_q_count; i++) {
6952 struct vpl *lq;
6953 lq = &vm_page_local_q[i].vpl_un.vpl;
6954 VPL_UNLOCK(&lq->vpl_lock);
6955 }
6956 }
813fb2f6 6957 lck_mtx_unlock(&vm_purgeable_queue_lock);
db609669
A
6958 lck_mtx_unlock(&vm_page_queue_free_lock);
6959 vm_page_unlock_queues();
39236c6e 6960 vm_object_unlock(compressor_object);
db609669
A
6961}
6962
b0d623f7
A
6963/*
6964 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6965 pages known to VM to not need saving are subtracted.
6966 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6967*/
6968
6969void
6970hibernate_page_list_setall(hibernate_page_list_t * page_list,
6971 hibernate_page_list_t * page_list_wired,
6d2010ae 6972 hibernate_page_list_t * page_list_pal,
39236c6e
A
6973 boolean_t preflight,
6974 boolean_t will_discard,
b0d623f7
A
6975 uint32_t * pagesOut)
6976{
6977 uint64_t start, end, nsec;
6978 vm_page_t m;
39236c6e 6979 vm_page_t next;
b0d623f7 6980 uint32_t pages = page_list->page_count;
39236c6e 6981 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
316670eb 6982 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
b0d623f7
A
6983 uint32_t count_wire = pages;
6984 uint32_t count_discard_active = 0;
6985 uint32_t count_discard_inactive = 0;
316670eb 6986 uint32_t count_discard_cleaned = 0;
b0d623f7
A
6987 uint32_t count_discard_purgeable = 0;
6988 uint32_t count_discard_speculative = 0;
39236c6e 6989 uint32_t count_discard_vm_struct_pages = 0;
b0d623f7
A
6990 uint32_t i;
6991 uint32_t bank;
6992 hibernate_bitmap_t * bitmap;
6993 hibernate_bitmap_t * bitmap_wired;
39236c6e
A
6994 boolean_t discard_all;
6995 boolean_t discard;
b0d623f7 6996
3e170ce0 6997 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
b0d623f7 6998
db609669
A
6999 if (preflight) {
7000 page_list = NULL;
7001 page_list_wired = NULL;
7002 page_list_pal = NULL;
39236c6e
A
7003 discard_all = FALSE;
7004 } else {
7005 discard_all = will_discard;
db609669 7006 }
0b4c1975 7007
fe8ab488 7008#if MACH_ASSERT || DEBUG
39236c6e
A
7009 if (!preflight)
7010 {
5ba3f43e 7011 assert(hibernate_vm_locks_are_safe());
316670eb
A
7012 vm_page_lock_queues();
7013 if (vm_page_local_q) {
7014 for (i = 0; i < vm_page_local_q_count; i++) {
7015 struct vpl *lq;
7016 lq = &vm_page_local_q[i].vpl_un.vpl;
7017 VPL_LOCK(&lq->vpl_lock);
7018 }
7019 }
39236c6e 7020 }
fe8ab488 7021#endif /* MACH_ASSERT || DEBUG */
316670eb
A
7022
7023
0b4c1975 7024 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
b0d623f7
A
7025
7026 clock_get_uptime(&start);
7027
db609669
A
7028 if (!preflight) {
7029 hibernate_page_list_zero(page_list);
7030 hibernate_page_list_zero(page_list_wired);
7031 hibernate_page_list_zero(page_list_pal);
7032
7033 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
7034 hibernate_stats.cd_pages = pages;
7035 }
0b4c1975 7036
b0d623f7
A
7037 if (vm_page_local_q) {
7038 for (i = 0; i < vm_page_local_q_count; i++)
db609669
A
7039 vm_page_reactivate_local(i, TRUE, !preflight);
7040 }
7041
7042 if (preflight) {
39236c6e 7043 vm_object_lock(compressor_object);
db609669
A
7044 vm_page_lock_queues();
7045 lck_mtx_lock(&vm_page_queue_free_lock);
b0d623f7
A
7046 }
7047
e8c3f781
A
7048 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7049
7050 hibernation_vmqueues_inspection = TRUE;
7051
b0d623f7 7052 m = (vm_page_t) hibernate_gobble_queue;
39236c6e 7053 while (m)
b0d623f7
A
7054 {
7055 pages--;
7056 count_wire--;
db609669 7057 if (!preflight) {
39037602
A
7058 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7059 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
db609669 7060 }
d9a64523 7061 m = m->vmp_snext;
b0d623f7 7062 }
6d2010ae 7063
db609669 7064 if (!preflight) for( i = 0; i < real_ncpus; i++ )
0b4c1975
A
7065 {
7066 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
7067 {
d9a64523 7068 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->vmp_snext)
0b4c1975 7069 {
d9a64523 7070 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
39037602 7071
0b4c1975
A
7072 pages--;
7073 count_wire--;
39037602
A
7074 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7075 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
0b4c1975
A
7076
7077 hibernate_stats.cd_local_free++;
7078 hibernate_stats.cd_total_free++;
7079 }
7080 }
7081 }
6d2010ae 7082
b0d623f7
A
7083 for( i = 0; i < vm_colors; i++ )
7084 {
39037602
A
7085 vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
7086 m,
7087 vm_page_t,
d9a64523 7088 vmp_pageq)
b0d623f7 7089 {
d9a64523 7090 assert(m->vmp_q_state == VM_PAGE_ON_FREE_Q);
39037602 7091
b0d623f7
A
7092 pages--;
7093 count_wire--;
db609669 7094 if (!preflight) {
39037602
A
7095 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7096 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
db609669
A
7097
7098 hibernate_stats.cd_total_free++;
7099 }
b0d623f7
A
7100 }
7101 }
7102
39037602
A
7103 vm_page_queue_iterate(&vm_lopage_queue_free,
7104 m,
7105 vm_page_t,
d9a64523 7106 vmp_pageq)
b0d623f7 7107 {
d9a64523 7108 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
39037602 7109
b0d623f7
A
7110 pages--;
7111 count_wire--;
db609669 7112 if (!preflight) {
39037602
A
7113 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7114 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
db609669
A
7115
7116 hibernate_stats.cd_total_free++;
7117 }
b0d623f7
A
7118 }
7119
39037602
A
7120 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
7121 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
b0d623f7 7122 {
d9a64523 7123 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
39037602 7124
d9a64523 7125 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39236c6e 7126 discard = FALSE;
b0d623f7 7127 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 7128 && hibernate_consider_discard(m, preflight))
b0d623f7 7129 {
39037602 7130 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7 7131 count_discard_inactive++;
39236c6e 7132 discard = discard_all;
b0d623f7
A
7133 }
7134 else
7135 count_throttled++;
7136 count_wire--;
39037602 7137 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
7138
7139 if (discard) hibernate_discard_page(m);
7140 m = next;
b0d623f7
A
7141 }
7142
39037602
A
7143 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7144 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
b0d623f7 7145 {
d9a64523 7146 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
39037602 7147
d9a64523 7148 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39236c6e 7149 discard = FALSE;
b0d623f7 7150 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 7151 && hibernate_consider_discard(m, preflight))
b0d623f7 7152 {
39037602 7153 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
d9a64523 7154 if (m->vmp_dirty)
b0d623f7
A
7155 count_discard_purgeable++;
7156 else
7157 count_discard_inactive++;
39236c6e 7158 discard = discard_all;
b0d623f7
A
7159 }
7160 else
39236c6e 7161 count_anonymous++;
b0d623f7 7162 count_wire--;
39037602 7163 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
7164 if (discard) hibernate_discard_page(m);
7165 m = next;
b0d623f7
A
7166 }
7167
39037602
A
7168 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7169 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
b0d623f7 7170 {
d9a64523 7171 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
39037602 7172
d9a64523 7173 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39236c6e 7174 discard = FALSE;
b0d623f7 7175 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 7176 && hibernate_consider_discard(m, preflight))
b0d623f7 7177 {
39037602 7178 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
d9a64523 7179 if (m->vmp_dirty)
b0d623f7
A
7180 count_discard_purgeable++;
7181 else
8a3053a0 7182 count_discard_cleaned++;
39236c6e 7183 discard = discard_all;
b0d623f7
A
7184 }
7185 else
8a3053a0 7186 count_cleaned++;
b0d623f7 7187 count_wire--;
39037602 7188 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
7189 if (discard) hibernate_discard_page(m);
7190 m = next;
b0d623f7
A
7191 }
7192
39037602
A
7193 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7194 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
8a3053a0 7195 {
d9a64523 7196 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
39037602 7197
d9a64523 7198 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8a3053a0
A
7199 discard = FALSE;
7200 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
7201 && hibernate_consider_discard(m, preflight))
7202 {
39037602 7203 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
d9a64523 7204 if (m->vmp_dirty)
8a3053a0
A
7205 count_discard_purgeable++;
7206 else
7207 count_discard_active++;
7208 discard = discard_all;
7209 }
7210 else
7211 count_active++;
7212 count_wire--;
39037602 7213 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8a3053a0
A
7214 if (discard) hibernate_discard_page(m);
7215 m = next;
7216 }
7217
39037602
A
7218 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7219 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
316670eb 7220 {
d9a64523 7221 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
39037602 7222
d9a64523 7223 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39236c6e 7224 discard = FALSE;
316670eb 7225 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 7226 && hibernate_consider_discard(m, preflight))
316670eb 7227 {
39037602 7228 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
d9a64523 7229 if (m->vmp_dirty)
316670eb
A
7230 count_discard_purgeable++;
7231 else
8a3053a0 7232 count_discard_inactive++;
39236c6e 7233 discard = discard_all;
316670eb
A
7234 }
7235 else
8a3053a0 7236 count_inactive++;
316670eb 7237 count_wire--;
39037602 7238 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
7239 if (discard) hibernate_discard_page(m);
7240 m = next;
316670eb 7241 }
39037602 7242 /* XXX FBDP TODO: secluded queue */
316670eb 7243
b0d623f7
A
7244 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7245 {
39037602
A
7246 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7247 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
39236c6e 7248 {
d9a64523
A
7249 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7250 assertf(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q,
7251 "Bad page: %p (0x%x:0x%x) on queue %d has state: %d (Discard: %d, Preflight: %d)",
7252 m, m->vmp_pageq.next, m->vmp_pageq.prev, i, m->vmp_q_state, discard, preflight);
39037602 7253
d9a64523 7254 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39236c6e
A
7255 discard = FALSE;
7256 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7257 && hibernate_consider_discard(m, preflight))
7258 {
39037602 7259 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
7260 count_discard_speculative++;
7261 discard = discard_all;
7262 }
7263 else
7264 count_speculative++;
7265 count_wire--;
39037602 7266 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
7267 if (discard) hibernate_discard_page(m);
7268 m = next;
7269 }
b0d623f7
A
7270 }
7271
d9a64523 7272 vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, vmp_listq)
39236c6e 7273 {
d9a64523 7274 assert(m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
39037602 7275
39236c6e
A
7276 count_compressor++;
7277 count_wire--;
39037602 7278 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
7279 }
7280
7281 if (preflight == FALSE && discard_all == TRUE) {
5ba3f43e 7282 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
39236c6e
A
7283
7284 HIBLOG("hibernate_teardown started\n");
7285 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7286 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
7287
7288 pages -= count_discard_vm_struct_pages;
7289 count_wire -= count_discard_vm_struct_pages;
7290
7291 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
7292
5ba3f43e 7293 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
b0d623f7
A
7294 }
7295
db609669
A
7296 if (!preflight) {
7297 // pull wired from hibernate_bitmap
7298 bitmap = &page_list->bank_bitmap[0];
7299 bitmap_wired = &page_list_wired->bank_bitmap[0];
7300 for (bank = 0; bank < page_list->bank_count; bank++)
7301 {
7302 for (i = 0; i < bitmap->bitmapwords; i++)
7303 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7304 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
7305 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7306 }
b0d623f7
A
7307 }
7308
7309 // machine dependent adjustments
db609669 7310 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
b0d623f7 7311
db609669
A
7312 if (!preflight) {
7313 hibernate_stats.cd_count_wire = count_wire;
39236c6e
A
7314 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7315 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
db609669 7316 }
0b4c1975 7317
b0d623f7
A
7318 clock_get_uptime(&end);
7319 absolutetime_to_nanoseconds(end - start, &nsec);
7320 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
7321
39236c6e
A
7322 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7323 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7324 discard_all ? "did" : "could",
316670eb 7325 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7 7326
8a3053a0
A
7327 if (hibernate_stats.cd_skipped_xpmapped)
7328 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7329
316670eb
A
7330 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
7331
39236c6e
A
7332 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
7333
e8c3f781
A
7334 hibernation_vmqueues_inspection = FALSE;
7335
fe8ab488 7336#if MACH_ASSERT || DEBUG
39236c6e
A
7337 if (!preflight)
7338 {
316670eb
A
7339 if (vm_page_local_q) {
7340 for (i = 0; i < vm_page_local_q_count; i++) {
7341 struct vpl *lq;
7342 lq = &vm_page_local_q[i].vpl_un.vpl;
7343 VPL_UNLOCK(&lq->vpl_lock);
7344 }
7345 }
7346 vm_page_unlock_queues();
39236c6e 7347 }
fe8ab488 7348#endif /* MACH_ASSERT || DEBUG */
0b4c1975 7349
db609669
A
7350 if (preflight) {
7351 lck_mtx_unlock(&vm_page_queue_free_lock);
7352 vm_page_unlock_queues();
39236c6e 7353 vm_object_unlock(compressor_object);
db609669
A
7354 }
7355
0b4c1975 7356 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
b0d623f7
A
7357}
7358
7359void
7360hibernate_page_list_discard(hibernate_page_list_t * page_list)
7361{
7362 uint64_t start, end, nsec;
7363 vm_page_t m;
7364 vm_page_t next;
7365 uint32_t i;
7366 uint32_t count_discard_active = 0;
7367 uint32_t count_discard_inactive = 0;
7368 uint32_t count_discard_purgeable = 0;
316670eb 7369 uint32_t count_discard_cleaned = 0;
b0d623f7
A
7370 uint32_t count_discard_speculative = 0;
7371
39236c6e 7372
fe8ab488 7373#if MACH_ASSERT || DEBUG
316670eb
A
7374 vm_page_lock_queues();
7375 if (vm_page_local_q) {
7376 for (i = 0; i < vm_page_local_q_count; i++) {
7377 struct vpl *lq;
7378 lq = &vm_page_local_q[i].vpl_un.vpl;
7379 VPL_LOCK(&lq->vpl_lock);
7380 }
7381 }
fe8ab488 7382#endif /* MACH_ASSERT || DEBUG */
316670eb 7383
b0d623f7
A
7384 clock_get_uptime(&start);
7385
39037602
A
7386 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7387 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
b0d623f7 7388 {
d9a64523 7389 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
39037602 7390
d9a64523 7391 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39037602 7392 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 7393 {
d9a64523 7394 if (m->vmp_dirty)
b0d623f7
A
7395 count_discard_purgeable++;
7396 else
7397 count_discard_inactive++;
7398 hibernate_discard_page(m);
7399 }
7400 m = next;
7401 }
7402
7403 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
7404 {
39037602
A
7405 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7406 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
b0d623f7 7407 {
d9a64523 7408 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
39037602 7409
d9a64523 7410 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39037602 7411 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7
A
7412 {
7413 count_discard_speculative++;
7414 hibernate_discard_page(m);
7415 }
7416 m = next;
7417 }
7418 }
7419
39037602
A
7420 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7421 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
b0d623f7 7422 {
d9a64523 7423 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
39037602 7424
d9a64523 7425 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39037602 7426 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 7427 {
d9a64523 7428 if (m->vmp_dirty)
b0d623f7
A
7429 count_discard_purgeable++;
7430 else
7431 count_discard_inactive++;
7432 hibernate_discard_page(m);
7433 }
7434 m = next;
7435 }
39037602 7436 /* XXX FBDP TODO: secluded queue */
b0d623f7 7437
39037602
A
7438 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7439 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
b0d623f7 7440 {
d9a64523 7441 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
39037602 7442
d9a64523 7443 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39037602 7444 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 7445 {
d9a64523 7446 if (m->vmp_dirty)
b0d623f7
A
7447 count_discard_purgeable++;
7448 else
7449 count_discard_active++;
7450 hibernate_discard_page(m);
7451 }
7452 m = next;
7453 }
7454
39037602
A
7455 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7456 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
316670eb 7457 {
d9a64523 7458 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
39037602 7459
d9a64523 7460 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
39037602 7461 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
316670eb 7462 {
d9a64523 7463 if (m->vmp_dirty)
316670eb
A
7464 count_discard_purgeable++;
7465 else
7466 count_discard_cleaned++;
7467 hibernate_discard_page(m);
7468 }
7469 m = next;
7470 }
7471
fe8ab488 7472#if MACH_ASSERT || DEBUG
316670eb
A
7473 if (vm_page_local_q) {
7474 for (i = 0; i < vm_page_local_q_count; i++) {
7475 struct vpl *lq;
7476 lq = &vm_page_local_q[i].vpl_un.vpl;
7477 VPL_UNLOCK(&lq->vpl_lock);
7478 }
7479 }
7480 vm_page_unlock_queues();
fe8ab488 7481#endif /* MACH_ASSERT || DEBUG */
316670eb 7482
b0d623f7
A
7483 clock_get_uptime(&end);
7484 absolutetime_to_nanoseconds(end - start, &nsec);
316670eb 7485 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
b0d623f7 7486 nsec / 1000000ULL,
316670eb 7487 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7
A
7488}
7489
39236c6e 7490boolean_t hibernate_paddr_map_inited = FALSE;
39236c6e
A
7491unsigned int hibernate_teardown_last_valid_compact_indx = -1;
7492vm_page_t hibernate_rebuild_hash_list = NULL;
7493
7494unsigned int hibernate_teardown_found_tabled_pages = 0;
7495unsigned int hibernate_teardown_found_created_pages = 0;
7496unsigned int hibernate_teardown_found_free_pages = 0;
7497unsigned int hibernate_teardown_vm_page_free_count;
7498
7499
7500struct ppnum_mapping {
7501 struct ppnum_mapping *ppnm_next;
7502 ppnum_t ppnm_base_paddr;
7503 unsigned int ppnm_sindx;
7504 unsigned int ppnm_eindx;
7505};
7506
7507struct ppnum_mapping *ppnm_head;
7508struct ppnum_mapping *ppnm_last_found = NULL;
7509
7510
7511void
7512hibernate_create_paddr_map()
7513{
7514 unsigned int i;
7515 ppnum_t next_ppnum_in_run = 0;
7516 struct ppnum_mapping *ppnm = NULL;
7517
7518 if (hibernate_paddr_map_inited == FALSE) {
7519
7520 for (i = 0; i < vm_pages_count; i++) {
7521
7522 if (ppnm)
7523 ppnm->ppnm_eindx = i;
7524
39037602 7525 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
39236c6e
A
7526
7527 ppnm = kalloc(sizeof(struct ppnum_mapping));
7528
7529 ppnm->ppnm_next = ppnm_head;
7530 ppnm_head = ppnm;
7531
7532 ppnm->ppnm_sindx = i;
39037602 7533 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
39236c6e 7534 }
39037602 7535 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
39236c6e
A
7536 }
7537 ppnm->ppnm_eindx++;
7538
7539 hibernate_paddr_map_inited = TRUE;
7540 }
7541}
7542
7543ppnum_t
7544hibernate_lookup_paddr(unsigned int indx)
7545{
7546 struct ppnum_mapping *ppnm = NULL;
7547
7548 ppnm = ppnm_last_found;
7549
7550 if (ppnm) {
7551 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7552 goto done;
7553 }
7554 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7555
7556 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7557 ppnm_last_found = ppnm;
7558 break;
7559 }
7560 }
7561 if (ppnm == NULL)
7562 panic("hibernate_lookup_paddr of %d failed\n", indx);
7563done:
7564 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7565}
7566
7567
7568uint32_t
7569hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7570{
7571 addr64_t saddr_aligned;
7572 addr64_t eaddr_aligned;
7573 addr64_t addr;
7574 ppnum_t paddr;
7575 unsigned int mark_as_unneeded_pages = 0;
7576
7577 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7578 eaddr_aligned = eaddr & ~PAGE_MASK_64;
7579
7580 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7581
7582 paddr = pmap_find_phys(kernel_pmap, addr);
7583
7584 assert(paddr);
7585
7586 hibernate_page_bitset(page_list, TRUE, paddr);
7587 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7588
7589 mark_as_unneeded_pages++;
7590 }
7591 return (mark_as_unneeded_pages);
7592}
7593
7594
7595void
7596hibernate_hash_insert_page(vm_page_t mem)
7597{
7598 vm_page_bucket_t *bucket;
7599 int hash_id;
39037602
A
7600 vm_object_t m_object;
7601
7602 m_object = VM_PAGE_OBJECT(mem);
39236c6e 7603
d9a64523 7604 assert(mem->vmp_hashed);
39037602 7605 assert(m_object);
d9a64523 7606 assert(mem->vmp_offset != (vm_object_offset_t) -1);
39236c6e
A
7607
7608 /*
7609 * Insert it into the object_object/offset hash table
7610 */
d9a64523 7611 hash_id = vm_page_hash(m_object, mem->vmp_offset);
39236c6e
A
7612 bucket = &vm_page_buckets[hash_id];
7613
d9a64523 7614 mem->vmp_next_m = bucket->page_list;
fe8ab488 7615 bucket->page_list = VM_PAGE_PACK_PTR(mem);
39236c6e
A
7616}
7617
7618
7619void
7620hibernate_free_range(int sindx, int eindx)
7621{
7622 vm_page_t mem;
7623 unsigned int color;
7624
7625 while (sindx < eindx) {
7626 mem = &vm_pages[sindx];
7627
7628 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7629
d9a64523
A
7630 mem->vmp_lopage = FALSE;
7631 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
39236c6e 7632
5ba3f43e
A
7633 color = VM_PAGE_GET_COLOR(mem);
7634#if defined(__x86_64__)
7635 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead,
39037602
A
7636 mem,
7637 vm_page_t,
d9a64523 7638 vmp_pageq);
5ba3f43e
A
7639#else
7640 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
7641 mem,
7642 vm_page_t,
d9a64523 7643 vmp_pageq);
5ba3f43e 7644#endif
39236c6e
A
7645 vm_page_free_count++;
7646
7647 sindx++;
7648 }
7649}
7650
7651
7652extern void hibernate_rebuild_pmap_structs(void);
7653
7654void
7655hibernate_rebuild_vm_structs(void)
7656{
5ba3f43e 7657 int i, cindx, sindx, eindx;
39236c6e
A
7658 vm_page_t mem, tmem, mem_next;
7659 AbsoluteTime startTime, endTime;
7660 uint64_t nsec;
7661
7662 if (hibernate_rebuild_needed == FALSE)
7663 return;
7664
5ba3f43e 7665 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
39236c6e
A
7666 HIBLOG("hibernate_rebuild started\n");
7667
7668 clock_get_uptime(&startTime);
7669
7670 hibernate_rebuild_pmap_structs();
7671
7672 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7673 eindx = vm_pages_count;
7674
5ba3f43e
A
7675 /*
7676 * Mark all the vm_pages[] that have not been initialized yet as being
7677 * transient. This is needed to ensure that buddy page search is corrrect.
7678 * Without this random data in these vm_pages[] can trip the buddy search
7679 */
7680 for (i = hibernate_teardown_last_valid_compact_indx+1; i < eindx; ++i)
d9a64523 7681 vm_pages[i].vmp_q_state = VM_PAGE_NOT_ON_Q;
5ba3f43e 7682
39236c6e
A
7683 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7684
7685 mem = &vm_pages[cindx];
d9a64523 7686 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
39236c6e
A
7687 /*
7688 * hibernate_teardown_vm_structs leaves the location where
7689 * this vm_page_t must be located in "next".
7690 */
d9a64523
A
7691 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
7692 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
39236c6e
A
7693
7694 sindx = (int)(tmem - &vm_pages[0]);
7695
7696 if (mem != tmem) {
7697 /*
7698 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7699 * so move it back to its real location
7700 */
7701 *tmem = *mem;
7702 mem = tmem;
7703 }
d9a64523 7704 if (mem->vmp_hashed)
39236c6e
A
7705 hibernate_hash_insert_page(mem);
7706 /*
7707 * the 'hole' between this vm_page_t and the previous
7708 * vm_page_t we moved needs to be initialized as
7709 * a range of free vm_page_t's
7710 */
7711 hibernate_free_range(sindx + 1, eindx);
7712
7713 eindx = sindx;
7714 }
7715 if (sindx)
7716 hibernate_free_range(0, sindx);
7717
7718 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7719
7720 /*
15129b1c 7721 * process the list of vm_page_t's that were entered in the hash,
39236c6e
A
7722 * but were not located in the vm_pages arrary... these are
7723 * vm_page_t's that were created on the fly (i.e. fictitious)
7724 */
7725 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
d9a64523 7726 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
39236c6e 7727
d9a64523 7728 mem->vmp_next_m = 0;
39236c6e
A
7729 hibernate_hash_insert_page(mem);
7730 }
7731 hibernate_rebuild_hash_list = NULL;
7732
7733 clock_get_uptime(&endTime);
7734 SUB_ABSOLUTETIME(&endTime, &startTime);
7735 absolutetime_to_nanoseconds(endTime, &nsec);
7736
7737 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7738
7739 hibernate_rebuild_needed = FALSE;
7740
5ba3f43e 7741 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
39236c6e
A
7742}
7743
7744
7745extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7746
7747uint32_t
7748hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7749{
7750 unsigned int i;
7751 unsigned int compact_target_indx;
7752 vm_page_t mem, mem_next;
7753 vm_page_bucket_t *bucket;
7754 unsigned int mark_as_unneeded_pages = 0;
7755 unsigned int unneeded_vm_page_bucket_pages = 0;
7756 unsigned int unneeded_vm_pages_pages = 0;
7757 unsigned int unneeded_pmap_pages = 0;
7758 addr64_t start_of_unneeded = 0;
7759 addr64_t end_of_unneeded = 0;
7760
7761
7762 if (hibernate_should_abort())
7763 return (0);
7764
5ba3f43e
A
7765 hibernate_rebuild_needed = TRUE;
7766
39236c6e
A
7767 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7768 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7769 vm_page_cleaned_count, compressor_object->resident_page_count);
7770
7771 for (i = 0; i < vm_page_bucket_count; i++) {
7772
7773 bucket = &vm_page_buckets[i];
7774
39037602 7775 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
d9a64523 7776 assert(mem->vmp_hashed);
39236c6e 7777
d9a64523 7778 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
39236c6e
A
7779
7780 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
d9a64523 7781 mem->vmp_next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
39236c6e
A
7782 hibernate_rebuild_hash_list = mem;
7783 }
7784 }
7785 }
7786 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7787 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7788
7789 hibernate_teardown_vm_page_free_count = vm_page_free_count;
7790
7791 compact_target_indx = 0;
7792
7793 for (i = 0; i < vm_pages_count; i++) {
7794
7795 mem = &vm_pages[i];
7796
d9a64523 7797 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
39236c6e
A
7798 unsigned int color;
7799
d9a64523
A
7800 assert(mem->vmp_busy);
7801 assert(!mem->vmp_lopage);
39236c6e 7802
5ba3f43e 7803 color = VM_PAGE_GET_COLOR(mem);
39037602
A
7804
7805 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7806 mem,
7807 vm_page_t,
d9a64523 7808 vmp_pageq);
39236c6e 7809
39037602 7810 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
39236c6e
A
7811
7812 vm_page_free_count--;
7813
7814 hibernate_teardown_found_free_pages++;
7815
d9a64523 7816 if (vm_pages[compact_target_indx].vmp_q_state != VM_PAGE_ON_FREE_Q)
39236c6e
A
7817 compact_target_indx = i;
7818 } else {
7819 /*
7820 * record this vm_page_t's original location
7821 * we need this even if it doesn't get moved
7822 * as an indicator to the rebuild function that
7823 * we don't have to move it
7824 */
d9a64523 7825 mem->vmp_next_m = VM_PAGE_PACK_PTR(mem);
39236c6e 7826
d9a64523 7827 if (vm_pages[compact_target_indx].vmp_q_state == VM_PAGE_ON_FREE_Q) {
39236c6e
A
7828 /*
7829 * we've got a hole to fill, so
7830 * move this vm_page_t to it's new home
7831 */
7832 vm_pages[compact_target_indx] = *mem;
d9a64523 7833 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
39236c6e
A
7834
7835 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7836 compact_target_indx++;
7837 } else
7838 hibernate_teardown_last_valid_compact_indx = i;
7839 }
7840 }
7841 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7842 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7843 mark_as_unneeded_pages += unneeded_vm_pages_pages;
7844
7845 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7846
7847 if (start_of_unneeded) {
7848 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7849 mark_as_unneeded_pages += unneeded_pmap_pages;
7850 }
7851 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7852
39236c6e
A
7853 return (mark_as_unneeded_pages);
7854}
7855
7856
d1ecb069
A
7857#endif /* HIBERNATION */
7858
b0d623f7 7859/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
7860
7861#include <mach_vm_debug.h>
7862#if MACH_VM_DEBUG
7863
7864#include <mach_debug/hash_info.h>
7865#include <vm/vm_debug.h>
7866
7867/*
7868 * Routine: vm_page_info
7869 * Purpose:
7870 * Return information about the global VP table.
7871 * Fills the buffer with as much information as possible
7872 * and returns the desired size of the buffer.
7873 * Conditions:
7874 * Nothing locked. The caller should provide
7875 * possibly-pageable memory.
7876 */
7877
7878unsigned int
7879vm_page_info(
7880 hash_info_bucket_t *info,
7881 unsigned int count)
7882{
91447636 7883 unsigned int i;
b0d623f7 7884 lck_spin_t *bucket_lock;
1c79356b
A
7885
7886 if (vm_page_bucket_count < count)
7887 count = vm_page_bucket_count;
7888
7889 for (i = 0; i < count; i++) {
7890 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7891 unsigned int bucket_count = 0;
7892 vm_page_t m;
7893
b0d623f7
A
7894 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7895 lck_spin_lock(bucket_lock);
7896
39037602
A
7897 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7898 m != VM_PAGE_NULL;
d9a64523 7899 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->vmp_next_m)))
1c79356b 7900 bucket_count++;
b0d623f7
A
7901
7902 lck_spin_unlock(bucket_lock);
1c79356b
A
7903
7904 /* don't touch pageable memory while holding locks */
7905 info[i].hib_count = bucket_count;
7906 }
7907
7908 return vm_page_bucket_count;
7909}
7910#endif /* MACH_VM_DEBUG */
15129b1c
A
7911
7912#if VM_PAGE_BUCKETS_CHECK
7913void
7914vm_page_buckets_check(void)
7915{
7916 unsigned int i;
7917 vm_page_t p;
7918 unsigned int p_hash;
7919 vm_page_bucket_t *bucket;
7920 lck_spin_t *bucket_lock;
7921
7922 if (!vm_page_buckets_check_ready) {
7923 return;
7924 }
7925
7926#if HIBERNATION
7927 if (hibernate_rebuild_needed ||
7928 hibernate_rebuild_hash_list) {
7929 panic("BUCKET_CHECK: hibernation in progress: "
7930 "rebuild_needed=%d rebuild_hash_list=%p\n",
7931 hibernate_rebuild_needed,
7932 hibernate_rebuild_hash_list);
7933 }
7934#endif /* HIBERNATION */
7935
7936#if VM_PAGE_FAKE_BUCKETS
7937 char *cp;
7938 for (cp = (char *) vm_page_fake_buckets_start;
7939 cp < (char *) vm_page_fake_buckets_end;
7940 cp++) {
7941 if (*cp != 0x5a) {
7942 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7943 "[0x%llx:0x%llx]\n",
7944 cp,
fe8ab488
A
7945 (uint64_t) vm_page_fake_buckets_start,
7946 (uint64_t) vm_page_fake_buckets_end);
15129b1c
A
7947 }
7948 }
7949#endif /* VM_PAGE_FAKE_BUCKETS */
7950
7951 for (i = 0; i < vm_page_bucket_count; i++) {
39037602
A
7952 vm_object_t p_object;
7953
15129b1c 7954 bucket = &vm_page_buckets[i];
fe8ab488 7955 if (!bucket->page_list) {
15129b1c
A
7956 continue;
7957 }
7958
7959 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7960 lck_spin_lock(bucket_lock);
39037602
A
7961 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7962
15129b1c 7963 while (p != VM_PAGE_NULL) {
39037602
A
7964 p_object = VM_PAGE_OBJECT(p);
7965
d9a64523 7966 if (!p->vmp_hashed) {
15129b1c
A
7967 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7968 "hash %d in bucket %d at %p "
7969 "is not hashed\n",
d9a64523 7970 p, p_object, p->vmp_offset,
15129b1c
A
7971 p_hash, i, bucket);
7972 }
d9a64523 7973 p_hash = vm_page_hash(p_object, p->vmp_offset);
15129b1c
A
7974 if (p_hash != i) {
7975 panic("BUCKET_CHECK: corruption in bucket %d "
7976 "at %p: page %p object %p offset 0x%llx "
7977 "hash %d\n",
d9a64523 7978 i, bucket, p, p_object, p->vmp_offset,
15129b1c
A
7979 p_hash);
7980 }
d9a64523 7981 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m));
15129b1c
A
7982 }
7983 lck_spin_unlock(bucket_lock);
7984 }
7985
7986// printf("BUCKET_CHECK: checked buckets\n");
7987}
7988#endif /* VM_PAGE_BUCKETS_CHECK */
3e170ce0
A
7989
7990/*
7991 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7992 * local queues if they exist... its the only spot in the system where we add pages
7993 * to those queues... once on those queues, those pages can only move to one of the
7994 * global page queues or the free queues... they NEVER move from local q to local q.
7995 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7996 * the global vm_page_queue_lock at this point... we still need to take the local lock
7997 * in case this operation is being run on a different CPU then the local queue's identity,
7998 * but we don't have to worry about the page moving to a global queue or becoming wired
7999 * while we're grabbing the local lock since those operations would require the global
8000 * vm_page_queue_lock to be held, and we already own it.
8001 *
8002 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
8003 * 'wired' and local are ALWAYS mutually exclusive conditions.
8004 */
39037602
A
8005
8006#if CONFIG_BACKGROUND_QUEUE
8007void
8008vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
8009#else
3e170ce0 8010void
39037602
A
8011vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
8012#endif
3e170ce0 8013{
39037602
A
8014 boolean_t was_pageable = TRUE;
8015 vm_object_t m_object;
3e170ce0 8016
39037602
A
8017 m_object = VM_PAGE_OBJECT(mem);
8018
8019 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8020
d9a64523 8021 if (mem->vmp_q_state == VM_PAGE_NOT_ON_Q)
39037602 8022 {
d9a64523 8023 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
39037602 8024#if CONFIG_BACKGROUND_QUEUE
743345f9
A
8025 if (remove_from_backgroundq == TRUE) {
8026 vm_page_remove_from_backgroundq(mem);
39037602 8027 }
d9a64523
A
8028 if (mem->vmp_on_backgroundq) {
8029 assert(mem->vmp_backgroundq.next != 0);
8030 assert(mem->vmp_backgroundq.prev != 0);
743345f9 8031 } else {
d9a64523
A
8032 assert(mem->vmp_backgroundq.next == 0);
8033 assert(mem->vmp_backgroundq.prev == 0);
743345f9
A
8034 }
8035#endif /* CONFIG_BACKGROUND_QUEUE */
39037602
A
8036 return;
8037 }
d190cdc3 8038
d9a64523 8039 if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR)
39037602 8040 {
d9a64523 8041 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
39037602 8042#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
8043 assert(mem->vmp_backgroundq.next == 0 &&
8044 mem->vmp_backgroundq.prev == 0 &&
8045 mem->vmp_on_backgroundq == FALSE);
39037602
A
8046#endif
8047 return;
8048 }
d9a64523 8049 if (mem->vmp_q_state == VM_PAGE_IS_WIRED) {
39037602
A
8050 /*
8051 * might put these guys on a list for debugging purposes
8052 * if we do, we'll need to remove this assert
8053 */
d9a64523 8054 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
39037602 8055#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
8056 assert(mem->vmp_backgroundq.next == 0 &&
8057 mem->vmp_backgroundq.prev == 0 &&
8058 mem->vmp_on_backgroundq == FALSE);
39037602
A
8059#endif
8060 return;
8061 }
8062
8063 assert(m_object != compressor_object);
8064 assert(m_object != kernel_object);
8065 assert(m_object != vm_submap_object);
d9a64523 8066 assert(!mem->vmp_fictitious);
39037602 8067
d9a64523 8068 switch(mem->vmp_q_state) {
39037602
A
8069
8070 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
8071 {
3e170ce0 8072 struct vpl *lq;
39037602 8073
d9a64523 8074 lq = &vm_page_local_q[mem->vmp_local_id].vpl_un.vpl;
3e170ce0 8075 VPL_LOCK(&lq->vpl_lock);
39037602 8076 vm_page_queue_remove(&lq->vpl_queue,
d9a64523
A
8077 mem, vm_page_t, vmp_pageq);
8078 mem->vmp_local_id = 0;
3e170ce0 8079 lq->vpl_count--;
39037602 8080 if (m_object->internal) {
3e170ce0
A
8081 lq->vpl_internal_count--;
8082 } else {
8083 lq->vpl_external_count--;
8084 }
8085 VPL_UNLOCK(&lq->vpl_lock);
8086 was_pageable = FALSE;
39037602 8087 break;
3e170ce0 8088 }
39037602
A
8089 case VM_PAGE_ON_ACTIVE_Q:
8090 {
8091 vm_page_queue_remove(&vm_page_queue_active,
d9a64523 8092 mem, vm_page_t, vmp_pageq);
3e170ce0 8093 vm_page_active_count--;
39037602 8094 break;
3e170ce0
A
8095 }
8096
39037602
A
8097 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
8098 {
8099 assert(m_object->internal == TRUE);
8100
3e170ce0 8101 vm_page_inactive_count--;
39037602 8102 vm_page_queue_remove(&vm_page_queue_anonymous,
d9a64523 8103 mem, vm_page_t, vmp_pageq);
39037602 8104 vm_page_anonymous_count--;
d9a64523 8105
39037602 8106 vm_purgeable_q_advance_all();
d9a64523 8107 vm_page_balance_inactive(3);
39037602
A
8108 break;
8109 }
8110
8111 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
8112 {
8113 assert(m_object->internal == FALSE);
8114
8115 vm_page_inactive_count--;
8116 vm_page_queue_remove(&vm_page_queue_inactive,
d9a64523 8117 mem, vm_page_t, vmp_pageq);
39037602 8118 vm_purgeable_q_advance_all();
d9a64523 8119 vm_page_balance_inactive(3);
39037602
A
8120 break;
8121 }
8122
8123 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
8124 {
8125 assert(m_object->internal == FALSE);
8126
8127 vm_page_inactive_count--;
8128 vm_page_queue_remove(&vm_page_queue_cleaned,
d9a64523 8129 mem, vm_page_t, vmp_pageq);
39037602 8130 vm_page_cleaned_count--;
d9a64523 8131 vm_page_balance_inactive(3);
39037602
A
8132 break;
8133 }
8134
8135 case VM_PAGE_ON_THROTTLED_Q:
8136 {
8137 assert(m_object->internal == TRUE);
8138
8139 vm_page_queue_remove(&vm_page_queue_throttled,
d9a64523 8140 mem, vm_page_t, vmp_pageq);
3e170ce0
A
8141 vm_page_throttled_count--;
8142 was_pageable = FALSE;
39037602 8143 break;
3e170ce0
A
8144 }
8145
39037602
A
8146 case VM_PAGE_ON_SPECULATIVE_Q:
8147 {
8148 assert(m_object->internal == FALSE);
8149
d9a64523 8150 vm_page_remque(&mem->vmp_pageq);
3e170ce0 8151 vm_page_speculative_count--;
d9a64523 8152 vm_page_balance_inactive(3);
39037602
A
8153 break;
8154 }
8155
8156#if CONFIG_SECLUDED_MEMORY
8157 case VM_PAGE_ON_SECLUDED_Q:
8158 {
8159 vm_page_queue_remove(&vm_page_queue_secluded,
d9a64523 8160 mem, vm_page_t, vmp_pageq);
39037602
A
8161 vm_page_secluded_count--;
8162 if (m_object == VM_OBJECT_NULL) {
8163 vm_page_secluded_count_free--;
8164 was_pageable = FALSE;
8165 } else {
8166 assert(!m_object->internal);
8167 vm_page_secluded_count_inuse--;
8168 was_pageable = FALSE;
8169// was_pageable = TRUE;
8170 }
8171 break;
8172 }
8173#endif /* CONFIG_SECLUDED_MEMORY */
8174
8175 default:
8176 {
8177 /*
d9a64523 8178 * if (mem->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
39037602
A
8179 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
8180 * the caller is responsible for determing if the page is on that queue, and if so, must
8181 * either first remove it (it needs both the page queues lock and the object lock to do
8182 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
8183 *
8184 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
8185 * or any of the undefined states
8186 */
d9a64523 8187 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vmp_q_state);
39037602 8188 break;
3e170ce0
A
8189 }
8190
3e170ce0 8191 }
39037602 8192 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
d9a64523 8193 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3e170ce0 8194
39037602
A
8195#if CONFIG_BACKGROUND_QUEUE
8196 if (remove_from_backgroundq == TRUE)
8197 vm_page_remove_from_backgroundq(mem);
8198#endif
3e170ce0 8199 if (was_pageable) {
39037602 8200 if (m_object->internal) {
3e170ce0
A
8201 vm_page_pageable_internal_count--;
8202 } else {
8203 vm_page_pageable_external_count--;
8204 }
8205 }
8206}
8207
8208void
8209vm_page_remove_internal(vm_page_t page)
8210{
39037602 8211 vm_object_t __object = VM_PAGE_OBJECT(page);
3e170ce0
A
8212 if (page == __object->memq_hint) {
8213 vm_page_t __new_hint;
39037602 8214 vm_page_queue_entry_t __qe;
d9a64523 8215 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->vmp_listq);
39037602 8216 if (vm_page_queue_end(&__object->memq, __qe)) {
d9a64523 8217 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->vmp_listq);
39037602 8218 if (vm_page_queue_end(&__object->memq, __qe)) {
3e170ce0
A
8219 __qe = NULL;
8220 }
8221 }
39037602 8222 __new_hint = (vm_page_t)((uintptr_t) __qe);
3e170ce0
A
8223 __object->memq_hint = __new_hint;
8224 }
d9a64523 8225 vm_page_queue_remove(&__object->memq, page, vm_page_t, vmp_listq);
39037602
A
8226#if CONFIG_SECLUDED_MEMORY
8227 if (__object->eligible_for_secluded) {
8228 vm_page_secluded.eligible_for_secluded--;
8229 }
8230#endif /* CONFIG_SECLUDED_MEMORY */
3e170ce0
A
8231}
8232
8233void
8234vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8235{
39037602
A
8236 vm_object_t m_object;
8237
8238 m_object = VM_PAGE_OBJECT(mem);
8239
8240 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523
A
8241 assert(!mem->vmp_fictitious);
8242 assert(!mem->vmp_laundry);
8243 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3e170ce0 8244 vm_page_check_pageable_safe(mem);
39037602 8245
39037602 8246 if (m_object->internal) {
d9a64523 8247 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
39037602 8248
3e170ce0 8249 if (first == TRUE)
d9a64523 8250 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, vmp_pageq);
3e170ce0 8251 else
d9a64523 8252 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, vmp_pageq);
39037602 8253
3e170ce0
A
8254 vm_page_anonymous_count++;
8255 vm_page_pageable_internal_count++;
8256 } else {
d9a64523 8257 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
39037602 8258
3e170ce0 8259 if (first == TRUE)
d9a64523 8260 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, vmp_pageq);
3e170ce0 8261 else
d9a64523 8262 vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, vmp_pageq);
39037602 8263
3e170ce0
A
8264 vm_page_pageable_external_count++;
8265 }
3e170ce0
A
8266 vm_page_inactive_count++;
8267 token_new_pagecount++;
39037602
A
8268
8269#if CONFIG_BACKGROUND_QUEUE
d9a64523 8270 if (mem->vmp_in_background)
39037602
A
8271 vm_page_add_to_backgroundq(mem, FALSE);
8272#endif
8273}
8274
8275void
8276vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8277{
8278 vm_object_t m_object;
8279
8280 m_object = VM_PAGE_OBJECT(mem);
8281
8282 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523
A
8283 assert(!mem->vmp_fictitious);
8284 assert(!mem->vmp_laundry);
8285 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
39037602
A
8286 vm_page_check_pageable_safe(mem);
8287
d9a64523 8288 mem->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
39037602 8289 if (first == TRUE)
d9a64523 8290 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, vmp_pageq);
39037602 8291 else
d9a64523 8292 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, vmp_pageq);
39037602
A
8293 vm_page_active_count++;
8294
8295 if (m_object->internal) {
8296 vm_page_pageable_internal_count++;
8297 } else {
8298 vm_page_pageable_external_count++;
8299 }
8300
8301#if CONFIG_BACKGROUND_QUEUE
d9a64523 8302 if (mem->vmp_in_background)
39037602
A
8303 vm_page_add_to_backgroundq(mem, FALSE);
8304#endif
d9a64523 8305 vm_page_balance_inactive(3);
3e170ce0
A
8306}
8307
8308/*
8309 * Pages from special kernel objects shouldn't
8310 * be placed on pageable queues.
8311 */
8312void
8313vm_page_check_pageable_safe(vm_page_t page)
8314{
39037602
A
8315 vm_object_t page_object;
8316
8317 page_object = VM_PAGE_OBJECT(page);
8318
8319 if (page_object == kernel_object) {
3e170ce0
A
8320 panic("vm_page_check_pageable_safe: trying to add page" \
8321 "from kernel object (%p) to pageable queue", kernel_object);
8322 }
8323
39037602 8324 if (page_object == compressor_object) {
3e170ce0
A
8325 panic("vm_page_check_pageable_safe: trying to add page" \
8326 "from compressor object (%p) to pageable queue", compressor_object);
8327 }
8328
39037602 8329 if (page_object == vm_submap_object) {
3e170ce0
A
8330 panic("vm_page_check_pageable_safe: trying to add page" \
8331 "from submap object (%p) to pageable queue", vm_submap_object);
8332 }
8333}
8334
8335/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8336 * wired page diagnose
8337 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8338
8339#include <libkern/OSKextLibPrivate.h>
8340
5ba3f43e
A
8341#define KA_SIZE(namelen, subtotalscount) \
8342 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8343
8344#define KA_NAME(alloc) \
8345 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8346
8347#define KA_NAME_LEN(alloc) \
8348 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
3e170ce0
A
8349
8350vm_tag_t
8351vm_tag_bt(void)
8352{
8353 uintptr_t* frameptr;
8354 uintptr_t* frameptr_next;
8355 uintptr_t retaddr;
8356 uintptr_t kstackb, kstackt;
8357 const vm_allocation_site_t * site;
8358 thread_t cthread;
5ba3f43e 8359 kern_allocation_name_t name;
3e170ce0
A
8360
8361 cthread = current_thread();
8362 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
8363
5ba3f43e
A
8364 if ((name = thread_get_kernel_state(cthread)->allocation_name))
8365 {
8366 if (!name->tag) vm_tag_alloc(name);
8367 return name->tag;
8368 }
8369
3e170ce0
A
8370 kstackb = cthread->kernel_stack;
8371 kstackt = kstackb + kernel_stack_size;
8372
8373 /* Load stack frame pointer (EBP on x86) into frameptr */
8374 frameptr = __builtin_frame_address(0);
8375 site = NULL;
8376 while (frameptr != NULL)
8377 {
8378 /* Verify thread stack bounds */
8379 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
8380
8381 /* Next frame pointer is pointed to by the previous one */
8382 frameptr_next = (uintptr_t*) *frameptr;
8383
8384 /* Pull return address from one spot above the frame pointer */
8385 retaddr = *(frameptr + 1);
8386
5ba3f43e 8387
d9a64523
A
8388 if (((retaddr < vm_kernel_builtinkmod_text_end) && (retaddr >= vm_kernel_builtinkmod_text))
8389 || (retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
3e170ce0 8390 {
5ba3f43e
A
8391 site = OSKextGetAllocationSiteForCaller(retaddr);
8392 break;
3e170ce0 8393 }
3e170ce0
A
8394 frameptr = frameptr_next;
8395 }
5ba3f43e 8396
3e170ce0
A
8397 return (site ? site->tag : VM_KERN_MEMORY_NONE);
8398}
8399
5ba3f43e 8400static uint64_t free_tag_bits[VM_MAX_TAG_VALUE/64];
3e170ce0
A
8401
8402void
5ba3f43e 8403vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
3e170ce0
A
8404{
8405 vm_tag_t tag;
8406 uint64_t avail;
5ba3f43e
A
8407 uint32_t idx;
8408 vm_allocation_site_t * prev;
3e170ce0
A
8409
8410 if (site->tag) return;
8411
8412 idx = 0;
8413 while (TRUE)
8414 {
5ba3f43e
A
8415 avail = free_tag_bits[idx];
8416 if (avail)
8417 {
8418 tag = __builtin_clzll(avail);
8419 avail &= ~(1ULL << (63 - tag));
8420 free_tag_bits[idx] = avail;
8421 tag += (idx << 6);
8422 break;
8423 }
8424 idx++;
8425 if (idx >= ARRAY_COUNT(free_tag_bits))
8426 {
8427 for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++)
8428 {
8429 prev = vm_allocation_sites[idx];
8430 if (!prev) continue;
8431 if (!KA_NAME_LEN(prev)) continue;
8432 if (!prev->tag) continue;
8433 if (prev->total) continue;
8434 if (1 != prev->refcount) continue;
8435
8436 assert(idx == prev->tag);
8437 tag = idx;
8438 prev->tag = VM_KERN_MEMORY_NONE;
8439 *releasesiteP = prev;
8440 break;
8441 }
8442 if (idx >= ARRAY_COUNT(vm_allocation_sites))
8443 {
8444 tag = VM_KERN_MEMORY_ANY;
8445 }
8446 break;
8447 }
3e170ce0
A
8448 }
8449 site->tag = tag;
5ba3f43e
A
8450
8451 OSAddAtomic16(1, &site->refcount);
8452
8453 if (VM_KERN_MEMORY_ANY != tag) vm_allocation_sites[tag] = site;
8454
8455 if (tag > vm_allocation_tag_highest) vm_allocation_tag_highest = tag;
3e170ce0
A
8456}
8457
8458static void
8459vm_tag_free_locked(vm_tag_t tag)
8460{
8461 uint64_t avail;
8462 uint32_t idx;
8463 uint64_t bit;
8464
8465 if (VM_KERN_MEMORY_ANY == tag) return;
8466
8467 idx = (tag >> 6);
8468 avail = free_tag_bits[idx];
8469 tag &= 63;
8470 bit = (1ULL << (63 - tag));
8471 assert(!(avail & bit));
8472 free_tag_bits[idx] = (avail | bit);
8473}
8474
8475static void
8476vm_tag_init(void)
8477{
8478 vm_tag_t tag;
8479 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8480 {
8481 vm_tag_free_locked(tag);
8482 }
5ba3f43e
A
8483
8484 for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++)
8485 {
8486 vm_tag_free_locked(tag);
8487 }
3e170ce0
A
8488}
8489
8490vm_tag_t
8491vm_tag_alloc(vm_allocation_site_t * site)
8492{
8493 vm_tag_t tag;
5ba3f43e 8494 vm_allocation_site_t * releasesite;
3e170ce0
A
8495
8496 if (VM_TAG_BT & site->flags)
8497 {
5ba3f43e
A
8498 tag = vm_tag_bt();
8499 if (VM_KERN_MEMORY_NONE != tag) return (tag);
3e170ce0
A
8500 }
8501
8502 if (!site->tag)
8503 {
5ba3f43e
A
8504 releasesite = NULL;
8505 lck_spin_lock(&vm_allocation_sites_lock);
8506 vm_tag_alloc_locked(site, &releasesite);
8507 lck_spin_unlock(&vm_allocation_sites_lock);
8508 if (releasesite) kern_allocation_name_release(releasesite);
3e170ce0
A
8509 }
8510
8511 return (site->tag);
8512}
8513
5ba3f43e
A
8514void
8515vm_tag_update_size(vm_tag_t tag, int64_t delta)
8516{
8517 vm_allocation_site_t * allocation;
8518 uint64_t prior;
8519
8520 assert(VM_KERN_MEMORY_NONE != tag);
8521 assert(tag < VM_MAX_TAG_VALUE);
8522
8523 allocation = vm_allocation_sites[tag];
8524 assert(allocation);
8525
8526 if (delta < 0) {
8527 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
8528 }
8529 prior = OSAddAtomic64(delta, &allocation->total);
8530
8531#if DEBUG || DEVELOPMENT
8532
8533 uint64_t new, peak;
8534 new = prior + delta;
8535 do
8536 {
8537 peak = allocation->peak;
8538 if (new <= peak) break;
8539 }
8540 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8541
8542#endif /* DEBUG || DEVELOPMENT */
8543
8544 if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) return;
8545
8546 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8547}
8548
8549void
8550kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
8551{
8552 uint64_t prior;
8553
8554 if (delta < 0) {
8555 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
8556 }
8557 prior = OSAddAtomic64(delta, &allocation->total);
8558
8559#if DEBUG || DEVELOPMENT
8560
8561 uint64_t new, peak;
8562 new = prior + delta;
8563 do
8564 {
8565 peak = allocation->peak;
8566 if (new <= peak) break;
8567 }
8568 while (!OSCompareAndSwap64(peak, new, &allocation->peak));
8569
8570#endif /* DEBUG || DEVELOPMENT */
8571
8572 if (!prior && !allocation->tag) vm_tag_alloc(allocation);
8573}
8574
8575#if VM_MAX_TAG_ZONES
8576
8577void
8578vm_allocation_zones_init(void)
8579{
8580 kern_return_t ret;
8581 vm_offset_t addr;
8582 vm_size_t size;
8583
8584 size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **)
8585 + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8586
8587 ret = kernel_memory_allocate(kernel_map,
8588 &addr, round_page(size), 0,
8589 KMA_ZERO, VM_KERN_MEMORY_DIAG);
8590 assert(KERN_SUCCESS == ret);
8591
8592 vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
8593 addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t **);
8594
8595 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8596 // in vm_tag_update_zone_size() won't recurse
8597 vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG] = (vm_allocation_zone_total_t *) addr;
8598 addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
8599 vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
8600}
8601
8602void
8603vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
8604{
8605 vm_allocation_zone_total_t * zone;
8606
8607 assert(VM_KERN_MEMORY_NONE != tag);
8608 assert(tag < VM_MAX_TAG_VALUE);
8609
8610 if (zidx >= VM_MAX_TAG_ZONES) return;
8611
8612 zone = vm_allocation_zone_totals[tag];
8613 if (!zone)
8614 {
8615 zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
8616 if (!zone) return;
8617 bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8618 if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag]))
8619 {
8620 kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
8621 }
8622 }
8623}
8624
8625void
8626vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
8627{
8628 vm_allocation_zone_total_t * zone;
8629 uint32_t new;
8630
8631 assert(VM_KERN_MEMORY_NONE != tag);
8632 assert(tag < VM_MAX_TAG_VALUE);
8633
8634 if (zidx >= VM_MAX_TAG_ZONES) return;
8635
8636 zone = vm_allocation_zone_totals[tag];
8637 assert(zone);
8638 zone += zidx;
8639
8640 /* the zone is locked */
8641 if (delta < 0)
8642 {
8643 assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
8644 zone->total += delta;
8645 }
8646 else
8647 {
8648 zone->total += delta;
8649 if (zone->total > zone->peak) zone->peak = zone->total;
8650 if (dwaste)
8651 {
8652 new = zone->waste;
8653 if (zone->wastediv < 65536) zone->wastediv++;
8654 else new -= (new >> 16);
8655 __assert_only bool ov = os_add_overflow(new, dwaste, &new);
8656 assert(!ov);
8657 zone->waste = new;
8658 }
8659 }
8660}
8661
8662#endif /* VM_MAX_TAG_ZONES */
8663
8664void
8665kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
8666{
8667 kern_allocation_name_t other;
8668 struct vm_allocation_total * total;
8669 uint32_t subidx;
8670
8671 subidx = 0;
8672 assert(VM_KERN_MEMORY_NONE != subtag);
8673 for (; subidx < allocation->subtotalscount; subidx++)
8674 {
8675 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag)
8676 {
8677 allocation->subtotals[subidx].tag = subtag;
8678 break;
8679 }
8680 if (subtag == allocation->subtotals[subidx].tag) break;
8681 }
8682 assert(subidx < allocation->subtotalscount);
8683 if (subidx >= allocation->subtotalscount) return;
8684
8685 total = &allocation->subtotals[subidx];
8686 other = vm_allocation_sites[subtag];
8687 assert(other);
8688
8689 if (delta < 0)
8690 {
8691 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
8692 OSAddAtomic64(delta, &total->total);
8693 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
8694 OSAddAtomic64(delta, &other->mapped);
8695 }
8696 else
8697 {
8698 OSAddAtomic64(delta, &other->mapped);
8699 OSAddAtomic64(delta, &total->total);
8700 }
8701}
8702
8703const char *
8704kern_allocation_get_name(kern_allocation_name_t allocation)
8705{
8706 return (KA_NAME(allocation));
8707}
8708
8709kern_allocation_name_t
8710kern_allocation_name_allocate(const char * name, uint32_t subtotalscount)
8711{
8712 uint32_t namelen;
8713
8714 namelen = (uint32_t) strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
8715
8716 kern_allocation_name_t allocation;
8717 allocation = kalloc(KA_SIZE(namelen, subtotalscount));
8718 bzero(allocation, KA_SIZE(namelen, subtotalscount));
8719
8720 allocation->refcount = 1;
8721 allocation->subtotalscount = subtotalscount;
8722 allocation->flags = (namelen << VM_TAG_NAME_LEN_SHIFT);
8723 strlcpy(KA_NAME(allocation), name, namelen + 1);
8724
8725 return (allocation);
8726}
8727
8728void
8729kern_allocation_name_release(kern_allocation_name_t allocation)
8730{
8731 assert(allocation->refcount > 0);
8732 if (1 == OSAddAtomic16(-1, &allocation->refcount))
8733 {
8734 kfree(allocation, KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
8735 }
8736}
8737
8738vm_tag_t
8739kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
8740{
8741 return (vm_tag_alloc(allocation));
8742}
8743
d9a64523 8744#if ! VM_TAG_ACTIVE_UPDATE
3e170ce0 8745static void
5ba3f43e 8746vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
3e170ce0
A
8747{
8748 if (!object->wired_page_count) return;
8749 if (object != kernel_object)
8750 {
5ba3f43e
A
8751 assert(object->wire_tag < num_info);
8752 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
3e170ce0
A
8753 }
8754}
8755
5ba3f43e
A
8756typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
8757 unsigned int num_info, vm_object_t object);
3e170ce0
A
8758
8759static void
5ba3f43e 8760vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
3e170ce0
A
8761 vm_page_iterate_proc proc, purgeable_q_t queue,
8762 int group)
8763{
8764 vm_object_t object;
8765
8766 for (object = (vm_object_t) queue_first(&queue->objq[group]);
5ba3f43e
A
8767 !queue_end(&queue->objq[group], (queue_entry_t) object);
8768 object = (vm_object_t) queue_next(&object->objq))
3e170ce0 8769 {
5ba3f43e 8770 proc(info, num_info, object);
3e170ce0
A
8771 }
8772}
8773
8774static void
5ba3f43e 8775vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
3e170ce0
A
8776 vm_page_iterate_proc proc)
8777{
3e170ce0 8778 vm_object_t object;
3e170ce0
A
8779
8780 lck_spin_lock(&vm_objects_wired_lock);
8781 queue_iterate(&vm_objects_wired,
8782 object,
8783 vm_object_t,
d9a64523 8784 wired_objq)
3e170ce0 8785 {
5ba3f43e 8786 proc(info, num_info, object);
3e170ce0
A
8787 }
8788 lck_spin_unlock(&vm_objects_wired_lock);
3e170ce0 8789}
d9a64523 8790#endif /* ! VM_TAG_ACTIVE_UPDATE */
3e170ce0
A
8791
8792static uint64_t
5ba3f43e 8793process_account(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes, boolean_t iterated)
3e170ce0 8794{
5ba3f43e
A
8795 size_t namelen;
8796 unsigned int idx, count, nextinfo;
3e170ce0 8797 vm_allocation_site_t * site;
5ba3f43e 8798 lck_spin_lock(&vm_allocation_sites_lock);
3e170ce0 8799
5ba3f43e 8800 for (idx = 0; idx <= vm_allocation_tag_highest; idx++)
3e170ce0 8801 {
5ba3f43e
A
8802 site = vm_allocation_sites[idx];
8803 if (!site) continue;
8804 info[idx].mapped = site->mapped;
8805 info[idx].tag = site->tag;
8806 if (!iterated)
8807 {
8808 info[idx].size = site->total;
8809#if DEBUG || DEVELOPMENT
8810 info[idx].peak = site->peak;
8811#endif /* DEBUG || DEVELOPMENT */
8812 }
8813 else
8814 {
8815 if (!site->subtotalscount && (site->total != info[idx].size))
8816 {
8817 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
8818 info[idx].size = site->total;
8819 }
8820 }
8821 }
8822
8823 nextinfo = (vm_allocation_tag_highest + 1);
8824 count = nextinfo;
8825 if (count >= num_info) count = num_info;
8826
8827 for (idx = 0; idx < count; idx++)
8828 {
8829 site = vm_allocation_sites[idx];
8830 if (!site) continue;
8831 info[idx].flags |= VM_KERN_SITE_WIRED;
8832 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8833 {
8834 info[idx].site = idx;
8835 info[idx].flags |= VM_KERN_SITE_TAG;
8836 if (VM_KERN_MEMORY_ZONE == idx)
8837 {
8838 info[idx].flags |= VM_KERN_SITE_HIDE;
8839 info[idx].flags &= ~VM_KERN_SITE_WIRED;
8840 info[idx].collectable_bytes = zones_collectable_bytes;
8841 }
8842 }
8843 else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT))))
8844 {
8845 info[idx].site = 0;
8846 info[idx].flags |= VM_KERN_SITE_NAMED;
8847 if (namelen > sizeof(info[idx].name)) namelen = sizeof(info[idx].name);
8848 strncpy(&info[idx].name[0], KA_NAME(site), namelen);
8849 }
8850 else if (VM_TAG_KMOD & site->flags)
3e170ce0 8851 {
5ba3f43e
A
8852 info[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
8853 info[idx].flags |= VM_KERN_SITE_KMOD;
3e170ce0
A
8854 }
8855 else
8856 {
5ba3f43e
A
8857 info[idx].site = VM_KERNEL_UNSLIDE(site);
8858 info[idx].flags |= VM_KERN_SITE_KERNEL;
3e170ce0 8859 }
5ba3f43e
A
8860#if VM_MAX_TAG_ZONES
8861 vm_allocation_zone_total_t * zone;
8862 unsigned int zidx;
8863 vm_size_t elem_size;
8864
8865 if (vm_allocation_zone_totals
8866 && (zone = vm_allocation_zone_totals[idx])
8867 && (nextinfo < num_info))
8868 {
8869 for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++)
8870 {
8871 if (!zone[zidx].peak) continue;
8872 info[nextinfo] = info[idx];
8873 info[nextinfo].zone = zone_index_from_tag_index(zidx, &elem_size);
8874 info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
8875 info[nextinfo].flags |= VM_KERN_SITE_ZONE;
8876 info[nextinfo].size = zone[zidx].total;
8877 info[nextinfo].peak = zone[zidx].peak;
8878 info[nextinfo].mapped = 0;
8879 if (zone[zidx].wastediv)
8880 {
8881 info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
8882 }
8883 nextinfo++;
8884 }
8885 }
8886#endif /* VM_MAX_TAG_ZONES */
8887 if (site->subtotalscount)
8888 {
8889 uint64_t mapped, mapcost, take;
8890 uint32_t sub;
8891 vm_tag_t alloctag;
8892
8893 info[idx].size = site->total;
8894 mapped = info[idx].size;
8895 info[idx].mapped = mapped;
8896 mapcost = 0;
8897 for (sub = 0; sub < site->subtotalscount; sub++)
8898 {
8899 alloctag = site->subtotals[sub].tag;
8900 assert(alloctag < num_info);
8901 if (info[alloctag].name[0]) continue;
8902 take = info[alloctag].mapped;
8903 if (take > info[alloctag].size) take = info[alloctag].size;
8904 if (take > mapped) take = mapped;
8905 info[alloctag].mapped -= take;
8906 info[alloctag].size -= take;
8907 mapped -= take;
8908 mapcost += take;
8909 }
8910 info[idx].size = mapcost;
8911 }
3e170ce0
A
8912 }
8913 lck_spin_unlock(&vm_allocation_sites_lock);
5ba3f43e
A
8914
8915 return (0);
8916}
8917
8918uint32_t
8919vm_page_diagnose_estimate(void)
8920{
8921 vm_allocation_site_t * site;
8922 uint32_t count;
8923 uint32_t idx;
8924
8925 lck_spin_lock(&vm_allocation_sites_lock);
8926 for (count = idx = 0; idx < VM_MAX_TAG_VALUE; idx++)
8927 {
8928 site = vm_allocation_sites[idx];
8929 if (!site) continue;
8930 count++;
8931#if VM_MAX_TAG_ZONES
8932 if (vm_allocation_zone_totals)
8933 {
8934 vm_allocation_zone_total_t * zone;
8935 zone = vm_allocation_zone_totals[idx];
8936 if (!zone) continue;
8937 for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) if (zone[zidx].peak) count++;
8938 }
8939#endif
3e170ce0 8940 }
5ba3f43e 8941 lck_spin_unlock(&vm_allocation_sites_lock);
39037602 8942
5ba3f43e
A
8943 /* some slop for new tags created */
8944 count += 8;
8945 count += VM_KERN_COUNTER_COUNT;
8946
8947 return (count);
3e170ce0
A
8948}
8949
5ba3f43e 8950
3e170ce0 8951kern_return_t
5ba3f43e 8952vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
3e170ce0 8953{
3e170ce0
A
8954 uint64_t wired_size;
8955 uint64_t wired_managed_size;
8956 uint64_t wired_reserved_size;
5ba3f43e
A
8957 uint64_t booter_size;
8958 boolean_t iterate;
3e170ce0
A
8959 mach_memory_info_t * counts;
8960
5ba3f43e 8961 bzero(info, num_info * sizeof(mach_memory_info_t));
3e170ce0 8962
39037602
A
8963 if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8964
5ba3f43e
A
8965#if CONFIG_EMBEDDED
8966 wired_size = ptoa_64(vm_page_wire_count);
8967 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
8968#else
3e170ce0
A
8969 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8970 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
5ba3f43e 8971#endif
3e170ce0
A
8972 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8973
5ba3f43e
A
8974 booter_size = ml_get_booter_memory_size();
8975 wired_size += booter_size;
8976
8977 assert(num_info >= VM_KERN_COUNTER_COUNT);
8978 num_info -= VM_KERN_COUNTER_COUNT;
8979 counts = &info[num_info];
3e170ce0 8980
5ba3f43e
A
8981#define SET_COUNT(xcount, xsize, xflags) \
8982 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
8983 counts[xcount].site = (xcount); \
8984 counts[xcount].size = (xsize); \
8985 counts[xcount].mapped = (xsize); \
3e170ce0
A
8986 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8987
8988 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
8989 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
8990 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
8991 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
8992 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8993 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
5ba3f43e
A
8994 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
8995 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
3e170ce0 8996
5ba3f43e 8997#define SET_MAP(xcount, xsize, xfree, xlargest) \
3e170ce0
A
8998 counts[xcount].site = (xcount); \
8999 counts[xcount].size = (xsize); \
5ba3f43e 9000 counts[xcount].mapped = (xsize); \
3e170ce0
A
9001 counts[xcount].free = (xfree); \
9002 counts[xcount].largest = (xlargest); \
9003 counts[xcount].flags = VM_KERN_SITE_COUNTER;
9004
9005 vm_map_size_t map_size, map_free, map_largest;
9006
9007 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
9008 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
9009
9010 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
9011 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
9012
9013 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
9014 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
9015
5ba3f43e
A
9016 iterate = !VM_TAG_ACTIVE_UPDATE;
9017 if (iterate)
3e170ce0 9018 {
5ba3f43e
A
9019 enum { kMaxKernelDepth = 1 };
9020 vm_map_t maps [kMaxKernelDepth];
9021 vm_map_entry_t entries[kMaxKernelDepth];
9022 vm_map_t map;
9023 vm_map_entry_t entry;
9024 vm_object_offset_t offset;
9025 vm_page_t page;
9026 int stackIdx, count;
9027
d9a64523 9028#if ! VM_TAG_ACTIVE_UPDATE
5ba3f43e 9029 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
d9a64523 9030#endif /* ! VM_TAG_ACTIVE_UPDATE */
5ba3f43e
A
9031
9032 map = kernel_map;
9033 stackIdx = 0;
9034 while (map)
3e170ce0 9035 {
5ba3f43e
A
9036 vm_map_lock(map);
9037 for (entry = map->hdr.links.next; map; entry = entry->links.next)
9038 {
9039 if (entry->is_sub_map)
9040 {
9041 assert(stackIdx < kMaxKernelDepth);
9042 maps[stackIdx] = map;
9043 entries[stackIdx] = entry;
9044 stackIdx++;
9045 map = VME_SUBMAP(entry);
9046 entry = NULL;
9047 break;
9048 }
9049 if (VME_OBJECT(entry) == kernel_object)
9050 {
9051 count = 0;
9052 vm_object_lock(VME_OBJECT(entry));
9053 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
9054 {
9055 page = vm_page_lookup(VME_OBJECT(entry), offset);
9056 if (page && VM_PAGE_WIRED(page)) count++;
9057 }
9058 vm_object_unlock(VME_OBJECT(entry));
3e170ce0 9059
5ba3f43e
A
9060 if (count)
9061 {
9062 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
9063 assert(VME_ALIAS(entry) < num_info);
9064 info[VME_ALIAS(entry)].size += ptoa_64(count);
9065 }
9066 }
9067 while (map && (entry == vm_map_last_entry(map)))
9068 {
9069 vm_map_unlock(map);
9070 if (!stackIdx) map = NULL;
9071 else
9072 {
9073 --stackIdx;
9074 map = maps[stackIdx];
9075 entry = entries[stackIdx];
9076 }
9077 }
9078 }
3e170ce0 9079 }
3e170ce0
A
9080 }
9081
5ba3f43e 9082 process_account(info, num_info, zones_collectable_bytes, iterate);
3e170ce0
A
9083
9084 return (KERN_SUCCESS);
9085}
39037602 9086
39037602
A
9087#if DEBUG || DEVELOPMENT
9088
39037602 9089kern_return_t
5ba3f43e 9090vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
39037602 9091{
5ba3f43e
A
9092 kern_return_t ret;
9093 vm_size_t zsize;
9094 vm_map_t map;
9095 vm_map_entry_t entry;
39037602 9096
5ba3f43e
A
9097 zsize = zone_element_info((void *) addr, tag);
9098 if (zsize)
39037602 9099 {
5ba3f43e
A
9100 *zone_size = *size = zsize;
9101 return (KERN_SUCCESS);
39037602
A
9102 }
9103
5ba3f43e
A
9104 *zone_size = 0;
9105 ret = KERN_INVALID_ADDRESS;
9106 for (map = kernel_map; map; )
9107 {
9108 vm_map_lock(map);
9109 if (!vm_map_lookup_entry(map, addr, &entry)) break;
9110 if (entry->is_sub_map)
9111 {
9112 if (map != kernel_map) break;
9113 map = VME_SUBMAP(entry);
9114 continue;
9115 }
9116 if (entry->vme_start != addr) break;
9117 *tag = VME_ALIAS(entry);
9118 *size = (entry->vme_end - addr);
9119 ret = KERN_SUCCESS;
9120 break;
9121 }
9122 if (map != kernel_map) vm_map_unlock(map);
9123 vm_map_unlock(kernel_map);
39037602 9124
5ba3f43e 9125 return (ret);
39037602
A
9126}
9127
5ba3f43e 9128#endif /* DEBUG || DEVELOPMENT */
39037602 9129
5ba3f43e
A
9130uint32_t
9131vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
9132{
9133 vm_allocation_site_t * site;
9134 uint32_t kmodId;
39037602 9135
5ba3f43e
A
9136 kmodId = 0;
9137 lck_spin_lock(&vm_allocation_sites_lock);
9138 if ((site = vm_allocation_sites[tag]))
39037602 9139 {
5ba3f43e 9140 if (VM_TAG_KMOD & site->flags)
39037602 9141 {
5ba3f43e 9142 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
39037602
A
9143 }
9144 }
5ba3f43e 9145 lck_spin_unlock(&vm_allocation_sites_lock);
39037602 9146
5ba3f43e 9147 return (kmodId);
39037602 9148}
d9a64523
A
9149
9150
9151#if CONFIG_SECLUDED_MEMORY
9152/*
9153 * Note that there's no locking around other accesses to vm_page_secluded_target.
9154 * That should be OK, since these are the only place where it can be changed after
9155 * initialization. Other users (like vm_pageout) may see the wrong value briefly,
9156 * but will eventually get the correct value. This brief mismatch is OK as pageout
9157 * and page freeing will auto-adjust the vm_page_secluded_count to match the target
9158 * over time.
9159 */
9160unsigned int vm_page_secluded_suppress_cnt = 0;
9161unsigned int vm_page_secluded_save_target;
9162
9163
9164lck_grp_attr_t secluded_suppress_slock_grp_attr;
9165lck_grp_t secluded_suppress_slock_grp;
9166lck_attr_t secluded_suppress_slock_attr;
9167lck_spin_t secluded_suppress_slock;
9168
9169void
9170secluded_suppression_init(void)
9171{
9172 lck_grp_attr_setdefault(&secluded_suppress_slock_grp_attr);
9173 lck_grp_init(&secluded_suppress_slock_grp,
9174 "secluded_suppress_slock", &secluded_suppress_slock_grp_attr);
9175 lck_attr_setdefault(&secluded_suppress_slock_attr);
9176 lck_spin_init(&secluded_suppress_slock,
9177 &secluded_suppress_slock_grp, &secluded_suppress_slock_attr);
9178}
9179
9180void
9181start_secluded_suppression(task_t task)
9182{
9183 if (task->task_suppressed_secluded)
9184 return;
9185 lck_spin_lock(&secluded_suppress_slock);
9186 if (!task->task_suppressed_secluded && vm_page_secluded_suppress_cnt++ == 0) {
9187 task->task_suppressed_secluded = TRUE;
9188 vm_page_secluded_save_target = vm_page_secluded_target;
9189 vm_page_secluded_target = 0;
9190 }
9191 lck_spin_unlock(&secluded_suppress_slock);
9192}
9193
9194void
9195stop_secluded_suppression(task_t task)
9196{
9197 lck_spin_lock(&secluded_suppress_slock);
9198 if (task->task_suppressed_secluded && --vm_page_secluded_suppress_cnt == 0) {
9199 task->task_suppressed_secluded = FALSE;
9200 vm_page_secluded_target = vm_page_secluded_save_target;
9201 }
9202 lck_spin_unlock(&secluded_suppress_slock);
9203}
9204
9205#endif /* CONFIG_SECLUDED_MEMORY */