]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
0a7de745 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
0a7de745 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
0a7de745 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
0a7de745 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
0a7de745 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
3e170ce0 67#include <libkern/OSDebug.h>
91447636 68
9bccf70c 69#include <mach/clock_types.h>
1c79356b
A
70#include <mach/vm_prot.h>
71#include <mach/vm_statistics.h>
2d21ac55 72#include <mach/sdt.h>
c3c9b80d 73#include <kern/counter.h>
f427ee49 74#include <kern/host_statistics.h>
1c79356b 75#include <kern/sched_prim.h>
39037602 76#include <kern/policy_internal.h>
1c79356b
A
77#include <kern/task.h>
78#include <kern/thread.h>
b0d623f7 79#include <kern/kalloc.h>
f427ee49 80#include <kern/zalloc_internal.h>
fe8ab488 81#include <kern/ledger.h>
1c79356b
A
82#include <vm/pmap.h>
83#include <vm/vm_init.h>
84#include <vm/vm_map.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pageout.h>
0a7de745 87#include <vm/vm_kern.h> /* kernel_memory_allocate() */
1c79356b 88#include <kern/misc_protos.h>
3e170ce0 89#include <mach_debug/zone_info.h>
1c79356b 90#include <vm/cpm.h>
6d2010ae 91#include <pexpert/pexpert.h>
5ba3f43e 92#include <san/kasan.h>
55e303ae 93
91447636 94#include <vm/vm_protos.h>
2d21ac55
A
95#include <vm/memory_object.h>
96#include <vm/vm_purgeable_internal.h>
39236c6e 97#include <vm/vm_compressor.h>
cb323159
A
98#if defined (__x86_64__)
99#include <i386/misc_protos.h>
100#endif
2d21ac55 101
fe8ab488
A
102#if CONFIG_PHANTOM_CACHE
103#include <vm/vm_phantom_cache.h>
104#endif
105
f427ee49 106#if HIBERNATION
b0d623f7 107#include <IOKit/IOHibernatePrivate.h>
f427ee49
A
108#include <machine/pal_hibernate.h>
109#endif /* HIBERNATION */
b0d623f7 110
b0d623f7
A
111#include <sys/kdebug.h>
112
cb323159
A
113#if defined(HAS_APPLE_PAC)
114#include <ptrauth.h>
115#endif
f427ee49
A
116#if defined(__arm64__)
117#include <arm/cpu_internal.h>
118#endif /* defined(__arm64__) */
39037602 119
0a7de745
A
120#if MACH_ASSERT
121
122#define ASSERT_PMAP_FREE(mem) pmap_assert_free(VM_PAGE_GET_PHYS_PAGE(mem))
123
124#else /* MACH_ASSERT */
125
126#define ASSERT_PMAP_FREE(mem) /* nothing */
d9a64523 127
0a7de745
A
128#endif /* MACH_ASSERT */
129
cb323159
A
130extern boolean_t vm_pageout_running;
131extern thread_t vm_pageout_scan_thread;
132extern boolean_t vps_dynamic_priority_enabled;
133
0a7de745
A
134char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
135char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
136char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
137char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
39037602
A
138
139#if CONFIG_SECLUDED_MEMORY
140struct vm_page_secluded_data vm_page_secluded;
141#endif /* CONFIG_SECLUDED_MEMORY */
142
f427ee49
A
143#if DEVELOPMENT || DEBUG
144extern struct memory_object_pager_ops shared_region_pager_ops;
145unsigned int shared_region_pagers_resident_count = 0;
146unsigned int shared_region_pagers_resident_peak = 0;
147#endif /* DEVELOPMENT || DEBUG */
148
149int PERCPU_DATA(start_color);
150vm_page_t PERCPU_DATA(free_pages);
0a7de745
A
151boolean_t hibernate_cleaning_in_progress = FALSE;
152boolean_t vm_page_free_verify = TRUE;
b0d623f7 153
0a7de745
A
154uint32_t vm_lopage_free_count = 0;
155uint32_t vm_lopage_free_limit = 0;
156uint32_t vm_lopage_lowater = 0;
157boolean_t vm_lopage_refill = FALSE;
158boolean_t vm_lopage_needed = FALSE;
0b4c1975 159
0a7de745
A
160lck_mtx_ext_t vm_page_queue_lock_ext;
161lck_mtx_ext_t vm_page_queue_free_lock_ext;
162lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55 163
0a7de745
A
164int speculative_age_index = 0;
165int speculative_steal_index = 0;
2d21ac55
A
166struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
167
0a7de745
A
168boolean_t hibernation_vmqueues_inspection = FALSE; /* Tracks if the hibernation code is looking at the VM queues.
169 * Updated and checked behind the vm_page_queues_lock. */
0b4e3aa0 170
0a7de745 171static void vm_page_free_prepare(vm_page_t page);
c3c9b80d 172static vm_page_t vm_page_grab_fictitious_common(ppnum_t, boolean_t);
6d2010ae 173
3e170ce0 174static void vm_tag_init(void);
b0d623f7 175
f427ee49 176/* for debugging purposes */
c3c9b80d
A
177SECURITY_READ_ONLY_EARLY(uint32_t) vm_packed_from_vm_pages_array_mask =
178 VM_PAGE_PACKED_FROM_ARRAY;
f427ee49
A
179SECURITY_READ_ONLY_EARLY(vm_packing_params_t) vm_page_packing_params =
180 VM_PACKING_PARAMS(VM_PAGE_PACKED_PTR);
b0d623f7 181
1c79356b
A
182/*
183 * Associated with page of user-allocatable memory is a
184 * page structure.
185 */
186
187/*
188 * These variables record the values returned by vm_page_bootstrap,
189 * for debugging purposes. The implementation of pmap_steal_memory
190 * and pmap_startup here also uses them internally.
191 */
192
193vm_offset_t virtual_space_start;
194vm_offset_t virtual_space_end;
0a7de745 195uint32_t vm_page_pages;
1c79356b
A
196
197/*
198 * The vm_page_lookup() routine, which provides for fast
199 * (virtual memory object, offset) to page lookup, employs
200 * the following hash table. The vm_page_{insert,remove}
201 * routines install and remove associations in the table.
202 * [This table is often called the virtual-to-physical,
203 * or VP, table.]
204 */
205typedef struct {
fe8ab488 206 vm_page_packed_t page_list;
0a7de745
A
207#if MACH_PAGE_HASH_STATS
208 int cur_count; /* current count */
209 int hi_count; /* high water mark */
1c79356b
A
210#endif /* MACH_PAGE_HASH_STATS */
211} vm_page_bucket_t;
212
b0d623f7 213
0a7de745 214#define BUCKETS_PER_LOCK 16
b0d623f7 215
c3c9b80d
A
216SECURITY_READ_ONLY_LATE(vm_page_bucket_t *) vm_page_buckets; /* Array of buckets */
217SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_count = 0; /* How big is array? */
218SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_mask; /* Mask for hash function */
219SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_shift; /* Shift for hash function */
220SECURITY_READ_ONLY_LATE(uint32_t) vm_page_bucket_hash; /* Basic bucket hash */
221SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_lock_count = 0; /* How big is array of locks? */
b0d623f7 222
5ba3f43e
A
223#ifndef VM_TAG_ACTIVE_UPDATE
224#error VM_TAG_ACTIVE_UPDATE
225#endif
226#ifndef VM_MAX_TAG_ZONES
227#error VM_MAX_TAG_ZONES
228#endif
229
c3c9b80d
A
230/* for debugging */
231SECURITY_READ_ONLY_LATE(bool) vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
232SECURITY_READ_ONLY_LATE(lck_spin_t *) vm_page_bucket_locks;
1c79356b 233
5ba3f43e
A
234vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
235vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
236#if VM_MAX_TAG_ZONES
c3c9b80d 237static vm_allocation_zone_total_t **vm_allocation_zone_totals;
5ba3f43e
A
238#endif /* VM_MAX_TAG_ZONES */
239
240vm_tag_t vm_allocation_tag_highest;
241
15129b1c
A
242#if VM_PAGE_BUCKETS_CHECK
243boolean_t vm_page_buckets_check_ready = FALSE;
244#if VM_PAGE_FAKE_BUCKETS
0a7de745 245vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
15129b1c
A
246vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
247#endif /* VM_PAGE_FAKE_BUCKETS */
248#endif /* VM_PAGE_BUCKETS_CHECK */
91447636 249
0a7de745 250#if MACH_PAGE_HASH_STATS
1c79356b
A
251/* This routine is only for debug. It is intended to be called by
252 * hand by a developer using a kernel debugger. This routine prints
253 * out vm_page_hash table statistics to the kernel debug console.
254 */
255void
256hash_debug(void)
257{
0a7de745
A
258 int i;
259 int numbuckets = 0;
260 int highsum = 0;
261 int maxdepth = 0;
1c79356b
A
262
263 for (i = 0; i < vm_page_bucket_count; i++) {
264 if (vm_page_buckets[i].hi_count) {
265 numbuckets++;
266 highsum += vm_page_buckets[i].hi_count;
0a7de745 267 if (vm_page_buckets[i].hi_count > maxdepth) {
1c79356b 268 maxdepth = vm_page_buckets[i].hi_count;
0a7de745 269 }
1c79356b
A
270 }
271 }
272 printf("Total number of buckets: %d\n", vm_page_bucket_count);
273 printf("Number used buckets: %d = %d%%\n",
0a7de745 274 numbuckets, 100 * numbuckets / vm_page_bucket_count);
1c79356b 275 printf("Number unused buckets: %d = %d%%\n",
0a7de745
A
276 vm_page_bucket_count - numbuckets,
277 100 * (vm_page_bucket_count - numbuckets) / vm_page_bucket_count);
1c79356b
A
278 printf("Sum of bucket max depth: %d\n", highsum);
279 printf("Average bucket depth: %d.%2d\n",
0a7de745
A
280 highsum / vm_page_bucket_count,
281 highsum % vm_page_bucket_count);
1c79356b
A
282 printf("Maximum bucket depth: %d\n", maxdepth);
283}
284#endif /* MACH_PAGE_HASH_STATS */
285
286/*
287 * The virtual page size is currently implemented as a runtime
288 * variable, but is constant once initialized using vm_set_page_size.
289 * This initialization must be done in the machine-dependent
290 * bootstrap sequence, before calling other machine-independent
291 * initializations.
292 *
293 * All references to the virtual page size outside this
294 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
295 * constants.
296 */
5ba3f43e 297#if defined(__arm__) || defined(__arm64__)
0a7de745
A
298vm_size_t page_size;
299vm_size_t page_mask;
300int page_shift;
5ba3f43e 301#else
0a7de745
A
302vm_size_t page_size = PAGE_SIZE;
303vm_size_t page_mask = PAGE_MASK;
304int page_shift = PAGE_SHIFT;
5ba3f43e 305#endif
1c79356b 306
f427ee49
A
307SECURITY_READ_ONLY_LATE(vm_page_t) vm_pages = VM_PAGE_NULL;
308SECURITY_READ_ONLY_LATE(vm_page_t) vm_page_array_beginning_addr;
309vm_page_t vm_page_array_ending_addr;
39037602 310
0a7de745 311unsigned int vm_pages_count = 0;
2d21ac55 312
1c79356b
A
313/*
314 * Resident pages that represent real memory
2d21ac55
A
315 * are allocated from a set of free lists,
316 * one per color.
1c79356b 317 */
0a7de745
A
318unsigned int vm_colors;
319unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
320unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
321unsigned int vm_free_magazine_refill_limit = 0;
39037602
A
322
323
324struct vm_page_queue_free_head {
0a7de745 325 vm_page_queue_head_t qhead;
f427ee49 326} VM_PAGE_PACKED_ALIGNED;
39037602 327
0a7de745 328struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
39037602
A
329
330
0a7de745
A
331unsigned int vm_page_free_wanted;
332unsigned int vm_page_free_wanted_privileged;
39037602 333#if CONFIG_SECLUDED_MEMORY
0a7de745 334unsigned int vm_page_free_wanted_secluded;
39037602 335#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745 336unsigned int vm_page_free_count;
1c79356b 337
1c79356b
A
338/*
339 * Occasionally, the virtual memory system uses
340 * resident page structures that do not refer to
341 * real pages, for example to leave a page with
342 * important state information in the VP table.
343 *
344 * These page structures are allocated the way
345 * most other kernel structures are.
346 */
f427ee49 347SECURITY_READ_ONLY_LATE(zone_t) vm_page_zone;
b0d623f7 348vm_locks_array_t vm_page_locks;
316670eb 349
f427ee49
A
350LCK_ATTR_DECLARE(vm_page_lck_attr, 0, 0);
351LCK_GRP_DECLARE(vm_page_lck_grp_free, "vm_page_free");
352LCK_GRP_DECLARE(vm_page_lck_grp_queue, "vm_page_queue");
353LCK_GRP_DECLARE(vm_page_lck_grp_local, "vm_page_queue_local");
354LCK_GRP_DECLARE(vm_page_lck_grp_purge, "vm_page_purge");
355LCK_GRP_DECLARE(vm_page_lck_grp_alloc, "vm_page_alloc");
356LCK_GRP_DECLARE(vm_page_lck_grp_bucket, "vm_page_bucket");
f427ee49
A
357LCK_SPIN_DECLARE_ATTR(vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
358LCK_SPIN_DECLARE_ATTR(vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
359
0a7de745
A
360unsigned int vm_page_local_q_soft_limit = 250;
361unsigned int vm_page_local_q_hard_limit = 500;
f427ee49 362struct vpl *__zpercpu vm_page_local_q;
b0d623f7 363
316670eb
A
364/* N.B. Guard and fictitious pages must not
365 * be assigned a zero phys_page value.
366 */
1c79356b
A
367/*
368 * Fictitious pages don't have a physical address,
55e303ae 369 * but we must initialize phys_page to something.
1c79356b
A
370 * For debugging, this should be a strange value
371 * that the pmap module can recognize in assertions.
372 */
5ba3f43e 373const ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 374
2d21ac55
A
375/*
376 * Guard pages are not accessible so they don't
0a7de745 377 * need a physical address, but we need to enter
2d21ac55
A
378 * one in the pmap.
379 * Let's make it recognizable and make sure that
380 * we don't use a real physical page with that
381 * physical address.
382 */
5ba3f43e 383const ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 384
1c79356b
A
385/*
386 * Resident page structures are also chained on
387 * queues that are used by the page replacement
388 * system (pageout daemon). These queues are
389 * defined here, but are shared by the pageout
0a7de745
A
390 * module. The inactive queue is broken into
391 * file backed and anonymous for convenience as the
392 * pageout daemon often assignes a higher
39236c6e 393 * importance to anonymous pages (less likely to pick)
1c79356b 394 */
f427ee49
A
395vm_page_queue_head_t vm_page_queue_active VM_PAGE_PACKED_ALIGNED;
396vm_page_queue_head_t vm_page_queue_inactive VM_PAGE_PACKED_ALIGNED;
39037602 397#if CONFIG_SECLUDED_MEMORY
f427ee49 398vm_page_queue_head_t vm_page_queue_secluded VM_PAGE_PACKED_ALIGNED;
39037602 399#endif /* CONFIG_SECLUDED_MEMORY */
f427ee49
A
400vm_page_queue_head_t vm_page_queue_anonymous VM_PAGE_PACKED_ALIGNED; /* inactive memory queue for anonymous pages */
401vm_page_queue_head_t vm_page_queue_throttled VM_PAGE_PACKED_ALIGNED;
2d21ac55 402
0a7de745 403queue_head_t vm_objects_wired;
3e170ce0 404
d9a64523
A
405void vm_update_darkwake_mode(boolean_t);
406
39037602 407#if CONFIG_BACKGROUND_QUEUE
f427ee49 408vm_page_queue_head_t vm_page_queue_background VM_PAGE_PACKED_ALIGNED;
0a7de745
A
409uint32_t vm_page_background_target;
410uint32_t vm_page_background_target_snapshot;
411uint32_t vm_page_background_count;
412uint64_t vm_page_background_promoted_count;
39037602 413
0a7de745
A
414uint32_t vm_page_background_internal_count;
415uint32_t vm_page_background_external_count;
39037602 416
0a7de745
A
417uint32_t vm_page_background_mode;
418uint32_t vm_page_background_exclude_external;
39037602
A
419#endif
420
0a7de745
A
421unsigned int vm_page_active_count;
422unsigned int vm_page_inactive_count;
f427ee49 423unsigned int vm_page_kernelcache_count;
39037602 424#if CONFIG_SECLUDED_MEMORY
0a7de745
A
425unsigned int vm_page_secluded_count;
426unsigned int vm_page_secluded_count_free;
427unsigned int vm_page_secluded_count_inuse;
cb323159 428unsigned int vm_page_secluded_count_over_target;
39037602 429#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745
A
430unsigned int vm_page_anonymous_count;
431unsigned int vm_page_throttled_count;
432unsigned int vm_page_speculative_count;
433
434unsigned int vm_page_wire_count;
435unsigned int vm_page_wire_count_on_boot = 0;
436unsigned int vm_page_stolen_count = 0;
437unsigned int vm_page_wire_count_initial;
438unsigned int vm_page_gobble_count = 0;
cb323159
A
439unsigned int vm_page_kern_lpage_count = 0;
440
441uint64_t booter_size; /* external so it can be found in core dumps */
0a7de745
A
442
443#define VM_PAGE_WIRE_COUNT_WARNING 0
444#define VM_PAGE_GOBBLE_COUNT_WARNING 0
445
446unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
447unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
448uint64_t vm_page_purged_count = 0; /* total count of purged pages */
449
450unsigned int vm_page_xpmapped_external_count = 0;
451unsigned int vm_page_external_count = 0;
452unsigned int vm_page_internal_count = 0;
453unsigned int vm_page_pageable_external_count = 0;
454unsigned int vm_page_pageable_internal_count = 0;
39236c6e 455
b0d623f7 456#if DEVELOPMENT || DEBUG
0a7de745
A
457unsigned int vm_page_speculative_recreated = 0;
458unsigned int vm_page_speculative_created = 0;
459unsigned int vm_page_speculative_used = 0;
b0d623f7 460#endif
2d21ac55 461
f427ee49 462vm_page_queue_head_t vm_page_queue_cleaned VM_PAGE_PACKED_ALIGNED;
316670eb 463
0a7de745 464unsigned int vm_page_cleaned_count = 0;
316670eb 465
0a7de745
A
466uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
467ppnum_t max_valid_low_ppnum = PPNUM_MAX;
0c530ab8
A
468
469
1c79356b
A
470/*
471 * Several page replacement parameters are also
472 * shared with this module, so that page allocation
473 * (done here in vm_page_alloc) can trigger the
474 * pageout daemon.
475 */
0a7de745
A
476unsigned int vm_page_free_target = 0;
477unsigned int vm_page_free_min = 0;
478unsigned int vm_page_throttle_limit = 0;
479unsigned int vm_page_inactive_target = 0;
39037602 480#if CONFIG_SECLUDED_MEMORY
0a7de745 481unsigned int vm_page_secluded_target = 0;
39037602 482#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745
A
483unsigned int vm_page_anonymous_min = 0;
484unsigned int vm_page_free_reserved = 0;
1c79356b 485
316670eb 486
1c79356b
A
487/*
488 * The VM system has a couple of heuristics for deciding
489 * that pages are "uninteresting" and should be placed
490 * on the inactive queue as likely candidates for replacement.
491 * These variables let the heuristics be controlled at run-time
492 * to make experimentation easier.
493 */
494
495boolean_t vm_page_deactivate_hint = TRUE;
496
b0d623f7 497struct vm_page_stats_reusable vm_page_stats_reusable;
0a7de745 498
1c79356b
A
499/*
500 * vm_set_page_size:
501 *
502 * Sets the page size, perhaps based upon the memory
503 * size. Must be called before any use of page-size
504 * dependent functions.
505 *
506 * Sets page_shift and page_mask from page_size.
507 */
508void
509vm_set_page_size(void)
510{
fe8ab488
A
511 page_size = PAGE_SIZE;
512 page_mask = PAGE_MASK;
513 page_shift = PAGE_SHIFT;
1c79356b 514
0a7de745 515 if ((page_mask & page_size) != 0) {
1c79356b 516 panic("vm_set_page_size: page size not a power of two");
0a7de745 517 }
1c79356b 518
0a7de745
A
519 for (page_shift = 0;; page_shift++) {
520 if ((1U << page_shift) == page_size) {
1c79356b 521 break;
0a7de745
A
522 }
523 }
1c79356b
A
524}
525
5ba3f43e
A
526#if defined (__x86_64__)
527
528#define MAX_CLUMP_SIZE 16
529#define DEFAULT_CLUMP_SIZE 4
530
531unsigned int vm_clump_size, vm_clump_mask, vm_clump_shift, vm_clump_promote_threshold;
532
533#if DEVELOPMENT || DEBUG
0a7de745 534unsigned long vm_clump_stats[MAX_CLUMP_SIZE + 1];
5ba3f43e
A
535unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
536
0a7de745
A
537static inline void
538vm_clump_update_stats(unsigned int c)
539{
540 assert(c <= vm_clump_size);
541 if (c > 0 && c <= vm_clump_size) {
542 vm_clump_stats[c] += c;
543 }
544 vm_clump_allocs += c;
5ba3f43e
A
545}
546#endif /* if DEVELOPMENT || DEBUG */
547
548/* Called once to setup the VM clump knobs */
549static void
550vm_page_setup_clump( void )
551{
0a7de745 552 unsigned int override, n;
5ba3f43e 553
0a7de745
A
554 vm_clump_size = DEFAULT_CLUMP_SIZE;
555 if (PE_parse_boot_argn("clump_size", &override, sizeof(override))) {
556 vm_clump_size = override;
557 }
5ba3f43e 558
0a7de745
A
559 if (vm_clump_size > MAX_CLUMP_SIZE) {
560 panic("vm_page_setup_clump:: clump_size is too large!");
561 }
562 if (vm_clump_size < 1) {
563 panic("vm_page_setup_clump:: clump_size must be >= 1");
564 }
565 if ((vm_clump_size & (vm_clump_size - 1)) != 0) {
566 panic("vm_page_setup_clump:: clump_size must be a power of 2");
567 }
5ba3f43e 568
0a7de745
A
569 vm_clump_promote_threshold = vm_clump_size;
570 vm_clump_mask = vm_clump_size - 1;
571 for (vm_clump_shift = 0, n = vm_clump_size; n > 1; n >>= 1, vm_clump_shift++) {
572 ;
573 }
5ba3f43e
A
574
575#if DEVELOPMENT || DEBUG
0a7de745
A
576 bzero(vm_clump_stats, sizeof(vm_clump_stats));
577 vm_clump_allocs = vm_clump_inserts = vm_clump_inrange = vm_clump_promotes = 0;
5ba3f43e
A
578#endif /* if DEVELOPMENT || DEBUG */
579}
fe8ab488 580
0a7de745 581#endif /* #if defined (__x86_64__) */
5ba3f43e 582
0a7de745 583#define COLOR_GROUPS_TO_STEAL 4
2d21ac55
A
584
585/* Called once during statup, once the cache geometry is known.
586 */
587static void
588vm_page_set_colors( void )
589{
0a7de745 590 unsigned int n, override;
5ba3f43e 591
0a7de745 592#if defined (__x86_64__)
5ba3f43e
A
593 /* adjust #colors because we need to color outside the clump boundary */
594 vm_cache_geometry_colors >>= vm_clump_shift;
595#endif
0a7de745
A
596 if (PE_parse_boot_argn("colors", &override, sizeof(override))) { /* colors specified as a boot-arg? */
597 n = override;
598 } else if (vm_cache_geometry_colors) { /* do we know what the cache geometry is? */
2d21ac55 599 n = vm_cache_geometry_colors;
0a7de745
A
600 } else {
601 n = DEFAULT_COLORS; /* use default if all else fails */
602 }
603 if (n == 0) {
2d21ac55 604 n = 1;
0a7de745
A
605 }
606 if (n > MAX_COLORS) {
2d21ac55 607 n = MAX_COLORS;
0a7de745
A
608 }
609
2d21ac55 610 /* the count must be a power of 2 */
0a7de745
A
611 if ((n & (n - 1)) != 0) {
612 n = DEFAULT_COLORS; /* use default if all else fails */
613 }
2d21ac55
A
614 vm_colors = n;
615 vm_color_mask = n - 1;
fe8ab488
A
616
617 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
5ba3f43e
A
618
619#if defined (__x86_64__)
0a7de745
A
620 /* adjust for reduction in colors due to clumping and multiple cores */
621 if (real_ncpus) {
5ba3f43e 622 vm_free_magazine_refill_limit *= (vm_clump_size * real_ncpus);
0a7de745
A
623 }
624#endif
625}
626
627/*
628 * During single threaded early boot we don't initialize all pages.
629 * This avoids some delay during boot. They'll be initialized and
630 * added to the free list as needed or after we are multithreaded by
631 * what becomes the pageout thread.
632 */
633static boolean_t fill = FALSE;
634static unsigned int fillval;
635uint_t vm_delayed_count = 0; /* when non-zero, indicates we may have more pages to init */
636ppnum_t delay_above_pnum = PPNUM_MAX;
637
638/*
639 * For x86 first 8 Gig initializes quickly and gives us lots of lowmem + mem above to start off with.
640 * If ARM ever uses delayed page initialization, this value may need to be quite different.
641 */
642#define DEFAULT_DELAY_ABOVE_PHYS_GB (8)
643
644/*
645 * When we have to dip into more delayed pages due to low memory, free up
646 * a large chunk to get things back to normal. This avoids contention on the
647 * delayed code allocating page by page.
648 */
649#define VM_DELAY_PAGE_CHUNK ((1024 * 1024 * 1024) / PAGE_SIZE)
650
651/*
652 * Get and initialize the next delayed page.
653 */
654static vm_page_t
655vm_get_delayed_page(int grab_options)
656{
657 vm_page_t p;
658 ppnum_t pnum;
659
660 /*
661 * Get a new page if we have one.
662 */
663 lck_mtx_lock(&vm_page_queue_free_lock);
664 if (vm_delayed_count == 0) {
665 lck_mtx_unlock(&vm_page_queue_free_lock);
666 return NULL;
667 }
668 if (!pmap_next_page(&pnum)) {
669 vm_delayed_count = 0;
670 lck_mtx_unlock(&vm_page_queue_free_lock);
671 return NULL;
672 }
673
674 assert(vm_delayed_count > 0);
675 --vm_delayed_count;
676
cb323159
A
677#if defined(__x86_64__)
678 /* x86 cluster code requires increasing phys_page in vm_pages[] */
679 if (vm_pages_count > 0) {
680 assert(pnum > vm_pages[vm_pages_count - 1].vmp_phys_page);
681 }
682#endif
0a7de745
A
683 p = &vm_pages[vm_pages_count];
684 assert(p < vm_page_array_ending_addr);
685 vm_page_init(p, pnum, FALSE);
686 ++vm_pages_count;
687 ++vm_page_pages;
688 lck_mtx_unlock(&vm_page_queue_free_lock);
689
690 /*
691 * These pages were initially counted as wired, undo that now.
692 */
693 if (grab_options & VM_PAGE_GRAB_Q_LOCK_HELD) {
694 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
695 } else {
696 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
697 vm_page_lockspin_queues();
698 }
699 --vm_page_wire_count;
700 --vm_page_wire_count_initial;
701 if (vm_page_wire_count_on_boot != 0) {
702 --vm_page_wire_count_on_boot;
703 }
704 if (!(grab_options & VM_PAGE_GRAB_Q_LOCK_HELD)) {
705 vm_page_unlock_queues();
706 }
707
708
709 if (fill) {
710 fillPage(pnum, fillval);
711 }
712 return p;
713}
714
715static void vm_page_module_init_delayed(void);
716
717/*
718 * Free all remaining delayed pages to the free lists.
719 */
720void
721vm_free_delayed_pages(void)
722{
723 vm_page_t p;
724 vm_page_t list = NULL;
725 uint_t cnt = 0;
cb323159
A
726 vm_offset_t start_free_va;
727 int64_t free_size;
0a7de745
A
728
729 while ((p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE)) != NULL) {
730 if (vm_himemory_mode) {
731 vm_page_release(p, FALSE);
732 } else {
733 p->vmp_snext = list;
734 list = p;
735 }
736 ++cnt;
737 }
738
739 /*
740 * Free the pages in reverse order if not himemory mode.
741 * Hence the low memory pages will be first on free lists. (LIFO)
742 */
743 while (list != NULL) {
744 p = list;
745 list = p->vmp_snext;
746 p->vmp_snext = NULL;
747 vm_page_release(p, FALSE);
748 }
749#if DEVELOPMENT || DEBUG
cb323159 750 kprintf("vm_free_delayed_pages: initialized %d free pages\n", cnt);
0a7de745
A
751#endif
752
753 /*
754 * Free up any unused full pages at the end of the vm_pages[] array
755 */
cb323159
A
756 start_free_va = round_page((vm_offset_t)&vm_pages[vm_pages_count]);
757
758#if defined(__x86_64__)
759 /*
760 * Since x86 might have used large pages for vm_pages[], we can't
761 * free starting in the middle of a partially used large page.
762 */
763 if (pmap_query_pagesize(kernel_pmap, start_free_va) == I386_LPGBYTES) {
764 start_free_va = ((start_free_va + I386_LPGMASK) & ~I386_LPGMASK);
765 }
5ba3f43e 766#endif
cb323159
A
767 if (start_free_va < (vm_offset_t)vm_page_array_ending_addr) {
768 free_size = trunc_page((vm_offset_t)vm_page_array_ending_addr - start_free_va);
769 if (free_size > 0) {
770 ml_static_mfree(start_free_va, (vm_offset_t)free_size);
771 vm_page_array_ending_addr = (void *)start_free_va;
0a7de745
A
772
773 /*
774 * Note there's no locking here, as only this thread will ever change this value.
775 * The reader, vm_page_diagnose, doesn't grab any locks for the counts it looks at.
776 */
cb323159
A
777 vm_page_stolen_count -= (free_size >> PAGE_SHIFT);
778
779#if DEVELOPMENT || DEBUG
780 kprintf("Freeing final unused %ld bytes from vm_pages[] at 0x%lx\n",
781 (long)free_size, (long)start_free_va);
782#endif
0a7de745
A
783 }
784 }
785
786
787 /*
788 * now we can create the VM page array zone
789 */
790 vm_page_module_init_delayed();
791}
792
793/*
794 * Try and free up enough delayed pages to match a contig memory allocation.
795 */
796static void
797vm_free_delayed_pages_contig(
798 uint_t npages,
799 ppnum_t max_pnum,
800 ppnum_t pnum_mask)
801{
802 vm_page_t p;
803 ppnum_t pnum;
804 uint_t cnt = 0;
805
806 /*
807 * Treat 0 as the absolute max page number.
808 */
809 if (max_pnum == 0) {
810 max_pnum = PPNUM_MAX;
811 }
812
813 /*
814 * Free till we get a properly aligned start page
815 */
816 for (;;) {
817 p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE);
818 if (p == NULL) {
819 return;
820 }
821 pnum = VM_PAGE_GET_PHYS_PAGE(p);
822 vm_page_release(p, FALSE);
823 if (pnum >= max_pnum) {
824 return;
825 }
826 if ((pnum & pnum_mask) == 0) {
827 break;
828 }
829 }
830
831 /*
832 * Having a healthy pool of free pages will help performance. We don't
833 * want to fall back to the delayed code for every page allocation.
834 */
835 if (vm_page_free_count < VM_DELAY_PAGE_CHUNK) {
836 npages += VM_DELAY_PAGE_CHUNK;
837 }
838
839 /*
840 * Now free up the pages
841 */
842 for (cnt = 1; cnt < npages; ++cnt) {
843 p = vm_get_delayed_page(VM_PAGE_GRAB_OPTIONS_NONE);
844 if (p == NULL) {
845 return;
846 }
847 vm_page_release(p, FALSE);
848 }
2d21ac55
A
849}
850
5c9f4661
A
851#define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
852
b0d623f7 853void
f427ee49 854vm_page_init_local_q(unsigned int num_cpus)
b0d623f7 855{
f427ee49 856 struct vpl *t_local_q;
b0d623f7
A
857
858 /*
859 * no point in this for a uni-processor system
860 */
861 if (num_cpus >= 2) {
f427ee49
A
862 ml_cpu_info_t cpu_info;
863
864 /*
865 * Force the allocation alignment to a cacheline,
866 * because the `vpl` struct has a lock and will be taken
867 * cross CPU so we want to isolate the rest of the per-CPU
868 * data to avoid false sharing due to this lock being taken.
869 */
870
871 ml_cpu_get_info(&cpu_info);
b0d623f7 872
f427ee49
A
873 t_local_q = zalloc_percpu_permanent(sizeof(struct vpl),
874 cpu_info.cache_line_size - 1);
b0d623f7 875
f427ee49 876 zpercpu_foreach(lq, t_local_q) {
b0d623f7 877 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
39037602 878 vm_page_queue_init(&lq->vpl_queue);
b0d623f7 879 }
b0d623f7 880
f427ee49
A
881 /* make the initialization visible to all cores */
882 os_atomic_store(&vm_page_local_q, t_local_q, release);
b0d623f7
A
883 }
884}
885
5ba3f43e
A
886/*
887 * vm_init_before_launchd
888 *
889 * This should be called right before launchd is loaded.
890 */
891void
892vm_init_before_launchd()
893{
0a7de745 894 vm_page_lockspin_queues();
5ba3f43e 895 vm_page_wire_count_on_boot = vm_page_wire_count;
0a7de745 896 vm_page_unlock_queues();
5ba3f43e
A
897}
898
b0d623f7 899
1c79356b
A
900/*
901 * vm_page_bootstrap:
902 *
903 * Initializes the resident memory module.
904 *
905 * Allocates memory for the page cells, and
906 * for the object/offset-to-page hash table headers.
907 * Each page cell is initialized and placed on the free list.
908 * Returns the range of available kernel virtual memory.
909 */
f427ee49 910__startup_func
1c79356b
A
911void
912vm_page_bootstrap(
0a7de745
A
913 vm_offset_t *startp,
914 vm_offset_t *endp)
1c79356b 915{
0a7de745
A
916 unsigned int i;
917 unsigned int log1;
918 unsigned int log2;
919 unsigned int size;
1c79356b 920
1c79356b
A
921 /*
922 * Initialize the page queues.
923 */
0a7de745 924
b0d623f7
A
925 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
926 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
927 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
0a7de745 928
2d21ac55
A
929 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
930 int group;
931
932 purgeable_queues[i].token_q_head = 0;
933 purgeable_queues[i].token_q_tail = 0;
0a7de745
A
934 for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
935 queue_init(&purgeable_queues[i].objq[group]);
936 }
2d21ac55
A
937
938 purgeable_queues[i].type = i;
939 purgeable_queues[i].new_pages = 0;
940#if MACH_ASSERT
941 purgeable_queues[i].debug_count_tokens = 0;
942 purgeable_queues[i].debug_count_objects = 0;
943#endif
0a7de745
A
944 }
945 ;
fe8ab488
A
946 purgeable_nonvolatile_count = 0;
947 queue_init(&purgeable_nonvolatile_queue);
0a7de745
A
948
949 for (i = 0; i < MAX_COLORS; i++) {
39037602 950 vm_page_queue_init(&vm_page_queue_free[i].qhead);
0a7de745 951 }
39037602
A
952
953 vm_page_queue_init(&vm_lopage_queue_free);
954 vm_page_queue_init(&vm_page_queue_active);
955 vm_page_queue_init(&vm_page_queue_inactive);
956#if CONFIG_SECLUDED_MEMORY
957 vm_page_queue_init(&vm_page_queue_secluded);
958#endif /* CONFIG_SECLUDED_MEMORY */
959 vm_page_queue_init(&vm_page_queue_cleaned);
960 vm_page_queue_init(&vm_page_queue_throttled);
961 vm_page_queue_init(&vm_page_queue_anonymous);
3e170ce0 962 queue_init(&vm_objects_wired);
1c79356b 963
0a7de745 964 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
39037602 965 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
2d21ac55
A
966
967 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
968 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
969 }
39037602
A
970#if CONFIG_BACKGROUND_QUEUE
971 vm_page_queue_init(&vm_page_queue_background);
972
973 vm_page_background_count = 0;
974 vm_page_background_internal_count = 0;
975 vm_page_background_external_count = 0;
976 vm_page_background_promoted_count = 0;
977
978 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
979
0a7de745 980 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX) {
39037602 981 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
0a7de745 982 }
39037602
A
983
984 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
985 vm_page_background_exclude_external = 0;
986
987 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
988 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
989 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
39037602 990
0a7de745 991 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_1) {
39037602 992 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
0a7de745 993 }
39037602 994#endif
1c79356b 995 vm_page_free_wanted = 0;
2d21ac55 996 vm_page_free_wanted_privileged = 0;
39037602
A
997#if CONFIG_SECLUDED_MEMORY
998 vm_page_free_wanted_secluded = 0;
999#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745 1000
5ba3f43e
A
1001#if defined (__x86_64__)
1002 /* this must be called before vm_page_set_colors() */
1003 vm_page_setup_clump();
1004#endif
1005
2d21ac55
A
1006 vm_page_set_colors();
1007
39037602
A
1008 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
1009 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1010 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1011 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1012
1013 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
1014 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1015 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1016 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1017 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1018 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
1019 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
1020#if CONFIG_SECLUDED_MEMORY
1021 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1022#endif /* CONFIG_SECLUDED_MEMORY */
1023
1024 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
1025 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1026 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1027 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1028 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1029 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
1030#if CONFIG_SECLUDED_MEMORY
1031 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1032#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745 1033
39037602
A
1034 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
1035 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
1036 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
1037 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
1038 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
1039#if CONFIG_SECLUDED_MEMORY
1040 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
1041#endif /* CONFIG_SECLUDED_MEMORY */
1042
f427ee49
A
1043 for (vm_tag_t t = 0; t < VM_KERN_MEMORY_FIRST_DYNAMIC; t++) {
1044 vm_allocation_sites_static[t].refcount = 2;
1045 vm_allocation_sites_static[t].tag = t;
1046 vm_allocation_sites[t] = &vm_allocation_sites_static[t];
5ba3f43e
A
1047 }
1048 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].refcount = 2;
1049 vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC].tag = VM_KERN_MEMORY_ANY;
1050 vm_allocation_sites[VM_KERN_MEMORY_ANY] = &vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC];
1c79356b
A
1051
1052 /*
1053 * Steal memory for the map and zone subsystems.
1054 */
f427ee49 1055 kernel_startup_initialize_upto(STARTUP_SUB_PMAP_STEAL);
1c79356b
A
1056
1057 /*
1058 * Allocate (and initialize) the virtual-to-physical
1059 * table hash buckets.
1060 *
1061 * The number of buckets should be a power of two to
1062 * get a good hash function. The following computation
1063 * chooses the first power of two that is greater
1064 * than the number of physical pages in the system.
1065 */
1066
1c79356b
A
1067 if (vm_page_bucket_count == 0) {
1068 unsigned int npages = pmap_free_pages();
1069
1070 vm_page_bucket_count = 1;
0a7de745 1071 while (vm_page_bucket_count < npages) {
1c79356b 1072 vm_page_bucket_count <<= 1;
0a7de745 1073 }
1c79356b 1074 }
b0d623f7 1075 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
1076
1077 vm_page_hash_mask = vm_page_bucket_count - 1;
1078
1079 /*
1080 * Calculate object shift value for hashing algorithm:
1081 * O = log2(sizeof(struct vm_object))
1082 * B = log2(vm_page_bucket_count)
1083 * hash shifts the object left by
1084 * B/2 - O
1085 */
1086 size = vm_page_bucket_count;
0a7de745 1087 for (log1 = 0; size > 1; log1++) {
1c79356b 1088 size /= 2;
0a7de745 1089 }
1c79356b 1090 size = sizeof(struct vm_object);
0a7de745 1091 for (log2 = 0; size > 1; log2++) {
1c79356b 1092 size /= 2;
0a7de745
A
1093 }
1094 vm_page_hash_shift = log1 / 2 - log2 + 1;
1095
1096 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
1097 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
1098 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b 1099
0a7de745 1100 if (vm_page_hash_mask & vm_page_bucket_count) {
1c79356b 1101 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
0a7de745 1102 }
1c79356b 1103
15129b1c
A
1104#if VM_PAGE_BUCKETS_CHECK
1105#if VM_PAGE_FAKE_BUCKETS
1106 /*
1107 * Allocate a decoy set of page buckets, to detect
1108 * any stomping there.
1109 */
1110 vm_page_fake_buckets = (vm_page_bucket_t *)
0a7de745
A
1111 pmap_steal_memory(vm_page_bucket_count *
1112 sizeof(vm_page_bucket_t));
15129b1c
A
1113 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
1114 vm_page_fake_buckets_end =
0a7de745
A
1115 vm_map_round_page((vm_page_fake_buckets_start +
1116 (vm_page_bucket_count *
1117 sizeof(vm_page_bucket_t))),
1118 PAGE_MASK);
15129b1c
A
1119 char *cp;
1120 for (cp = (char *)vm_page_fake_buckets_start;
0a7de745
A
1121 cp < (char *)vm_page_fake_buckets_end;
1122 cp++) {
15129b1c
A
1123 *cp = 0x5a;
1124 }
1125#endif /* VM_PAGE_FAKE_BUCKETS */
1126#endif /* VM_PAGE_BUCKETS_CHECK */
1127
39037602 1128 kernel_debug_string_early("vm_page_buckets");
1c79356b 1129 vm_page_buckets = (vm_page_bucket_t *)
0a7de745
A
1130 pmap_steal_memory(vm_page_bucket_count *
1131 sizeof(vm_page_bucket_t));
1c79356b 1132
39037602 1133 kernel_debug_string_early("vm_page_bucket_locks");
b0d623f7 1134 vm_page_bucket_locks = (lck_spin_t *)
0a7de745
A
1135 pmap_steal_memory(vm_page_bucket_lock_count *
1136 sizeof(lck_spin_t));
b0d623f7 1137
1c79356b 1138 for (i = 0; i < vm_page_bucket_count; i++) {
39037602 1139 vm_page_bucket_t *bucket = &vm_page_buckets[i];
1c79356b 1140
fe8ab488 1141 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1c79356b
A
1142#if MACH_PAGE_HASH_STATS
1143 bucket->cur_count = 0;
1144 bucket->hi_count = 0;
1145#endif /* MACH_PAGE_HASH_STATS */
1146 }
1147
0a7de745
A
1148 for (i = 0; i < vm_page_bucket_lock_count; i++) {
1149 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
1150 }
b0d623f7 1151
3e170ce0
A
1152 vm_tag_init();
1153
15129b1c
A
1154#if VM_PAGE_BUCKETS_CHECK
1155 vm_page_buckets_check_ready = TRUE;
1156#endif /* VM_PAGE_BUCKETS_CHECK */
1157
1c79356b
A
1158 /*
1159 * Machine-dependent code allocates the resident page table.
1160 * It uses vm_page_init to initialize the page frames.
1161 * The code also returns to us the virtual space available
1162 * to the kernel. We don't trust the pmap module
1163 * to get the alignment right.
1164 */
1165
39037602 1166 kernel_debug_string_early("pmap_startup");
1c79356b 1167 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
1168 virtual_space_start = round_page(virtual_space_start);
1169 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
1170
1171 *startp = virtual_space_start;
1172 *endp = virtual_space_end;
1173
1174 /*
1175 * Compute the initial "wire" count.
0a7de745 1176 * Up until now, the pages which have been set aside are not under
1c79356b
A
1177 * the VM system's control, so although they aren't explicitly
1178 * wired, they nonetheless can't be moved. At this moment,
1179 * all VM managed pages are "free", courtesy of pmap_startup.
1180 */
b0d623f7 1181 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
0a7de745
A
1182 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) -
1183 vm_page_free_count - vm_lopage_free_count;
39037602
A
1184#if CONFIG_SECLUDED_MEMORY
1185 vm_page_wire_count -= vm_page_secluded_count;
1186#endif
0b4c1975 1187 vm_page_wire_count_initial = vm_page_wire_count;
91447636 1188
cb323159
A
1189 /* capture this for later use */
1190 booter_size = ml_get_booter_memory_size();
1191
0a7de745
A
1192 printf("vm_page_bootstrap: %d free pages, %d wired pages, (up to %d of which are delayed free)\n",
1193 vm_page_free_count, vm_page_wire_count, vm_delayed_count);
2d21ac55 1194
39037602 1195 kernel_debug_string_early("vm_page_bootstrap complete");
1c79356b
A
1196}
1197
0a7de745 1198#ifndef MACHINE_PAGES
1c79356b 1199/*
cb323159
A
1200 * This is the early boot time allocator for data structures needed to bootstrap the VM system.
1201 * On x86 it will allocate large pages if size is sufficiently large. We don't need to do this
1202 * on ARM yet, due to the combination of a large base page size and smaller RAM devices.
1c79356b 1203 */
cb323159
A
1204static void *
1205pmap_steal_memory_internal(
1206 vm_size_t size,
1207 boolean_t might_free)
1c79356b 1208{
5ba3f43e 1209 kern_return_t kr;
cb323159
A
1210 vm_offset_t addr;
1211 vm_offset_t map_addr;
5ba3f43e 1212 ppnum_t phys_page;
1c79356b
A
1213
1214 /*
cb323159 1215 * Size needs to be aligned to word size.
1c79356b 1216 */
0a7de745 1217 size = (size + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
1c79356b
A
1218
1219 /*
cb323159
A
1220 * On the first call, get the initial values for virtual address space
1221 * and page align them.
1c79356b 1222 */
1c79356b
A
1223 if (virtual_space_start == virtual_space_end) {
1224 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
cb323159
A
1225 virtual_space_start = round_page(virtual_space_start);
1226 virtual_space_end = trunc_page(virtual_space_end);
1c79356b 1227
cb323159 1228#if defined(__x86_64__)
1c79356b 1229 /*
cb323159
A
1230 * Release remaining unused section of preallocated KVA and the 4K page tables
1231 * that map it. This makes the VA available for large page mappings.
1c79356b 1232 */
cb323159
A
1233 Idle_PTs_release(virtual_space_start, virtual_space_end);
1234#endif
1c79356b
A
1235 }
1236
1237 /*
cb323159
A
1238 * Allocate the virtual space for this request. On x86, we'll align to a large page
1239 * address if the size is big enough to back with at least 1 large page.
1c79356b 1240 */
cb323159
A
1241#if defined(__x86_64__)
1242 if (size >= I386_LPGBYTES) {
1243 virtual_space_start = ((virtual_space_start + I386_LPGMASK) & ~I386_LPGMASK);
1244 }
1245#endif
1c79356b
A
1246 addr = virtual_space_start;
1247 virtual_space_start += size;
1248
6d2010ae 1249 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
1250
1251 /*
cb323159 1252 * Allocate and map physical pages to back the new virtual space.
1c79356b 1253 */
cb323159
A
1254 map_addr = round_page(addr);
1255 while (map_addr < addr + size) {
1256#if defined(__x86_64__)
1257 /*
1258 * Back with a large page if properly aligned on x86
1259 */
1260 if ((map_addr & I386_LPGMASK) == 0 &&
1261 map_addr + I386_LPGBYTES <= addr + size &&
1262 pmap_pre_expand_large(kernel_pmap, map_addr) == KERN_SUCCESS &&
1263 pmap_next_page_large(&phys_page) == KERN_SUCCESS) {
1264 kr = pmap_enter(kernel_pmap, map_addr, phys_page,
1265 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
1266 VM_WIMG_USE_DEFAULT | VM_MEM_SUPERPAGE, FALSE);
1267
1268 if (kr != KERN_SUCCESS) {
1269 panic("pmap_steal_memory: pmap_enter() large failed, new_addr=%#lx, phys_page=%u",
1270 (unsigned long)map_addr, phys_page);
1271 }
1272 map_addr += I386_LPGBYTES;
1273 vm_page_wire_count += I386_LPGBYTES >> PAGE_SHIFT;
1274 vm_page_stolen_count += I386_LPGBYTES >> PAGE_SHIFT;
1275 vm_page_kern_lpage_count++;
1276 continue;
1277 }
1278#endif
1c79356b 1279
cb323159 1280 if (!pmap_next_page_hi(&phys_page, might_free)) {
39037602 1281 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
0a7de745 1282 }
1c79356b 1283
cb323159
A
1284#if defined(__x86_64__)
1285 pmap_pre_expand(kernel_pmap, map_addr);
5ba3f43e
A
1286#endif
1287
cb323159 1288 kr = pmap_enter(kernel_pmap, map_addr, phys_page,
0a7de745
A
1289 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
1290 VM_WIMG_USE_DEFAULT, FALSE);
5ba3f43e
A
1291
1292 if (kr != KERN_SUCCESS) {
cb323159
A
1293 panic("pmap_steal_memory() pmap_enter failed, map_addr=%#lx, phys_page=%u",
1294 (unsigned long)map_addr, phys_page);
5ba3f43e 1295 }
cb323159 1296 map_addr += PAGE_SIZE;
1c79356b 1297
1c79356b
A
1298 /*
1299 * Account for newly stolen memory
1300 */
1301 vm_page_wire_count++;
3e170ce0 1302 vm_page_stolen_count++;
1c79356b
A
1303 }
1304
cb323159
A
1305#if defined(__x86_64__)
1306 /*
1307 * The call with might_free is currently the last use of pmap_steal_memory*().
1308 * Notify the pmap layer to record which high pages were allocated so far.
1309 */
1310 if (might_free) {
1311 pmap_hi_pages_done();
1312 }
1313#endif
5ba3f43e
A
1314#if KASAN
1315 kasan_notify_address(round_page(addr), size);
1316#endif
91447636 1317 return (void *) addr;
1c79356b
A
1318}
1319
cb323159
A
1320void *
1321pmap_steal_memory(
1322 vm_size_t size)
1323{
1324 return pmap_steal_memory_internal(size, FALSE);
1325}
1326
1327void *
1328pmap_steal_freeable_memory(
1329 vm_size_t size)
1330{
1331 return pmap_steal_memory_internal(size, TRUE);
1332}
1333
c3c9b80d
A
1334#if defined(__arm64__)
1335/*
1336 * Retire a page at startup.
1337 * These pages will eventually wind up on the retired_pages_object
1338 * in vm_retire_boot_pages().
1339 */
1340static vm_page_queue_head_t vm_page_queue_retired VM_PAGE_PACKED_ALIGNED;
1341static void
1342vm_page_retire_startup(vm_page_t p)
1343{
1344 p->vmp_q_state = VM_PAGE_NOT_ON_Q;
1345 p->vmp_error = true;
1346 p->vmp_unusual = true;
1347 vm_page_queue_enter(&vm_page_queue_retired, p, vmp_pageq);
1348 printf("To be retired at boot: page at 0x%llx\n", (long long)ptoa(VM_PAGE_GET_PHYS_PAGE(p)));
1349}
1350#endif /* defined(__arm64__) */
1351
39037602
A
1352#if CONFIG_SECLUDED_MEMORY
1353/* boot-args to control secluded memory */
0a7de745
A
1354unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
1355int secluded_for_iokit = 1; /* IOKit can use secluded memory */
1356int secluded_for_apps = 1; /* apps can use secluded memory */
1357int secluded_for_filecache = 2; /* filecache can use seclude memory */
39037602
A
1358#if 11
1359int secluded_for_fbdp = 0;
1360#endif
d9a64523 1361uint64_t secluded_shutoff_trigger = 0;
f427ee49 1362uint64_t secluded_shutoff_headroom = 150 * 1024 * 1024; /* original value from N56 */
39037602
A
1363#endif /* CONFIG_SECLUDED_MEMORY */
1364
1365
5ba3f43e
A
1366#if defined(__arm__) || defined(__arm64__)
1367extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1368unsigned int vm_first_phys_ppnum = 0;
1369#endif
39037602 1370
fe8ab488 1371void vm_page_release_startup(vm_page_t mem);
1c79356b
A
1372void
1373pmap_startup(
0a7de745
A
1374 vm_offset_t *startp,
1375 vm_offset_t *endp)
1c79356b 1376{
0a7de745
A
1377 unsigned int i, npages;
1378 ppnum_t phys_page;
1379 uint64_t mem_sz;
1380 uint64_t start_ns;
1381 uint64_t now_ns;
1382 uint_t low_page_count = 0;
1c79356b 1383
fe8ab488 1384#if defined(__LP64__)
fe8ab488
A
1385 /*
1386 * make sure we are aligned on a 64 byte boundary
1387 * for VM_PAGE_PACK_PTR (it clips off the low-order
1388 * 6 bits of the pointer)
1389 */
0a7de745 1390 if (virtual_space_start != virtual_space_end) {
fe8ab488 1391 virtual_space_start = round_page(virtual_space_start);
0a7de745 1392 }
fe8ab488
A
1393#endif
1394
1c79356b 1395 /*
0a7de745
A
1396 * We calculate how many page frames we will have
1397 * and then allocate the page structures in one chunk.
1398 *
1399 * Note that the calculation here doesn't take into account
1400 * the memory needed to map what's being allocated, i.e. the page
1401 * table entries. So the actual number of pages we get will be
1402 * less than this. To do someday: include that in the computation.
c3c9b80d
A
1403 *
1404 * Also for ARM, we don't use the count of free_pages, but rather the
1405 * range from last page to first page (ignore holes due to retired pages).
1c79356b 1406 */
c3c9b80d
A
1407#if defined(__arm__) || defined(__arm64__)
1408 mem_sz = pmap_free_pages_span() * (uint64_t)PAGE_SIZE;
1409#else /* defined(__arm__) || defined(__arm64__) */
0a7de745 1410 mem_sz = pmap_free_pages() * (uint64_t)PAGE_SIZE;
c3c9b80d 1411#endif /* defined(__arm__) || defined(__arm64__) */
0a7de745
A
1412 mem_sz += round_page(virtual_space_start) - virtual_space_start; /* Account for any slop */
1413 npages = (uint_t)(mem_sz / (PAGE_SIZE + sizeof(*vm_pages))); /* scaled to include the vm_page_ts */
1c79356b 1414
cb323159 1415 vm_pages = (vm_page_t) pmap_steal_freeable_memory(npages * sizeof *vm_pages);
1c79356b 1416
0c530ab8
A
1417 /*
1418 * Check if we want to initialize pages to a known value
1419 */
0a7de745
A
1420 if (PE_parse_boot_argn("fill", &fillval, sizeof(fillval))) {
1421 fill = TRUE;
1422 }
1423#if DEBUG
316670eb
A
1424 /* This slows down booting the DEBUG kernel, particularly on
1425 * large memory systems, but is worthwhile in deterministically
1426 * trapping uninitialized memory usage.
1427 */
0a7de745
A
1428 if (!fill) {
1429 fill = TRUE;
316670eb
A
1430 fillval = 0xDEB8F177;
1431 }
1432#endif
0a7de745 1433 if (fill) {
316670eb 1434 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
0a7de745 1435 }
39037602
A
1436
1437#if CONFIG_SECLUDED_MEMORY
0a7de745
A
1438 /*
1439 * Figure out how much secluded memory to have before we start
1440 * release pages to free lists.
1441 * The default, if specified nowhere else, is no secluded mem.
1442 */
39037602 1443 secluded_mem_mb = 0;
0a7de745 1444 if (max_mem > 1 * 1024 * 1024 * 1024) {
39037602
A
1445 /* default to 90MB for devices with > 1GB of RAM */
1446 secluded_mem_mb = 90;
1447 }
1448 /* override with value from device tree, if provided */
1449 PE_get_default("kern.secluded_mem_mb",
0a7de745 1450 &secluded_mem_mb, sizeof(secluded_mem_mb));
39037602
A
1451 /* override with value from boot-args, if provided */
1452 PE_parse_boot_argn("secluded_mem_mb",
0a7de745
A
1453 &secluded_mem_mb,
1454 sizeof(secluded_mem_mb));
39037602
A
1455
1456 vm_page_secluded_target = (unsigned int)
0a7de745 1457 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
39037602 1458 PE_parse_boot_argn("secluded_for_iokit",
0a7de745
A
1459 &secluded_for_iokit,
1460 sizeof(secluded_for_iokit));
39037602 1461 PE_parse_boot_argn("secluded_for_apps",
0a7de745
A
1462 &secluded_for_apps,
1463 sizeof(secluded_for_apps));
39037602 1464 PE_parse_boot_argn("secluded_for_filecache",
0a7de745
A
1465 &secluded_for_filecache,
1466 sizeof(secluded_for_filecache));
39037602
A
1467#if 11
1468 PE_parse_boot_argn("secluded_for_fbdp",
0a7de745
A
1469 &secluded_for_fbdp,
1470 sizeof(secluded_for_fbdp));
39037602 1471#endif
39037602 1472
d9a64523 1473 /*
f427ee49 1474 * Allow a really large app to effectively use secluded memory until it exits.
d9a64523 1475 */
f427ee49 1476 if (vm_page_secluded_target != 0) {
d9a64523 1477 /*
f427ee49
A
1478 * Get an amount from boot-args, else use 1/2 of max_mem.
1479 * 1/2 max_mem was chosen from a Peace daemon tentpole test which
1480 * used munch to induce jetsam thrashing of false idle daemons on N56.
d9a64523
A
1481 */
1482 int secluded_shutoff_mb;
1483 if (PE_parse_boot_argn("secluded_shutoff_mb", &secluded_shutoff_mb,
0a7de745 1484 sizeof(secluded_shutoff_mb))) {
d9a64523 1485 secluded_shutoff_trigger = (uint64_t)secluded_shutoff_mb * 1024 * 1024;
0a7de745 1486 } else {
f427ee49 1487 secluded_shutoff_trigger = max_mem / 2;
0a7de745 1488 }
d9a64523 1489
f427ee49
A
1490 /* ensure the headroom value is sensible and avoid underflows */
1491 assert(secluded_shutoff_trigger == 0 || secluded_shutoff_trigger > secluded_shutoff_headroom);
0c530ab8 1492 }
d9a64523
A
1493
1494#endif /* CONFIG_SECLUDED_MEMORY */
0c530ab8 1495
0a7de745
A
1496#if defined(__x86_64__)
1497
1c79356b 1498 /*
0a7de745 1499 * Decide how much memory we delay freeing at boot time.
1c79356b 1500 */
0a7de745
A
1501 uint32_t delay_above_gb;
1502 if (!PE_parse_boot_argn("delay_above_gb", &delay_above_gb, sizeof(delay_above_gb))) {
1503 delay_above_gb = DEFAULT_DELAY_ABOVE_PHYS_GB;
1504 }
1505
1506 if (delay_above_gb == 0) {
1507 delay_above_pnum = PPNUM_MAX;
d9a64523 1508 } else {
0a7de745 1509 delay_above_pnum = delay_above_gb * (1024 * 1024 * 1024 / PAGE_SIZE);
1c79356b
A
1510 }
1511
0a7de745
A
1512 /* make sure we have sane breathing room: 1G above low memory */
1513 if (delay_above_pnum <= max_valid_low_ppnum) {
1514 delay_above_pnum = max_valid_low_ppnum + ((1024 * 1024 * 1024) >> PAGE_SHIFT);
1515 }
2d21ac55 1516
0a7de745
A
1517 if (delay_above_pnum < PPNUM_MAX) {
1518 printf("pmap_startup() delaying init/free of page nums > 0x%x\n", delay_above_pnum);
55e303ae 1519 }
55e303ae 1520
0a7de745 1521#endif /* defined(__x86_64__) */
55e303ae 1522
1c79356b 1523 /*
0a7de745 1524 * Initialize and release the page frames.
1c79356b 1525 */
f427ee49 1526 kernel_debug_string_early("page_frame_init");
1c79356b 1527
0a7de745
A
1528 vm_page_array_beginning_addr = &vm_pages[0];
1529 vm_page_array_ending_addr = &vm_pages[npages]; /* used by ptr packing/unpacking code */
f427ee49
A
1530#if VM_PAGE_PACKED_FROM_ARRAY
1531 if (npages >= VM_PAGE_PACKED_FROM_ARRAY) {
1532 panic("pmap_startup(): too many pages to support vm_page packing");
1533 }
1534#endif
1c79356b 1535
0a7de745 1536 vm_delayed_count = 0;
c3c9b80d
A
1537#if defined(__arm64__)
1538 vm_page_queue_init(&vm_page_queue_retired);
1539#endif /* defined(__arm64__) */
1c79356b 1540
0a7de745
A
1541 absolutetime_to_nanoseconds(mach_absolute_time(), &start_ns);
1542 vm_pages_count = 0;
1543 for (i = 0; i < npages; i++) {
1544 /* Did we run out of pages? */
1545 if (!pmap_next_page(&phys_page)) {
1546 break;
1547 }
1c79356b 1548
0a7de745
A
1549 if (phys_page < max_valid_low_ppnum) {
1550 ++low_page_count;
1551 }
1c79356b 1552
0a7de745
A
1553 /* Are we at high enough pages to delay the rest? */
1554 if (low_page_count > vm_lopage_free_limit && phys_page > delay_above_pnum) {
1555 vm_delayed_count = pmap_free_pages();
1556 break;
1557 }
1558
1559#if defined(__arm__) || defined(__arm64__)
1560 if (i == 0) {
1561 vm_first_phys_ppnum = phys_page;
1562 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr,
1563 (void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
c3c9b80d
A
1564#if defined(__arm64__)
1565 } else {
1566 /*
1567 * pmap_next_page() may skip over pages reported bad by iboot.
1568 */
1569 while (i < phys_page - vm_first_phys_ppnum && i < npages) {
1570 ++vm_pages_count;
1571 vm_page_init(&vm_pages[i], i + vm_first_phys_ppnum, FALSE);
1572 vm_page_retire_startup(&vm_pages[i]);
1573 ++i;
1574 }
1575 if (i >= npages) {
1576 break;
1577 }
1578 assert(i == phys_page - vm_first_phys_ppnum);
1579#endif /* defined(__arm64__) */
0a7de745 1580 }
c3c9b80d 1581#endif /* defined(__arm__) || defined(__arm64__) */
0a7de745 1582
cb323159
A
1583#if defined(__x86_64__)
1584 /* The x86 clump freeing code requires increasing ppn's to work correctly */
1585 if (i > 0) {
1586 assert(phys_page > vm_pages[i - 1].vmp_phys_page);
1587 }
1588#endif
0a7de745
A
1589 ++vm_pages_count;
1590 vm_page_init(&vm_pages[i], phys_page, FALSE);
1591 if (fill) {
1592 fillPage(phys_page, fillval);
1593 }
1594 if (vm_himemory_mode) {
1595 vm_page_release_startup(&vm_pages[i]);
1596 }
1597 }
1598 vm_page_pages = vm_pages_count; /* used to report to user space */
1599
1600 if (!vm_himemory_mode) {
1601 do {
c3c9b80d
A
1602 if (!vm_pages[--i].vmp_error) { /* skip retired pages */
1603 vm_page_release_startup(&vm_pages[i]);
1604 }
0a7de745
A
1605 } while (i != 0);
1606 }
1607
1608 absolutetime_to_nanoseconds(mach_absolute_time(), &now_ns);
1609 printf("pmap_startup() init/release time: %lld microsec\n", (now_ns - start_ns) / NSEC_PER_USEC);
1610 printf("pmap_startup() delayed init/release of %d pages\n", vm_delayed_count);
1611
f427ee49 1612#if defined(__LP64__)
0a7de745
A
1613 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0]) {
1614 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1615 }
1616
1617 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count - 1]))) != &vm_pages[vm_pages_count - 1]) {
1618 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count - 1]);
1619 }
1620#endif
1621
1622 VM_CHECK_MEMORYSTATUS;
1623
1624 /*
1625 * We have to re-align virtual_space_start,
1626 * because pmap_steal_memory has been using it.
1627 */
1628 virtual_space_start = round_page(virtual_space_start);
1629 *startp = virtual_space_start;
1630 *endp = virtual_space_end;
1631}
1632#endif /* MACHINE_PAGES */
1633
1634/*
1635 * Create the zone that represents the vm_pages[] array. Nothing ever allocates
1636 * or frees to this zone. It's just here for reporting purposes via zprint command.
1637 * This needs to be done after all initially delayed pages are put on the free lists.
1638 */
1639static void
1640vm_page_module_init_delayed(void)
1641{
f427ee49
A
1642 (void)zone_create_ext("vm pages array", sizeof(struct vm_page),
1643 ZC_NOGZALLOC, ZONE_ID_ANY, ^(zone_t z) {
1644 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
0a7de745 1645
f427ee49
A
1646 zone_set_exhaustible(z, 0);
1647 /*
1648 * Reflect size and usage information for vm_pages[].
1649 */
0a7de745 1650
c3c9b80d
A
1651 z->z_elems_avail = (uint32_t)(vm_page_array_ending_addr - vm_pages);
1652 z->z_elems_free = z->z_elems_avail - vm_pages_count;
f427ee49
A
1653 zpercpu_get_cpu(z->z_stats, 0)->zs_mem_allocated =
1654 vm_pages_count * sizeof(struct vm_page);
1655 vm_page_array_zone_data_size = (uintptr_t)((void *)vm_page_array_ending_addr - (void *)vm_pages);
1656 vm_page_zone_pages = atop(round_page((vm_offset_t)vm_page_array_zone_data_size));
c3c9b80d
A
1657 z->z_wired_cur += vm_page_zone_pages;
1658 z->z_wired_hwm = z->z_wired_cur;
1659 z->z_va_cur = z->z_wired_cur;
f427ee49
A
1660 /* since zone accounts for these, take them out of stolen */
1661 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1662 });
0a7de745
A
1663}
1664
1665/*
1666 * Create the vm_pages zone. This is used for the vm_page structures for the pages
1667 * that are scavanged from other boot time usages by ml_static_mfree(). As such,
1668 * this needs to happen in early VM bootstrap.
1669 */
f427ee49
A
1670
1671__startup_func
1672static void
0a7de745
A
1673vm_page_module_init(void)
1674{
1675 vm_size_t vm_page_with_ppnum_size;
39037602 1676
0a7de745
A
1677 /*
1678 * Since the pointers to elements in this zone will be packed, they
1679 * must have appropriate size. Not strictly what sizeof() reports.
1680 */
1681 vm_page_with_ppnum_size =
f427ee49
A
1682 (sizeof(struct vm_page_with_ppnum) + (VM_PAGE_PACKED_PTR_ALIGNMENT - 1)) &
1683 ~(VM_PAGE_PACKED_PTR_ALIGNMENT - 1);
1684
1685 vm_page_zone = zone_create_ext("vm pages", vm_page_with_ppnum_size,
c3c9b80d 1686 ZC_NOGZALLOC | ZC_ALIGNMENT_REQUIRED, ZONE_ID_ANY, ^(zone_t z) {
f427ee49 1687#if defined(__LP64__)
c3c9b80d 1688 zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED);
f427ee49 1689#endif
c3c9b80d
A
1690 /*
1691 * The number "10" is a small number that is larger than the number
1692 * of fictitious pages that any single caller will attempt to allocate
1693 * without blocking.
1694 *
1695 * The largest such number at the moment is kernel_memory_allocate()
1696 * when 2 guard pages are asked. 10 is simply a somewhat larger number,
1697 * taking into account the 50% hysteresis the zone allocator uses.
1698 *
1699 * Note: this works at all because the zone allocator
1700 * doesn't ever allocate fictitious pages.
1701 */
1702 z->z_elems_rsv = 10;
f427ee49 1703 });
1c79356b 1704}
f427ee49 1705STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_page_module_init);
1c79356b
A
1706
1707/*
1708 * Routine: vm_page_create
1709 * Purpose:
1710 * After the VM system is up, machine-dependent code
1711 * may stumble across more physical memory. For example,
1712 * memory that it was reserving for a frame buffer.
1713 * vm_page_create turns this memory into available pages.
1714 */
1715
1716void
1717vm_page_create(
55e303ae
A
1718 ppnum_t start,
1719 ppnum_t end)
1c79356b 1720{
0a7de745
A
1721 ppnum_t phys_page;
1722 vm_page_t m;
1c79356b 1723
55e303ae 1724 for (phys_page = start;
0a7de745
A
1725 phys_page < end;
1726 phys_page++) {
c3c9b80d 1727 m = vm_page_grab_fictitious_common(phys_page, TRUE);
d9a64523 1728 m->vmp_fictitious = FALSE;
0b4c1975 1729 pmap_clear_noencrypt(phys_page);
6d2010ae 1730
0a7de745 1731 lck_mtx_lock(&vm_page_queue_free_lock);
1c79356b 1732 vm_page_pages++;
0a7de745 1733 lck_mtx_unlock(&vm_page_queue_free_lock);
39037602 1734 vm_page_release(m, FALSE);
1c79356b
A
1735 }
1736}
1737
c3c9b80d
A
1738#if defined(__arm64__)
1739/*
1740 * Like vm_page_create(), except we want to immediately retire the page,
1741 * not put it on the free list.
1742 */
1743void
1744vm_page_create_retired(
1745 ppnum_t phys_page)
1746{
1747 vm_page_t m;
1748
1749 m = vm_page_grab_fictitious_common(phys_page, TRUE);
1750 m->vmp_fictitious = FALSE;
1751 pmap_clear_noencrypt(phys_page);
1752 m->vmp_error = true;
1753 m->vmp_unusual = true;
1754 vm_page_lock_queues();
1755 m->vmp_q_state = VM_PAGE_IS_WIRED;
1756 m->vmp_wire_count++;
1757 vm_page_unlock_queues();
1758
1759 lck_mtx_lock(&vm_page_queue_free_lock);
1760 vm_page_pages++;
1761 lck_mtx_unlock(&vm_page_queue_free_lock);
1762
1763 vm_object_lock(retired_pages_object);
1764 vm_page_insert_wired(m, retired_pages_object, ptoa(VM_PAGE_GET_PHYS_PAGE(m)), VM_KERN_MEMORY_RETIRED);
1765 vm_object_unlock(retired_pages_object);
1766 pmap_retire_page(VM_PAGE_GET_PHYS_PAGE(m));
1767}
1768#endif /* defined(__arm64__) */
1769
1c79356b
A
1770/*
1771 * vm_page_hash:
1772 *
1773 * Distributes the object/offset key pair among hash buckets.
1774 *
55e303ae 1775 * NOTE: The bucket count must be a power of 2
1c79356b
A
1776 */
1777#define vm_page_hash(object, offset) (\
b0d623f7 1778 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
1779 & vm_page_hash_mask)
1780
2d21ac55 1781
1c79356b
A
1782/*
1783 * vm_page_insert: [ internal use only ]
1784 *
1785 * Inserts the given mem entry into the object/object-page
1786 * table and object list.
1787 *
1788 * The object must be locked.
1789 */
1c79356b
A
1790void
1791vm_page_insert(
0a7de745
A
1792 vm_page_t mem,
1793 vm_object_t object,
1794 vm_object_offset_t offset)
2d21ac55 1795{
3e170ce0
A
1796 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1797}
1798
1799void
1800vm_page_insert_wired(
0a7de745
A
1801 vm_page_t mem,
1802 vm_object_t object,
1803 vm_object_offset_t offset,
3e170ce0
A
1804 vm_tag_t tag)
1805{
1806 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
2d21ac55
A
1807}
1808
4a3eedf9 1809void
2d21ac55 1810vm_page_insert_internal(
0a7de745
A
1811 vm_page_t mem,
1812 vm_object_t object,
1813 vm_object_offset_t offset,
3e170ce0 1814 vm_tag_t tag,
0a7de745
A
1815 boolean_t queues_lock_held,
1816 boolean_t insert_in_hash,
1817 boolean_t batch_pmap_op,
1818 boolean_t batch_accounting,
1819 uint64_t *delayed_ledger_update)
1820{
1821 vm_page_bucket_t *bucket;
1822 lck_spin_t *bucket_lock;
1823 int hash_id;
1824 task_t owner;
1825 int ledger_idx_volatile;
1826 int ledger_idx_nonvolatile;
1827 int ledger_idx_volatile_compressed;
1828 int ledger_idx_nonvolatile_compressed;
1829 boolean_t do_footprint;
1830
316670eb
A
1831#if 0
1832 /*
1833 * we may not hold the page queue lock
1834 * so this check isn't safe to make
1835 */
1c79356b 1836 VM_PAGE_CHECK(mem);
316670eb 1837#endif
1c79356b 1838
f427ee49 1839 assertf(page_aligned(offset), "0x%llx\n", offset);
39236c6e 1840
d9a64523 1841 assert(!VM_PAGE_WIRED(mem) || mem->vmp_private || mem->vmp_fictitious || (tag != VM_KERN_MEMORY_NONE));
3e170ce0 1842
fe8ab488
A
1843 /* the vm_submap_object is only a placeholder for submaps */
1844 assert(object != vm_submap_object);
2d21ac55
A
1845
1846 vm_object_lock_assert_exclusive(object);
39037602 1847 LCK_MTX_ASSERT(&vm_page_queue_lock,
0a7de745
A
1848 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1849 : LCK_MTX_ASSERT_NOTOWNED);
5ba3f43e 1850
0a7de745 1851 if (queues_lock_held == FALSE) {
39037602 1852 assert(!VM_PAGE_PAGEABLE(mem));
0a7de745 1853 }
3e170ce0 1854
b0d623f7 1855 if (insert_in_hash == TRUE) {
f427ee49 1856#if DEBUG || VM_PAGE_BUCKETS_CHECK
0a7de745 1857 if (mem->vmp_tabled || mem->vmp_object) {
b0d623f7 1858 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
0a7de745
A
1859 "already in (obj=%p,off=0x%llx)",
1860 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
1861 }
91447636 1862#endif
5ba3f43e
A
1863 if (object->internal && (offset >= object->vo_size)) {
1864 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
0a7de745 1865 mem, object, offset, object->vo_size);
5ba3f43e
A
1866 }
1867
b0d623f7 1868 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
0a7de745 1869
b0d623f7
A
1870 /*
1871 * Record the object/offset pair in this page
1872 */
1c79356b 1873
d9a64523
A
1874 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
1875 mem->vmp_offset = offset;
1c79356b 1876
39037602
A
1877#if CONFIG_SECLUDED_MEMORY
1878 if (object->eligible_for_secluded) {
1879 vm_page_secluded.eligible_for_secluded++;
1880 }
1881#endif /* CONFIG_SECLUDED_MEMORY */
1882
b0d623f7
A
1883 /*
1884 * Insert it into the object_object/offset hash table
1885 */
1886 hash_id = vm_page_hash(object, offset);
1887 bucket = &vm_page_buckets[hash_id];
1888 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
0a7de745
A
1889
1890 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
1c79356b 1891
d9a64523 1892 mem->vmp_next_m = bucket->page_list;
fe8ab488 1893 bucket->page_list = VM_PAGE_PACK_PTR(mem);
39037602 1894 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
fe8ab488 1895
1c79356b 1896#if MACH_PAGE_HASH_STATS
0a7de745 1897 if (++bucket->cur_count > bucket->hi_count) {
b0d623f7 1898 bucket->hi_count = bucket->cur_count;
0a7de745 1899 }
1c79356b 1900#endif /* MACH_PAGE_HASH_STATS */
d9a64523 1901 mem->vmp_hashed = TRUE;
b0d623f7
A
1902 lck_spin_unlock(bucket_lock);
1903 }
6d2010ae 1904
0a7de745 1905 {
316670eb 1906 unsigned int cache_attr;
6d2010ae
A
1907
1908 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1909
1910 if (cache_attr != VM_WIMG_USE_DEFAULT) {
316670eb 1911 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
6d2010ae
A
1912 }
1913 }
1c79356b
A
1914 /*
1915 * Now link into the object's list of backed pages.
1916 */
0a7de745 1917 vm_page_queue_enter(&object->memq, mem, vmp_listq);
3e170ce0 1918 object->memq_hint = mem;
d9a64523 1919 mem->vmp_tabled = TRUE;
1c79356b
A
1920
1921 /*
1922 * Show that the object has one more resident page.
1923 */
1924
1925 object->resident_page_count++;
b0d623f7 1926 if (VM_PAGE_WIRED(mem)) {
0a7de745
A
1927 assert(mem->vmp_wire_count > 0);
1928 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
1929 VM_OBJECT_WIRED_PAGE_ADD(object, mem);
1930 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
b0d623f7
A
1931 }
1932 assert(object->resident_page_count >= object->wired_page_count);
91447636 1933
f427ee49
A
1934#if DEVELOPMENT || DEBUG
1935 if (object->object_is_shared_cache &&
1936 object->pager != NULL &&
1937 object->pager->mo_pager_ops == &shared_region_pager_ops) {
1938 int new, old;
1939 assert(!object->internal);
1940 new = OSAddAtomic(+1, &shared_region_pagers_resident_count);
1941 do {
1942 old = shared_region_pagers_resident_peak;
1943 } while (old < new &&
1944 !OSCompareAndSwap(old, new, &shared_region_pagers_resident_peak));
1945 }
1946#endif /* DEVELOPMENT || DEBUG */
1947
0a7de745 1948 if (batch_accounting == FALSE) {
3e170ce0
A
1949 if (object->internal) {
1950 OSAddAtomic(1, &vm_page_internal_count);
1951 } else {
1952 OSAddAtomic(1, &vm_page_external_count);
1953 }
39236c6e
A
1954 }
1955
1956 /*
1957 * It wouldn't make sense to insert a "reusable" page in
1958 * an object (the page would have been marked "reusable" only
1959 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1960 * in the object at that time).
1961 * But a page could be inserted in a "all_reusable" object, if
1962 * something faults it in (a vm_read() from another task or a
1963 * "use-after-free" issue in user space, for example). It can
1964 * also happen if we're relocating a page from that object to
1965 * a different physical page during a physically-contiguous
1966 * allocation.
1967 */
d9a64523 1968 assert(!mem->vmp_reusable);
39037602 1969 if (object->all_reusable) {
39236c6e
A
1970 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1971 }
2d21ac55 1972
d9a64523 1973 if (object->purgable == VM_PURGABLE_DENY &&
0a7de745 1974 !object->vo_ledger_tag) {
fe8ab488
A
1975 owner = TASK_NULL;
1976 } else {
d9a64523
A
1977 owner = VM_OBJECT_OWNER(object);
1978 vm_object_ledger_tag_ledgers(object,
0a7de745
A
1979 &ledger_idx_volatile,
1980 &ledger_idx_nonvolatile,
1981 &ledger_idx_volatile_compressed,
1982 &ledger_idx_nonvolatile_compressed,
1983 &do_footprint);
fe8ab488
A
1984 }
1985 if (owner &&
1986 (object->purgable == VM_PURGABLE_NONVOLATILE ||
0a7de745
A
1987 object->purgable == VM_PURGABLE_DENY ||
1988 VM_PAGE_WIRED(mem))) {
1989 if (delayed_ledger_update) {
3e170ce0 1990 *delayed_ledger_update += PAGE_SIZE;
0a7de745 1991 } else {
3e170ce0
A
1992 /* more non-volatile bytes */
1993 ledger_credit(owner->ledger,
0a7de745
A
1994 ledger_idx_nonvolatile,
1995 PAGE_SIZE);
d9a64523
A
1996 if (do_footprint) {
1997 /* more footprint */
1998 ledger_credit(owner->ledger,
0a7de745
A
1999 task_ledgers.phys_footprint,
2000 PAGE_SIZE);
d9a64523 2001 }
3e170ce0 2002 }
fe8ab488 2003 } else if (owner &&
0a7de745
A
2004 (object->purgable == VM_PURGABLE_VOLATILE ||
2005 object->purgable == VM_PURGABLE_EMPTY)) {
2006 assert(!VM_PAGE_WIRED(mem));
fe8ab488
A
2007 /* more volatile bytes */
2008 ledger_credit(owner->ledger,
0a7de745
A
2009 ledger_idx_volatile,
2010 PAGE_SIZE);
fe8ab488
A
2011 }
2012
b0d623f7
A
2013 if (object->purgable == VM_PURGABLE_VOLATILE) {
2014 if (VM_PAGE_WIRED(mem)) {
fe8ab488 2015 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
b0d623f7 2016 } else {
fe8ab488 2017 OSAddAtomic(+1, &vm_page_purgeable_count);
b0d623f7 2018 }
593a1d5f 2019 } else if (object->purgable == VM_PURGABLE_EMPTY &&
0a7de745 2020 mem->vmp_q_state == VM_PAGE_ON_THROTTLED_Q) {
b0d623f7
A
2021 /*
2022 * This page belongs to a purged VM object but hasn't
2023 * been purged (because it was "busy").
2024 * It's in the "throttled" queue and hence not
2025 * visible to vm_pageout_scan(). Move it to a pageable
2026 * queue, so that it can eventually be reclaimed, instead
2027 * of lingering in the "empty" object.
2028 */
0a7de745 2029 if (queues_lock_held == FALSE) {
b0d623f7 2030 vm_page_lockspin_queues();
0a7de745 2031 }
593a1d5f 2032 vm_page_deactivate(mem);
0a7de745 2033 if (queues_lock_held == FALSE) {
2d21ac55 2034 vm_page_unlock_queues();
0a7de745 2035 }
91447636 2036 }
fe8ab488
A
2037
2038#if VM_OBJECT_TRACKING_OP_MODIFIED
2039 if (vm_object_tracking_inited &&
2040 object->internal &&
2041 object->resident_page_count == 0 &&
2042 object->pager == NULL &&
2043 object->shadow != NULL &&
2044 object->shadow->copy == object) {
2045 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
2046 int numsaved = 0;
2047
0a7de745 2048 numsaved = OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
fe8ab488 2049 btlog_add_entry(vm_object_tracking_btlog,
0a7de745
A
2050 object,
2051 VM_OBJECT_TRACKING_OP_MODIFIED,
2052 bt,
2053 numsaved);
fe8ab488
A
2054 }
2055#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1c79356b
A
2056}
2057
2058/*
2059 * vm_page_replace:
2060 *
2061 * Exactly like vm_page_insert, except that we first
2062 * remove any existing page at the given offset in object.
2063 *
b0d623f7 2064 * The object must be locked.
1c79356b 2065 */
1c79356b
A
2066void
2067vm_page_replace(
0a7de745
A
2068 vm_page_t mem,
2069 vm_object_t object,
2070 vm_object_offset_t offset)
1c79356b 2071{
0c530ab8 2072 vm_page_bucket_t *bucket;
0a7de745
A
2073 vm_page_t found_m = VM_PAGE_NULL;
2074 lck_spin_t *bucket_lock;
2075 int hash_id;
1c79356b 2076
316670eb
A
2077#if 0
2078 /*
2079 * we don't hold the page queue lock
2080 * so this check isn't safe to make
2081 */
1c79356b 2082 VM_PAGE_CHECK(mem);
316670eb 2083#endif
2d21ac55 2084 vm_object_lock_assert_exclusive(object);
f427ee49 2085#if DEBUG || VM_PAGE_BUCKETS_CHECK
0a7de745 2086 if (mem->vmp_tabled || mem->vmp_object) {
91447636 2087 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
0a7de745
A
2088 "already in (obj=%p,off=0x%llx)",
2089 mem, object, offset, VM_PAGE_OBJECT(mem), mem->vmp_offset);
2090 }
91447636 2091#endif
39037602
A
2092 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2093
2094 assert(!VM_PAGE_PAGEABLE(mem));
2095
1c79356b
A
2096 /*
2097 * Record the object/offset pair in this page
2098 */
d9a64523
A
2099 mem->vmp_object = VM_PAGE_PACK_OBJECT(object);
2100 mem->vmp_offset = offset;
1c79356b
A
2101
2102 /*
2103 * Insert it into the object_object/offset hash table,
2104 * replacing any page that might have been there.
2105 */
2106
b0d623f7
A
2107 hash_id = vm_page_hash(object, offset);
2108 bucket = &vm_page_buckets[hash_id];
2109 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2110
0a7de745 2111 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
0c530ab8 2112
fe8ab488
A
2113 if (bucket->page_list) {
2114 vm_page_packed_t *mp = &bucket->page_list;
39037602 2115 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
0c530ab8 2116
1c79356b 2117 do {
39037602
A
2118 /*
2119 * compare packed object pointers
2120 */
d9a64523 2121 if (m->vmp_object == mem->vmp_object && m->vmp_offset == offset) {
1c79356b 2122 /*
0c530ab8 2123 * Remove old page from hash list
1c79356b 2124 */
d9a64523
A
2125 *mp = m->vmp_next_m;
2126 m->vmp_hashed = FALSE;
2127 m->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1c79356b 2128
0c530ab8 2129 found_m = m;
1c79356b
A
2130 break;
2131 }
d9a64523 2132 mp = &m->vmp_next_m;
39037602 2133 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
0c530ab8 2134
d9a64523 2135 mem->vmp_next_m = bucket->page_list;
1c79356b 2136 } else {
d9a64523 2137 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
1c79356b 2138 }
0c530ab8
A
2139 /*
2140 * insert new page at head of hash list
2141 */
fe8ab488 2142 bucket->page_list = VM_PAGE_PACK_PTR(mem);
d9a64523 2143 mem->vmp_hashed = TRUE;
0c530ab8 2144
b0d623f7 2145 lck_spin_unlock(bucket_lock);
1c79356b 2146
0c530ab8 2147 if (found_m) {
0a7de745 2148 /*
0c530ab8
A
2149 * there was already a page at the specified
2150 * offset for this object... remove it from
2151 * the object and free it back to the free list
2152 */
b0d623f7 2153 vm_page_free_unlocked(found_m, FALSE);
91447636 2154 }
3e170ce0 2155 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1c79356b
A
2156}
2157
2158/*
2159 * vm_page_remove: [ internal use only ]
2160 *
2161 * Removes the given mem entry from the object/offset-page
2162 * table and the object page list.
2163 *
b0d623f7 2164 * The object must be locked.
1c79356b
A
2165 */
2166
2167void
2168vm_page_remove(
0a7de745
A
2169 vm_page_t mem,
2170 boolean_t remove_from_hash)
1c79356b 2171{
b0d623f7 2172 vm_page_bucket_t *bucket;
0a7de745
A
2173 vm_page_t this;
2174 lck_spin_t *bucket_lock;
2175 int hash_id;
2176 task_t owner;
2177 vm_object_t m_object;
2178 int ledger_idx_volatile;
2179 int ledger_idx_nonvolatile;
2180 int ledger_idx_volatile_compressed;
2181 int ledger_idx_nonvolatile_compressed;
2182 int do_footprint;
39037602
A
2183
2184 m_object = VM_PAGE_OBJECT(mem);
1c79356b 2185
39037602 2186 vm_object_lock_assert_exclusive(m_object);
d9a64523
A
2187 assert(mem->vmp_tabled);
2188 assert(!mem->vmp_cleaning);
2189 assert(!mem->vmp_laundry);
39037602
A
2190
2191 if (VM_PAGE_PAGEABLE(mem)) {
2192 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2193 }
316670eb
A
2194#if 0
2195 /*
2196 * we don't hold the page queue lock
2197 * so this check isn't safe to make
2198 */
1c79356b 2199 VM_PAGE_CHECK(mem);
316670eb 2200#endif
b0d623f7
A
2201 if (remove_from_hash == TRUE) {
2202 /*
2203 * Remove from the object_object/offset hash table
2204 */
d9a64523 2205 hash_id = vm_page_hash(m_object, mem->vmp_offset);
b0d623f7
A
2206 bucket = &vm_page_buckets[hash_id];
2207 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 2208
0a7de745 2209 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
1c79356b 2210
39037602 2211 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
b0d623f7 2212 /* optimize for common case */
1c79356b 2213
d9a64523 2214 bucket->page_list = mem->vmp_next_m;
b0d623f7 2215 } else {
0a7de745 2216 vm_page_packed_t *prev;
1c79356b 2217
d9a64523 2218 for (prev = &this->vmp_next_m;
0a7de745
A
2219 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
2220 prev = &this->vmp_next_m) {
b0d623f7 2221 continue;
0a7de745 2222 }
d9a64523 2223 *prev = this->vmp_next_m;
b0d623f7 2224 }
1c79356b 2225#if MACH_PAGE_HASH_STATS
b0d623f7 2226 bucket->cur_count--;
1c79356b 2227#endif /* MACH_PAGE_HASH_STATS */
d9a64523
A
2228 mem->vmp_hashed = FALSE;
2229 this->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
b0d623f7
A
2230 lck_spin_unlock(bucket_lock);
2231 }
1c79356b
A
2232 /*
2233 * Now remove from the object's list of backed pages.
2234 */
2235
3e170ce0 2236 vm_page_remove_internal(mem);
1c79356b
A
2237
2238 /*
2239 * And show that the object has one fewer resident
2240 * page.
2241 */
2242
39037602
A
2243 assert(m_object->resident_page_count > 0);
2244 m_object->resident_page_count--;
6d2010ae 2245
f427ee49
A
2246#if DEVELOPMENT || DEBUG
2247 if (m_object->object_is_shared_cache &&
2248 m_object->pager != NULL &&
2249 m_object->pager->mo_pager_ops == &shared_region_pager_ops) {
2250 assert(!m_object->internal);
2251 OSAddAtomic(-1, &shared_region_pagers_resident_count);
2252 }
2253#endif /* DEVELOPMENT || DEBUG */
2254
39037602 2255 if (m_object->internal) {
fe8ab488 2256#if DEBUG
39236c6e 2257 assert(vm_page_internal_count);
fe8ab488
A
2258#endif /* DEBUG */
2259
39236c6e
A
2260 OSAddAtomic(-1, &vm_page_internal_count);
2261 } else {
2262 assert(vm_page_external_count);
2263 OSAddAtomic(-1, &vm_page_external_count);
fe8ab488 2264
d9a64523 2265 if (mem->vmp_xpmapped) {
fe8ab488
A
2266 assert(vm_page_xpmapped_external_count);
2267 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
2268 }
39236c6e 2269 }
d9a64523
A
2270 if (!m_object->internal &&
2271 m_object->cached_list.next &&
2272 m_object->cached_list.prev) {
0a7de745 2273 if (m_object->resident_page_count == 0) {
39037602 2274 vm_object_cache_remove(m_object);
0a7de745 2275 }
6d2010ae
A
2276 }
2277
b0d623f7 2278 if (VM_PAGE_WIRED(mem)) {
d9a64523 2279 assert(mem->vmp_wire_count > 0);
5ba3f43e
A
2280 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
2281 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
2282 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
b0d623f7 2283 }
39037602 2284 assert(m_object->resident_page_count >=
0a7de745 2285 m_object->wired_page_count);
d9a64523 2286 if (mem->vmp_reusable) {
39037602
A
2287 assert(m_object->reusable_page_count > 0);
2288 m_object->reusable_page_count--;
2289 assert(m_object->reusable_page_count <=
0a7de745 2290 m_object->resident_page_count);
d9a64523 2291 mem->vmp_reusable = FALSE;
b0d623f7
A
2292 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
2293 vm_page_stats_reusable.reused_remove++;
39037602 2294 } else if (m_object->all_reusable) {
b0d623f7
A
2295 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
2296 vm_page_stats_reusable.reused_remove++;
2297 }
1c79356b 2298
d9a64523 2299 if (m_object->purgable == VM_PURGABLE_DENY &&
0a7de745 2300 !m_object->vo_ledger_tag) {
fe8ab488
A
2301 owner = TASK_NULL;
2302 } else {
d9a64523
A
2303 owner = VM_OBJECT_OWNER(m_object);
2304 vm_object_ledger_tag_ledgers(m_object,
0a7de745
A
2305 &ledger_idx_volatile,
2306 &ledger_idx_nonvolatile,
2307 &ledger_idx_volatile_compressed,
2308 &ledger_idx_nonvolatile_compressed,
2309 &do_footprint);
fe8ab488
A
2310 }
2311 if (owner &&
39037602 2312 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
0a7de745
A
2313 m_object->purgable == VM_PURGABLE_DENY ||
2314 VM_PAGE_WIRED(mem))) {
fe8ab488
A
2315 /* less non-volatile bytes */
2316 ledger_debit(owner->ledger,
0a7de745
A
2317 ledger_idx_nonvolatile,
2318 PAGE_SIZE);
d9a64523
A
2319 if (do_footprint) {
2320 /* less footprint */
2321 ledger_debit(owner->ledger,
0a7de745
A
2322 task_ledgers.phys_footprint,
2323 PAGE_SIZE);
d9a64523 2324 }
fe8ab488 2325 } else if (owner &&
0a7de745
A
2326 (m_object->purgable == VM_PURGABLE_VOLATILE ||
2327 m_object->purgable == VM_PURGABLE_EMPTY)) {
2328 assert(!VM_PAGE_WIRED(mem));
fe8ab488
A
2329 /* less volatile bytes */
2330 ledger_debit(owner->ledger,
0a7de745
A
2331 ledger_idx_volatile,
2332 PAGE_SIZE);
fe8ab488 2333 }
39037602 2334 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
2335 if (VM_PAGE_WIRED(mem)) {
2336 assert(vm_page_purgeable_wired_count > 0);
2337 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2338 } else {
2339 assert(vm_page_purgeable_count > 0);
2340 OSAddAtomic(-1, &vm_page_purgeable_count);
2341 }
91447636 2342 }
5ba3f43e 2343
0a7de745 2344 if (m_object->set_cache_attr == TRUE) {
39037602 2345 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
0a7de745 2346 }
6d2010ae 2347
d9a64523
A
2348 mem->vmp_tabled = FALSE;
2349 mem->vmp_object = 0;
2350 mem->vmp_offset = (vm_object_offset_t) -1;
1c79356b
A
2351}
2352
b0d623f7 2353
1c79356b
A
2354/*
2355 * vm_page_lookup:
2356 *
2357 * Returns the page associated with the object/offset
2358 * pair specified; if none is found, VM_PAGE_NULL is returned.
2359 *
2360 * The object must be locked. No side effects.
2361 */
2362
0a7de745 2363#define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
3e170ce0
A
2364
2365#if DEBUG_VM_PAGE_LOOKUP
2d21ac55 2366
3e170ce0 2367struct {
0a7de745
A
2368 uint64_t vpl_total;
2369 uint64_t vpl_empty_obj;
2370 uint64_t vpl_bucket_NULL;
2371 uint64_t vpl_hit_hint;
2372 uint64_t vpl_hit_hint_next;
2373 uint64_t vpl_hit_hint_prev;
2374 uint64_t vpl_fast;
2375 uint64_t vpl_slow;
2376 uint64_t vpl_hit;
2377 uint64_t vpl_miss;
2378
2379 uint64_t vpl_fast_elapsed;
2380 uint64_t vpl_slow_elapsed;
3e170ce0
A
2381} vm_page_lookup_stats __attribute__((aligned(8)));
2382
2383#endif
2384
0a7de745 2385#define KDP_VM_PAGE_WALK_MAX 1000
3e170ce0
A
2386
2387vm_page_t
2388kdp_vm_page_lookup(
0a7de745
A
2389 vm_object_t object,
2390 vm_object_offset_t offset)
3e170ce0
A
2391{
2392 vm_page_t cur_page;
2393 int num_traversed = 0;
2394
2395 if (not_in_kdp) {
2396 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
2397 }
2398
0a7de745 2399 vm_page_queue_iterate(&object->memq, cur_page, vmp_listq) {
d9a64523 2400 if (cur_page->vmp_offset == offset) {
3e170ce0
A
2401 return cur_page;
2402 }
2403 num_traversed++;
2404
2405 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
2406 return VM_PAGE_NULL;
2407 }
2408 }
2409
2410 return VM_PAGE_NULL;
2411}
91447636 2412
1c79356b
A
2413vm_page_t
2414vm_page_lookup(
0a7de745
A
2415 vm_object_t object,
2416 vm_object_offset_t offset)
1c79356b 2417{
0a7de745 2418 vm_page_t mem;
b0d623f7 2419 vm_page_bucket_t *bucket;
0a7de745
A
2420 vm_page_queue_entry_t qe;
2421 lck_spin_t *bucket_lock = NULL;
2422 int hash_id;
3e170ce0 2423#if DEBUG_VM_PAGE_LOOKUP
0a7de745 2424 uint64_t start, elapsed;
91447636 2425
3e170ce0
A
2426 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
2427#endif
2d21ac55 2428 vm_object_lock_assert_held(object);
f427ee49 2429 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
3e170ce0
A
2430
2431 if (object->resident_page_count == 0) {
2432#if DEBUG_VM_PAGE_LOOKUP
2433 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
2434#endif
0a7de745 2435 return VM_PAGE_NULL;
3e170ce0
A
2436 }
2437
91447636 2438 mem = object->memq_hint;
2d21ac55 2439
91447636 2440 if (mem != VM_PAGE_NULL) {
39037602 2441 assert(VM_PAGE_OBJECT(mem) == object);
2d21ac55 2442
d9a64523 2443 if (mem->vmp_offset == offset) {
3e170ce0
A
2444#if DEBUG_VM_PAGE_LOOKUP
2445 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
2446#endif
0a7de745 2447 return mem;
91447636 2448 }
d9a64523 2449 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->vmp_listq);
2d21ac55 2450
0a7de745
A
2451 if (!vm_page_queue_end(&object->memq, qe)) {
2452 vm_page_t next_page;
91447636 2453
39037602
A
2454 next_page = (vm_page_t)((uintptr_t)qe);
2455 assert(VM_PAGE_OBJECT(next_page) == object);
2d21ac55 2456
d9a64523 2457 if (next_page->vmp_offset == offset) {
91447636 2458 object->memq_hint = next_page; /* new hint */
3e170ce0
A
2459#if DEBUG_VM_PAGE_LOOKUP
2460 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
2461#endif
0a7de745 2462 return next_page;
91447636
A
2463 }
2464 }
d9a64523 2465 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->vmp_listq);
2d21ac55 2466
0a7de745 2467 if (!vm_page_queue_end(&object->memq, qe)) {
91447636
A
2468 vm_page_t prev_page;
2469
39037602
A
2470 prev_page = (vm_page_t)((uintptr_t)qe);
2471 assert(VM_PAGE_OBJECT(prev_page) == object);
2d21ac55 2472
d9a64523 2473 if (prev_page->vmp_offset == offset) {
91447636 2474 object->memq_hint = prev_page; /* new hint */
3e170ce0
A
2475#if DEBUG_VM_PAGE_LOOKUP
2476 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
2477#endif
0a7de745 2478 return prev_page;
91447636
A
2479 }
2480 }
2481 }
1c79356b 2482 /*
2d21ac55 2483 * Search the hash table for this object/offset pair
1c79356b 2484 */
b0d623f7
A
2485 hash_id = vm_page_hash(object, offset);
2486 bucket = &vm_page_buckets[hash_id];
1c79356b 2487
2d21ac55
A
2488 /*
2489 * since we hold the object lock, we are guaranteed that no
2490 * new pages can be inserted into this object... this in turn
2491 * guarantess that the page we're looking for can't exist
2492 * if the bucket it hashes to is currently NULL even when looked
0a7de745 2493 * at outside the scope of the hash bucket lock... this is a
2d21ac55
A
2494 * really cheap optimiztion to avoid taking the lock
2495 */
fe8ab488 2496 if (!bucket->page_list) {
3e170ce0
A
2497#if DEBUG_VM_PAGE_LOOKUP
2498 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
2499#endif
0a7de745 2500 return VM_PAGE_NULL;
2d21ac55 2501 }
0c530ab8 2502
3e170ce0
A
2503#if DEBUG_VM_PAGE_LOOKUP
2504 start = mach_absolute_time();
2505#endif
2506 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
316670eb 2507 /*
3e170ce0
A
2508 * on average, it's roughly 3 times faster to run a short memq list
2509 * than to take the spin lock and go through the hash list
316670eb 2510 */
39037602 2511 mem = (vm_page_t)vm_page_queue_first(&object->memq);
3e170ce0 2512
39037602 2513 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
0a7de745 2514 if (mem->vmp_offset == offset) {
3e170ce0 2515 break;
0a7de745 2516 }
3e170ce0 2517
d9a64523 2518 mem = (vm_page_t)vm_page_queue_next(&mem->vmp_listq);
3e170ce0 2519 }
0a7de745 2520 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
3e170ce0 2521 mem = NULL;
0a7de745 2522 }
3e170ce0 2523 } else {
0a7de745 2524 vm_page_object_t packed_object;
39037602
A
2525
2526 packed_object = VM_PAGE_PACK_OBJECT(object);
3e170ce0
A
2527
2528 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
2529
0a7de745 2530 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
3e170ce0 2531
39037602 2532 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
0a7de745
A
2533 mem != VM_PAGE_NULL;
2534 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m))) {
3e170ce0
A
2535#if 0
2536 /*
2537 * we don't hold the page queue lock
2538 * so this check isn't safe to make
2539 */
2540 VM_PAGE_CHECK(mem);
316670eb 2541#endif
0a7de745 2542 if ((mem->vmp_object == packed_object) && (mem->vmp_offset == offset)) {
3e170ce0 2543 break;
0a7de745 2544 }
3e170ce0
A
2545 }
2546 lck_spin_unlock(bucket_lock);
1c79356b 2547 }
55e303ae 2548
3e170ce0
A
2549#if DEBUG_VM_PAGE_LOOKUP
2550 elapsed = mach_absolute_time() - start;
2551
2552 if (bucket_lock) {
2553 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2554 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2555 } else {
2556 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2557 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2558 }
0a7de745 2559 if (mem != VM_PAGE_NULL) {
3e170ce0 2560 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
0a7de745
A
2561 } else {
2562 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2563 }
3e170ce0 2564#endif
91447636 2565 if (mem != VM_PAGE_NULL) {
39037602 2566 assert(VM_PAGE_OBJECT(mem) == object);
91447636 2567
3e170ce0
A
2568 object->memq_hint = mem;
2569 }
0a7de745 2570 return mem;
91447636
A
2571}
2572
2573
1c79356b
A
2574/*
2575 * vm_page_rename:
2576 *
2577 * Move the given memory entry from its
2578 * current object to the specified target object/offset.
2579 *
2580 * The object must be locked.
2581 */
2582void
2583vm_page_rename(
0a7de745
A
2584 vm_page_t mem,
2585 vm_object_t new_object,
2586 vm_object_offset_t new_offset)
1c79356b 2587{
0a7de745
A
2588 boolean_t internal_to_external, external_to_internal;
2589 vm_tag_t tag;
2590 vm_object_t m_object;
39236c6e 2591
39037602 2592 m_object = VM_PAGE_OBJECT(mem);
2d21ac55 2593
39037602 2594 assert(m_object != new_object);
0a7de745 2595 assert(m_object);
3e170ce0 2596
1c79356b 2597 /*
d9a64523 2598 * Changes to mem->vmp_object require the page lock because
1c79356b
A
2599 * the pageout daemon uses that lock to get the object.
2600 */
b0d623f7 2601 vm_page_lockspin_queues();
1c79356b 2602
39236c6e
A
2603 internal_to_external = FALSE;
2604 external_to_internal = FALSE;
2605
d9a64523 2606 if (mem->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
39236c6e
A
2607 /*
2608 * it's much easier to get the vm_page_pageable_xxx accounting correct
2609 * if we first move the page to the active queue... it's going to end
2610 * up there anyway, and we don't do vm_page_rename's frequently enough
2611 * for this to matter.
2612 */
39037602 2613 vm_page_queues_remove(mem, FALSE);
39236c6e
A
2614 vm_page_activate(mem);
2615 }
39037602
A
2616 if (VM_PAGE_PAGEABLE(mem)) {
2617 if (m_object->internal && !new_object->internal) {
39236c6e
A
2618 internal_to_external = TRUE;
2619 }
39037602 2620 if (!m_object->internal && new_object->internal) {
39236c6e
A
2621 external_to_internal = TRUE;
2622 }
2623 }
2624
39037602 2625 tag = m_object->wire_tag;
0a7de745 2626 vm_page_remove(mem, TRUE);
3e170ce0 2627 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
1c79356b 2628
39236c6e
A
2629 if (internal_to_external) {
2630 vm_page_pageable_internal_count--;
2631 vm_page_pageable_external_count++;
2632 } else if (external_to_internal) {
2633 vm_page_pageable_external_count--;
2634 vm_page_pageable_internal_count++;
2635 }
2636
1c79356b
A
2637 vm_page_unlock_queues();
2638}
2639
2640/*
2641 * vm_page_init:
2642 *
2643 * Initialize the fields in a new page.
2644 * This takes a structure with random values and initializes it
2645 * so that it can be given to vm_page_release or vm_page_insert.
2646 */
2647void
2648vm_page_init(
0a7de745
A
2649 vm_page_t mem,
2650 ppnum_t phys_page,
2651 boolean_t lopage)
1c79356b 2652{
0a7de745
A
2653 uint_t i;
2654 uintptr_t *p;
2655
91447636 2656 assert(phys_page);
7ddcb079 2657
0a7de745 2658#if DEBUG
7ddcb079
A
2659 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2660 if (!(pmap_valid_page(phys_page))) {
2661 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2662 }
2663 }
0a7de745
A
2664#endif /* DEBUG */
2665
2666 /*
2667 * Initialize the fields of the vm_page. If adding any new fields to vm_page,
2668 * try to use initial values which match 0. This minimizes the number of writes
2669 * needed for boot-time initialization.
2670 *
2671 * Kernel bzero() isn't an inline yet, so do it by hand for performance.
2672 */
2673 assert(VM_PAGE_NOT_ON_Q == 0);
2674 assert(sizeof(*mem) % sizeof(uintptr_t) == 0);
2675 for (p = (uintptr_t *)(void *)mem, i = sizeof(*mem) / sizeof(uintptr_t); i != 0; --i) {
2676 *p++ = 0;
2677 }
2678 mem->vmp_offset = (vm_object_offset_t)-1;
2679 mem->vmp_busy = TRUE;
2680 mem->vmp_lopage = lopage;
39037602
A
2681
2682 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
6d2010ae
A
2683#if 0
2684 /*
2685 * we're leaving this turned off for now... currently pages
2686 * come off the free list and are either immediately dirtied/referenced
2687 * due to zero-fill or COW faults, or are used to read or write files...
2688 * in the file I/O case, the UPL mechanism takes care of clearing
2689 * the state of the HW ref/mod bits in a somewhat fragile way.
2690 * Since we may change the way this works in the future (to toughen it up),
2691 * I'm leaving this as a reminder of where these bits could get cleared
2692 */
2693
2694 /*
2695 * make sure both the h/w referenced and modified bits are
0a7de745 2696 * clear at this point... we are especially dependent on
6d2010ae
A
2697 * not finding a 'stale' h/w modified in a number of spots
2698 * once this page goes back into use
2699 */
2700 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2701#endif
1c79356b
A
2702}
2703
2704/*
2705 * vm_page_grab_fictitious:
2706 *
2707 * Remove a fictitious page from the free list.
2708 * Returns VM_PAGE_NULL if there are no free pages.
2709 */
1c79356b 2710
c3c9b80d
A
2711static vm_page_t
2712vm_page_grab_fictitious_common(ppnum_t phys_addr, boolean_t canwait)
1c79356b 2713{
c3c9b80d 2714 vm_page_t m;
6d2010ae 2715
c3c9b80d
A
2716 m = zalloc_flags(vm_page_zone, canwait ? Z_WAITOK : Z_NOWAIT);
2717 if (m) {
0b4c1975 2718 vm_page_init(m, phys_addr, FALSE);
d9a64523 2719 m->vmp_fictitious = TRUE;
0a7de745 2720 }
1c79356b
A
2721 return m;
2722}
2723
2d21ac55 2724vm_page_t
c3c9b80d 2725vm_page_grab_fictitious(boolean_t canwait)
2d21ac55 2726{
c3c9b80d 2727 return vm_page_grab_fictitious_common(vm_page_fictitious_addr, canwait);
2d21ac55
A
2728}
2729
5ba3f43e
A
2730int vm_guard_count;
2731
2732
2d21ac55 2733vm_page_t
c3c9b80d 2734vm_page_grab_guard(boolean_t canwait)
2d21ac55 2735{
5ba3f43e 2736 vm_page_t page;
c3c9b80d 2737 page = vm_page_grab_fictitious_common(vm_page_guard_addr, canwait);
0a7de745
A
2738 if (page) {
2739 OSAddAtomic(1, &vm_guard_count);
2740 }
5ba3f43e 2741 return page;
2d21ac55
A
2742}
2743
6d2010ae 2744
1c79356b
A
2745/*
2746 * vm_page_release_fictitious:
2747 *
6d2010ae 2748 * Release a fictitious page to the zone pool
1c79356b 2749 */
1c79356b
A
2750void
2751vm_page_release_fictitious(
6d2010ae 2752 vm_page_t m)
1c79356b 2753{
d9a64523
A
2754 assert((m->vmp_q_state == VM_PAGE_NOT_ON_Q) || (m->vmp_q_state == VM_PAGE_IS_WIRED));
2755 assert(m->vmp_fictitious);
39037602 2756 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
0a7de745 2757 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
1c79356b 2758
5ba3f43e 2759
0a7de745
A
2760 if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {
2761 OSAddAtomic(-1, &vm_guard_count);
2762 }
5ba3f43e 2763
91447636 2764 zfree(vm_page_zone, m);
1c79356b
A
2765}
2766
1c79356b
A
2767/*
2768 * vm_pool_low():
2769 *
2770 * Return true if it is not likely that a non-vm_privileged thread
2771 * can get memory without blocking. Advisory only, since the
2772 * situation may change under us.
2773 */
c3c9b80d 2774bool
1c79356b
A
2775vm_pool_low(void)
2776{
2777 /* No locking, at worst we will fib. */
0a7de745 2778 return vm_page_free_count <= vm_page_free_reserved;
1c79356b
A
2779}
2780
d9a64523
A
2781boolean_t vm_darkwake_mode = FALSE;
2782
2783/*
2784 * vm_update_darkwake_mode():
2785 *
2786 * Tells the VM that the system is in / out of darkwake.
2787 *
2788 * Today, the VM only lowers/raises the background queue target
2789 * so as to favor consuming more/less background pages when
2790 * darwake is ON/OFF.
2791 *
2792 * We might need to do more things in the future.
2793 */
2794
2795void
2796vm_update_darkwake_mode(boolean_t darkwake_mode)
2797{
2798 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2799
2800 vm_page_lockspin_queues();
2801
2802 if (vm_darkwake_mode == darkwake_mode) {
2803 /*
2804 * No change.
2805 */
2806 vm_page_unlock_queues();
2807 return;
2808 }
2809
2810 vm_darkwake_mode = darkwake_mode;
2811
2812 if (vm_darkwake_mode == TRUE) {
2813#if CONFIG_BACKGROUND_QUEUE
2814
2815 /* save background target to restore later */
2816 vm_page_background_target_snapshot = vm_page_background_target;
2817
2818 /* target is set to 0...no protection for background pages */
2819 vm_page_background_target = 0;
2820
2821#endif /* CONFIG_BACKGROUND_QUEUE */
d9a64523
A
2822 } else if (vm_darkwake_mode == FALSE) {
2823#if CONFIG_BACKGROUND_QUEUE
2824
2825 if (vm_page_background_target_snapshot) {
2826 vm_page_background_target = vm_page_background_target_snapshot;
2827 }
2828#endif /* CONFIG_BACKGROUND_QUEUE */
2829 }
2830 vm_page_unlock_queues();
2831}
0c530ab8 2832
39037602
A
2833#if CONFIG_BACKGROUND_QUEUE
2834
2835void
2836vm_page_update_background_state(vm_page_t mem)
2837{
0a7de745 2838 if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
39037602 2839 return;
0a7de745 2840 }
39037602 2841
0a7de745 2842 if (mem->vmp_in_background == FALSE) {
39037602 2843 return;
0a7de745 2844 }
39037602 2845
0a7de745 2846 task_t my_task = current_task();
d9a64523
A
2847
2848 if (my_task) {
2849 if (task_get_darkwake_mode(my_task)) {
2850 return;
2851 }
2852 }
2853
39037602 2854#if BACKGROUNDQ_BASED_ON_QOS
0a7de745 2855 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY) {
39037602 2856 return;
0a7de745 2857 }
39037602 2858#else
39037602 2859 if (my_task) {
0a7de745 2860 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG)) {
39037602 2861 return;
0a7de745 2862 }
39037602
A
2863 }
2864#endif
2865 vm_page_lockspin_queues();
2866
d9a64523 2867 mem->vmp_in_background = FALSE;
39037602
A
2868 vm_page_background_promoted_count++;
2869
2870 vm_page_remove_from_backgroundq(mem);
2871
2872 vm_page_unlock_queues();
2873}
2874
2875
2876void
2877vm_page_assign_background_state(vm_page_t mem)
2878{
0a7de745 2879 if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
39037602 2880 return;
0a7de745 2881 }
39037602 2882
0a7de745 2883 task_t my_task = current_task();
d9a64523
A
2884
2885 if (my_task) {
2886 if (task_get_darkwake_mode(my_task)) {
2887 mem->vmp_in_background = TRUE;
2888 return;
2889 }
2890 }
2891
39037602 2892#if BACKGROUNDQ_BASED_ON_QOS
0a7de745
A
2893 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY) {
2894 mem->vmp_in_background = TRUE;
2895 } else {
2896 mem->vmp_in_background = FALSE;
2897 }
39037602 2898#else
0a7de745 2899 if (my_task) {
d9a64523 2900 mem->vmp_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
0a7de745 2901 }
39037602
A
2902#endif
2903}
2904
2905
2906void
2907vm_page_remove_from_backgroundq(
0a7de745 2908 vm_page_t mem)
39037602 2909{
0a7de745 2910 vm_object_t m_object;
39037602
A
2911
2912 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2913
d9a64523 2914 if (mem->vmp_on_backgroundq) {
0a7de745 2915 vm_page_queue_remove(&vm_page_queue_background, mem, vmp_backgroundq);
39037602 2916
d9a64523
A
2917 mem->vmp_backgroundq.next = 0;
2918 mem->vmp_backgroundq.prev = 0;
2919 mem->vmp_on_backgroundq = FALSE;
0a7de745 2920
39037602
A
2921 vm_page_background_count--;
2922
2923 m_object = VM_PAGE_OBJECT(mem);
2924
0a7de745 2925 if (m_object->internal) {
39037602 2926 vm_page_background_internal_count--;
0a7de745 2927 } else {
39037602 2928 vm_page_background_external_count--;
0a7de745 2929 }
39037602 2930 } else {
d9a64523 2931 assert(VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.next) == (uintptr_t)NULL &&
0a7de745 2932 VM_PAGE_UNPACK_PTR(mem->vmp_backgroundq.prev) == (uintptr_t)NULL);
39037602
A
2933 }
2934}
2935
2936
2937void
2938vm_page_add_to_backgroundq(
0a7de745
A
2939 vm_page_t mem,
2940 boolean_t first)
2941{
2942 vm_object_t m_object;
39037602
A
2943
2944 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2945
0a7de745 2946 if (vm_page_background_mode == VM_PAGE_BG_DISABLED) {
39037602 2947 return;
0a7de745 2948 }
39037602 2949
d9a64523 2950 if (mem->vmp_on_backgroundq == FALSE) {
39037602
A
2951 m_object = VM_PAGE_OBJECT(mem);
2952
0a7de745 2953 if (vm_page_background_exclude_external && !m_object->internal) {
39037602 2954 return;
0a7de745 2955 }
39037602 2956
0a7de745
A
2957 if (first == TRUE) {
2958 vm_page_queue_enter_first(&vm_page_queue_background, mem, vmp_backgroundq);
2959 } else {
2960 vm_page_queue_enter(&vm_page_queue_background, mem, vmp_backgroundq);
2961 }
d9a64523 2962 mem->vmp_on_backgroundq = TRUE;
0a7de745 2963
39037602
A
2964 vm_page_background_count++;
2965
0a7de745 2966 if (m_object->internal) {
39037602 2967 vm_page_background_internal_count++;
0a7de745 2968 } else {
39037602 2969 vm_page_background_external_count++;
0a7de745 2970 }
39037602
A
2971 }
2972}
2973
d9a64523 2974#endif /* CONFIG_BACKGROUND_QUEUE */
0c530ab8
A
2975
2976/*
0a7de745
A
2977 * This can be switched to FALSE to help debug drivers
2978 * that are having problems with memory > 4G.
0c530ab8 2979 */
0a7de745 2980boolean_t vm_himemory_mode = TRUE;
0c530ab8
A
2981
2982/*
2983 * this interface exists to support hardware controllers
2984 * incapable of generating DMAs with more than 32 bits
2985 * of address on platforms with physical memory > 4G...
2986 */
0a7de745
A
2987unsigned int vm_lopages_allocated_q = 0;
2988unsigned int vm_lopages_allocated_cpm_success = 0;
2989unsigned int vm_lopages_allocated_cpm_failed = 0;
f427ee49 2990vm_page_queue_head_t vm_lopage_queue_free VM_PAGE_PACKED_ALIGNED;
0c530ab8
A
2991
2992vm_page_t
2993vm_page_grablo(void)
2994{
0a7de745 2995 vm_page_t mem;
0c530ab8 2996
0a7de745
A
2997 if (vm_lopage_needed == FALSE) {
2998 return vm_page_grab();
2999 }
0c530ab8 3000
b0d623f7 3001 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 3002
0a7de745
A
3003 if (!vm_page_queue_empty(&vm_lopage_queue_free)) {
3004 vm_page_queue_remove_first(&vm_lopage_queue_free, mem, vmp_pageq);
0b4c1975 3005 assert(vm_lopage_free_count);
d9a64523
A
3006 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
3007 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
0c530ab8 3008
0a7de745 3009 vm_lopage_free_count--;
0b4c1975
A
3010 vm_lopages_allocated_q++;
3011
0a7de745 3012 if (vm_lopage_free_count < vm_lopage_lowater) {
0b4c1975 3013 vm_lopage_refill = TRUE;
0a7de745 3014 }
0c530ab8 3015
0b4c1975 3016 lck_mtx_unlock(&vm_page_queue_free_lock);
39037602
A
3017
3018#if CONFIG_BACKGROUND_QUEUE
3019 vm_page_assign_background_state(mem);
3020#endif
2d21ac55 3021 } else {
0b4c1975
A
3022 lck_mtx_unlock(&vm_page_queue_free_lock);
3023
0a7de745 3024 if (cpm_allocate(PAGE_SIZE, &mem, atop(PPNUM_MAX), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
0b4c1975
A
3025 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3026 vm_lopages_allocated_cpm_failed++;
3027 lck_mtx_unlock(&vm_page_queue_free_lock);
3028
0a7de745 3029 return VM_PAGE_NULL;
0b4c1975 3030 }
d9a64523 3031 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
39037602 3032
d9a64523 3033 mem->vmp_busy = TRUE;
0b4c1975
A
3034
3035 vm_page_lockspin_queues();
0a7de745 3036
d9a64523 3037 mem->vmp_gobbled = FALSE;
0b4c1975
A
3038 vm_page_gobble_count--;
3039 vm_page_wire_count--;
3040
3041 vm_lopages_allocated_cpm_success++;
3042 vm_page_unlock_queues();
0c530ab8 3043 }
d9a64523
A
3044 assert(mem->vmp_busy);
3045 assert(!mem->vmp_pmapped);
3046 assert(!mem->vmp_wpmapped);
39037602 3047 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
0b4c1975 3048
39037602 3049 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
0c530ab8 3050
c3c9b80d 3051 counter_inc(&vm_page_grab_count);
d9a64523 3052 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, 0, 1, 0, 0);
d9a64523 3053
0a7de745 3054 return mem;
0c530ab8
A
3055}
3056
1c79356b
A
3057/*
3058 * vm_page_grab:
3059 *
2d21ac55
A
3060 * first try to grab a page from the per-cpu free list...
3061 * this must be done while pre-emption is disabled... if
0a7de745 3062 * a page is available, we're done...
2d21ac55
A
3063 * if no page is available, grab the vm_page_queue_free_lock
3064 * and see if current number of free pages would allow us
0a7de745 3065 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2d21ac55 3066 * if there are pages available, disable preemption and
0a7de745 3067 * recheck the state of the per-cpu free list... we could
2d21ac55 3068 * have been preempted and moved to a different cpu, or
0a7de745 3069 * some other thread could have re-filled it... if still
2d21ac55
A
3070 * empty, figure out how many pages we can steal from the
3071 * global free queue and move to the per-cpu queue...
3072 * return 1 of these pages when done... only wakeup the
0a7de745 3073 * pageout_scan thread if we moved pages from the global
2d21ac55
A
3074 * list... no need for the wakeup if we've satisfied the
3075 * request from the per-cpu queue.
1c79356b
A
3076 */
3077
39037602
A
3078#if CONFIG_SECLUDED_MEMORY
3079vm_page_t vm_page_grab_secluded(void);
3080#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b 3081
0a7de745
A
3082static inline void
3083vm_page_grab_diags(void);
3084
1c79356b 3085vm_page_t
39037602 3086vm_page_grab(void)
1c79356b 3087{
0a7de745 3088 return vm_page_grab_options(VM_PAGE_GRAB_OPTIONS_NONE);
39037602 3089}
2d21ac55 3090
5ba3f43e
A
3091#if HIBERNATION
3092boolean_t hibernate_rebuild_needed = FALSE;
3093#endif /* HIBERNATION */
3094
39037602
A
3095vm_page_t
3096vm_page_grab_options(
3097 int grab_options)
3098{
0a7de745 3099 vm_page_t mem;
2d21ac55
A
3100
3101 disable_preemption();
3102
f427ee49 3103 if ((mem = *PERCPU_GET(free_pages))) {
2d21ac55 3104return_page_from_cpu_list:
d9a64523 3105 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
39037602 3106
5ba3f43e
A
3107#if HIBERNATION
3108 if (hibernate_rebuild_needed) {
3109 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3110 }
3111#endif /* HIBERNATION */
0a7de745
A
3112
3113 vm_page_grab_diags();
f427ee49
A
3114
3115 vm_offset_t pcpu_base = current_percpu_base();
c3c9b80d 3116 counter_inc_preemption_disabled(&vm_page_grab_count);
f427ee49 3117 *PERCPU_GET_WITH_BASE(pcpu_base, free_pages) = mem->vmp_snext;
d9a64523 3118 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2d21ac55 3119
0a7de745 3120 enable_preemption();
39037602 3121 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
d9a64523
A
3122 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3123
3124 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3125 assert(mem->vmp_tabled == FALSE);
3126 assert(mem->vmp_object == 0);
3127 assert(!mem->vmp_laundry);
0a7de745 3128 ASSERT_PMAP_FREE(mem);
d9a64523
A
3129 assert(mem->vmp_busy);
3130 assert(!mem->vmp_pmapped);
3131 assert(!mem->vmp_wpmapped);
39037602 3132 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55 3133
39037602
A
3134#if CONFIG_BACKGROUND_QUEUE
3135 vm_page_assign_background_state(mem);
3136#endif
2d21ac55
A
3137 return mem;
3138 }
3139 enable_preemption();
3140
1c79356b 3141
1c79356b
A
3142 /*
3143 * Optionally produce warnings if the wire or gobble
3144 * counts exceed some threshold.
3145 */
fe8ab488
A
3146#if VM_PAGE_WIRE_COUNT_WARNING
3147 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
1c79356b 3148 printf("mk: vm_page_grab(): high wired page count of %d\n",
0a7de745 3149 vm_page_wire_count);
1c79356b 3150 }
fe8ab488
A
3151#endif
3152#if VM_PAGE_GOBBLE_COUNT_WARNING
3153 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
1c79356b 3154 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
0a7de745 3155 vm_page_gobble_count);
1c79356b 3156 }
fe8ab488 3157#endif
39037602 3158
0a7de745
A
3159 /*
3160 * If free count is low and we have delayed pages from early boot,
3161 * get one of those instead.
3162 */
3163 if (__improbable(vm_delayed_count > 0 &&
3164 vm_page_free_count <= vm_page_free_target &&
3165 (mem = vm_get_delayed_page(grab_options)) != NULL)) {
3166 return mem;
3167 }
3168
b0d623f7
A
3169 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3170
1c79356b
A
3171 /*
3172 * Only let privileged threads (involved in pageout)
3173 * dip into the reserved pool.
3174 */
1c79356b 3175 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 3176 !(current_thread()->options & TH_OPT_VMPRIV)) {
39037602 3177 /* no page for us in the free queue... */
b0d623f7 3178 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 3179 mem = VM_PAGE_NULL;
39037602
A
3180
3181#if CONFIG_SECLUDED_MEMORY
3182 /* ... but can we try and grab from the secluded queue? */
3183 if (vm_page_secluded_count > 0 &&
3184 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
0a7de745 3185 task_can_use_secluded_mem(current_task(), TRUE))) {
39037602
A
3186 mem = vm_page_grab_secluded();
3187 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
3188 vm_page_secluded.grab_for_iokit++;
3189 if (mem) {
3190 vm_page_secluded.grab_for_iokit_success++;
3191 }
3192 }
3193 if (mem) {
3194 VM_CHECK_MEMORYSTATUS;
d9a64523 3195
0a7de745 3196 vm_page_grab_diags();
c3c9b80d 3197 counter_inc(&vm_page_grab_count);
d9a64523 3198 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
d9a64523 3199
39037602
A
3200 return mem;
3201 }
3202 }
3203#else /* CONFIG_SECLUDED_MEMORY */
3204 (void) grab_options;
3205#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745
A
3206 } else {
3207 vm_page_t head;
3208 vm_page_t tail;
3209 unsigned int pages_to_steal;
3210 unsigned int color;
3211 unsigned int clump_end, sub_count;
1c79356b 3212
0a7de745 3213 while (vm_page_free_count == 0) {
b0d623f7 3214 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
3215 /*
3216 * must be a privileged thread to be
0a7de745 3217 * in this state since a non-privileged
2d21ac55
A
3218 * thread would have bailed if we were
3219 * under the vm_page_free_reserved mark
3220 */
3221 VM_PAGE_WAIT();
b0d623f7 3222 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
3223 }
3224
3225 disable_preemption();
3226
f427ee49 3227 if ((mem = *PERCPU_GET(free_pages))) {
b0d623f7 3228 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3229
0a7de745 3230 /*
2d21ac55
A
3231 * we got preempted and moved to another processor
3232 * or we got preempted and someone else ran and filled the cache
3233 */
3234 goto return_page_from_cpu_list;
3235 }
0a7de745
A
3236 if (vm_page_free_count <= vm_page_free_reserved) {
3237 pages_to_steal = 1;
3238 } else {
3239 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved)) {
fe8ab488 3240 pages_to_steal = vm_free_magazine_refill_limit;
0a7de745
A
3241 } else {
3242 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
3243 }
2d21ac55 3244 }
f427ee49 3245 color = *PERCPU_GET(start_color);
2d21ac55
A
3246 head = tail = NULL;
3247
fe8ab488 3248 vm_page_free_count -= pages_to_steal;
5ba3f43e 3249 clump_end = sub_count = 0;
fe8ab488 3250
2d21ac55 3251 while (pages_to_steal--) {
0a7de745
A
3252 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead)) {
3253 color = (color + 1) & vm_color_mask;
3254 }
5ba3f43e
A
3255#if defined(__x86_64__)
3256 vm_page_queue_remove_first_with_clump(&vm_page_queue_free[color].qhead,
0a7de745 3257 mem, clump_end);
5ba3f43e 3258#else
39037602 3259 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
0a7de745 3260 mem, vmp_pageq);
5ba3f43e
A
3261#endif
3262
d9a64523 3263 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_Q);
6d2010ae 3264
39037602 3265 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
5ba3f43e
A
3266
3267#if defined(__arm__) || defined(__arm64__)
2d21ac55 3268 color = (color + 1) & vm_color_mask;
5ba3f43e
A
3269#else
3270
3271#if DEVELOPMENT || DEBUG
3272
3273 sub_count++;
3274 if (clump_end) {
3275 vm_clump_update_stats(sub_count);
3276 sub_count = 0;
3277 color = (color + 1) & vm_color_mask;
3278 }
3279#else
0a7de745
A
3280 if (clump_end) {
3281 color = (color + 1) & vm_color_mask;
3282 }
5ba3f43e
A
3283
3284#endif /* if DEVELOPMENT || DEBUG */
3285
3286#endif /* if defined(__arm__) || defined(__arm64__) */
2d21ac55 3287
0a7de745 3288 if (head == NULL) {
2d21ac55 3289 head = mem;
0a7de745
A
3290 } else {
3291 tail->vmp_snext = mem;
3292 }
3293 tail = mem;
2d21ac55 3294
d9a64523
A
3295 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
3296 assert(mem->vmp_tabled == FALSE);
3297 assert(mem->vmp_object == 0);
3298 assert(!mem->vmp_laundry);
2d21ac55 3299
d9a64523 3300 mem->vmp_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
39037602 3301
0a7de745 3302 ASSERT_PMAP_FREE(mem);
d9a64523
A
3303 assert(mem->vmp_busy);
3304 assert(!mem->vmp_pmapped);
3305 assert(!mem->vmp_wpmapped);
39037602 3306 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55 3307 }
5ba3f43e
A
3308#if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
3309 vm_clump_update_stats(sub_count);
3310#endif
fe8ab488
A
3311 lck_mtx_unlock(&vm_page_queue_free_lock);
3312
5ba3f43e
A
3313#if HIBERNATION
3314 if (hibernate_rebuild_needed) {
3315 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__, __LINE__);
3316 }
3317#endif /* HIBERNATION */
f427ee49
A
3318 vm_offset_t pcpu_base = current_percpu_base();
3319 *PERCPU_GET_WITH_BASE(pcpu_base, free_pages) = head->vmp_snext;
3320 *PERCPU_GET_WITH_BASE(pcpu_base, start_color) = color;
2d21ac55
A
3321
3322 /*
3323 * satisfy this request
3324 */
0a7de745 3325 vm_page_grab_diags();
c3c9b80d 3326 counter_inc_preemption_disabled(&vm_page_grab_count);
d9a64523 3327 VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
2d21ac55 3328 mem = head;
d9a64523 3329 assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
39037602
A
3330
3331 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
d9a64523 3332 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
91447636 3333
2d21ac55
A
3334 enable_preemption();
3335 }
1c79356b
A
3336 /*
3337 * Decide if we should poke the pageout daemon.
3338 * We do this if the free count is less than the low
cb323159
A
3339 * water mark. VM Pageout Scan will keep running till
3340 * the free_count > free_target (& hence above free_min).
3341 * This wakeup is to catch the possibility of the counts
3342 * dropping between VM Pageout Scan parking and this check.
1c79356b
A
3343 *
3344 * We don't have the counts locked ... if they change a little,
3345 * it doesn't really matter.
3346 */
0a7de745 3347 if (vm_page_free_count < vm_page_free_min) {
cb323159
A
3348 lck_mtx_lock(&vm_page_queue_free_lock);
3349 if (vm_pageout_running == FALSE) {
3350 lck_mtx_unlock(&vm_page_queue_free_lock);
3351 thread_wakeup((event_t) &vm_page_free_wanted);
3352 } else {
3353 lck_mtx_unlock(&vm_page_queue_free_lock);
3354 }
0a7de745 3355 }
2d21ac55 3356
6d2010ae 3357 VM_CHECK_MEMORYSTATUS;
39037602
A
3358
3359 if (mem) {
3360// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
3361
3362#if CONFIG_BACKGROUND_QUEUE
3363 vm_page_assign_background_state(mem);
3364#endif
3365 }
3366 return mem;
3367}
3368
3369#if CONFIG_SECLUDED_MEMORY
3370vm_page_t
3371vm_page_grab_secluded(void)
3372{
0a7de745
A
3373 vm_page_t mem;
3374 vm_object_t object;
3375 int refmod_state;
39037602
A
3376
3377 if (vm_page_secluded_count == 0) {
3378 /* no secluded pages to grab... */
3379 return VM_PAGE_NULL;
3380 }
3381
3382 /* secluded queue is protected by the VM page queue lock */
3383 vm_page_lock_queues();
3384
3385 if (vm_page_secluded_count == 0) {
3386 /* no secluded pages to grab... */
3387 vm_page_unlock_queues();
3388 return VM_PAGE_NULL;
3389 }
3390
3391#if 00
3392 /* can we grab from the secluded queue? */
3393 if (vm_page_secluded_count > vm_page_secluded_target ||
3394 (vm_page_secluded_count > 0 &&
0a7de745 3395 task_can_use_secluded_mem(current_task(), TRUE))) {
39037602
A
3396 /* OK */
3397 } else {
3398 /* can't grab from secluded queue... */
3399 vm_page_unlock_queues();
3400 return VM_PAGE_NULL;
3401 }
3402#endif
3403
3404 /* we can grab a page from secluded queue! */
3405 assert((vm_page_secluded_count_free +
0a7de745
A
3406 vm_page_secluded_count_inuse) ==
3407 vm_page_secluded_count);
39037602
A
3408 if (current_task()->task_can_use_secluded_mem) {
3409 assert(num_tasks_can_use_secluded_mem > 0);
3410 }
3411 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
3412 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
5ba3f43e 3413 mem = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
d9a64523 3414 assert(mem->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
d190cdc3 3415 vm_page_queues_remove(mem, TRUE);
39037602
A
3416
3417 object = VM_PAGE_OBJECT(mem);
3418
d9a64523 3419 assert(!mem->vmp_fictitious);
39037602
A
3420 assert(!VM_PAGE_WIRED(mem));
3421 if (object == VM_OBJECT_NULL) {
3422 /* free for grab! */
39037602
A
3423 vm_page_unlock_queues();
3424 vm_page_secluded.grab_success_free++;
d190cdc3 3425
d9a64523
A
3426 assert(mem->vmp_busy);
3427 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
d190cdc3 3428 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
d9a64523
A
3429 assert(mem->vmp_pageq.next == 0);
3430 assert(mem->vmp_pageq.prev == 0);
3431 assert(mem->vmp_listq.next == 0);
3432 assert(mem->vmp_listq.prev == 0);
d190cdc3 3433#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
3434 assert(mem->vmp_on_backgroundq == 0);
3435 assert(mem->vmp_backgroundq.next == 0);
3436 assert(mem->vmp_backgroundq.prev == 0);
d190cdc3 3437#endif /* CONFIG_BACKGROUND_QUEUE */
39037602
A
3438 return mem;
3439 }
3440
39037602
A
3441 assert(!object->internal);
3442// vm_page_pageable_external_count--;
3443
3444 if (!vm_object_lock_try(object)) {
3445// printf("SECLUDED: page %p: object %p locked\n", mem, object);
3446 vm_page_secluded.grab_failure_locked++;
0a7de745 3447reactivate_secluded_page:
39037602
A
3448 vm_page_activate(mem);
3449 vm_page_unlock_queues();
3450 return VM_PAGE_NULL;
3451 }
d9a64523
A
3452 if (mem->vmp_busy ||
3453 mem->vmp_cleaning ||
3454 mem->vmp_laundry) {
39037602
A
3455 /* can't steal page in this state... */
3456 vm_object_unlock(object);
3457 vm_page_secluded.grab_failure_state++;
3458 goto reactivate_secluded_page;
3459 }
3460
d9a64523 3461 mem->vmp_busy = TRUE;
39037602
A
3462 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
3463 if (refmod_state & VM_MEM_REFERENCED) {
d9a64523 3464 mem->vmp_reference = TRUE;
39037602
A
3465 }
3466 if (refmod_state & VM_MEM_MODIFIED) {
3467 SET_PAGE_DIRTY(mem, FALSE);
3468 }
d9a64523 3469 if (mem->vmp_dirty || mem->vmp_precious) {
39037602
A
3470 /* can't grab a dirty page; re-activate */
3471// printf("SECLUDED: dirty page %p\n", mem);
743345f9 3472 PAGE_WAKEUP_DONE(mem);
39037602
A
3473 vm_page_secluded.grab_failure_dirty++;
3474 vm_object_unlock(object);
3475 goto reactivate_secluded_page;
3476 }
d9a64523 3477 if (mem->vmp_reference) {
39037602
A
3478 /* it's been used but we do need to grab a page... */
3479 }
743345f9 3480
39037602
A
3481 vm_page_unlock_queues();
3482
3483 /* finish what vm_page_free() would have done... */
3484 vm_page_free_prepare_object(mem, TRUE);
3485 vm_object_unlock(object);
3486 object = VM_OBJECT_NULL;
3487 if (vm_page_free_verify) {
0a7de745 3488 ASSERT_PMAP_FREE(mem);
39037602
A
3489 }
3490 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
39037602 3491 vm_page_secluded.grab_success_other++;
1c79356b 3492
d9a64523
A
3493 assert(mem->vmp_busy);
3494 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
d190cdc3 3495 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
d9a64523
A
3496 assert(mem->vmp_pageq.next == 0);
3497 assert(mem->vmp_pageq.prev == 0);
3498 assert(mem->vmp_listq.next == 0);
3499 assert(mem->vmp_listq.prev == 0);
d190cdc3 3500#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
3501 assert(mem->vmp_on_backgroundq == 0);
3502 assert(mem->vmp_backgroundq.next == 0);
3503 assert(mem->vmp_backgroundq.prev == 0);
d190cdc3
A
3504#endif /* CONFIG_BACKGROUND_QUEUE */
3505
1c79356b
A
3506 return mem;
3507}
cb323159
A
3508
3509uint64_t
3510vm_page_secluded_drain(void)
3511{
3512 vm_page_t local_freeq;
3513 int local_freed;
3514 uint64_t num_reclaimed;
3515 unsigned int saved_secluded_count, saved_secluded_target;
3516
3517 num_reclaimed = 0;
3518 local_freeq = NULL;
3519 local_freed = 0;
3520
3521 vm_page_lock_queues();
3522
3523 saved_secluded_count = vm_page_secluded_count;
3524 saved_secluded_target = vm_page_secluded_target;
3525 vm_page_secluded_target = 0;
3526 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3527 while (vm_page_secluded_count) {
3528 vm_page_t secluded_page;
3529
3530 assert((vm_page_secluded_count_free +
3531 vm_page_secluded_count_inuse) ==
3532 vm_page_secluded_count);
3533 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
3534 assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
3535
3536 vm_page_queues_remove(secluded_page, FALSE);
3537 assert(!secluded_page->vmp_fictitious);
3538 assert(!VM_PAGE_WIRED(secluded_page));
3539
3540 if (secluded_page->vmp_object == 0) {
3541 /* transfer to free queue */
3542 assert(secluded_page->vmp_busy);
3543 secluded_page->vmp_snext = local_freeq;
3544 local_freeq = secluded_page;
3545 local_freed += 1;
3546 } else {
3547 /* transfer to head of active queue */
3548 vm_page_enqueue_active(secluded_page, FALSE);
3549 secluded_page = VM_PAGE_NULL;
3550 }
3551 num_reclaimed++;
3552 }
3553 vm_page_secluded_target = saved_secluded_target;
3554 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
3555
3556// printf("FBDP %s:%d secluded_count %d->%d, target %d, reclaimed %lld\n", __FUNCTION__, __LINE__, saved_secluded_count, vm_page_secluded_count, vm_page_secluded_target, num_reclaimed);
3557
3558 vm_page_unlock_queues();
3559
3560 if (local_freed) {
3561 vm_page_free_list(local_freeq, TRUE);
3562 local_freeq = NULL;
3563 local_freed = 0;
3564 }
3565
3566 return num_reclaimed;
3567}
39037602 3568#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b 3569
0a7de745
A
3570
3571static inline void
3572vm_page_grab_diags()
3573{
3574#if DEVELOPMENT || DEBUG
3575 task_t task = current_task();
3576 if (task == NULL) {
3577 return;
3578 }
3579
3580 ledger_credit(task->ledger, task_ledgers.pages_grabbed, 1);
3581#endif /* DEVELOPMENT || DEBUG */
3582}
3583
1c79356b
A
3584/*
3585 * vm_page_release:
3586 *
3587 * Return a page to the free list.
3588 */
3589
3590void
3591vm_page_release(
0a7de745
A
3592 vm_page_t mem,
3593 boolean_t page_queues_locked)
1c79356b 3594{
0a7de745
A
3595 unsigned int color;
3596 int need_wakeup = 0;
3597 int need_priv_wakeup = 0;
39037602 3598#if CONFIG_SECLUDED_MEMORY
0a7de745 3599 int need_secluded_wakeup = 0;
39037602 3600#endif /* CONFIG_SECLUDED_MEMORY */
cb323159 3601 event_t wakeup_event = NULL;
55e303ae 3602
39037602
A
3603 if (page_queues_locked) {
3604 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3605 } else {
3606 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3607 }
6d2010ae 3608
d9a64523 3609 assert(!mem->vmp_private && !mem->vmp_fictitious);
b0d623f7 3610 if (vm_page_free_verify) {
0a7de745 3611 ASSERT_PMAP_FREE(mem);
b0d623f7 3612 }
39037602 3613// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 3614
39037602 3615 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
7ddcb079 3616
b0d623f7 3617 lck_mtx_lock_spin(&vm_page_queue_free_lock);
6d2010ae 3618
d9a64523
A
3619 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3620 assert(mem->vmp_busy);
3621 assert(!mem->vmp_laundry);
3622 assert(mem->vmp_object == 0);
3623 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
3624 assert(mem->vmp_listq.next == 0 && mem->vmp_listq.prev == 0);
39037602 3625#if CONFIG_BACKGROUND_QUEUE
d9a64523 3626 assert(mem->vmp_backgroundq.next == 0 &&
0a7de745
A
3627 mem->vmp_backgroundq.prev == 0 &&
3628 mem->vmp_on_backgroundq == FALSE);
3629#endif
d9a64523 3630 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
0b4c1975 3631 vm_lopage_free_count < vm_lopage_free_limit &&
39037602 3632 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
0a7de745 3633 /*
0c530ab8
A
3634 * this exists to support hardware controllers
3635 * incapable of generating DMAs with more than 32 bits
3636 * of address on platforms with physical memory > 4G...
3637 */
0a7de745 3638 vm_page_queue_enter_first(&vm_lopage_queue_free, mem, vmp_pageq);
0c530ab8 3639 vm_lopage_free_count++;
0b4c1975 3640
0a7de745 3641 if (vm_lopage_free_count >= vm_lopage_free_limit) {
0b4c1975 3642 vm_lopage_refill = FALSE;
0a7de745 3643 }
0b4c1975 3644
d9a64523
A
3645 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
3646 mem->vmp_lopage = TRUE;
39037602
A
3647#if CONFIG_SECLUDED_MEMORY
3648 } else if (vm_page_free_count > vm_page_free_reserved &&
0a7de745
A
3649 vm_page_secluded_count < vm_page_secluded_target &&
3650 num_tasks_can_use_secluded_mem == 0) {
39037602
A
3651 /*
3652 * XXX FBDP TODO: also avoid refilling secluded queue
3653 * when some IOKit objects are already grabbing from it...
3654 */
3655 if (!page_queues_locked) {
3656 if (!vm_page_trylock_queues()) {
3657 /* take locks in right order */
3658 lck_mtx_unlock(&vm_page_queue_free_lock);
3659 vm_page_lock_queues();
3660 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3661 }
3662 }
d9a64523 3663 mem->vmp_lopage = FALSE;
39037602 3664 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
0a7de745 3665 vm_page_queue_enter_first(&vm_page_queue_secluded, mem, vmp_pageq);
d9a64523 3666 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
39037602 3667 vm_page_secluded_count++;
cb323159 3668 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
39037602
A
3669 vm_page_secluded_count_free++;
3670 if (!page_queues_locked) {
3671 vm_page_unlock_queues();
3672 }
3673 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
3674 if (vm_page_free_wanted_secluded > 0) {
0a7de745 3675 vm_page_free_wanted_secluded--;
39037602
A
3676 need_secluded_wakeup = 1;
3677 }
3678#endif /* CONFIG_SECLUDED_MEMORY */
3679 } else {
d9a64523
A
3680 mem->vmp_lopage = FALSE;
3681 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
0b4c1975 3682
5ba3f43e
A
3683 color = VM_PAGE_GET_COLOR(mem);
3684#if defined(__x86_64__)
0a7de745 3685 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
5ba3f43e 3686#else
0a7de745 3687 vm_page_queue_enter(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
5ba3f43e 3688#endif
0c530ab8
A
3689 vm_page_free_count++;
3690 /*
3691 * Check if we should wake up someone waiting for page.
3692 * But don't bother waking them unless they can allocate.
3693 *
3694 * We wakeup only one thread, to prevent starvation.
3695 * Because the scheduling system handles wait queues FIFO,
3696 * if we wakeup all waiting threads, one greedy thread
3697 * can starve multiple niceguy threads. When the threads
3698 * all wakeup, the greedy threads runs first, grabs the page,
3699 * and waits for another page. It will be the first to run
3700 * when the next page is freed.
3701 *
3702 * However, there is a slight danger here.
3703 * The thread we wake might not use the free page.
3704 * Then the other threads could wait indefinitely
3705 * while the page goes unused. To forestall this,
3706 * the pageout daemon will keep making free pages
3707 * as long as vm_page_free_wanted is non-zero.
3708 */
1c79356b 3709
b0d623f7
A
3710 assert(vm_page_free_count > 0);
3711 if (vm_page_free_wanted_privileged > 0) {
0a7de745 3712 vm_page_free_wanted_privileged--;
b0d623f7 3713 need_priv_wakeup = 1;
39037602
A
3714#if CONFIG_SECLUDED_MEMORY
3715 } else if (vm_page_free_wanted_secluded > 0 &&
0a7de745 3716 vm_page_free_count > vm_page_free_reserved) {
39037602
A
3717 vm_page_free_wanted_secluded--;
3718 need_secluded_wakeup = 1;
3719#endif /* CONFIG_SECLUDED_MEMORY */
b0d623f7 3720 } else if (vm_page_free_wanted > 0 &&
0a7de745
A
3721 vm_page_free_count > vm_page_free_reserved) {
3722 vm_page_free_wanted--;
b0d623f7 3723 need_wakeup = 1;
0c530ab8 3724 }
1c79356b 3725 }
d9a64523
A
3726 vm_pageout_vminfo.vm_page_pages_freed++;
3727
3728 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, 1, 0, 0, 0);
3729
b0d623f7
A
3730 lck_mtx_unlock(&vm_page_queue_free_lock);
3731
0a7de745 3732 if (need_priv_wakeup) {
cb323159 3733 wakeup_event = &vm_page_free_wanted_privileged;
0a7de745 3734 }
39037602 3735#if CONFIG_SECLUDED_MEMORY
0a7de745 3736 else if (need_secluded_wakeup) {
cb323159 3737 wakeup_event = &vm_page_free_wanted_secluded;
0a7de745 3738 }
39037602 3739#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745 3740 else if (need_wakeup) {
cb323159
A
3741 wakeup_event = &vm_page_free_count;
3742 }
3743
3744 if (wakeup_event) {
3745 if (vps_dynamic_priority_enabled == TRUE) {
3746 thread_t thread_woken = NULL;
3747 wakeup_one_with_inheritor((event_t) wakeup_event, THREAD_AWAKENED, LCK_WAKE_DO_NOT_TRANSFER_PUSH, &thread_woken);
3748 thread_deallocate(thread_woken);
3749 } else {
3750 thread_wakeup_one((event_t) wakeup_event);
3751 }
0a7de745 3752 }
2d21ac55 3753
6d2010ae 3754 VM_CHECK_MEMORYSTATUS;
1c79356b
A
3755}
3756
fe8ab488
A
3757/*
3758 * This version of vm_page_release() is used only at startup
0a7de745 3759 * when we are single-threaded and pages are being released
fe8ab488
A
3760 * for the first time. Hence, no locking or unnecessary checks are made.
3761 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3762 */
3763void
3764vm_page_release_startup(
0a7de745 3765 vm_page_t mem)
fe8ab488 3766{
0a7de745 3767 vm_page_queue_t queue_free;
fe8ab488
A
3768
3769 if (vm_lopage_free_count < vm_lopage_free_limit &&
39037602 3770 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
d9a64523
A
3771 mem->vmp_lopage = TRUE;
3772 mem->vmp_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
fe8ab488
A
3773 vm_lopage_free_count++;
3774 queue_free = &vm_lopage_queue_free;
39037602
A
3775#if CONFIG_SECLUDED_MEMORY
3776 } else if (vm_page_secluded_count < vm_page_secluded_target) {
d9a64523
A
3777 mem->vmp_lopage = FALSE;
3778 mem->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
39037602 3779 vm_page_secluded_count++;
cb323159 3780 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
39037602
A
3781 vm_page_secluded_count_free++;
3782 queue_free = &vm_page_queue_secluded;
3783#endif /* CONFIG_SECLUDED_MEMORY */
3784 } else {
d9a64523
A
3785 mem->vmp_lopage = FALSE;
3786 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
fe8ab488 3787 vm_page_free_count++;
5ba3f43e 3788 queue_free = &vm_page_queue_free[VM_PAGE_GET_COLOR(mem)].qhead;
fe8ab488 3789 }
d9a64523 3790 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
5ba3f43e 3791#if defined(__x86_64__)
0a7de745 3792 vm_page_queue_enter_clump(queue_free, mem);
5ba3f43e 3793#else
0a7de745 3794 vm_page_queue_enter(queue_free, mem, vmp_pageq);
5ba3f43e 3795#endif
0a7de745
A
3796 } else {
3797 vm_page_queue_enter_first(queue_free, mem, vmp_pageq);
3798 }
fe8ab488
A
3799}
3800
1c79356b
A
3801/*
3802 * vm_page_wait:
3803 *
3804 * Wait for a page to become available.
3805 * If there are plenty of free pages, then we don't sleep.
3806 *
3807 * Returns:
3808 * TRUE: There may be another page, try again
3809 * FALSE: We were interrupted out of our wait, don't try again
3810 */
3811
3812boolean_t
3813vm_page_wait(
0a7de745 3814 int interruptible )
1c79356b
A
3815{
3816 /*
3817 * We can't use vm_page_free_reserved to make this
3818 * determination. Consider: some thread might
3819 * need to allocate two pages. The first allocation
3820 * succeeds, the second fails. After the first page is freed,
3821 * a call to vm_page_wait must really block.
3822 */
0a7de745
A
3823 kern_return_t wait_result;
3824 int need_wakeup = 0;
3825 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
cb323159 3826 event_t wait_event = NULL;
1c79356b 3827
b0d623f7 3828 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
3829
3830 if (is_privileged && vm_page_free_count) {
b0d623f7 3831 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
3832 return TRUE;
3833 }
2d21ac55 3834
39037602 3835 if (vm_page_free_count >= vm_page_free_target) {
b0d623f7 3836 lck_mtx_unlock(&vm_page_queue_free_lock);
39037602
A
3837 return TRUE;
3838 }
9bccf70c 3839
39037602 3840 if (is_privileged) {
0a7de745 3841 if (vm_page_free_wanted_privileged++ == 0) {
39037602 3842 need_wakeup = 1;
0a7de745 3843 }
cb323159 3844 wait_event = (event_t)&vm_page_free_wanted_privileged;
39037602
A
3845#if CONFIG_SECLUDED_MEMORY
3846 } else if (secluded_for_apps &&
0a7de745 3847 task_can_use_secluded_mem(current_task(), FALSE)) {
39037602
A
3848#if 00
3849 /* XXX FBDP: need pageq lock for this... */
3850 /* XXX FBDP: might wait even if pages available, */
3851 /* XXX FBDP: hopefully not for too long... */
3852 if (vm_page_secluded_count > 0) {
3853 lck_mtx_unlock(&vm_page_queue_free_lock);
3854 return TRUE;
39236c6e 3855 }
39037602
A
3856#endif
3857 if (vm_page_free_wanted_secluded++ == 0) {
3858 need_wakeup = 1;
3859 }
cb323159 3860 wait_event = (event_t)&vm_page_free_wanted_secluded;
39037602 3861#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b 3862 } else {
0a7de745 3863 if (vm_page_free_wanted++ == 0) {
39037602 3864 need_wakeup = 1;
0a7de745 3865 }
cb323159 3866 wait_event = (event_t)&vm_page_free_count;
39037602 3867 }
39037602 3868
cb323159
A
3869 /*
3870 * We don't do a vm_pageout_scan wakeup if we already have
3871 * some waiters because vm_pageout_scan checks for waiters
3872 * before it returns and does so behind the vm_page_queue_free_lock,
3873 * which we own when we bump the waiter counts.
3874 */
3875
3876 if (vps_dynamic_priority_enabled == TRUE) {
3877 /*
3878 * We are waking up vm_pageout_scan here. If it needs
3879 * the vm_page_queue_free_lock before we unlock it
3880 * we'll end up just blocking and incur an extra
3881 * context switch. Could be a perf. issue.
3882 */
3883
cb323159
A
3884 if (need_wakeup) {
3885 thread_wakeup((event_t)&vm_page_free_wanted);
3886 }
3887
3888 /*
3889 * LD: This event is going to get recorded every time because
3890 * we don't get back THREAD_WAITING from lck_mtx_sleep_with_inheritor.
3891 * We just block in that routine.
3892 */
d9a64523 3893 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
0a7de745
A
3894 vm_page_free_wanted_privileged,
3895 vm_page_free_wanted,
39037602 3896#if CONFIG_SECLUDED_MEMORY
0a7de745 3897 vm_page_free_wanted_secluded,
39037602 3898#else /* CONFIG_SECLUDED_MEMORY */
0a7de745 3899 0,
39037602 3900#endif /* CONFIG_SECLUDED_MEMORY */
0a7de745 3901 0);
cb323159
A
3902 wait_result = lck_mtx_sleep_with_inheritor(&vm_page_queue_free_lock,
3903 LCK_SLEEP_UNLOCK,
3904 wait_event,
3905 vm_pageout_scan_thread,
3906 interruptible,
3907 0);
3908 } else {
3909 wait_result = assert_wait(wait_event, interruptible);
3910
3911 lck_mtx_unlock(&vm_page_queue_free_lock);
cb323159
A
3912
3913 if (need_wakeup) {
3914 thread_wakeup((event_t)&vm_page_free_wanted);
3915 }
3916
3917 if (wait_result == THREAD_WAITING) {
3918 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3919 vm_page_free_wanted_privileged,
3920 vm_page_free_wanted,
3921#if CONFIG_SECLUDED_MEMORY
3922 vm_page_free_wanted_secluded,
3923#else /* CONFIG_SECLUDED_MEMORY */
3924 0,
3925#endif /* CONFIG_SECLUDED_MEMORY */
3926 0);
3927 wait_result = thread_block(THREAD_CONTINUE_NULL);
3928 VM_DEBUG_CONSTANT_EVENT(vm_page_wait_block,
3929 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
3930 }
1c79356b 3931 }
39037602 3932
cb323159 3933 return (wait_result == THREAD_AWAKENED) || (wait_result == THREAD_NOT_WAITING);
1c79356b
A
3934}
3935
3936/*
3937 * vm_page_alloc:
3938 *
3939 * Allocate and return a memory cell associated
3940 * with this VM object/offset pair.
3941 *
3942 * Object must be locked.
3943 */
3944
3945vm_page_t
3946vm_page_alloc(
0a7de745
A
3947 vm_object_t object,
3948 vm_object_offset_t offset)
1c79356b 3949{
0a7de745
A
3950 vm_page_t mem;
3951 int grab_options;
1c79356b 3952
2d21ac55 3953 vm_object_lock_assert_exclusive(object);
39037602
A
3954 grab_options = 0;
3955#if CONFIG_SECLUDED_MEMORY
3956 if (object->can_grab_secluded) {
3957 grab_options |= VM_PAGE_GRAB_SECLUDED;
3958 }
3959#endif /* CONFIG_SECLUDED_MEMORY */
3960 mem = vm_page_grab_options(grab_options);
0a7de745 3961 if (mem == VM_PAGE_NULL) {
1c79356b 3962 return VM_PAGE_NULL;
0a7de745 3963 }
1c79356b
A
3964
3965 vm_page_insert(mem, object, offset);
3966
0a7de745 3967 return mem;
1c79356b
A
3968}
3969
1c79356b 3970/*
6d2010ae 3971 * vm_page_free_prepare:
1c79356b 3972 *
6d2010ae
A
3973 * Removes page from any queue it may be on
3974 * and disassociates it from its VM object.
1c79356b
A
3975 *
3976 * Object and page queues must be locked prior to entry.
3977 */
b0d623f7 3978static void
2d21ac55 3979vm_page_free_prepare(
0a7de745 3980 vm_page_t mem)
b0d623f7
A
3981{
3982 vm_page_free_prepare_queues(mem);
3983 vm_page_free_prepare_object(mem, TRUE);
3984}
3985
3986
3987void
3988vm_page_free_prepare_queues(
0a7de745 3989 vm_page_t mem)
1c79356b 3990{
0a7de745 3991 vm_object_t m_object;
39037602 3992
2d21ac55 3993 VM_PAGE_CHECK(mem);
39037602 3994
d9a64523
A
3995 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
3996 assert(!mem->vmp_cleaning);
39037602 3997 m_object = VM_PAGE_OBJECT(mem);
fe8ab488 3998
39037602
A
3999 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4000 if (m_object) {
4001 vm_object_lock_assert_exclusive(m_object);
b0d623f7 4002 }
d9a64523 4003 if (mem->vmp_laundry) {
2d21ac55
A
4004 /*
4005 * We may have to free a page while it's being laundered
4006 * if we lost its pager (due to a forced unmount, for example).
316670eb
A
4007 * We need to call vm_pageout_steal_laundry() before removing
4008 * the page from its VM object, so that we can remove it
4009 * from its pageout queue and adjust the laundry accounting
2d21ac55 4010 */
316670eb 4011 vm_pageout_steal_laundry(mem, TRUE);
2d21ac55 4012 }
0a7de745 4013
39037602 4014 vm_page_queues_remove(mem, TRUE);
b0d623f7
A
4015
4016 if (VM_PAGE_WIRED(mem)) {
d9a64523 4017 assert(mem->vmp_wire_count > 0);
39037602
A
4018
4019 if (m_object) {
5ba3f43e
A
4020 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4021 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4022 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
3e170ce0 4023
39037602 4024 assert(m_object->resident_page_count >=
0a7de745 4025 m_object->wired_page_count);
6d2010ae 4026
39037602 4027 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
6d2010ae
A
4028 OSAddAtomic(+1, &vm_page_purgeable_count);
4029 assert(vm_page_purgeable_wired_count > 0);
4030 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4031 }
39037602 4032 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
0a7de745 4033 m_object->purgable == VM_PURGABLE_EMPTY) &&
d9a64523 4034 m_object->vo_owner != TASK_NULL) {
0a7de745
A
4035 task_t owner;
4036 int ledger_idx_volatile;
4037 int ledger_idx_nonvolatile;
4038 int ledger_idx_volatile_compressed;
4039 int ledger_idx_nonvolatile_compressed;
4040 boolean_t do_footprint;
d9a64523
A
4041
4042 owner = VM_OBJECT_OWNER(m_object);
4043 vm_object_ledger_tag_ledgers(
4044 m_object,
4045 &ledger_idx_volatile,
4046 &ledger_idx_nonvolatile,
4047 &ledger_idx_volatile_compressed,
4048 &ledger_idx_nonvolatile_compressed,
4049 &do_footprint);
fe8ab488
A
4050 /*
4051 * While wired, this page was accounted
4052 * as "non-volatile" but it should now
4053 * be accounted as "volatile".
4054 */
4055 /* one less "non-volatile"... */
4056 ledger_debit(owner->ledger,
0a7de745
A
4057 ledger_idx_nonvolatile,
4058 PAGE_SIZE);
d9a64523
A
4059 if (do_footprint) {
4060 /* ... and "phys_footprint" */
4061 ledger_debit(owner->ledger,
0a7de745
A
4062 task_ledgers.phys_footprint,
4063 PAGE_SIZE);
d9a64523 4064 }
fe8ab488
A
4065 /* one more "volatile" */
4066 ledger_credit(owner->ledger,
0a7de745
A
4067 ledger_idx_volatile,
4068 PAGE_SIZE);
fe8ab488 4069 }
b0d623f7 4070 }
0a7de745 4071 if (!mem->vmp_private && !mem->vmp_fictitious) {
1c79356b 4072 vm_page_wire_count--;
0a7de745 4073 }
39037602 4074
d9a64523
A
4075 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
4076 mem->vmp_wire_count = 0;
4077 assert(!mem->vmp_gobbled);
4078 } else if (mem->vmp_gobbled) {
0a7de745 4079 if (!mem->vmp_private && !mem->vmp_fictitious) {
1c79356b 4080 vm_page_wire_count--;
0a7de745 4081 }
1c79356b
A
4082 vm_page_gobble_count--;
4083 }
b0d623f7
A
4084}
4085
4086
4087void
4088vm_page_free_prepare_object(
0a7de745
A
4089 vm_page_t mem,
4090 boolean_t remove_from_hash)
b0d623f7 4091{
0a7de745
A
4092 if (mem->vmp_tabled) {
4093 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
4094 }
4095 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b 4096
d9a64523
A
4097 if (mem->vmp_private) {
4098 mem->vmp_private = FALSE;
4099 mem->vmp_fictitious = TRUE;
39037602 4100 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
1c79356b 4101 }
0a7de745 4102 if (!mem->vmp_fictitious) {
d9a64523
A
4103 assert(mem->vmp_pageq.next == 0);
4104 assert(mem->vmp_pageq.prev == 0);
4105 assert(mem->vmp_listq.next == 0);
4106 assert(mem->vmp_listq.prev == 0);
d190cdc3 4107#if CONFIG_BACKGROUND_QUEUE
d9a64523
A
4108 assert(mem->vmp_backgroundq.next == 0);
4109 assert(mem->vmp_backgroundq.prev == 0);
d190cdc3 4110#endif /* CONFIG_BACKGROUND_QUEUE */
d9a64523 4111 assert(mem->vmp_next_m == 0);
0a7de745 4112 ASSERT_PMAP_FREE(mem);
d9a64523 4113 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->vmp_lopage);
1c79356b
A
4114 }
4115}
4116
b0d623f7 4117
6d2010ae
A
4118/*
4119 * vm_page_free:
4120 *
4121 * Returns the given page to the free list,
4122 * disassociating it with any VM object.
4123 *
4124 * Object and page queues must be locked prior to entry.
4125 */
2d21ac55
A
4126void
4127vm_page_free(
0a7de745 4128 vm_page_t mem)
2d21ac55 4129{
b0d623f7 4130 vm_page_free_prepare(mem);
6d2010ae 4131
d9a64523 4132 if (mem->vmp_fictitious) {
b0d623f7
A
4133 vm_page_release_fictitious(mem);
4134 } else {
39037602 4135 vm_page_release(mem,
0a7de745 4136 TRUE); /* page queues are locked */
b0d623f7
A
4137 }
4138}
4139
4140
4141void
4142vm_page_free_unlocked(
0a7de745
A
4143 vm_page_t mem,
4144 boolean_t remove_from_hash)
b0d623f7
A
4145{
4146 vm_page_lockspin_queues();
4147 vm_page_free_prepare_queues(mem);
4148 vm_page_unlock_queues();
4149
4150 vm_page_free_prepare_object(mem, remove_from_hash);
4151
d9a64523 4152 if (mem->vmp_fictitious) {
2d21ac55
A
4153 vm_page_release_fictitious(mem);
4154 } else {
39037602 4155 vm_page_release(mem, FALSE); /* page queues are not locked */
2d21ac55
A
4156 }
4157}
55e303ae 4158
316670eb 4159
2d21ac55
A
4160/*
4161 * Free a list of pages. The list can be up to several hundred pages,
4162 * as blocked up by vm_pageout_scan().
b0d623f7 4163 * The big win is not having to take the free list lock once
316670eb 4164 * per page.
d190cdc3
A
4165 *
4166 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
4167 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
2d21ac55 4168 */
55e303ae
A
4169void
4170vm_page_free_list(
0a7de745
A
4171 vm_page_t freeq,
4172 boolean_t prepare_object)
55e303ae 4173{
0a7de745
A
4174 vm_page_t mem;
4175 vm_page_t nxt;
4176 vm_page_t local_freeq;
4177 int pg_count;
2d21ac55 4178
d190cdc3
A
4179 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
4180 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
4181
316670eb 4182 while (freeq) {
316670eb
A
4183 pg_count = 0;
4184 local_freeq = VM_PAGE_NULL;
4185 mem = freeq;
b0d623f7 4186
316670eb
A
4187 /*
4188 * break up the processing into smaller chunks so
4189 * that we can 'pipeline' the pages onto the
4190 * free list w/o introducing too much
4191 * contention on the global free queue lock
4192 */
4193 while (mem && pg_count < 64) {
d9a64523 4194 assert((mem->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
0a7de745 4195 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
39037602 4196#if CONFIG_BACKGROUND_QUEUE
d9a64523 4197 assert(mem->vmp_backgroundq.next == 0 &&
0a7de745
A
4198 mem->vmp_backgroundq.prev == 0 &&
4199 mem->vmp_on_backgroundq == FALSE);
39037602 4200#endif
d9a64523
A
4201 nxt = mem->vmp_snext;
4202 mem->vmp_snext = NULL;
4203 assert(mem->vmp_pageq.prev == 0);
316670eb 4204
d9a64523 4205 if (vm_page_free_verify && !mem->vmp_fictitious && !mem->vmp_private) {
0a7de745 4206 ASSERT_PMAP_FREE(mem);
316670eb 4207 }
0a7de745 4208 if (prepare_object == TRUE) {
316670eb 4209 vm_page_free_prepare_object(mem, TRUE);
0a7de745 4210 }
b0d623f7 4211
d9a64523
A
4212 if (!mem->vmp_fictitious) {
4213 assert(mem->vmp_busy);
55e303ae 4214
d9a64523 4215 if ((mem->vmp_lopage == TRUE || vm_lopage_refill == TRUE) &&
316670eb 4216 vm_lopage_free_count < vm_lopage_free_limit &&
39037602
A
4217 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
4218 vm_page_release(mem, FALSE); /* page queues are not locked */
4219#if CONFIG_SECLUDED_MEMORY
4220 } else if (vm_page_secluded_count < vm_page_secluded_target &&
0a7de745 4221 num_tasks_can_use_secluded_mem == 0) {
39037602 4222 vm_page_release(mem,
0a7de745 4223 FALSE); /* page queues are not locked */
39037602 4224#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
4225 } else {
4226 /*
4227 * IMPORTANT: we can't set the page "free" here
4228 * because that would make the page eligible for
4229 * a physically-contiguous allocation (see
4230 * vm_page_find_contiguous()) right away (we don't
4231 * hold the vm_page_queue_free lock). That would
4232 * cause trouble because the page is not actually
4233 * in the free queue yet...
4234 */
d9a64523 4235 mem->vmp_snext = local_freeq;
316670eb
A
4236 local_freeq = mem;
4237 pg_count++;
935ed37a 4238
39037602 4239 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
935ed37a 4240 }
316670eb 4241 } else {
39037602 4242 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
0a7de745 4243 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
316670eb 4244 vm_page_release_fictitious(mem);
2d21ac55 4245 }
316670eb 4246 mem = nxt;
55e303ae 4247 }
316670eb
A
4248 freeq = mem;
4249
0a7de745
A
4250 if ((mem = local_freeq)) {
4251 unsigned int avail_free_count;
4252 unsigned int need_wakeup = 0;
4253 unsigned int need_priv_wakeup = 0;
39037602 4254#if CONFIG_SECLUDED_MEMORY
0a7de745 4255 unsigned int need_wakeup_secluded = 0;
39037602 4256#endif /* CONFIG_SECLUDED_MEMORY */
cb323159
A
4257 event_t priv_wakeup_event, secluded_wakeup_event, normal_wakeup_event;
4258 boolean_t priv_wakeup_all, secluded_wakeup_all, normal_wakeup_all;
0a7de745 4259
316670eb 4260 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 4261
316670eb 4262 while (mem) {
0a7de745 4263 int color;
316670eb 4264
d9a64523 4265 nxt = mem->vmp_snext;
2d21ac55 4266
d9a64523
A
4267 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
4268 assert(mem->vmp_busy);
4269 mem->vmp_lopage = FALSE;
4270 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
39037602 4271
5ba3f43e
A
4272 color = VM_PAGE_GET_COLOR(mem);
4273#if defined(__x86_64__)
0a7de745 4274 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
5ba3f43e
A
4275#else
4276 vm_page_queue_enter(&vm_page_queue_free[color].qhead,
0a7de745 4277 mem, vmp_pageq);
5ba3f43e 4278#endif
316670eb 4279 mem = nxt;
2d21ac55 4280 }
d9a64523 4281 vm_pageout_vminfo.vm_page_pages_freed += pg_count;
316670eb
A
4282 vm_page_free_count += pg_count;
4283 avail_free_count = vm_page_free_count;
4284
d9a64523
A
4285 VM_DEBUG_CONSTANT_EVENT(vm_page_release, VM_PAGE_RELEASE, DBG_FUNC_NONE, pg_count, 0, 0, 0);
4286
316670eb 4287 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
316670eb
A
4288 if (avail_free_count < vm_page_free_wanted_privileged) {
4289 need_priv_wakeup = avail_free_count;
4290 vm_page_free_wanted_privileged -= avail_free_count;
4291 avail_free_count = 0;
4292 } else {
4293 need_priv_wakeup = vm_page_free_wanted_privileged;
316670eb 4294 avail_free_count -= vm_page_free_wanted_privileged;
39037602 4295 vm_page_free_wanted_privileged = 0;
316670eb 4296 }
b0d623f7 4297 }
39037602
A
4298#if CONFIG_SECLUDED_MEMORY
4299 if (vm_page_free_wanted_secluded > 0 &&
4300 avail_free_count > vm_page_free_reserved) {
4301 unsigned int available_pages;
4302 available_pages = (avail_free_count -
0a7de745 4303 vm_page_free_reserved);
39037602
A
4304 if (available_pages <
4305 vm_page_free_wanted_secluded) {
4306 need_wakeup_secluded = available_pages;
4307 vm_page_free_wanted_secluded -=
0a7de745 4308 available_pages;
39037602
A
4309 avail_free_count -= available_pages;
4310 } else {
4311 need_wakeup_secluded =
0a7de745 4312 vm_page_free_wanted_secluded;
39037602 4313 avail_free_count -=
0a7de745 4314 vm_page_free_wanted_secluded;
39037602
A
4315 vm_page_free_wanted_secluded = 0;
4316 }
4317 }
4318#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
4319 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
4320 unsigned int available_pages;
55e303ae 4321
316670eb 4322 available_pages = avail_free_count - vm_page_free_reserved;
55e303ae 4323
316670eb
A
4324 if (available_pages >= vm_page_free_wanted) {
4325 need_wakeup = vm_page_free_wanted;
4326 vm_page_free_wanted = 0;
4327 } else {
4328 need_wakeup = available_pages;
4329 vm_page_free_wanted -= available_pages;
4330 }
4331 }
4332 lck_mtx_unlock(&vm_page_queue_free_lock);
55e303ae 4333
cb323159
A
4334 priv_wakeup_event = NULL;
4335 secluded_wakeup_event = NULL;
4336 normal_wakeup_event = NULL;
4337
4338 priv_wakeup_all = FALSE;
4339 secluded_wakeup_all = FALSE;
4340 normal_wakeup_all = FALSE;
4341
4342
316670eb
A
4343 if (need_priv_wakeup != 0) {
4344 /*
4345 * There shouldn't be that many VM-privileged threads,
4346 * so let's wake them all up, even if we don't quite
4347 * have enough pages to satisfy them all.
4348 */
cb323159
A
4349 priv_wakeup_event = (event_t)&vm_page_free_wanted_privileged;
4350 priv_wakeup_all = TRUE;
316670eb 4351 }
39037602
A
4352#if CONFIG_SECLUDED_MEMORY
4353 if (need_wakeup_secluded != 0 &&
4354 vm_page_free_wanted_secluded == 0) {
cb323159
A
4355 secluded_wakeup_event = (event_t)&vm_page_free_wanted_secluded;
4356 secluded_wakeup_all = TRUE;
4357 need_wakeup_secluded = 0;
39037602 4358 } else {
cb323159 4359 secluded_wakeup_event = (event_t)&vm_page_free_wanted_secluded;
39037602
A
4360 }
4361#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
4362 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
4363 /*
4364 * We don't expect to have any more waiters
4365 * after this, so let's wake them all up at
4366 * once.
4367 */
cb323159
A
4368 normal_wakeup_event = (event_t) &vm_page_free_count;
4369 normal_wakeup_all = TRUE;
4370 need_wakeup = 0;
0a7de745 4371 } else {
cb323159
A
4372 normal_wakeup_event = (event_t) &vm_page_free_count;
4373 }
4374
4375 if (priv_wakeup_event ||
4376#if CONFIG_SECLUDED_MEMORY
4377 secluded_wakeup_event ||
4378#endif /* CONFIG_SECLUDED_MEMORY */
4379 normal_wakeup_event) {
4380 if (vps_dynamic_priority_enabled == TRUE) {
4381 thread_t thread_woken = NULL;
4382
4383 if (priv_wakeup_all == TRUE) {
4384 wakeup_all_with_inheritor(priv_wakeup_event, THREAD_AWAKENED);
4385 }
4386
4387#if CONFIG_SECLUDED_MEMORY
4388 if (secluded_wakeup_all == TRUE) {
4389 wakeup_all_with_inheritor(secluded_wakeup_event, THREAD_AWAKENED);
4390 }
4391
4392 while (need_wakeup_secluded-- != 0) {
4393 /*
4394 * Wake up one waiter per page we just released.
4395 */
4396 wakeup_one_with_inheritor(secluded_wakeup_event, THREAD_AWAKENED, LCK_WAKE_DO_NOT_TRANSFER_PUSH, &thread_woken);
4397 thread_deallocate(thread_woken);
4398 }
4399#endif /* CONFIG_SECLUDED_MEMORY */
4400
4401 if (normal_wakeup_all == TRUE) {
4402 wakeup_all_with_inheritor(normal_wakeup_event, THREAD_AWAKENED);
4403 }
4404
4405 while (need_wakeup-- != 0) {
4406 /*
4407 * Wake up one waiter per page we just released.
4408 */
4409 wakeup_one_with_inheritor(normal_wakeup_event, THREAD_AWAKENED, LCK_WAKE_DO_NOT_TRANSFER_PUSH, &thread_woken);
4410 thread_deallocate(thread_woken);
4411 }
4412 } else {
0a7de745 4413 /*
cb323159 4414 * Non-priority-aware wakeups.
0a7de745 4415 */
cb323159
A
4416
4417 if (priv_wakeup_all == TRUE) {
4418 thread_wakeup(priv_wakeup_event);
4419 }
4420
4421#if CONFIG_SECLUDED_MEMORY
4422 if (secluded_wakeup_all == TRUE) {
4423 thread_wakeup(secluded_wakeup_event);
4424 }
4425
4426 while (need_wakeup_secluded-- != 0) {
4427 /*
4428 * Wake up one waiter per page we just released.
4429 */
4430 thread_wakeup_one(secluded_wakeup_event);
4431 }
4432
4433#endif /* CONFIG_SECLUDED_MEMORY */
4434 if (normal_wakeup_all == TRUE) {
4435 thread_wakeup(normal_wakeup_event);
4436 }
4437
4438 while (need_wakeup-- != 0) {
4439 /*
4440 * Wake up one waiter per page we just released.
4441 */
4442 thread_wakeup_one(normal_wakeup_event);
4443 }
0a7de745 4444 }
55e303ae 4445 }
2d21ac55 4446
316670eb 4447 VM_CHECK_MEMORYSTATUS;
b0d623f7 4448 }
55e303ae
A
4449 }
4450}
4451
4452
1c79356b
A
4453/*
4454 * vm_page_wire:
4455 *
4456 * Mark this page as wired down by yet
4457 * another map, removing it from paging queues
4458 * as necessary.
4459 *
4460 * The page's object and the page queues must be locked.
4461 */
3e170ce0
A
4462
4463
1c79356b
A
4464void
4465vm_page_wire(
39037602 4466 vm_page_t mem,
3e170ce0 4467 vm_tag_t tag,
0a7de745 4468 boolean_t check_memorystatus)
1c79356b 4469{
0a7de745 4470 vm_object_t m_object;
39037602
A
4471
4472 m_object = VM_PAGE_OBJECT(mem);
1c79356b 4473
d9a64523 4474// dbgLog(current_thread(), mem->vmp_offset, m_object, 1); /* (TEST/DEBUG) */
1c79356b
A
4475
4476 VM_PAGE_CHECK(mem);
39037602
A
4477 if (m_object) {
4478 vm_object_lock_assert_exclusive(m_object);
b0d623f7
A
4479 } else {
4480 /*
4481 * In theory, the page should be in an object before it
4482 * gets wired, since we need to hold the object lock
4483 * to update some fields in the page structure.
4484 * However, some code (i386 pmap, for example) might want
4485 * to wire a page before it gets inserted into an object.
4486 * That's somewhat OK, as long as nobody else can get to
4487 * that page and update it at the same time.
4488 */
4489 }
39037602 4490 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
0a7de745
A
4491 if (!VM_PAGE_WIRED(mem)) {
4492 if (mem->vmp_laundry) {
39037602 4493 vm_pageout_steal_laundry(mem, TRUE);
0a7de745 4494 }
39037602
A
4495
4496 vm_page_queues_remove(mem, TRUE);
4497
d9a64523
A
4498 assert(mem->vmp_wire_count == 0);
4499 mem->vmp_q_state = VM_PAGE_IS_WIRED;
b0d623f7 4500
39037602 4501 if (m_object) {
5ba3f43e
A
4502 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4503 VM_OBJECT_WIRED_PAGE_ADD(m_object, mem);
4504 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, tag);
3e170ce0 4505
39037602 4506 assert(m_object->resident_page_count >=
0a7de745 4507 m_object->wired_page_count);
39037602 4508 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
4509 assert(vm_page_purgeable_count > 0);
4510 OSAddAtomic(-1, &vm_page_purgeable_count);
4511 OSAddAtomic(1, &vm_page_purgeable_wired_count);
4512 }
39037602 4513 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
0a7de745 4514 m_object->purgable == VM_PURGABLE_EMPTY) &&
d9a64523 4515 m_object->vo_owner != TASK_NULL) {
0a7de745
A
4516 task_t owner;
4517 int ledger_idx_volatile;
4518 int ledger_idx_nonvolatile;
4519 int ledger_idx_volatile_compressed;
4520 int ledger_idx_nonvolatile_compressed;
4521 boolean_t do_footprint;
d9a64523
A
4522
4523 owner = VM_OBJECT_OWNER(m_object);
4524 vm_object_ledger_tag_ledgers(
4525 m_object,
4526 &ledger_idx_volatile,
4527 &ledger_idx_nonvolatile,
4528 &ledger_idx_volatile_compressed,
4529 &ledger_idx_nonvolatile_compressed,
4530 &do_footprint);
fe8ab488
A
4531 /* less volatile bytes */
4532 ledger_debit(owner->ledger,
0a7de745
A
4533 ledger_idx_volatile,
4534 PAGE_SIZE);
fe8ab488
A
4535 /* more not-quite-volatile bytes */
4536 ledger_credit(owner->ledger,
0a7de745
A
4537 ledger_idx_nonvolatile,
4538 PAGE_SIZE);
d9a64523
A
4539 if (do_footprint) {
4540 /* more footprint */
4541 ledger_credit(owner->ledger,
0a7de745
A
4542 task_ledgers.phys_footprint,
4543 PAGE_SIZE);
d9a64523 4544 }
fe8ab488 4545 }
39037602 4546 if (m_object->all_reusable) {
b0d623f7
A
4547 /*
4548 * Wired pages are not counted as "re-usable"
4549 * in "all_reusable" VM objects, so nothing
4550 * to do here.
4551 */
d9a64523 4552 } else if (mem->vmp_reusable) {
b0d623f7
A
4553 /*
4554 * This page is not "re-usable" when it's
4555 * wired, so adjust its state and the
4556 * accounting.
4557 */
39037602 4558 vm_object_reuse_pages(m_object,
0a7de745
A
4559 mem->vmp_offset,
4560 mem->vmp_offset + PAGE_SIZE_64,
4561 FALSE);
b0d623f7
A
4562 }
4563 }
d9a64523 4564 assert(!mem->vmp_reusable);
b0d623f7 4565
0a7de745 4566 if (!mem->vmp_private && !mem->vmp_fictitious && !mem->vmp_gobbled) {
1c79356b 4567 vm_page_wire_count++;
0a7de745
A
4568 }
4569 if (mem->vmp_gobbled) {
1c79356b 4570 vm_page_gobble_count--;
0a7de745 4571 }
d9a64523 4572 mem->vmp_gobbled = FALSE;
593a1d5f 4573
3e170ce0
A
4574 if (check_memorystatus == TRUE) {
4575 VM_CHECK_MEMORYSTATUS;
4576 }
1c79356b 4577 }
d9a64523
A
4578 assert(!mem->vmp_gobbled);
4579 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
4580 mem->vmp_wire_count++;
4581 if (__improbable(mem->vmp_wire_count == 0)) {
39037602
A
4582 panic("vm_page_wire(%p): wire_count overflow", mem);
4583 }
b0d623f7 4584 VM_PAGE_CHECK(mem);
1c79356b
A
4585}
4586
1c79356b
A
4587/*
4588 * vm_page_unwire:
4589 *
4590 * Release one wiring of this page, potentially
4591 * enabling it to be paged again.
4592 *
4593 * The page's object and the page queues must be locked.
4594 */
4595void
4596vm_page_unwire(
0a7de745
A
4597 vm_page_t mem,
4598 boolean_t queueit)
1c79356b 4599{
0a7de745 4600 vm_object_t m_object;
39037602
A
4601
4602 m_object = VM_PAGE_OBJECT(mem);
1c79356b 4603
d9a64523 4604// dbgLog(current_thread(), mem->vmp_offset, m_object, 0); /* (TEST/DEBUG) */
1c79356b
A
4605
4606 VM_PAGE_CHECK(mem);
b0d623f7 4607 assert(VM_PAGE_WIRED(mem));
d9a64523
A
4608 assert(mem->vmp_wire_count > 0);
4609 assert(!mem->vmp_gobbled);
39037602
A
4610 assert(m_object != VM_OBJECT_NULL);
4611 vm_object_lock_assert_exclusive(m_object);
4612 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4613 if (--mem->vmp_wire_count == 0) {
d9a64523 4614 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
39037602 4615
5ba3f43e
A
4616 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object);
4617 VM_OBJECT_WIRED_PAGE_REMOVE(m_object, mem);
4618 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object, m_object->wire_tag);
d9a64523 4619 if (!mem->vmp_private && !mem->vmp_fictitious) {
4bd07ac2
A
4620 vm_page_wire_count--;
4621 }
5ba3f43e 4622
39037602 4623 assert(m_object->resident_page_count >=
0a7de745 4624 m_object->wired_page_count);
39037602 4625 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
4626 OSAddAtomic(+1, &vm_page_purgeable_count);
4627 assert(vm_page_purgeable_wired_count > 0);
4628 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
4629 }
39037602 4630 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
0a7de745 4631 m_object->purgable == VM_PURGABLE_EMPTY) &&
d9a64523 4632 m_object->vo_owner != TASK_NULL) {
0a7de745
A
4633 task_t owner;
4634 int ledger_idx_volatile;
4635 int ledger_idx_nonvolatile;
4636 int ledger_idx_volatile_compressed;
4637 int ledger_idx_nonvolatile_compressed;
4638 boolean_t do_footprint;
d9a64523
A
4639
4640 owner = VM_OBJECT_OWNER(m_object);
4641 vm_object_ledger_tag_ledgers(
4642 m_object,
4643 &ledger_idx_volatile,
4644 &ledger_idx_nonvolatile,
4645 &ledger_idx_volatile_compressed,
4646 &ledger_idx_nonvolatile_compressed,
4647 &do_footprint);
fe8ab488
A
4648 /* more volatile bytes */
4649 ledger_credit(owner->ledger,
0a7de745
A
4650 ledger_idx_volatile,
4651 PAGE_SIZE);
fe8ab488
A
4652 /* less not-quite-volatile bytes */
4653 ledger_debit(owner->ledger,
0a7de745
A
4654 ledger_idx_nonvolatile,
4655 PAGE_SIZE);
d9a64523
A
4656 if (do_footprint) {
4657 /* less footprint */
4658 ledger_debit(owner->ledger,
0a7de745
A
4659 task_ledgers.phys_footprint,
4660 PAGE_SIZE);
d9a64523 4661 }
fe8ab488 4662 }
39037602 4663 assert(m_object != kernel_object);
d9a64523 4664 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
0b4c1975
A
4665
4666 if (queueit == TRUE) {
39037602 4667 if (m_object->purgable == VM_PURGABLE_EMPTY) {
0b4c1975
A
4668 vm_page_deactivate(mem);
4669 } else {
4670 vm_page_activate(mem);
4671 }
2d21ac55 4672 }
593a1d5f 4673
6d2010ae 4674 VM_CHECK_MEMORYSTATUS;
1c79356b 4675 }
b0d623f7 4676 VM_PAGE_CHECK(mem);
1c79356b
A
4677}
4678
4679/*
4680 * vm_page_deactivate:
4681 *
4682 * Returns the given page to the inactive list,
4683 * indicating that no physical maps have access
4684 * to this page. [Used by the physical mapping system.]
4685 *
4686 * The page queues must be locked.
4687 */
4688void
4689vm_page_deactivate(
0a7de745 4690 vm_page_t m)
b0d623f7
A
4691{
4692 vm_page_deactivate_internal(m, TRUE);
4693}
4694
4695
4696void
4697vm_page_deactivate_internal(
0a7de745
A
4698 vm_page_t m,
4699 boolean_t clear_hw_reference)
1c79356b 4700{
0a7de745 4701 vm_object_t m_object;
39037602
A
4702
4703 m_object = VM_PAGE_OBJECT(m);
2d21ac55 4704
1c79356b 4705 VM_PAGE_CHECK(m);
39037602
A
4706 assert(m_object != kernel_object);
4707 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1c79356b 4708
39037602
A
4709// dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4710 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1c79356b
A
4711 /*
4712 * This page is no longer very interesting. If it was
4713 * interesting (active or inactive/referenced), then we
4714 * clear the reference bit and (re)enter it in the
4715 * inactive queue. Note wired pages should not have
4716 * their reference bit cleared.
4717 */
0a7de745 4718 assert( !(m->vmp_absent && !m->vmp_unusual));
0b4c1975 4719
0a7de745 4720 if (m->vmp_gobbled) { /* can this happen? */
b0d623f7 4721 assert( !VM_PAGE_WIRED(m));
2d21ac55 4722
0a7de745 4723 if (!m->vmp_private && !m->vmp_fictitious) {
1c79356b 4724 vm_page_wire_count--;
0a7de745 4725 }
1c79356b 4726 vm_page_gobble_count--;
d9a64523 4727 m->vmp_gobbled = FALSE;
1c79356b 4728 }
316670eb
A
4729 /*
4730 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4731 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4732 * and we can't remove it manually since we would need the object lock
4733 * (which is not required here) to decrement the activity_in_progress
4734 * reference which is held on the object while the page is in the pageout queue...
4735 * just let the normal laundry processing proceed
39037602 4736 */
d9a64523
A
4737 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4738 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4739 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
39037602 4740 VM_PAGE_WIRED(m)) {
0a7de745 4741 return;
39037602 4742 }
0a7de745 4743 if (!m->vmp_absent && clear_hw_reference == TRUE) {
39037602 4744 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
0a7de745 4745 }
2d21ac55 4746
d9a64523
A
4747 m->vmp_reference = FALSE;
4748 m->vmp_no_cache = FALSE;
2d21ac55 4749
0a7de745 4750 if (!VM_PAGE_INACTIVE(m)) {
39037602 4751 vm_page_queues_remove(m, FALSE);
0b4e3aa0 4752
39037602 4753 if (!VM_DYNAMIC_PAGING_ENABLED() &&
d9a64523 4754 m->vmp_dirty && m_object->internal &&
39037602 4755 (m_object->purgable == VM_PURGABLE_DENY ||
0a7de745
A
4756 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4757 m_object->purgable == VM_PURGABLE_VOLATILE)) {
3e170ce0 4758 vm_page_check_pageable_safe(m);
0a7de745 4759 vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
d9a64523 4760 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
2d21ac55 4761 vm_page_throttled_count++;
9bccf70c 4762 } else {
39037602 4763 if (m_object->named && m_object->ref_count == 1) {
0a7de745 4764 vm_page_speculate(m, FALSE);
b0d623f7 4765#if DEVELOPMENT || DEBUG
2d21ac55 4766 vm_page_speculative_recreated++;
b0d623f7 4767#endif
2d21ac55 4768 } else {
3e170ce0 4769 vm_page_enqueue_inactive(m, FALSE);
2d21ac55 4770 }
9bccf70c 4771 }
1c79356b
A
4772 }
4773}
4774
316670eb
A
4775/*
4776 * vm_page_enqueue_cleaned
4777 *
4778 * Put the page on the cleaned queue, mark it cleaned, etc.
4779 * Being on the cleaned queue (and having m->clean_queue set)
4780 * does ** NOT ** guarantee that the page is clean!
4781 *
4782 * Call with the queues lock held.
4783 */
4784
0a7de745
A
4785void
4786vm_page_enqueue_cleaned(vm_page_t m)
316670eb 4787{
0a7de745 4788 vm_object_t m_object;
39037602
A
4789
4790 m_object = VM_PAGE_OBJECT(m);
4791
4792 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4793 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4794 assert( !(m->vmp_absent && !m->vmp_unusual));
5ba3f43e
A
4795
4796 if (VM_PAGE_WIRED(m)) {
4797 return;
4798 }
316670eb 4799
d9a64523 4800 if (m->vmp_gobbled) {
0a7de745 4801 if (!m->vmp_private && !m->vmp_fictitious) {
316670eb 4802 vm_page_wire_count--;
0a7de745 4803 }
316670eb 4804 vm_page_gobble_count--;
d9a64523 4805 m->vmp_gobbled = FALSE;
316670eb
A
4806 }
4807 /*
4808 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4809 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4810 * and we can't remove it manually since we would need the object lock
4811 * (which is not required here) to decrement the activity_in_progress
4812 * reference which is held on the object while the page is in the pageout queue...
4813 * just let the normal laundry processing proceed
4814 */
d9a64523
A
4815 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4816 (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
4817 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
0a7de745 4818 return;
39037602
A
4819 }
4820 vm_page_queues_remove(m, FALSE);
316670eb 4821
3e170ce0 4822 vm_page_check_pageable_safe(m);
0a7de745 4823 vm_page_queue_enter(&vm_page_queue_cleaned, m, vmp_pageq);
d9a64523 4824 m->vmp_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
316670eb
A
4825 vm_page_cleaned_count++;
4826
316670eb 4827 vm_page_inactive_count++;
39037602 4828 if (m_object->internal) {
39236c6e
A
4829 vm_page_pageable_internal_count++;
4830 } else {
4831 vm_page_pageable_external_count++;
4832 }
39037602 4833#if CONFIG_BACKGROUND_QUEUE
0a7de745 4834 if (m->vmp_in_background) {
39037602 4835 vm_page_add_to_backgroundq(m, TRUE);
0a7de745 4836 }
39037602 4837#endif
d9a64523 4838 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
316670eb
A
4839}
4840
1c79356b
A
4841/*
4842 * vm_page_activate:
4843 *
4844 * Put the specified page on the active list (if appropriate).
4845 *
4846 * The page queues must be locked.
4847 */
4848
4849void
4850vm_page_activate(
0a7de745 4851 vm_page_t m)
1c79356b 4852{
0a7de745 4853 vm_object_t m_object;
39037602
A
4854
4855 m_object = VM_PAGE_OBJECT(m);
4856
1c79356b 4857 VM_PAGE_CHECK(m);
0a7de745 4858#ifdef FIXME_4778297
39037602 4859 assert(m_object != kernel_object);
91447636 4860#endif
39037602
A
4861 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4862 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4863 assert( !(m->vmp_absent && !m->vmp_unusual));
0b4c1975 4864
d9a64523 4865 if (m->vmp_gobbled) {
b0d623f7 4866 assert( !VM_PAGE_WIRED(m));
0a7de745 4867 if (!m->vmp_private && !m->vmp_fictitious) {
1c79356b 4868 vm_page_wire_count--;
0a7de745 4869 }
1c79356b 4870 vm_page_gobble_count--;
d9a64523 4871 m->vmp_gobbled = FALSE;
1c79356b 4872 }
316670eb
A
4873 /*
4874 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4875 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4876 * and we can't remove it manually since we would need the object lock
4877 * (which is not required here) to decrement the activity_in_progress
4878 * reference which is held on the object while the page is in the pageout queue...
4879 * just let the normal laundry processing proceed
4880 */
d9a64523
A
4881 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4882 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
0a7de745 4883 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
1c79356b 4884 return;
0a7de745 4885 }
1c79356b 4886
2d21ac55 4887#if DEBUG
0a7de745
A
4888 if (m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q) {
4889 panic("vm_page_activate: already active");
4890 }
2d21ac55
A
4891#endif
4892
d9a64523 4893 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
2d21ac55
A
4894 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4895 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4896 }
0a7de745 4897
39037602 4898 vm_page_queues_remove(m, FALSE);
2d21ac55 4899
0a7de745 4900 if (!VM_PAGE_WIRED(m)) {
3e170ce0 4901 vm_page_check_pageable_safe(m);
0a7de745
A
4902 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4903 m->vmp_dirty && m_object->internal &&
39037602 4904 (m_object->purgable == VM_PURGABLE_DENY ||
0a7de745
A
4905 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4906 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4907 vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
d9a64523 4908 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
2d21ac55 4909 vm_page_throttled_count++;
9bccf70c 4910 } else {
39037602
A
4911#if CONFIG_SECLUDED_MEMORY
4912 if (secluded_for_filecache &&
4913 vm_page_secluded_target != 0 &&
4914 num_tasks_can_use_secluded_mem == 0 &&
5ba3f43e 4915 m_object->eligible_for_secluded) {
0a7de745 4916 vm_page_queue_enter(&vm_page_queue_secluded, m, vmp_pageq);
d9a64523 4917 m->vmp_q_state = VM_PAGE_ON_SECLUDED_Q;
39037602 4918 vm_page_secluded_count++;
cb323159 4919 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
39037602
A
4920 vm_page_secluded_count_inuse++;
4921 assert(!m_object->internal);
4922// vm_page_pageable_external_count++;
4923 } else
4924#endif /* CONFIG_SECLUDED_MEMORY */
4925 vm_page_enqueue_active(m, FALSE);
9bccf70c 4926 }
d9a64523
A
4927 m->vmp_reference = TRUE;
4928 m->vmp_no_cache = FALSE;
1c79356b 4929 }
b0d623f7 4930 VM_PAGE_CHECK(m);
2d21ac55
A
4931}
4932
4933
4934/*
4935 * vm_page_speculate:
4936 *
4937 * Put the specified page on the speculative list (if appropriate).
4938 *
4939 * The page queues must be locked.
4940 */
4941void
4942vm_page_speculate(
0a7de745
A
4943 vm_page_t m,
4944 boolean_t new)
2d21ac55 4945{
0a7de745
A
4946 struct vm_speculative_age_q *aq;
4947 vm_object_t m_object;
39037602
A
4948
4949 m_object = VM_PAGE_OBJECT(m);
2d21ac55
A
4950
4951 VM_PAGE_CHECK(m);
3e170ce0
A
4952 vm_page_check_pageable_safe(m);
4953
39037602
A
4954 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4955 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523 4956 assert( !(m->vmp_absent && !m->vmp_unusual));
39037602 4957 assert(m_object->internal == FALSE);
b0d623f7 4958
316670eb
A
4959 /*
4960 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4961 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4962 * and we can't remove it manually since we would need the object lock
4963 * (which is not required here) to decrement the activity_in_progress
4964 * reference which is held on the object while the page is in the pageout queue...
4965 * just let the normal laundry processing proceed
4966 */
d9a64523
A
4967 if (m->vmp_laundry || m->vmp_private || m->vmp_fictitious ||
4968 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
0a7de745 4969 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
6d2010ae 4970 return;
0a7de745 4971 }
0b4c1975 4972
39037602 4973 vm_page_queues_remove(m, FALSE);
b0d623f7 4974
0a7de745
A
4975 if (!VM_PAGE_WIRED(m)) {
4976 mach_timespec_t ts;
b0d623f7
A
4977 clock_sec_t sec;
4978 clock_nsec_t nsec;
2d21ac55 4979
0a7de745 4980 clock_get_system_nanotime(&sec, &nsec);
b0d623f7
A
4981 ts.tv_sec = (unsigned int) sec;
4982 ts.tv_nsec = nsec;
2d21ac55
A
4983
4984 if (vm_page_speculative_count == 0) {
2d21ac55
A
4985 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4986 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4987
4988 aq = &vm_page_queue_speculative[speculative_age_index];
4989
0a7de745 4990 /*
2d21ac55
A
4991 * set the timer to begin a new group
4992 */
d9a64523
A
4993 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
4994 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
cb323159 4995
2d21ac55
A
4996 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4997 } else {
4998 aq = &vm_page_queue_speculative[speculative_age_index];
4999
5000 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
0a7de745 5001 speculative_age_index++;
2d21ac55 5002
0a7de745
A
5003 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
5004 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5005 }
2d21ac55 5006 if (speculative_age_index == speculative_steal_index) {
0a7de745 5007 speculative_steal_index = speculative_age_index + 1;
2d21ac55 5008
0a7de745
A
5009 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
5010 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
5011 }
2d21ac55
A
5012 }
5013 aq = &vm_page_queue_speculative[speculative_age_index];
5014
0a7de745
A
5015 if (!vm_page_queue_empty(&aq->age_q)) {
5016 vm_page_speculate_ageit(aq);
5017 }
2d21ac55 5018
d9a64523
A
5019 aq->age_ts.tv_sec = vm_pageout_state.vm_page_speculative_q_age_ms / 1000;
5020 aq->age_ts.tv_nsec = (vm_pageout_state.vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
cb323159 5021
2d21ac55
A
5022 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
5023 }
5024 }
d9a64523
A
5025 vm_page_enqueue_tail(&aq->age_q, &m->vmp_pageq);
5026 m->vmp_q_state = VM_PAGE_ON_SPECULATIVE_Q;
2d21ac55 5027 vm_page_speculative_count++;
39037602 5028 vm_page_pageable_external_count++;
2d21ac55
A
5029
5030 if (new == TRUE) {
39037602 5031 vm_object_lock_assert_exclusive(m_object);
6d2010ae 5032
0a7de745 5033 m_object->pages_created++;
b0d623f7 5034#if DEVELOPMENT || DEBUG
2d21ac55 5035 vm_page_speculative_created++;
b0d623f7 5036#endif
2d21ac55
A
5037 }
5038 }
b0d623f7 5039 VM_PAGE_CHECK(m);
2d21ac55
A
5040}
5041
5042
5043/*
5044 * move pages from the specified aging bin to
5045 * the speculative bin that pageout_scan claims from
5046 *
5047 * The page queues must be locked.
5048 */
5049void
5050vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
5051{
0a7de745
A
5052 struct vm_speculative_age_q *sq;
5053 vm_page_t t;
2d21ac55
A
5054
5055 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
5056
39037602 5057 if (vm_page_queue_empty(&sq->age_q)) {
0a7de745 5058 sq->age_q.next = aq->age_q.next;
2d21ac55 5059 sq->age_q.prev = aq->age_q.prev;
0a7de745 5060
39037602 5061 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
d9a64523 5062 t->vmp_pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55 5063
39037602 5064 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
d9a64523 5065 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55 5066 } else {
0a7de745 5067 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
d9a64523 5068 t->vmp_pageq.next = aq->age_q.next;
0a7de745 5069
39037602 5070 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
d9a64523 5071 t->vmp_pageq.prev = sq->age_q.prev;
2d21ac55 5072
39037602 5073 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
d9a64523 5074 t->vmp_pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55
A
5075
5076 sq->age_q.prev = aq->age_q.prev;
1c79356b 5077 }
39037602 5078 vm_page_queue_init(&aq->age_q);
2d21ac55
A
5079}
5080
5081
5082void
5083vm_page_lru(
0a7de745 5084 vm_page_t m)
2d21ac55
A
5085{
5086 VM_PAGE_CHECK(m);
39037602
A
5087 assert(VM_PAGE_OBJECT(m) != kernel_object);
5088 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2d21ac55 5089
39037602 5090 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523
A
5091
5092 if (m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q) {
0a7de745
A
5093 /*
5094 * we don't need to do all the other work that
d9a64523
A
5095 * vm_page_queues_remove and vm_page_enqueue_inactive
5096 * bring along for the ride
5097 */
0a7de745 5098 assert(!m->vmp_laundry);
d9a64523 5099 assert(!m->vmp_private);
0a7de745 5100
d9a64523
A
5101 m->vmp_no_cache = FALSE;
5102
0a7de745
A
5103 vm_page_queue_remove(&vm_page_queue_inactive, m, vmp_pageq);
5104 vm_page_queue_enter(&vm_page_queue_inactive, m, vmp_pageq);
d9a64523
A
5105
5106 return;
5107 }
316670eb
A
5108 /*
5109 * if this page is currently on the pageout queue, we can't do the
3e170ce0 5110 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
5111 * and we can't remove it manually since we would need the object lock
5112 * (which is not required here) to decrement the activity_in_progress
5113 * reference which is held on the object while the page is in the pageout queue...
5114 * just let the normal laundry processing proceed
5115 */
d9a64523
A
5116 if (m->vmp_laundry || m->vmp_private ||
5117 (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
5118 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
0a7de745 5119 VM_PAGE_WIRED(m)) {
2d21ac55 5120 return;
0a7de745 5121 }
2d21ac55 5122
d9a64523 5123 m->vmp_no_cache = FALSE;
2d21ac55 5124
39037602 5125 vm_page_queues_remove(m, FALSE);
2d21ac55 5126
3e170ce0 5127 vm_page_enqueue_inactive(m, FALSE);
1c79356b
A
5128}
5129
2d21ac55 5130
b0d623f7
A
5131void
5132vm_page_reactivate_all_throttled(void)
5133{
0a7de745
A
5134 vm_page_t first_throttled, last_throttled;
5135 vm_page_t first_active;
5136 vm_page_t m;
5137 int extra_active_count;
5138 int extra_internal_count, extra_external_count;
5139 vm_object_t m_object;
5140
5141 if (!VM_DYNAMIC_PAGING_ENABLED()) {
6d2010ae 5142 return;
0a7de745 5143 }
6d2010ae 5144
b0d623f7 5145 extra_active_count = 0;
39236c6e
A
5146 extra_internal_count = 0;
5147 extra_external_count = 0;
b0d623f7 5148 vm_page_lock_queues();
0a7de745 5149 if (!vm_page_queue_empty(&vm_page_queue_throttled)) {
b0d623f7
A
5150 /*
5151 * Switch "throttled" pages to "active".
5152 */
0a7de745 5153 vm_page_queue_iterate(&vm_page_queue_throttled, m, vmp_pageq) {
b0d623f7 5154 VM_PAGE_CHECK(m);
d9a64523 5155 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
39037602
A
5156
5157 m_object = VM_PAGE_OBJECT(m);
6d2010ae
A
5158
5159 extra_active_count++;
39037602 5160 if (m_object->internal) {
39236c6e
A
5161 extra_internal_count++;
5162 } else {
5163 extra_external_count++;
5164 }
6d2010ae 5165
d9a64523 5166 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
b0d623f7 5167 VM_PAGE_CHECK(m);
39037602 5168#if CONFIG_BACKGROUND_QUEUE
0a7de745 5169 if (m->vmp_in_background) {
39037602 5170 vm_page_add_to_backgroundq(m, FALSE);
0a7de745 5171 }
39037602 5172#endif
b0d623f7
A
5173 }
5174
5175 /*
5176 * Transfer the entire throttled queue to a regular LRU page queues.
5177 * We insert it at the head of the active queue, so that these pages
5178 * get re-evaluated by the LRU algorithm first, since they've been
5179 * completely out of it until now.
5180 */
39037602
A
5181 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
5182 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
5183 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
5184 if (vm_page_queue_empty(&vm_page_queue_active)) {
5185 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
b0d623f7 5186 } else {
d9a64523 5187 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
b0d623f7 5188 }
39037602 5189 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
d9a64523
A
5190 first_throttled->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
5191 last_throttled->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
b0d623f7
A
5192
5193#if DEBUG
5194 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
5195#endif
39037602 5196 vm_page_queue_init(&vm_page_queue_throttled);
b0d623f7
A
5197 /*
5198 * Adjust the global page counts.
5199 */
5200 vm_page_active_count += extra_active_count;
39236c6e
A
5201 vm_page_pageable_internal_count += extra_internal_count;
5202 vm_page_pageable_external_count += extra_external_count;
b0d623f7
A
5203 vm_page_throttled_count = 0;
5204 }
5205 assert(vm_page_throttled_count == 0);
39037602 5206 assert(vm_page_queue_empty(&vm_page_queue_throttled));
b0d623f7
A
5207 vm_page_unlock_queues();
5208}
5209
5210
5211/*
5212 * move pages from the indicated local queue to the global active queue
5213 * its ok to fail if we're below the hard limit and force == FALSE
5214 * the nolocks == TRUE case is to allow this function to be run on
5215 * the hibernate path
5216 */
5217
5218void
5219vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
5220{
0a7de745
A
5221 struct vpl *lq;
5222 vm_page_t first_local, last_local;
5223 vm_page_t first_active;
5224 vm_page_t m;
5225 uint32_t count = 0;
b0d623f7 5226
0a7de745 5227 if (vm_page_local_q == NULL) {
b0d623f7 5228 return;
0a7de745 5229 }
b0d623f7 5230
f427ee49 5231 lq = zpercpu_get_cpu(vm_page_local_q, lid);
b0d623f7
A
5232
5233 if (nolocks == FALSE) {
5234 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
0a7de745 5235 if (!vm_page_trylockspin_queues()) {
b0d623f7 5236 return;
0a7de745
A
5237 }
5238 } else {
b0d623f7 5239 vm_page_lockspin_queues();
0a7de745 5240 }
b0d623f7
A
5241
5242 VPL_LOCK(&lq->vpl_lock);
5243 }
5244 if (lq->vpl_count) {
5245 /*
5246 * Switch "local" pages to "active".
5247 */
39037602 5248 assert(!vm_page_queue_empty(&lq->vpl_queue));
b0d623f7 5249
0a7de745 5250 vm_page_queue_iterate(&lq->vpl_queue, m, vmp_pageq) {
b0d623f7 5251 VM_PAGE_CHECK(m);
3e170ce0 5252 vm_page_check_pageable_safe(m);
d9a64523
A
5253 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
5254 assert(!m->vmp_fictitious);
b0d623f7 5255
0a7de745 5256 if (m->vmp_local_id != lid) {
b0d623f7 5257 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
0a7de745
A
5258 }
5259
d9a64523
A
5260 m->vmp_local_id = 0;
5261 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
b0d623f7 5262 VM_PAGE_CHECK(m);
39037602 5263#if CONFIG_BACKGROUND_QUEUE
0a7de745 5264 if (m->vmp_in_background) {
39037602 5265 vm_page_add_to_backgroundq(m, FALSE);
0a7de745 5266 }
39037602 5267#endif
b0d623f7
A
5268 count++;
5269 }
0a7de745 5270 if (count != lq->vpl_count) {
b0d623f7 5271 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
0a7de745 5272 }
b0d623f7
A
5273
5274 /*
5275 * Transfer the entire local queue to a regular LRU page queues.
5276 */
39037602
A
5277 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
5278 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
5279 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
b0d623f7 5280
39037602
A
5281 if (vm_page_queue_empty(&vm_page_queue_active)) {
5282 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
b0d623f7 5283 } else {
d9a64523 5284 first_active->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
b0d623f7 5285 }
39037602 5286 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
d9a64523
A
5287 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
5288 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
b0d623f7 5289
39037602 5290 vm_page_queue_init(&lq->vpl_queue);
b0d623f7
A
5291 /*
5292 * Adjust the global page counts.
5293 */
5294 vm_page_active_count += lq->vpl_count;
39236c6e
A
5295 vm_page_pageable_internal_count += lq->vpl_internal_count;
5296 vm_page_pageable_external_count += lq->vpl_external_count;
b0d623f7 5297 lq->vpl_count = 0;
39236c6e
A
5298 lq->vpl_internal_count = 0;
5299 lq->vpl_external_count = 0;
b0d623f7 5300 }
39037602 5301 assert(vm_page_queue_empty(&lq->vpl_queue));
b0d623f7
A
5302
5303 if (nolocks == FALSE) {
5304 VPL_UNLOCK(&lq->vpl_lock);
d9a64523
A
5305
5306 vm_page_balance_inactive(count / 4);
b0d623f7
A
5307 vm_page_unlock_queues();
5308 }
5309}
5310
1c79356b
A
5311/*
5312 * vm_page_part_zero_fill:
5313 *
5314 * Zero-fill a part of the page.
5315 */
39236c6e 5316#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
1c79356b
A
5317void
5318vm_page_part_zero_fill(
0a7de745
A
5319 vm_page_t m,
5320 vm_offset_t m_pa,
5321 vm_size_t len)
1c79356b 5322{
316670eb
A
5323#if 0
5324 /*
5325 * we don't hold the page queue lock
5326 * so this check isn't safe to make
5327 */
1c79356b 5328 VM_PAGE_CHECK(m);
316670eb
A
5329#endif
5330
1c79356b 5331#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
39037602 5332 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
1c79356b 5333#else
0a7de745 5334 vm_page_t tmp;
1c79356b 5335 while (1) {
0a7de745 5336 tmp = vm_page_grab();
1c79356b
A
5337 if (tmp == VM_PAGE_NULL) {
5338 vm_page_wait(THREAD_UNINT);
5339 continue;
5340 }
0a7de745 5341 break;
1c79356b
A
5342 }
5343 vm_page_zero_fill(tmp);
0a7de745 5344 if (m_pa != 0) {
1c79356b
A
5345 vm_page_part_copy(m, 0, tmp, 0, m_pa);
5346 }
0a7de745
A
5347 if ((m_pa + len) < PAGE_SIZE) {
5348 vm_page_part_copy(m, m_pa + len, tmp,
5349 m_pa + len, PAGE_SIZE - (m_pa + len));
1c79356b 5350 }
0a7de745
A
5351 vm_page_copy(tmp, m);
5352 VM_PAGE_FREE(tmp);
1c79356b 5353#endif
1c79356b
A
5354}
5355
5356/*
5357 * vm_page_zero_fill:
5358 *
5359 * Zero-fill the specified page.
5360 */
5361void
5362vm_page_zero_fill(
0a7de745 5363 vm_page_t m)
1c79356b 5364{
316670eb
A
5365#if 0
5366 /*
5367 * we don't hold the page queue lock
5368 * so this check isn't safe to make
5369 */
1c79356b 5370 VM_PAGE_CHECK(m);
316670eb 5371#endif
1c79356b 5372
39037602
A
5373// dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
5374 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
1c79356b
A
5375}
5376
5377/*
5378 * vm_page_part_copy:
5379 *
5380 * copy part of one page to another
5381 */
5382
5383void
5384vm_page_part_copy(
0a7de745
A
5385 vm_page_t src_m,
5386 vm_offset_t src_pa,
5387 vm_page_t dst_m,
5388 vm_offset_t dst_pa,
5389 vm_size_t len)
1c79356b 5390{
316670eb
A
5391#if 0
5392 /*
5393 * we don't hold the page queue lock
5394 * so this check isn't safe to make
5395 */
1c79356b
A
5396 VM_PAGE_CHECK(src_m);
5397 VM_PAGE_CHECK(dst_m);
316670eb 5398#endif
39037602 5399 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
0a7de745 5400 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
1c79356b
A
5401}
5402
5403/*
5404 * vm_page_copy:
5405 *
5406 * Copy one page to another
5407 */
5408
2d21ac55
A
5409int vm_page_copy_cs_validations = 0;
5410int vm_page_copy_cs_tainted = 0;
5411
1c79356b
A
5412void
5413vm_page_copy(
0a7de745
A
5414 vm_page_t src_m,
5415 vm_page_t dest_m)
1c79356b 5416{
0a7de745 5417 vm_object_t src_m_object;
39037602
A
5418
5419 src_m_object = VM_PAGE_OBJECT(src_m);
5420
316670eb
A
5421#if 0
5422 /*
5423 * we don't hold the page queue lock
5424 * so this check isn't safe to make
5425 */
1c79356b
A
5426 VM_PAGE_CHECK(src_m);
5427 VM_PAGE_CHECK(dest_m);
316670eb 5428#endif
39037602 5429 vm_object_lock_assert_held(src_m_object);
1c79356b 5430
39037602
A
5431 if (src_m_object != VM_OBJECT_NULL &&
5432 src_m_object->code_signed) {
2d21ac55 5433 /*
4a3eedf9 5434 * We're copying a page from a code-signed object.
2d21ac55
A
5435 * Whoever ends up mapping the copy page might care about
5436 * the original page's integrity, so let's validate the
5437 * source page now.
5438 */
5439 vm_page_copy_cs_validations++;
f427ee49 5440 vm_page_validate_cs(src_m, PAGE_SIZE, 0);
39037602
A
5441#if DEVELOPMENT || DEBUG
5442 DTRACE_VM4(codesigned_copy,
0a7de745
A
5443 vm_object_t, src_m_object,
5444 vm_object_offset_t, src_m->vmp_offset,
5445 int, src_m->vmp_cs_validated,
5446 int, src_m->vmp_cs_tainted);
39037602 5447#endif /* DEVELOPMENT || DEBUG */
2d21ac55 5448 }
6d2010ae 5449
2d21ac55 5450 /*
b0d623f7
A
5451 * Propagate the cs_tainted bit to the copy page. Do not propagate
5452 * the cs_validated bit.
2d21ac55 5453 */
d9a64523 5454 dest_m->vmp_cs_tainted = src_m->vmp_cs_tainted;
f427ee49 5455 dest_m->vmp_cs_nx = src_m->vmp_cs_nx;
d9a64523 5456 if (dest_m->vmp_cs_tainted) {
2d21ac55
A
5457 vm_page_copy_cs_tainted++;
5458 }
d9a64523 5459 dest_m->vmp_error = src_m->vmp_error; /* sliding src_m might have failed... */
39037602 5460 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
1c79356b
A
5461}
5462
2d21ac55 5463#if MACH_ASSERT
b0d623f7
A
5464static void
5465_vm_page_print(
0a7de745 5466 vm_page_t p)
b0d623f7
A
5467{
5468 printf("vm_page %p: \n", p);
39037602 5469 printf(" pageq: next=%p prev=%p\n",
0a7de745
A
5470 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next),
5471 (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.prev));
39037602 5472 printf(" listq: next=%p prev=%p\n",
0a7de745
A
5473 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.next)),
5474 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_listq.prev)));
d9a64523 5475 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m)));
0a7de745 5476 printf(" object=%p offset=0x%llx\n", VM_PAGE_OBJECT(p), p->vmp_offset);
d9a64523
A
5477 printf(" wire_count=%u\n", p->vmp_wire_count);
5478 printf(" q_state=%u\n", p->vmp_q_state);
b0d623f7 5479
39037602 5480 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
0a7de745
A
5481 (p->vmp_laundry ? "" : "!"),
5482 (p->vmp_reference ? "" : "!"),
5483 (p->vmp_gobbled ? "" : "!"),
5484 (p->vmp_private ? "" : "!"));
b0d623f7 5485 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
0a7de745
A
5486 (p->vmp_busy ? "" : "!"),
5487 (p->vmp_wanted ? "" : "!"),
5488 (p->vmp_tabled ? "" : "!"),
5489 (p->vmp_fictitious ? "" : "!"),
5490 (p->vmp_pmapped ? "" : "!"),
5491 (p->vmp_wpmapped ? "" : "!"));
39037602 5492 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
0a7de745
A
5493 (p->vmp_free_when_done ? "" : "!"),
5494 (p->vmp_absent ? "" : "!"),
5495 (p->vmp_error ? "" : "!"),
5496 (p->vmp_dirty ? "" : "!"),
5497 (p->vmp_cleaning ? "" : "!"),
5498 (p->vmp_precious ? "" : "!"),
5499 (p->vmp_clustered ? "" : "!"));
5ba3f43e 5500 printf(" %soverwriting, %srestart, %sunusual\n",
0a7de745
A
5501 (p->vmp_overwriting ? "" : "!"),
5502 (p->vmp_restart ? "" : "!"),
5503 (p->vmp_unusual ? "" : "!"));
f427ee49
A
5504 printf(" cs_validated=%d, cs_tainted=%d, cs_nx=%d, %sno_cache\n",
5505 p->vmp_cs_validated,
5506 p->vmp_cs_tainted,
5507 p->vmp_cs_nx,
0a7de745 5508 (p->vmp_no_cache ? "" : "!"));
b0d623f7 5509
39037602 5510 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
b0d623f7
A
5511}
5512
1c79356b
A
5513/*
5514 * Check that the list of pages is ordered by
5515 * ascending physical address and has no holes.
5516 */
2d21ac55 5517static int
1c79356b 5518vm_page_verify_contiguous(
0a7de745
A
5519 vm_page_t pages,
5520 unsigned int npages)
1c79356b 5521{
0a7de745
A
5522 vm_page_t m;
5523 unsigned int page_count;
5524 vm_offset_t prev_addr;
1c79356b 5525
39037602 5526 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
1c79356b
A
5527 page_count = 1;
5528 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
39037602 5529 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
b0d623f7 5530 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
0a7de745 5531 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
6d2010ae 5532 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
1c79356b
A
5533 panic("vm_page_verify_contiguous: not contiguous!");
5534 }
39037602 5535 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
1c79356b
A
5536 ++page_count;
5537 }
5538 if (page_count != npages) {
2d21ac55 5539 printf("pages %p actual count 0x%x but requested 0x%x\n",
0a7de745 5540 pages, page_count, npages);
1c79356b
A
5541 panic("vm_page_verify_contiguous: count error");
5542 }
5543 return 1;
5544}
1c79356b
A
5545
5546
2d21ac55
A
5547/*
5548 * Check the free lists for proper length etc.
5549 */
fe8ab488 5550static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
b0d623f7
A
5551static unsigned int
5552vm_page_verify_free_list(
0a7de745
A
5553 vm_page_queue_head_t *vm_page_queue,
5554 unsigned int color,
5555 vm_page_t look_for_page,
5556 boolean_t expect_page)
b0d623f7 5557{
0a7de745
A
5558 unsigned int npages;
5559 vm_page_t m;
5560 vm_page_t prev_m;
5561 boolean_t found_page;
b0d623f7 5562
0a7de745 5563 if (!vm_page_verify_this_free_list_enabled) {
fe8ab488 5564 return 0;
0a7de745 5565 }
fe8ab488 5566
b0d623f7
A
5567 found_page = FALSE;
5568 npages = 0;
39037602
A
5569 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
5570
0a7de745 5571 vm_page_queue_iterate(vm_page_queue, m, vmp_pageq) {
b0d623f7
A
5572 if (m == look_for_page) {
5573 found_page = TRUE;
5574 }
0a7de745 5575 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev) != prev_m) {
b0d623f7 5576 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
0a7de745
A
5577 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.prev), prev_m);
5578 }
5579 if (!m->vmp_busy) {
b0d623f7 5580 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
0a7de745
A
5581 color, npages, m);
5582 }
6d2010ae 5583 if (color != (unsigned int) -1) {
0a7de745 5584 if (VM_PAGE_GET_COLOR(m) != color) {
6d2010ae 5585 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
0a7de745
A
5586 color, npages, m, VM_PAGE_GET_COLOR(m), color);
5587 }
5588 if (m->vmp_q_state != VM_PAGE_ON_FREE_Q) {
39037602 5589 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
0a7de745
A
5590 color, npages, m, m->vmp_q_state);
5591 }
39037602 5592 } else {
0a7de745 5593 if (m->vmp_q_state != VM_PAGE_ON_FREE_LOCAL_Q) {
39037602 5594 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
0a7de745
A
5595 npages, m, m->vmp_q_state);
5596 }
6d2010ae 5597 }
b0d623f7
A
5598 ++npages;
5599 prev_m = m;
5600 }
5601 if (look_for_page != VM_PAGE_NULL) {
5602 unsigned int other_color;
5603
5604 if (expect_page && !found_page) {
5605 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
0a7de745 5606 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
b0d623f7
A
5607 _vm_page_print(look_for_page);
5608 for (other_color = 0;
0a7de745
A
5609 other_color < vm_colors;
5610 other_color++) {
5611 if (other_color == color) {
b0d623f7 5612 continue;
0a7de745 5613 }
39037602 5614 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
0a7de745 5615 other_color, look_for_page, FALSE);
b0d623f7 5616 }
6d2010ae 5617 if (color == (unsigned int) -1) {
d1ecb069 5618 vm_page_verify_free_list(&vm_lopage_queue_free,
0a7de745 5619 (unsigned int) -1, look_for_page, FALSE);
d1ecb069 5620 }
b0d623f7
A
5621 panic("vm_page_verify_free_list(color=%u)\n", color);
5622 }
5623 if (!expect_page && found_page) {
5624 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
0a7de745 5625 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
b0d623f7
A
5626 }
5627 }
5628 return npages;
5629}
5630
fe8ab488 5631static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
2d21ac55
A
5632static void
5633vm_page_verify_free_lists( void )
5634{
0a7de745
A
5635 unsigned int color, npages, nlopages;
5636 boolean_t toggle = TRUE;
b0d623f7 5637
0a7de745 5638 if (!vm_page_verify_all_free_lists_enabled) {
b0d623f7 5639 return;
0a7de745 5640 }
b0d623f7 5641
2d21ac55 5642 npages = 0;
b0d623f7
A
5643
5644 lck_mtx_lock(&vm_page_queue_free_lock);
0a7de745 5645
fe8ab488
A
5646 if (vm_page_verify_this_free_list_enabled == TRUE) {
5647 /*
5648 * This variable has been set globally for extra checking of
5649 * each free list Q. Since we didn't set it, we don't own it
5650 * and we shouldn't toggle it.
5651 */
5652 toggle = FALSE;
5653 }
5654
5655 if (toggle == TRUE) {
5656 vm_page_verify_this_free_list_enabled = TRUE;
5657 }
2d21ac55 5658
0a7de745 5659 for (color = 0; color < vm_colors; color++) {
39037602 5660 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
0a7de745 5661 color, VM_PAGE_NULL, FALSE);
2d21ac55 5662 }
d1ecb069 5663 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
0a7de745
A
5664 (unsigned int) -1,
5665 VM_PAGE_NULL, FALSE);
5666 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count) {
d1ecb069 5667 panic("vm_page_verify_free_lists: "
0a7de745
A
5668 "npages %u free_count %d nlopages %u lo_free_count %u",
5669 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
5670 }
6d2010ae 5671
fe8ab488
A
5672 if (toggle == TRUE) {
5673 vm_page_verify_this_free_list_enabled = FALSE;
5674 }
5675
b0d623f7 5676 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 5677}
2d21ac55 5678
0a7de745 5679#endif /* MACH_ASSERT */
2d21ac55 5680
91447636 5681
0a7de745 5682extern boolean_t(*volatile consider_buffer_cache_collect)(int);
3e170ce0 5683
1c79356b 5684/*
2d21ac55 5685 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
5686 *
5687 * Find a region large enough to contain at least n pages
1c79356b
A
5688 * of contiguous physical memory.
5689 *
2d21ac55
A
5690 * This is done by traversing the vm_page_t array in a linear fashion
5691 * we assume that the vm_page_t array has the avaiable physical pages in an
5692 * ordered, ascending list... this is currently true of all our implementations
0a7de745 5693 * and must remain so... there can be 'holes' in the array... we also can
2d21ac55 5694 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
0a7de745
A
5695 * which use to happen via 'vm_page_convert'... that function was no longer
5696 * being called and was removed...
5697 *
5698 * The basic flow consists of stabilizing some of the interesting state of
2d21ac55
A
5699 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5700 * sweep at the beginning of the array looking for pages that meet our criterea
5701 * for a 'stealable' page... currently we are pretty conservative... if the page
5702 * meets this criterea and is physically contiguous to the previous page in the 'run'
0a7de745 5703 * we keep developing it. If we hit a page that doesn't fit, we reset our state
2d21ac55 5704 * and start to develop a new run... if at this point we've already considered
0a7de745
A
5705 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5706 * and mutex_pause (which will yield the processor), to keep the latency low w/r
2d21ac55
A
5707 * to other threads trying to acquire free pages (or move pages from q to q),
5708 * and then continue from the spot we left off... we only make 1 pass through the
5709 * array. Once we have a 'run' that is long enough, we'll go into the loop which
0a7de745 5710 * which steals the pages from the queues they're currently on... pages on the free
2d21ac55
A
5711 * queue can be stolen directly... pages that are on any of the other queues
5712 * must be removed from the object they are tabled on... this requires taking the
0a7de745 5713 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
2d21ac55
A
5714 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5715 * dump the pages we've currently stolen back to the free list, and pick up our
5716 * scan from the point where we aborted the 'current' run.
5717 *
5718 *
1c79356b 5719 * Requirements:
2d21ac55 5720 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 5721 *
2d21ac55 5722 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 5723 *
e5568f75 5724 * Algorithm:
1c79356b 5725 */
2d21ac55 5726
0a7de745 5727#define MAX_CONSIDERED_BEFORE_YIELD 1000
2d21ac55
A
5728
5729
0a7de745
A
5730#define RESET_STATE_OF_RUN() \
5731 MACRO_BEGIN \
5732 prevcontaddr = -2; \
5733 start_pnum = -1; \
5734 free_considered = 0; \
5735 substitute_needed = 0; \
5736 npages = 0; \
5737 MACRO_END
2d21ac55 5738
b0d623f7
A
5739/*
5740 * Can we steal in-use (i.e. not free) pages when searching for
5741 * physically-contiguous pages ?
5742 */
5743#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5744
0a7de745 5745static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
b0d623f7
A
5746#if DEBUG
5747int vm_page_find_contig_debug = 0;
5748#endif
2d21ac55 5749
1c79356b
A
5750static vm_page_t
5751vm_page_find_contiguous(
0a7de745
A
5752 unsigned int contig_pages,
5753 ppnum_t max_pnum,
b0d623f7 5754 ppnum_t pnum_mask,
0a7de745
A
5755 boolean_t wire,
5756 int flags)
5757{
5758 vm_page_t m = NULL;
5759 ppnum_t prevcontaddr = 0;
5760 ppnum_t start_pnum = 0;
5761 unsigned int npages = 0, considered = 0, scanned = 0;
5762 unsigned int page_idx = 0, start_idx = 0, last_idx = 0, orig_last_idx = 0;
5763 unsigned int idx_last_contig_page_found = 0;
5764 int free_considered = 0, free_available = 0;
5765 int substitute_needed = 0;
c3c9b80d
A
5766 int zone_gc_called = 0;
5767 boolean_t wrapped;
0a7de745 5768 kern_return_t kr;
593a1d5f 5769#if DEBUG
0a7de745
A
5770 clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
5771 clock_usec_t tv_start_usec = 0, tv_end_usec = 0;
593a1d5f 5772#endif
3e170ce0 5773
0a7de745
A
5774 int yielded = 0;
5775 int dumped_run = 0;
5776 int stolen_pages = 0;
5777 int compressed_pages = 0;
3e170ce0 5778
1c79356b 5779
0a7de745 5780 if (contig_pages == 0) {
1c79356b 5781 return VM_PAGE_NULL;
0a7de745 5782 }
1c79356b 5783
3e170ce0
A
5784full_scan_again:
5785
2d21ac55
A
5786#if MACH_ASSERT
5787 vm_page_verify_free_lists();
593a1d5f
A
5788#endif
5789#if DEBUG
2d21ac55
A
5790 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
5791#endif
39236c6e
A
5792 PAGE_REPLACEMENT_ALLOWED(TRUE);
5793
0a7de745
A
5794 /*
5795 * If there are still delayed pages, try to free up some that match.
5796 */
5797 if (__improbable(vm_delayed_count != 0 && contig_pages != 0)) {
5798 vm_free_delayed_pages_contig(contig_pages, max_pnum, pnum_mask);
5ba3f43e 5799 }
3e170ce0 5800
0a7de745 5801 vm_page_lock_queues();
b0d623f7 5802 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
5803
5804 RESET_STATE_OF_RUN();
1c79356b 5805
b0d623f7 5806 scanned = 0;
2d21ac55
A
5807 considered = 0;
5808 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 5809
b0d623f7 5810 wrapped = FALSE;
0a7de745
A
5811
5812 if (flags & KMA_LOMEM) {
b0d623f7 5813 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
0a7de745 5814 } else {
b0d623f7 5815 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
0a7de745 5816 }
b0d623f7
A
5817
5818 orig_last_idx = idx_last_contig_page_found;
5819 last_idx = orig_last_idx;
5820
5821 for (page_idx = last_idx, start_idx = last_idx;
0a7de745
A
5822 npages < contig_pages && page_idx < vm_pages_count;
5823 page_idx++) {
b0d623f7
A
5824retry:
5825 if (wrapped &&
5826 npages == 0 &&
5827 page_idx >= orig_last_idx) {
5828 /*
5829 * We're back where we started and we haven't
5830 * found any suitable contiguous range. Let's
5831 * give up.
5832 */
5833 break;
5834 }
5835 scanned++;
2d21ac55 5836 m = &vm_pages[page_idx];
e5568f75 5837
d9a64523
A
5838 assert(!m->vmp_fictitious);
5839 assert(!m->vmp_private);
b0d623f7 5840
39037602 5841 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
2d21ac55
A
5842 /* no more low pages... */
5843 break;
e5568f75 5844 }
39037602 5845 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
b0d623f7
A
5846 /*
5847 * not aligned
5848 */
5849 RESET_STATE_OF_RUN();
d9a64523 5850 } else if (VM_PAGE_WIRED(m) || m->vmp_gobbled ||
0a7de745
A
5851 m->vmp_laundry || m->vmp_wanted ||
5852 m->vmp_cleaning || m->vmp_overwriting || m->vmp_free_when_done) {
2d21ac55
A
5853 /*
5854 * page is in a transient state
5855 * or a state we don't want to deal
5856 * with, so don't consider it which
5857 * means starting a new run
5858 */
5859 RESET_STATE_OF_RUN();
d9a64523 5860 } else if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) ||
0a7de745
A
5861 (m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5862 (m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5863 (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
2d21ac55 5864 /*
39037602
A
5865 * page needs to be on one of our queues (other then the pageout or special free queues)
5866 * or it needs to belong to the compressor pool (which is now indicated
d9a64523 5867 * by vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
39037602 5868 * from the check for VM_PAGE_NOT_ON_Q)
2d21ac55
A
5869 * in order for it to be stable behind the
5870 * locks we hold at this point...
5871 * if not, don't consider it which
5872 * means starting a new run
5873 */
5874 RESET_STATE_OF_RUN();
d9a64523 5875 } else if ((m->vmp_q_state != VM_PAGE_ON_FREE_Q) && (!m->vmp_tabled || m->vmp_busy)) {
2d21ac55
A
5876 /*
5877 * pages on the free list are always 'busy'
5878 * so we couldn't test for 'busy' in the check
5879 * for the transient states... pages that are
5880 * 'free' are never 'tabled', so we also couldn't
5881 * test for 'tabled'. So we check here to make
5882 * sure that a non-free page is not busy and is
0a7de745 5883 * tabled on an object...
2d21ac55
A
5884 * if not, don't consider it which
5885 * means starting a new run
5886 */
5887 RESET_STATE_OF_RUN();
2d21ac55 5888 } else {
39037602
A
5889 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5890 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
b0d623f7
A
5891 RESET_STATE_OF_RUN();
5892 goto did_consider;
5893 } else {
5894 npages = 1;
5895 start_idx = page_idx;
39037602 5896 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
b0d623f7 5897 }
2d21ac55
A
5898 } else {
5899 npages++;
e5568f75 5900 }
39037602 5901 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
0a7de745 5902
b0d623f7 5903 VM_PAGE_CHECK(m);
d9a64523 5904 if (m->vmp_q_state == VM_PAGE_ON_FREE_Q) {
2d21ac55 5905 free_considered++;
b0d623f7
A
5906 } else {
5907 /*
5908 * This page is not free.
5909 * If we can't steal used pages,
5910 * we have to give up this run
5911 * and keep looking.
5912 * Otherwise, we might need to
5913 * move the contents of this page
5914 * into a substitute page.
5915 */
5916#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
d9a64523 5917 if (m->vmp_pmapped || m->vmp_dirty || m->vmp_precious) {
b0d623f7
A
5918 substitute_needed++;
5919 }
5920#else
5921 RESET_STATE_OF_RUN();
5922#endif
2d21ac55 5923 }
b0d623f7 5924
0a7de745 5925 if ((free_considered + substitute_needed) > free_available) {
2d21ac55
A
5926 /*
5927 * if we let this run continue
5928 * we will end up dropping the vm_page_free_count
5929 * below the reserve limit... we need to abort
5930 * this run, but we can at least re-consider this
5931 * page... thus the jump back to 'retry'
5932 */
5933 RESET_STATE_OF_RUN();
5934
5935 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5936 considered++;
5937 goto retry;
e5568f75 5938 }
2d21ac55
A
5939 /*
5940 * free_available == 0
5941 * so can't consider any free pages... if
5942 * we went to retry in this case, we'd
5943 * get stuck looking at the same page
5944 * w/o making any forward progress
5945 * we also want to take this path if we've already
5946 * reached our limit that controls the lock latency
5947 */
e5568f75 5948 }
2d21ac55 5949 }
b0d623f7 5950did_consider:
2d21ac55 5951 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
39236c6e
A
5952 PAGE_REPLACEMENT_ALLOWED(FALSE);
5953
b0d623f7 5954 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 5955 vm_page_unlock_queues();
e5568f75 5956
2d21ac55
A
5957 mutex_pause(0);
5958
39236c6e
A
5959 PAGE_REPLACEMENT_ALLOWED(TRUE);
5960
2d21ac55 5961 vm_page_lock_queues();
b0d623f7 5962 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
5963
5964 RESET_STATE_OF_RUN();
1c79356b 5965 /*
2d21ac55
A
5966 * reset our free page limit since we
5967 * dropped the lock protecting the vm_page_free_queue
1c79356b 5968 */
2d21ac55
A
5969 free_available = vm_page_free_count - vm_page_free_reserved;
5970 considered = 0;
3e170ce0 5971
2d21ac55 5972 yielded++;
3e170ce0 5973
2d21ac55
A
5974 goto retry;
5975 }
5976 considered++;
5977 }
5978 m = VM_PAGE_NULL;
5979
b0d623f7
A
5980 if (npages != contig_pages) {
5981 if (!wrapped) {
5982 /*
5983 * We didn't find a contiguous range but we didn't
5984 * start from the very first page.
5985 * Start again from the very first page.
5986 */
5987 RESET_STATE_OF_RUN();
0a7de745 5988 if (flags & KMA_LOMEM) {
b0d623f7 5989 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
0a7de745 5990 } else {
b0d623f7 5991 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
0a7de745 5992 }
b0d623f7
A
5993 last_idx = 0;
5994 page_idx = last_idx;
5995 wrapped = TRUE;
5996 goto retry;
5997 }
5998 lck_mtx_unlock(&vm_page_queue_free_lock);
5999 } else {
0a7de745
A
6000 vm_page_t m1;
6001 vm_page_t m2;
6002 unsigned int cur_idx;
6003 unsigned int tmp_start_idx;
6004 vm_object_t locked_object = VM_OBJECT_NULL;
6005 boolean_t abort_run = FALSE;
6006
b0d623f7
A
6007 assert(page_idx - start_idx == contig_pages);
6008
2d21ac55
A
6009 tmp_start_idx = start_idx;
6010
6011 /*
6012 * first pass through to pull the free pages
6013 * off of the free queue so that in case we
0a7de745 6014 * need substitute pages, we won't grab any
2d21ac55
A
6015 * of the free pages in the run... we'll clear
6016 * the 'free' bit in the 2nd pass, and even in
6017 * an abort_run case, we'll collect all of the
6018 * free pages in this run and return them to the free list
6019 */
6020 while (start_idx < page_idx) {
2d21ac55
A
6021 m1 = &vm_pages[start_idx++];
6022
b0d623f7 6023#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
d9a64523 6024 assert(m1->vmp_q_state == VM_PAGE_ON_FREE_Q);
b0d623f7
A
6025#endif
6026
d9a64523 6027 if (m1->vmp_q_state == VM_PAGE_ON_FREE_Q) {
0b4c1975 6028 unsigned int color;
2d21ac55 6029
5ba3f43e 6030 color = VM_PAGE_GET_COLOR(m1);
b0d623f7 6031#if MACH_ASSERT
39037602 6032 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
b0d623f7 6033#endif
0a7de745 6034 vm_page_queue_remove(&vm_page_queue_free[color].qhead, m1, vmp_pageq);
39037602
A
6035
6036 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
0b4c1975 6037#if MACH_ASSERT
39037602 6038 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
0b4c1975 6039#endif
b0d623f7
A
6040 /*
6041 * Clear the "free" bit so that this page
6042 * does not get considered for another
6043 * concurrent physically-contiguous allocation.
6044 */
d9a64523
A
6045 m1->vmp_q_state = VM_PAGE_NOT_ON_Q;
6046 assert(m1->vmp_busy);
0b4c1975
A
6047
6048 vm_page_free_count--;
2d21ac55
A
6049 }
6050 }
0a7de745 6051 if (flags & KMA_LOMEM) {
b0d623f7 6052 vm_page_lomem_find_contiguous_last_idx = page_idx;
0a7de745 6053 } else {
b0d623f7 6054 vm_page_find_contiguous_last_idx = page_idx;
0a7de745
A
6055 }
6056
2d21ac55
A
6057 /*
6058 * we can drop the free queue lock at this point since
6059 * we've pulled any 'free' candidates off of the list
6060 * we need it dropped so that we can do a vm_page_grab
6061 * when substituing for pmapped/dirty pages
6062 */
b0d623f7 6063 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
6064
6065 start_idx = tmp_start_idx;
6066 cur_idx = page_idx - 1;
6067
6068 while (start_idx++ < page_idx) {
6069 /*
6070 * must go through the list from back to front
6071 * so that the page list is created in the
6072 * correct order - low -> high phys addresses
6073 */
6074 m1 = &vm_pages[cur_idx--];
6075
d9a64523 6076 if (m1->vmp_object == 0) {
2d21ac55 6077 /*
b0d623f7 6078 * page has already been removed from
2d21ac55
A
6079 * the free list in the 1st pass
6080 */
d9a64523
A
6081 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
6082 assert(m1->vmp_offset == (vm_object_offset_t) -1);
6083 assert(m1->vmp_busy);
6084 assert(!m1->vmp_wanted);
6085 assert(!m1->vmp_laundry);
e5568f75 6086 } else {
2d21ac55 6087 vm_object_t object;
39236c6e
A
6088 int refmod;
6089 boolean_t disconnected, reusable;
2d21ac55 6090
0a7de745 6091 if (abort_run == TRUE) {
2d21ac55 6092 continue;
0a7de745 6093 }
2d21ac55 6094
d9a64523 6095 assert(m1->vmp_q_state != VM_PAGE_NOT_ON_Q);
39037602
A
6096
6097 object = VM_PAGE_OBJECT(m1);
2d21ac55
A
6098
6099 if (object != locked_object) {
6100 if (locked_object) {
6101 vm_object_unlock(locked_object);
6102 locked_object = VM_OBJECT_NULL;
6103 }
0a7de745 6104 if (vm_object_lock_try(object)) {
2d21ac55 6105 locked_object = object;
0a7de745 6106 }
2d21ac55 6107 }
0a7de745 6108 if (locked_object == VM_OBJECT_NULL ||
d9a64523 6109 (VM_PAGE_WIRED(m1) || m1->vmp_gobbled ||
0a7de745
A
6110 m1->vmp_laundry || m1->vmp_wanted ||
6111 m1->vmp_cleaning || m1->vmp_overwriting || m1->vmp_free_when_done || m1->vmp_busy) ||
d9a64523 6112 (m1->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
2d21ac55
A
6113 if (locked_object) {
6114 vm_object_unlock(locked_object);
6115 locked_object = VM_OBJECT_NULL;
6116 }
6117 tmp_start_idx = cur_idx;
6118 abort_run = TRUE;
6119 continue;
6120 }
39236c6e
A
6121
6122 disconnected = FALSE;
6123 reusable = FALSE;
6124
d9a64523 6125 if ((m1->vmp_reusable ||
0a7de745 6126 object->all_reusable) &&
d9a64523
A
6127 (m1->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
6128 !m1->vmp_dirty &&
6129 !m1->vmp_reference) {
39236c6e 6130 /* reusable page... */
39037602 6131 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
39236c6e
A
6132 disconnected = TRUE;
6133 if (refmod == 0) {
6134 /*
6135 * ... not reused: can steal
6136 * without relocating contents.
6137 */
6138 reusable = TRUE;
6139 }
6140 }
6141
d9a64523 6142 if ((m1->vmp_pmapped &&
0a7de745 6143 !reusable) ||
d9a64523
A
6144 m1->vmp_dirty ||
6145 m1->vmp_precious) {
2d21ac55
A
6146 vm_object_offset_t offset;
6147
0a7de745 6148 m2 = vm_page_grab_options(VM_PAGE_GRAB_Q_LOCK_HELD);
2d21ac55
A
6149
6150 if (m2 == VM_PAGE_NULL) {
6151 if (locked_object) {
6152 vm_object_unlock(locked_object);
6153 locked_object = VM_OBJECT_NULL;
6154 }
6155 tmp_start_idx = cur_idx;
6156 abort_run = TRUE;
6157 continue;
6158 }
0a7de745
A
6159 if (!disconnected) {
6160 if (m1->vmp_pmapped) {
39037602 6161 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
0a7de745 6162 } else {
39236c6e 6163 refmod = 0;
0a7de745 6164 }
39236c6e
A
6165 }
6166
6167 /* copy the page's contents */
39037602 6168 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
39236c6e
A
6169 /* copy the page's state */
6170 assert(!VM_PAGE_WIRED(m1));
d9a64523
A
6171 assert(m1->vmp_q_state != VM_PAGE_ON_FREE_Q);
6172 assert(m1->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q);
6173 assert(!m1->vmp_laundry);
0a7de745 6174 m2->vmp_reference = m1->vmp_reference;
d9a64523
A
6175 assert(!m1->vmp_gobbled);
6176 assert(!m1->vmp_private);
0a7de745
A
6177 m2->vmp_no_cache = m1->vmp_no_cache;
6178 m2->vmp_xpmapped = 0;
d9a64523
A
6179 assert(!m1->vmp_busy);
6180 assert(!m1->vmp_wanted);
6181 assert(!m1->vmp_fictitious);
0a7de745
A
6182 m2->vmp_pmapped = m1->vmp_pmapped; /* should flush cache ? */
6183 m2->vmp_wpmapped = m1->vmp_wpmapped;
d9a64523 6184 assert(!m1->vmp_free_when_done);
0a7de745
A
6185 m2->vmp_absent = m1->vmp_absent;
6186 m2->vmp_error = m1->vmp_error;
6187 m2->vmp_dirty = m1->vmp_dirty;
d9a64523 6188 assert(!m1->vmp_cleaning);
0a7de745
A
6189 m2->vmp_precious = m1->vmp_precious;
6190 m2->vmp_clustered = m1->vmp_clustered;
d9a64523 6191 assert(!m1->vmp_overwriting);
0a7de745
A
6192 m2->vmp_restart = m1->vmp_restart;
6193 m2->vmp_unusual = m1->vmp_unusual;
d9a64523 6194 m2->vmp_cs_validated = m1->vmp_cs_validated;
0a7de745
A
6195 m2->vmp_cs_tainted = m1->vmp_cs_tainted;
6196 m2->vmp_cs_nx = m1->vmp_cs_nx;
39236c6e
A
6197
6198 /*
6199 * If m1 had really been reusable,
6200 * we would have just stolen it, so
6201 * let's not propagate it's "reusable"
6202 * bit and assert that m2 is not
6203 * marked as "reusable".
6204 */
d9a64523
A
6205 // m2->vmp_reusable = m1->vmp_reusable;
6206 assert(!m2->vmp_reusable);
39236c6e 6207
d9a64523 6208 // assert(!m1->vmp_lopage);
39037602 6209
0a7de745 6210 if (m1->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
d9a64523 6211 m2->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
0a7de745 6212 }
39236c6e 6213
15129b1c
A
6214 /*
6215 * page may need to be flushed if
6216 * it is marshalled into a UPL
6217 * that is going to be used by a device
6218 * that doesn't support coherency
6219 */
d9a64523 6220 m2->vmp_written_by_kernel = TRUE;
15129b1c 6221
39236c6e
A
6222 /*
6223 * make sure we clear the ref/mod state
6224 * from the pmap layer... else we risk
6225 * inheriting state from the last time
6226 * this page was used...
6227 */
39037602 6228 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2d21ac55 6229
0a7de745 6230 if (refmod & VM_MEM_REFERENCED) {
d9a64523 6231 m2->vmp_reference = TRUE;
0a7de745 6232 }
316670eb
A
6233 if (refmod & VM_MEM_MODIFIED) {
6234 SET_PAGE_DIRTY(m2, TRUE);
6235 }
d9a64523 6236 offset = m1->vmp_offset;
2d21ac55
A
6237
6238 /*
6239 * completely cleans up the state
6240 * of the page so that it is ready
6241 * to be put onto the free list, or
6242 * for this purpose it looks like it
6243 * just came off of the free list
6244 */
6245 vm_page_free_prepare(m1);
6246
6247 /*
39236c6e
A
6248 * now put the substitute page
6249 * on the object
2d21ac55 6250 */
3e170ce0 6251 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
2d21ac55 6252
d9a64523
A
6253 if (m2->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6254 m2->vmp_pmapped = TRUE;
6255 m2->vmp_wpmapped = TRUE;
2d21ac55 6256
f427ee49 6257 PMAP_ENTER(kernel_pmap, (vm_map_offset_t)m2->vmp_offset, m2,
0a7de745 6258 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, kr);
5ba3f43e
A
6259
6260 assert(kr == KERN_SUCCESS);
3e170ce0 6261
39236c6e 6262 compressed_pages++;
39236c6e 6263 } else {
0a7de745 6264 if (m2->vmp_reference) {
39236c6e 6265 vm_page_activate(m2);
0a7de745 6266 } else {
39236c6e 6267 vm_page_deactivate(m2);
0a7de745 6268 }
39236c6e 6269 }
2d21ac55 6270 PAGE_WAKEUP_DONE(m2);
2d21ac55 6271 } else {
d9a64523 6272 assert(m1->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
39236c6e 6273
2d21ac55
A
6274 /*
6275 * completely cleans up the state
6276 * of the page so that it is ready
6277 * to be put onto the free list, or
6278 * for this purpose it looks like it
6279 * just came off of the free list
6280 */
6281 vm_page_free_prepare(m1);
6282 }
3e170ce0 6283
2d21ac55 6284 stolen_pages++;
1c79356b 6285 }
39037602
A
6286#if CONFIG_BACKGROUND_QUEUE
6287 vm_page_assign_background_state(m1);
6288#endif
6289 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
d9a64523 6290 m1->vmp_snext = m;
2d21ac55 6291 m = m1;
e5568f75 6292 }
2d21ac55
A
6293 if (locked_object) {
6294 vm_object_unlock(locked_object);
6295 locked_object = VM_OBJECT_NULL;
1c79356b
A
6296 }
6297
2d21ac55 6298 if (abort_run == TRUE) {
2d21ac55
A
6299 /*
6300 * want the index of the last
6301 * page in this run that was
6302 * successfully 'stolen', so back
6303 * it up 1 for the auto-decrement on use
6304 * and 1 more to bump back over this page
6305 */
6306 page_idx = tmp_start_idx + 2;
b0d623f7 6307 if (page_idx >= vm_pages_count) {
d190cdc3
A
6308 if (wrapped) {
6309 if (m != VM_PAGE_NULL) {
6310 vm_page_unlock_queues();
6311 vm_page_free_list(m, FALSE);
6312 vm_page_lock_queues();
6313 m = VM_PAGE_NULL;
6314 }
6315 dumped_run++;
b0d623f7 6316 goto done_scanning;
d190cdc3 6317 }
b0d623f7
A
6318 page_idx = last_idx = 0;
6319 wrapped = TRUE;
6320 }
6321 abort_run = FALSE;
0a7de745 6322
2d21ac55 6323 /*
b0d623f7
A
6324 * We didn't find a contiguous range but we didn't
6325 * start from the very first page.
6326 * Start again from the very first page.
2d21ac55 6327 */
b0d623f7 6328 RESET_STATE_OF_RUN();
0a7de745
A
6329
6330 if (flags & KMA_LOMEM) {
b0d623f7 6331 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
0a7de745 6332 } else {
b0d623f7 6333 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
0a7de745
A
6334 }
6335
b0d623f7 6336 last_idx = page_idx;
0a7de745 6337
d190cdc3
A
6338 if (m != VM_PAGE_NULL) {
6339 vm_page_unlock_queues();
6340 vm_page_free_list(m, FALSE);
6341 vm_page_lock_queues();
6342 m = VM_PAGE_NULL;
6343 }
6344 dumped_run++;
6345
b0d623f7
A
6346 lck_mtx_lock(&vm_page_queue_free_lock);
6347 /*
0a7de745
A
6348 * reset our free page limit since we
6349 * dropped the lock protecting the vm_page_free_queue
6350 */
b0d623f7 6351 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
6352 goto retry;
6353 }
e5568f75 6354
e5568f75 6355 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
d9a64523
A
6356 assert(m1->vmp_q_state == VM_PAGE_NOT_ON_Q);
6357 assert(m1->vmp_wire_count == 0);
39037602
A
6358
6359 if (wire == TRUE) {
d9a64523
A
6360 m1->vmp_wire_count++;
6361 m1->vmp_q_state = VM_PAGE_IS_WIRED;
0a7de745 6362 } else {
d9a64523 6363 m1->vmp_gobbled = TRUE;
0a7de745 6364 }
e5568f75 6365 }
0a7de745 6366 if (wire == FALSE) {
2d21ac55 6367 vm_page_gobble_count += npages;
0a7de745 6368 }
2d21ac55
A
6369
6370 /*
6371 * gobbled pages are also counted as wired pages
6372 */
e5568f75 6373 vm_page_wire_count += npages;
e5568f75 6374
0a7de745 6375 assert(vm_page_verify_contiguous(m, npages));
2d21ac55
A
6376 }
6377done_scanning:
39236c6e
A
6378 PAGE_REPLACEMENT_ALLOWED(FALSE);
6379
2d21ac55
A
6380 vm_page_unlock_queues();
6381
593a1d5f 6382#if DEBUG
2d21ac55
A
6383 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
6384
6385 tv_end_sec -= tv_start_sec;
6386 if (tv_end_usec < tv_start_usec) {
6387 tv_end_sec--;
6388 tv_end_usec += 1000000;
1c79356b 6389 }
2d21ac55
A
6390 tv_end_usec -= tv_start_usec;
6391 if (tv_end_usec >= 1000000) {
6392 tv_end_sec++;
6393 tv_end_sec -= 1000000;
6394 }
b0d623f7 6395 if (vm_page_find_contig_debug) {
39236c6e 6396 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
0a7de745
A
6397 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
6398 (long)tv_end_sec, tv_end_usec, orig_last_idx,
6399 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
b0d623f7 6400 }
e5568f75 6401
593a1d5f
A
6402#endif
6403#if MACH_ASSERT
2d21ac55
A
6404 vm_page_verify_free_lists();
6405#endif
c3c9b80d 6406 if (m == NULL && zone_gc_called < 2) {
3e170ce0 6407 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
0a7de745
A
6408 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
6409 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
3e170ce0
A
6410
6411 if (consider_buffer_cache_collect != NULL) {
6412 (void)(*consider_buffer_cache_collect)(1);
6413 }
6414
c3c9b80d 6415 zone_gc(zone_gc_called ? ZONE_GC_DRAIN : ZONE_GC_TRIM);
3e170ce0 6416
c3c9b80d 6417 zone_gc_called++;
3e170ce0
A
6418
6419 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
6420 goto full_scan_again;
6421 }
6422
e5568f75 6423 return m;
1c79356b
A
6424}
6425
6426/*
6427 * Allocate a list of contiguous, wired pages.
6428 */
6429kern_return_t
6430cpm_allocate(
0a7de745
A
6431 vm_size_t size,
6432 vm_page_t *list,
6433 ppnum_t max_pnum,
6434 ppnum_t pnum_mask,
6435 boolean_t wire,
6436 int flags)
1c79356b 6437{
0a7de745
A
6438 vm_page_t pages;
6439 unsigned int npages;
1c79356b 6440
0a7de745 6441 if (size % PAGE_SIZE != 0) {
1c79356b 6442 return KERN_INVALID_ARGUMENT;
0a7de745 6443 }
1c79356b 6444
b0d623f7
A
6445 npages = (unsigned int) (size / PAGE_SIZE);
6446 if (npages != size / PAGE_SIZE) {
6447 /* 32-bit overflow */
6448 return KERN_INVALID_ARGUMENT;
6449 }
1c79356b 6450
1c79356b
A
6451 /*
6452 * Obtain a pointer to a subset of the free
6453 * list large enough to satisfy the request;
6454 * the region will be physically contiguous.
6455 */
b0d623f7 6456 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 6457
0a7de745 6458 if (pages == VM_PAGE_NULL) {
1c79356b 6459 return KERN_NO_SPACE;
0a7de745 6460 }
1c79356b 6461 /*
2d21ac55 6462 * determine need for wakeups
1c79356b 6463 */
0a7de745 6464 if (vm_page_free_count < vm_page_free_min) {
cb323159
A
6465 lck_mtx_lock(&vm_page_queue_free_lock);
6466 if (vm_pageout_running == FALSE) {
6467 lck_mtx_unlock(&vm_page_queue_free_lock);
6468 thread_wakeup((event_t) &vm_page_free_wanted);
6469 } else {
6470 lck_mtx_unlock(&vm_page_queue_free_lock);
6471 }
0a7de745
A
6472 }
6473
6d2010ae 6474 VM_CHECK_MEMORYSTATUS;
0a7de745 6475
1c79356b
A
6476 /*
6477 * The CPM pages should now be available and
6478 * ordered by ascending physical address.
6479 */
6480 assert(vm_page_verify_contiguous(pages, npages));
6481
6482 *list = pages;
6483 return KERN_SUCCESS;
6484}
6d2010ae
A
6485
6486
6487unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
6488
6489/*
0a7de745 6490 * when working on a 'run' of pages, it is necessary to hold
6d2010ae
A
6491 * the vm_page_queue_lock (a hot global lock) for certain operations
6492 * on the page... however, the majority of the work can be done
6493 * while merely holding the object lock... in fact there are certain
6494 * collections of pages that don't require any work brokered by the
6495 * vm_page_queue_lock... to mitigate the time spent behind the global
6496 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
6497 * while doing all of the work that doesn't require the vm_page_queue_lock...
6498 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
6499 * necessary work for each page... we will grab the busy bit on the page
6500 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
6501 * if it can't immediately take the vm_page_queue_lock in order to compete
6502 * for the locks in the same order that vm_pageout_scan takes them.
6503 * the operation names are modeled after the names of the routines that
6504 * need to be called in order to make the changes very obvious in the
6505 * original loop
6506 */
6507
6508void
6509vm_page_do_delayed_work(
0a7de745 6510 vm_object_t object,
3e170ce0 6511 vm_tag_t tag,
6d2010ae 6512 struct vm_page_delayed_work *dwp,
0a7de745 6513 int dw_count)
6d2010ae 6514{
0a7de745
A
6515 int j;
6516 vm_page_t m;
6517 vm_page_t local_free_q = VM_PAGE_NULL;
6d2010ae
A
6518
6519 /*
6520 * pageout_scan takes the vm_page_lock_queues first
6521 * then tries for the object lock... to avoid what
6522 * is effectively a lock inversion, we'll go to the
6523 * trouble of taking them in that same order... otherwise
6524 * if this object contains the majority of the pages resident
6525 * in the UBC (or a small set of large objects actively being
6526 * worked on contain the majority of the pages), we could
6527 * cause the pageout_scan thread to 'starve' in its attempt
6528 * to find pages to move to the free queue, since it has to
6529 * successfully acquire the object lock of any candidate page
6530 * before it can steal/clean it.
6531 */
6532 if (!vm_page_trylockspin_queues()) {
6533 vm_object_unlock(object);
6534
cb323159
A
6535 /*
6536 * "Turnstile enabled vm_pageout_scan" can be runnable
6537 * for a very long time without getting on a core.
6538 * If this is a higher priority thread it could be
6539 * waiting here for a very long time respecting the fact
6540 * that pageout_scan would like its object after VPS does
6541 * a mutex_pause(0).
6542 * So we cap the number of yields in the vm_object_lock_avoid()
6543 * case to a single mutex_pause(0) which will give vm_pageout_scan
6544 * 10us to run and grab the object if needed.
6545 */
6d2010ae
A
6546 vm_page_lockspin_queues();
6547
0a7de745 6548 for (j = 0;; j++) {
cb323159
A
6549 if ((!vm_object_lock_avoid(object) ||
6550 (vps_dynamic_priority_enabled && (j > 0))) &&
0a7de745 6551 _vm_object_lock_try(object)) {
6d2010ae 6552 break;
0a7de745 6553 }
6d2010ae
A
6554 vm_page_unlock_queues();
6555 mutex_pause(j);
6556 vm_page_lockspin_queues();
6557 }
6d2010ae
A
6558 }
6559 for (j = 0; j < dw_count; j++, dwp++) {
6d2010ae
A
6560 m = dwp->dw_m;
6561
0a7de745 6562 if (dwp->dw_mask & DW_vm_pageout_throttle_up) {
6d2010ae 6563 vm_pageout_throttle_up(m);
0a7de745 6564 }
fe8ab488 6565#if CONFIG_PHANTOM_CACHE
0a7de745 6566 if (dwp->dw_mask & DW_vm_phantom_cache_update) {
fe8ab488 6567 vm_phantom_cache_update(m);
0a7de745 6568 }
fe8ab488 6569#endif
0a7de745 6570 if (dwp->dw_mask & DW_vm_page_wire) {
3e170ce0 6571 vm_page_wire(m, tag, FALSE);
0a7de745
A
6572 } else if (dwp->dw_mask & DW_vm_page_unwire) {
6573 boolean_t queueit;
6d2010ae 6574
fe8ab488 6575 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6d2010ae
A
6576
6577 vm_page_unwire(m, queueit);
6578 }
6579 if (dwp->dw_mask & DW_vm_page_free) {
6580 vm_page_free_prepare_queues(m);
6581
d9a64523 6582 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6d2010ae
A
6583 /*
6584 * Add this page to our list of reclaimed pages,
6585 * to be freed later.
6586 */
d9a64523 6587 m->vmp_snext = local_free_q;
6d2010ae
A
6588 local_free_q = m;
6589 } else {
0a7de745 6590 if (dwp->dw_mask & DW_vm_page_deactivate_internal) {
6d2010ae 6591 vm_page_deactivate_internal(m, FALSE);
0a7de745 6592 } else if (dwp->dw_mask & DW_vm_page_activate) {
d9a64523 6593 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
6d2010ae
A
6594 vm_page_activate(m);
6595 }
0a7de745 6596 } else if (dwp->dw_mask & DW_vm_page_speculate) {
6d2010ae 6597 vm_page_speculate(m, TRUE);
0a7de745 6598 } else if (dwp->dw_mask & DW_enqueue_cleaned) {
316670eb
A
6599 /*
6600 * if we didn't hold the object lock and did this,
6601 * we might disconnect the page, then someone might
6602 * soft fault it back in, then we would put it on the
6603 * cleaned queue, and so we would have a referenced (maybe even dirty)
6604 * page on that queue, which we don't want
6605 */
39037602 6606 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
316670eb
A
6607
6608 if ((refmod_state & VM_MEM_REFERENCED)) {
6609 /*
6610 * this page has been touched since it got cleaned; let's activate it
6611 * if it hasn't already been
6612 */
0a7de745 6613 VM_PAGEOUT_DEBUG(vm_pageout_enqueued_cleaned, 1);
d9a64523 6614 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
316670eb 6615
0a7de745 6616 if (m->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) {
316670eb 6617 vm_page_activate(m);
0a7de745 6618 }
316670eb 6619 } else {
d9a64523 6620 m->vmp_reference = FALSE;
316670eb
A
6621 vm_page_enqueue_cleaned(m);
6622 }
0a7de745 6623 } else if (dwp->dw_mask & DW_vm_page_lru) {
6d2010ae 6624 vm_page_lru(m);
0a7de745
A
6625 } else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
6626 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
39037602 6627 vm_page_queues_remove(m, TRUE);
0a7de745 6628 }
316670eb 6629 }
0a7de745 6630 if (dwp->dw_mask & DW_set_reference) {
d9a64523 6631 m->vmp_reference = TRUE;
0a7de745 6632 } else if (dwp->dw_mask & DW_clear_reference) {
d9a64523 6633 m->vmp_reference = FALSE;
0a7de745 6634 }
6d2010ae
A
6635
6636 if (dwp->dw_mask & DW_move_page) {
d9a64523 6637 if (m->vmp_q_state != VM_PAGE_ON_PAGEOUT_Q) {
39037602 6638 vm_page_queues_remove(m, FALSE);
6d2010ae 6639
39037602 6640 assert(VM_PAGE_OBJECT(m) != kernel_object);
6d2010ae 6641
3e170ce0 6642 vm_page_enqueue_inactive(m, FALSE);
316670eb 6643 }
6d2010ae 6644 }
0a7de745 6645 if (dwp->dw_mask & DW_clear_busy) {
d9a64523 6646 m->vmp_busy = FALSE;
0a7de745 6647 }
6d2010ae 6648
0a7de745 6649 if (dwp->dw_mask & DW_PAGE_WAKEUP) {
6d2010ae 6650 PAGE_WAKEUP(m);
0a7de745 6651 }
6d2010ae
A
6652 }
6653 }
6654 vm_page_unlock_queues();
6655
0a7de745 6656 if (local_free_q) {
6d2010ae 6657 vm_page_free_list(local_free_q, TRUE);
0a7de745 6658 }
6d2010ae 6659
0a7de745 6660 VM_CHECK_MEMORYSTATUS;
6d2010ae
A
6661}
6662
0b4c1975
A
6663kern_return_t
6664vm_page_alloc_list(
c3c9b80d
A
6665 int page_count,
6666 kma_flags_t flags,
6667 vm_page_t *list)
0b4c1975 6668{
c3c9b80d 6669 vm_page_t page_list = VM_PAGE_NULL;
0a7de745 6670 vm_page_t mem;
c3c9b80d
A
6671 kern_return_t kr = KERN_SUCCESS;
6672 int page_grab_count = 0;
6673 mach_vm_size_t map_size = ptoa_64(page_count);
6674#if DEVELOPMENT || DEBUG
6675 task_t task = current_task();
6676#endif /* DEVELOPMENT || DEBUG */
0b4c1975 6677
c3c9b80d
A
6678 for (int i = 0; i < page_count; i++) {
6679 for (;;) {
6680 if (flags & KMA_LOMEM) {
6681 mem = vm_page_grablo();
6682 } else {
6683 mem = vm_page_grab();
6684 }
0b4c1975 6685
c3c9b80d
A
6686 if (mem != VM_PAGE_NULL) {
6687 break;
6688 }
0b4c1975 6689
c3c9b80d
A
6690 if (flags & KMA_NOPAGEWAIT) {
6691 kr = KERN_RESOURCE_SHORTAGE;
6692 goto out;
6693 }
6694 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
6695 kr = KERN_RESOURCE_SHORTAGE;
6696 goto out;
0a7de745 6697 }
0b4c1975 6698
c3c9b80d
A
6699 /* VM privileged threads should have waited in vm_page_grab() and not get here. */
6700 assert(!(current_thread()->options & TH_OPT_VMPRIV));
0b4c1975 6701
c3c9b80d
A
6702 uint64_t unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
6703 if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
6704 kr = KERN_RESOURCE_SHORTAGE;
6705 goto out;
6706 }
6707 VM_PAGE_WAIT();
0b4c1975 6708 }
c3c9b80d
A
6709
6710 page_grab_count++;
6711 mem->vmp_snext = page_list;
6712 page_list = mem;
0b4c1975 6713 }
0b4c1975 6714
c3c9b80d
A
6715 if (KMA_ZERO & flags) {
6716 for (mem = page_list; mem; mem = mem->vmp_snext) {
6717 vm_page_zero_fill(mem);
6718 }
6719 }
6720
6721out:
6722#if DEBUG || DEVELOPMENT
6723 if (task != NULL) {
6724 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
6725 }
6726#endif
6727
6728 if (kr == KERN_SUCCESS) {
6729 *list = page_list;
6730 } else {
6731 vm_page_free_list(page_list, FALSE);
6732 }
6733
6734 return kr;
0b4c1975
A
6735}
6736
6737void
6738vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
6739{
d9a64523 6740 page->vmp_offset = offset;
0b4c1975
A
6741}
6742
6743vm_page_t
6744vm_page_get_next(vm_page_t page)
6745{
0a7de745 6746 return page->vmp_snext;
0b4c1975
A
6747}
6748
6749vm_object_offset_t
6750vm_page_get_offset(vm_page_t page)
6751{
0a7de745 6752 return page->vmp_offset;
0b4c1975
A
6753}
6754
6755ppnum_t
6756vm_page_get_phys_page(vm_page_t page)
6757{
0a7de745 6758 return VM_PAGE_GET_PHYS_PAGE(page);
0b4c1975 6759}
0a7de745
A
6760
6761
b0d623f7
A
6762/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6763
d1ecb069
A
6764#if HIBERNATION
6765
b0d623f7
A
6766static vm_page_t hibernate_gobble_queue;
6767
0b4c1975 6768static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
39236c6e 6769static int hibernate_flush_dirty_pages(int);
39037602 6770static int hibernate_flush_queue(vm_page_queue_head_t *, int);
0b4c1975
A
6771
6772void hibernate_flush_wait(void);
6773void hibernate_mark_in_progress(void);
6774void hibernate_clear_in_progress(void);
6775
0a7de745
A
6776void hibernate_free_range(int, int);
6777void hibernate_hash_insert_page(vm_page_t);
6778uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
0a7de745
A
6779uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
6780ppnum_t hibernate_lookup_paddr(unsigned int);
0b4c1975
A
6781
6782struct hibernate_statistics {
6783 int hibernate_considered;
6784 int hibernate_reentered_on_q;
6785 int hibernate_found_dirty;
6786 int hibernate_skipped_cleaning;
6787 int hibernate_skipped_transient;
6788 int hibernate_skipped_precious;
39236c6e 6789 int hibernate_skipped_external;
0b4c1975
A
6790 int hibernate_queue_nolock;
6791 int hibernate_queue_paused;
6792 int hibernate_throttled;
6793 int hibernate_throttle_timeout;
6794 int hibernate_drained;
6795 int hibernate_drain_timeout;
6796 int cd_lock_failed;
6797 int cd_found_precious;
6798 int cd_found_wired;
6799 int cd_found_busy;
6800 int cd_found_unusual;
6801 int cd_found_cleaning;
6802 int cd_found_laundry;
6803 int cd_found_dirty;
39236c6e 6804 int cd_found_xpmapped;
8a3053a0 6805 int cd_skipped_xpmapped;
0b4c1975
A
6806 int cd_local_free;
6807 int cd_total_free;
6808 int cd_vm_page_wire_count;
39236c6e 6809 int cd_vm_struct_pages_unneeded;
0b4c1975
A
6810 int cd_pages;
6811 int cd_discarded;
6812 int cd_count_wire;
6813} hibernate_stats;
6814
6815
8a3053a0
A
6816/*
6817 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6818 * so that we don't overrun the estimated image size, which would
6819 * result in a hibernation failure.
2a1bd2d3
A
6820 *
6821 * We use a size value instead of pages because we don't want to take up more space
6822 * on disk if the system has a 16K page size vs 4K. Also, we are not guaranteed
6823 * to have that additional space available.
6824 *
6825 * Since this was set at 40000 pages on X86 we are going to use 160MB as our
6826 * xpmapped size.
8a3053a0 6827 */
2a1bd2d3 6828#define HIBERNATE_XPMAPPED_LIMIT ((160 * 1024 * 1024ULL) / PAGE_SIZE)
8a3053a0 6829
0b4c1975
A
6830
6831static int
6832hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
6833{
0a7de745 6834 wait_result_t wait_result;
0b4c1975
A
6835
6836 vm_page_lock_queues();
6837
0a7de745 6838 while (!vm_page_queue_empty(&q->pgo_pending)) {
0b4c1975
A
6839 q->pgo_draining = TRUE;
6840
0a7de745 6841 assert_wait_timeout((event_t) (&q->pgo_laundry + 1), THREAD_INTERRUPTIBLE, 5000, 1000 * NSEC_PER_USEC);
0b4c1975
A
6842
6843 vm_page_unlock_queues();
6844
6845 wait_result = thread_block(THREAD_CONTINUE_NULL);
6846
39037602 6847 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
0b4c1975 6848 hibernate_stats.hibernate_drain_timeout++;
0a7de745
A
6849
6850 if (q == &vm_pageout_queue_external) {
6851 return 0;
6852 }
6853
6854 return 1;
0b4c1975
A
6855 }
6856 vm_page_lock_queues();
6857
6858 hibernate_stats.hibernate_drained++;
6859 }
6860 vm_page_unlock_queues();
6861
0a7de745 6862 return 0;
0b4c1975
A
6863}
6864
0b4c1975 6865
39236c6e
A
6866boolean_t hibernate_skip_external = FALSE;
6867
0b4c1975 6868static int
39037602 6869hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
0b4c1975 6870{
0a7de745
A
6871 vm_page_t m;
6872 vm_object_t l_object = NULL;
6873 vm_object_t m_object = NULL;
6874 int refmod_state = 0;
6875 int try_failed_count = 0;
6876 int retval = 0;
6877 int current_run = 0;
6878 struct vm_pageout_queue *iq;
6879 struct vm_pageout_queue *eq;
6880 struct vm_pageout_queue *tq;
0b4c1975 6881
5ba3f43e 6882 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START,
0a7de745 6883 VM_KERNEL_UNSLIDE_OR_PERM(q), qcount);
0b4c1975 6884
0b4c1975
A
6885 iq = &vm_pageout_queue_internal;
6886 eq = &vm_pageout_queue_external;
6887
6888 vm_page_lock_queues();
6889
39037602 6890 while (qcount && !vm_page_queue_empty(q)) {
0b4c1975
A
6891 if (current_run++ == 1000) {
6892 if (hibernate_should_abort()) {
6893 retval = 1;
6894 break;
6895 }
6896 current_run = 0;
6897 }
6898
39037602
A
6899 m = (vm_page_t) vm_page_queue_first(q);
6900 m_object = VM_PAGE_OBJECT(m);
0b4c1975
A
6901
6902 /*
6903 * check to see if we currently are working
6904 * with the same object... if so, we've
6905 * already got the lock
6906 */
6907 if (m_object != l_object) {
0a7de745
A
6908 /*
6909 * the object associated with candidate page is
0b4c1975
A
6910 * different from the one we were just working
6911 * with... dump the lock if we still own it
6912 */
0a7de745
A
6913 if (l_object != NULL) {
6914 vm_object_unlock(l_object);
0b4c1975
A
6915 l_object = NULL;
6916 }
6917 /*
6918 * Try to lock object; since we've alread got the
6919 * page queues lock, we can only 'try' for this one.
6920 * if the 'try' fails, we need to do a mutex_pause
6921 * to allow the owner of the object lock a chance to
0a7de745 6922 * run...
0b4c1975 6923 */
0a7de745 6924 if (!vm_object_lock_try_scan(m_object)) {
0b4c1975
A
6925 if (try_failed_count > 20) {
6926 hibernate_stats.hibernate_queue_nolock++;
6927
6928 goto reenter_pg_on_q;
6929 }
0b4c1975
A
6930
6931 vm_page_unlock_queues();
6932 mutex_pause(try_failed_count++);
6933 vm_page_lock_queues();
6934
6935 hibernate_stats.hibernate_queue_paused++;
6936 continue;
6937 } else {
6938 l_object = m_object;
0b4c1975
A
6939 }
6940 }
0a7de745 6941 if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error) {
0b4c1975
A
6942 /*
6943 * page is not to be cleaned
6944 * put it back on the head of its queue
6945 */
0a7de745 6946 if (m->vmp_cleaning) {
0b4c1975 6947 hibernate_stats.hibernate_skipped_cleaning++;
0a7de745 6948 } else {
0b4c1975 6949 hibernate_stats.hibernate_skipped_transient++;
0a7de745 6950 }
0b4c1975
A
6951
6952 goto reenter_pg_on_q;
6953 }
0b4c1975
A
6954 if (m_object->copy == VM_OBJECT_NULL) {
6955 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6956 /*
6957 * let the normal hibernate image path
6958 * deal with these
6959 */
6960 goto reenter_pg_on_q;
6961 }
6962 }
0a7de745
A
6963 if (!m->vmp_dirty && m->vmp_pmapped) {
6964 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
0b4c1975 6965
316670eb
A
6966 if ((refmod_state & VM_MEM_MODIFIED)) {
6967 SET_PAGE_DIRTY(m, FALSE);
6968 }
0a7de745 6969 } else {
0b4c1975 6970 refmod_state = 0;
0a7de745 6971 }
0b4c1975 6972
0a7de745 6973 if (!m->vmp_dirty) {
0b4c1975
A
6974 /*
6975 * page is not to be cleaned
6976 * put it back on the head of its queue
6977 */
0a7de745 6978 if (m->vmp_precious) {
0b4c1975 6979 hibernate_stats.hibernate_skipped_precious++;
0a7de745 6980 }
0b4c1975
A
6981
6982 goto reenter_pg_on_q;
6983 }
39236c6e
A
6984
6985 if (hibernate_skip_external == TRUE && !m_object->internal) {
39236c6e 6986 hibernate_stats.hibernate_skipped_external++;
0a7de745 6987
39236c6e
A
6988 goto reenter_pg_on_q;
6989 }
0b4c1975
A
6990 tq = NULL;
6991
6992 if (m_object->internal) {
0a7de745 6993 if (VM_PAGE_Q_THROTTLED(iq)) {
0b4c1975 6994 tq = iq;
0a7de745
A
6995 }
6996 } else if (VM_PAGE_Q_THROTTLED(eq)) {
0b4c1975 6997 tq = eq;
0a7de745 6998 }
0b4c1975
A
6999
7000 if (tq != NULL) {
0a7de745
A
7001 wait_result_t wait_result;
7002 int wait_count = 5;
0b4c1975 7003
0a7de745
A
7004 if (l_object != NULL) {
7005 vm_object_unlock(l_object);
0b4c1975
A
7006 l_object = NULL;
7007 }
0b4c1975 7008
0b4c1975 7009 while (retval == 0) {
39236c6e
A
7010 tq->pgo_throttled = TRUE;
7011
0a7de745 7012 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
0b4c1975 7013
316670eb 7014 vm_page_unlock_queues();
0b4c1975 7015
316670eb 7016 wait_result = thread_block(THREAD_CONTINUE_NULL);
0b4c1975
A
7017
7018 vm_page_lock_queues();
7019
0a7de745
A
7020 if (wait_result != THREAD_TIMED_OUT) {
7021 break;
7022 }
7023 if (!VM_PAGE_Q_THROTTLED(tq)) {
39236c6e 7024 break;
0a7de745 7025 }
39236c6e 7026
0a7de745 7027 if (hibernate_should_abort()) {
0b4c1975 7028 retval = 1;
0a7de745 7029 }
0b4c1975 7030
0b4c1975 7031 if (--wait_count == 0) {
316670eb 7032 hibernate_stats.hibernate_throttle_timeout++;
39236c6e
A
7033
7034 if (tq == eq) {
7035 hibernate_skip_external = TRUE;
7036 break;
7037 }
316670eb
A
7038 retval = 1;
7039 }
0b4c1975 7040 }
0a7de745 7041 if (retval) {
0b4c1975 7042 break;
0a7de745 7043 }
0b4c1975
A
7044
7045 hibernate_stats.hibernate_throttled++;
7046
7047 continue;
7048 }
316670eb
A
7049 /*
7050 * we've already factored out pages in the laundry which
7051 * means this page can't be on the pageout queue so it's
3e170ce0 7052 * safe to do the vm_page_queues_remove
316670eb 7053 */
39037602 7054 vm_page_queues_remove(m, TRUE);
0b4c1975 7055
0a7de745 7056 if (m_object->internal == TRUE) {
39037602 7057 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
0a7de745 7058 }
39236c6e 7059
5ba3f43e 7060 vm_pageout_cluster(m);
0b4c1975
A
7061
7062 hibernate_stats.hibernate_found_dirty++;
7063
7064 goto next_pg;
7065
7066reenter_pg_on_q:
0a7de745
A
7067 vm_page_queue_remove(q, m, vmp_pageq);
7068 vm_page_queue_enter(q, m, vmp_pageq);
0b4c1975
A
7069
7070 hibernate_stats.hibernate_reentered_on_q++;
7071next_pg:
7072 hibernate_stats.hibernate_considered++;
7073
7074 qcount--;
7075 try_failed_count = 0;
7076 }
7077 if (l_object != NULL) {
7078 vm_object_unlock(l_object);
7079 l_object = NULL;
7080 }
0b4c1975
A
7081
7082 vm_page_unlock_queues();
7083
7084 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
7085
0a7de745 7086 return retval;
0b4c1975
A
7087}
7088
7089
7090static int
39236c6e 7091hibernate_flush_dirty_pages(int pass)
0b4c1975 7092{
0a7de745
A
7093 struct vm_speculative_age_q *aq;
7094 uint32_t i;
0b4c1975 7095
0b4c1975 7096 if (vm_page_local_q) {
f427ee49
A
7097 zpercpu_foreach_cpu(lid) {
7098 vm_page_reactivate_local(lid, TRUE, FALSE);
0a7de745 7099 }
0b4c1975
A
7100 }
7101
7102 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
0a7de745
A
7103 int qcount;
7104 vm_page_t m;
0b4c1975
A
7105
7106 aq = &vm_page_queue_speculative[i];
7107
0a7de745 7108 if (vm_page_queue_empty(&aq->age_q)) {
0b4c1975 7109 continue;
0a7de745 7110 }
0b4c1975
A
7111 qcount = 0;
7112
7113 vm_page_lockspin_queues();
7114
0a7de745 7115 vm_page_queue_iterate(&aq->age_q, m, vmp_pageq) {
0b4c1975
A
7116 qcount++;
7117 }
7118 vm_page_unlock_queues();
7119
7120 if (qcount) {
0a7de745
A
7121 if (hibernate_flush_queue(&aq->age_q, qcount)) {
7122 return 1;
7123 }
0b4c1975
A
7124 }
7125 }
0a7de745
A
7126 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count)) {
7127 return 1;
7128 }
39037602 7129 /* XXX FBDP TODO: flush secluded queue */
0a7de745
A
7130 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count)) {
7131 return 1;
7132 }
7133 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count)) {
7134 return 1;
7135 }
7136 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
7137 return 1;
7138 }
7139
7140 if (pass == 1) {
39236c6e 7141 vm_compressor_record_warmup_start();
0a7de745 7142 }
39236c6e
A
7143
7144 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
0a7de745 7145 if (pass == 1) {
39236c6e 7146 vm_compressor_record_warmup_end();
0a7de745
A
7147 }
7148 return 1;
39236c6e
A
7149 }
7150 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
0a7de745 7151 if (pass == 1) {
39236c6e 7152 vm_compressor_record_warmup_end();
0a7de745
A
7153 }
7154 return 1;
39236c6e 7155 }
0a7de745 7156 if (pass == 1) {
39236c6e 7157 vm_compressor_record_warmup_end();
0a7de745 7158 }
39236c6e 7159
0a7de745
A
7160 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external)) {
7161 return 1;
7162 }
39236c6e 7163
0a7de745 7164 return 0;
39236c6e 7165}
0b4c1975 7166
0b4c1975 7167
fe8ab488
A
7168void
7169hibernate_reset_stats()
7170{
7171 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
7172}
7173
7174
0b4c1975
A
7175int
7176hibernate_flush_memory()
7177{
0a7de745 7178 int retval;
0b4c1975 7179
39037602
A
7180 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
7181
0b4c1975
A
7182 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
7183
39236c6e
A
7184 hibernate_cleaning_in_progress = TRUE;
7185 hibernate_skip_external = FALSE;
7186
7187 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
39037602 7188 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
0b4c1975 7189
39037602 7190 vm_compressor_flush();
0b4c1975 7191
39037602 7192 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
39236c6e 7193
fe8ab488 7194 if (consider_buffer_cache_collect != NULL) {
39236c6e
A
7195 unsigned int orig_wire_count;
7196
7197 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
7198 orig_wire_count = vm_page_wire_count;
0a7de745 7199
0b4c1975 7200 (void)(*consider_buffer_cache_collect)(1);
c3c9b80d 7201 zone_gc(ZONE_GC_DRAIN);
0b4c1975 7202
39236c6e
A
7203 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
7204
7205 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
0b4c1975
A
7206 }
7207 }
39236c6e
A
7208 hibernate_cleaning_in_progress = FALSE;
7209
0b4c1975
A
7210 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
7211
0a7de745 7212 if (retval) {
39236c6e 7213 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
0a7de745 7214 }
39236c6e
A
7215
7216
0a7de745
A
7217 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
7218 hibernate_stats.hibernate_considered,
7219 hibernate_stats.hibernate_reentered_on_q,
7220 hibernate_stats.hibernate_found_dirty);
7221 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
7222 hibernate_stats.hibernate_skipped_cleaning,
7223 hibernate_stats.hibernate_skipped_transient,
7224 hibernate_stats.hibernate_skipped_precious,
7225 hibernate_stats.hibernate_skipped_external,
7226 hibernate_stats.hibernate_queue_nolock);
7227 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
7228 hibernate_stats.hibernate_queue_paused,
7229 hibernate_stats.hibernate_throttled,
7230 hibernate_stats.hibernate_throttle_timeout,
7231 hibernate_stats.hibernate_drained,
7232 hibernate_stats.hibernate_drain_timeout);
0b4c1975 7233
0a7de745 7234 return retval;
0b4c1975
A
7235}
7236
6d2010ae 7237
b0d623f7
A
7238static void
7239hibernate_page_list_zero(hibernate_page_list_t *list)
7240{
0a7de745
A
7241 uint32_t bank;
7242 hibernate_bitmap_t * bitmap;
7243
7244 bitmap = &list->bank_bitmap[0];
7245 for (bank = 0; bank < list->bank_count; bank++) {
7246 uint32_t last_bit;
7247
7248 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
7249 // set out-of-bound bits at end of bitmap.
7250 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
7251 if (last_bit) {
7252 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
7253 }
b0d623f7 7254
0a7de745
A
7255 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
7256 }
b0d623f7
A
7257}
7258
b0d623f7
A
7259void
7260hibernate_free_gobble_pages(void)
7261{
0a7de745
A
7262 vm_page_t m, next;
7263 uint32_t count = 0;
7264
7265 m = (vm_page_t) hibernate_gobble_queue;
7266 while (m) {
7267 next = m->vmp_snext;
7268 vm_page_free(m);
7269 count++;
7270 m = next;
7271 }
7272 hibernate_gobble_queue = VM_PAGE_NULL;
b0d623f7 7273
0a7de745
A
7274 if (count) {
7275 HIBLOG("Freed %d pages\n", count);
7276 }
b0d623f7
A
7277}
7278
0a7de745 7279static boolean_t
db609669 7280hibernate_consider_discard(vm_page_t m, boolean_t preflight)
b0d623f7 7281{
0a7de745
A
7282 vm_object_t object = NULL;
7283 int refmod_state;
7284 boolean_t discard = FALSE;
b0d623f7 7285
0a7de745
A
7286 do{
7287 if (m->vmp_private) {
7288 panic("hibernate_consider_discard: private");
7289 }
b0d623f7 7290
0a7de745 7291 object = VM_PAGE_OBJECT(m);
39037602 7292
0a7de745
A
7293 if (!vm_object_lock_try(object)) {
7294 object = NULL;
7295 if (!preflight) {
7296 hibernate_stats.cd_lock_failed++;
7297 }
7298 break;
7299 }
7300 if (VM_PAGE_WIRED(m)) {
7301 if (!preflight) {
7302 hibernate_stats.cd_found_wired++;
7303 }
7304 break;
7305 }
7306 if (m->vmp_precious) {
7307 if (!preflight) {
7308 hibernate_stats.cd_found_precious++;
7309 }
7310 break;
7311 }
7312 if (m->vmp_busy || !object->alive) {
7313 /*
7314 * Somebody is playing with this page.
7315 */
7316 if (!preflight) {
7317 hibernate_stats.cd_found_busy++;
7318 }
7319 break;
7320 }
7321 if (m->vmp_absent || m->vmp_unusual || m->vmp_error) {
7322 /*
7323 * If it's unusual in anyway, ignore it
7324 */
7325 if (!preflight) {
7326 hibernate_stats.cd_found_unusual++;
7327 }
7328 break;
7329 }
7330 if (m->vmp_cleaning) {
7331 if (!preflight) {
7332 hibernate_stats.cd_found_cleaning++;
7333 }
7334 break;
7335 }
7336 if (m->vmp_laundry) {
7337 if (!preflight) {
7338 hibernate_stats.cd_found_laundry++;
7339 }
7340 break;
7341 }
7342 if (!m->vmp_dirty) {
7343 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
7344
7345 if (refmod_state & VM_MEM_REFERENCED) {
7346 m->vmp_reference = TRUE;
7347 }
7348 if (refmod_state & VM_MEM_MODIFIED) {
7349 SET_PAGE_DIRTY(m, FALSE);
7350 }
7351 }
7352
7353 /*
7354 * If it's clean or purgeable we can discard the page on wakeup.
7355 */
7356 discard = (!m->vmp_dirty)
b0d623f7 7357 || (VM_PURGABLE_VOLATILE == object->purgable)
0a7de745 7358 || (VM_PURGABLE_EMPTY == object->purgable);
0b4c1975 7359
39236c6e 7360
0a7de745
A
7361 if (discard == FALSE) {
7362 if (!preflight) {
7363 hibernate_stats.cd_found_dirty++;
7364 }
7365 } else if (m->vmp_xpmapped && m->vmp_reference && !object->internal) {
7366 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
7367 if (!preflight) {
7368 hibernate_stats.cd_found_xpmapped++;
7369 }
7370 discard = FALSE;
7371 } else {
7372 if (!preflight) {
7373 hibernate_stats.cd_skipped_xpmapped++;
7374 }
7375 }
8a3053a0 7376 }
0a7de745 7377 }while (FALSE);
b0d623f7 7378
0a7de745
A
7379 if (object) {
7380 vm_object_unlock(object);
7381 }
b0d623f7 7382
0a7de745 7383 return discard;
b0d623f7
A
7384}
7385
7386
7387static void
7388hibernate_discard_page(vm_page_t m)
7389{
0a7de745 7390 vm_object_t m_object;
39037602 7391
0a7de745
A
7392 if (m->vmp_absent || m->vmp_unusual || m->vmp_error) {
7393 /*
7394 * If it's unusual in anyway, ignore
7395 */
7396 return;
7397 }
b0d623f7 7398
0a7de745 7399 m_object = VM_PAGE_OBJECT(m);
39037602 7400
fe8ab488 7401#if MACH_ASSERT || DEBUG
0a7de745
A
7402 if (!vm_object_lock_try(m_object)) {
7403 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
7404 }
316670eb 7405#else
0a7de745
A
7406 /* No need to lock page queue for token delete, hibernate_vm_unlock()
7407 * makes sure these locks are uncontended before sleep */
fe8ab488 7408#endif /* MACH_ASSERT || DEBUG */
316670eb 7409
0a7de745
A
7410 if (m->vmp_pmapped == TRUE) {
7411 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
39236c6e 7412 }
fe8ab488 7413
0a7de745
A
7414 if (m->vmp_laundry) {
7415 panic("hibernate_discard_page(%p) laundry", m);
7416 }
7417 if (m->vmp_private) {
7418 panic("hibernate_discard_page(%p) private", m);
7419 }
7420 if (m->vmp_fictitious) {
7421 panic("hibernate_discard_page(%p) fictitious", m);
7422 }
7423
7424 if (VM_PURGABLE_VOLATILE == m_object->purgable) {
7425 /* object should be on a queue */
7426 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
7427 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
7428 assert(old_queue);
7429 if (m_object->purgeable_when_ripe) {
7430 vm_purgeable_token_delete_first(old_queue);
7431 }
7432 vm_object_lock_assert_exclusive(m_object);
7433 m_object->purgable = VM_PURGABLE_EMPTY;
7434
7435 /*
7436 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
7437 * accounted in the "volatile" ledger, so no change here.
7438 * We have to update vm_page_purgeable_count, though, since we're
7439 * effectively purging this object.
7440 */
7441 unsigned int delta;
7442 assert(m_object->resident_page_count >= m_object->wired_page_count);
7443 delta = (m_object->resident_page_count - m_object->wired_page_count);
7444 assert(vm_page_purgeable_count >= delta);
7445 assert(delta > 0);
7446 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
7447 }
7448
7449 vm_page_free(m);
316670eb 7450
fe8ab488 7451#if MACH_ASSERT || DEBUG
0a7de745
A
7452 vm_object_unlock(m_object);
7453#endif /* MACH_ASSERT || DEBUG */
b0d623f7
A
7454}
7455
db609669 7456/*
0a7de745
A
7457 * Grab locks for hibernate_page_list_setall()
7458 */
db609669
A
7459void
7460hibernate_vm_lock_queues(void)
7461{
0a7de745
A
7462 vm_object_lock(compressor_object);
7463 vm_page_lock_queues();
7464 lck_mtx_lock(&vm_page_queue_free_lock);
7465 lck_mtx_lock(&vm_purgeable_queue_lock);
db609669 7466
0a7de745 7467 if (vm_page_local_q) {
f427ee49 7468 zpercpu_foreach(lq, vm_page_local_q) {
0a7de745
A
7469 VPL_LOCK(&lq->vpl_lock);
7470 }
db609669 7471 }
db609669
A
7472}
7473
7474void
7475hibernate_vm_unlock_queues(void)
7476{
0a7de745 7477 if (vm_page_local_q) {
f427ee49 7478 zpercpu_foreach(lq, vm_page_local_q) {
0a7de745
A
7479 VPL_UNLOCK(&lq->vpl_lock);
7480 }
db609669 7481 }
0a7de745
A
7482 lck_mtx_unlock(&vm_purgeable_queue_lock);
7483 lck_mtx_unlock(&vm_page_queue_free_lock);
7484 vm_page_unlock_queues();
7485 vm_object_unlock(compressor_object);
db609669
A
7486}
7487
b0d623f7 7488/*
0a7de745
A
7489 * Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
7490 * pages known to VM to not need saving are subtracted.
7491 * Wired pages to be saved are present in page_list_wired, pageable in page_list.
7492 */
b0d623f7
A
7493
7494void
7495hibernate_page_list_setall(hibernate_page_list_t * page_list,
0a7de745
A
7496 hibernate_page_list_t * page_list_wired,
7497 hibernate_page_list_t * page_list_pal,
7498 boolean_t preflight,
7499 boolean_t will_discard,
7500 uint32_t * pagesOut)
7501{
7502 uint64_t start, end, nsec;
7503 vm_page_t m;
7504 vm_page_t next;
7505 uint32_t pages = page_list->page_count;
7506 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
7507 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
7508 uint32_t count_wire = pages;
7509 uint32_t count_discard_active = 0;
7510 uint32_t count_discard_inactive = 0;
7511 uint32_t count_discard_cleaned = 0;
7512 uint32_t count_discard_purgeable = 0;
7513 uint32_t count_discard_speculative = 0;
7514 uint32_t count_discard_vm_struct_pages = 0;
7515 uint32_t i;
7516 uint32_t bank;
7517 hibernate_bitmap_t * bitmap;
7518 hibernate_bitmap_t * bitmap_wired;
7519 boolean_t discard_all;
7520 boolean_t discard;
7521
7522 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
7523
7524 if (preflight) {
7525 page_list = NULL;
7526 page_list_wired = NULL;
7527 page_list_pal = NULL;
39236c6e 7528 discard_all = FALSE;
0a7de745 7529 } else {
39236c6e 7530 discard_all = will_discard;
0a7de745 7531 }
0b4c1975 7532
fe8ab488 7533#if MACH_ASSERT || DEBUG
0a7de745
A
7534 if (!preflight) {
7535 assert(hibernate_vm_locks_are_safe());
7536 vm_page_lock_queues();
7537 if (vm_page_local_q) {
f427ee49 7538 zpercpu_foreach(lq, vm_page_local_q) {
0a7de745
A
7539 VPL_LOCK(&lq->vpl_lock);
7540 }
7541 }
7542 }
7543#endif /* MACH_ASSERT || DEBUG */
7544
7545
7546 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
7547
7548 clock_get_uptime(&start);
7549
7550 if (!preflight) {
7551 hibernate_page_list_zero(page_list);
7552 hibernate_page_list_zero(page_list_wired);
7553 hibernate_page_list_zero(page_list_pal);
7554
7555 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
7556 hibernate_stats.cd_pages = pages;
7557 }
7558
316670eb 7559 if (vm_page_local_q) {
f427ee49
A
7560 zpercpu_foreach_cpu(lid) {
7561 vm_page_reactivate_local(lid, TRUE, !preflight);
0a7de745
A
7562 }
7563 }
7564
7565 if (preflight) {
7566 vm_object_lock(compressor_object);
7567 vm_page_lock_queues();
7568 lck_mtx_lock(&vm_page_queue_free_lock);
7569 }
7570
7571 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7572
7573 hibernation_vmqueues_inspection = TRUE;
7574
7575 m = (vm_page_t) hibernate_gobble_queue;
7576 while (m) {
7577 pages--;
7578 count_wire--;
7579 if (!preflight) {
7580 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7581 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7582 }
7583 m = m->vmp_snext;
7584 }
7585
7586 if (!preflight) {
f427ee49
A
7587 percpu_foreach(free_pages_head, free_pages) {
7588 for (m = *free_pages_head; m; m = m->vmp_snext) {
7589 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
0a7de745 7590
f427ee49
A
7591 pages--;
7592 count_wire--;
7593 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7594 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
0a7de745 7595
f427ee49
A
7596 hibernate_stats.cd_local_free++;
7597 hibernate_stats.cd_total_free++;
0a7de745
A
7598 }
7599 }
7600 }
7601
7602 for (i = 0; i < vm_colors; i++) {
7603 vm_page_queue_iterate(&vm_page_queue_free[i].qhead, m, vmp_pageq) {
7604 assert(m->vmp_q_state == VM_PAGE_ON_FREE_Q);
7605
7606 pages--;
7607 count_wire--;
7608 if (!preflight) {
7609 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7610 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7611
7612 hibernate_stats.cd_total_free++;
7613 }
7614 }
7615 }
7616
7617 vm_page_queue_iterate(&vm_lopage_queue_free, m, vmp_pageq) {
7618 assert(m->vmp_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
7619
7620 pages--;
7621 count_wire--;
7622 if (!preflight) {
7623 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7624 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7625
7626 hibernate_stats.cd_total_free++;
7627 }
7628 }
7629
7630 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
7631 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m)) {
7632 assert(m->vmp_q_state == VM_PAGE_ON_THROTTLED_Q);
7633
7634 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7635 discard = FALSE;
7636 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
7637 && hibernate_consider_discard(m, preflight)) {
7638 if (!preflight) {
7639 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7640 }
7641 count_discard_inactive++;
7642 discard = discard_all;
7643 } else {
7644 count_throttled++;
7645 }
7646 count_wire--;
7647 if (!preflight) {
7648 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7649 }
7650
7651 if (discard) {
7652 hibernate_discard_page(m);
7653 }
7654 m = next;
7655 }
7656
7657 m = (vm_page_t)vm_page_queue_first(&vm_page_queue_anonymous);
7658 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m)) {
7659 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7660
7661 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7662 discard = FALSE;
7663 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7664 hibernate_consider_discard(m, preflight)) {
7665 if (!preflight) {
7666 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7667 }
7668 if (m->vmp_dirty) {
7669 count_discard_purgeable++;
7670 } else {
7671 count_discard_inactive++;
7672 }
7673 discard = discard_all;
7674 } else {
7675 count_anonymous++;
7676 }
7677 count_wire--;
7678 if (!preflight) {
7679 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7680 }
7681 if (discard) {
7682 hibernate_discard_page(m);
7683 }
7684 m = next;
7685 }
7686
7687 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7688 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m)) {
7689 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7690
7691 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7692 discard = FALSE;
7693 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7694 hibernate_consider_discard(m, preflight)) {
7695 if (!preflight) {
7696 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7697 }
7698 if (m->vmp_dirty) {
7699 count_discard_purgeable++;
7700 } else {
7701 count_discard_cleaned++;
7702 }
7703 discard = discard_all;
7704 } else {
7705 count_cleaned++;
7706 }
7707 count_wire--;
7708 if (!preflight) {
7709 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7710 }
7711 if (discard) {
7712 hibernate_discard_page(m);
7713 }
7714 m = next;
7715 }
7716
7717 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7718 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m)) {
7719 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7720
7721 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7722 discard = FALSE;
7723 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) &&
7724 hibernate_consider_discard(m, preflight)) {
7725 if (!preflight) {
7726 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7727 }
7728 if (m->vmp_dirty) {
7729 count_discard_purgeable++;
7730 } else {
7731 count_discard_active++;
7732 }
7733 discard = discard_all;
7734 } else {
7735 count_active++;
7736 }
7737 count_wire--;
7738 if (!preflight) {
7739 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7740 }
7741 if (discard) {
7742 hibernate_discard_page(m);
7743 }
7744 m = next;
7745 }
7746
7747 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7748 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m)) {
7749 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7750
7751 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7752 discard = FALSE;
7753 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7754 hibernate_consider_discard(m, preflight)) {
7755 if (!preflight) {
7756 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7757 }
7758 if (m->vmp_dirty) {
7759 count_discard_purgeable++;
7760 } else {
7761 count_discard_inactive++;
7762 }
7763 discard = discard_all;
7764 } else {
7765 count_inactive++;
7766 }
7767 count_wire--;
7768 if (!preflight) {
7769 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7770 }
7771 if (discard) {
7772 hibernate_discard_page(m);
7773 }
7774 m = next;
7775 }
7776 /* XXX FBDP TODO: secluded queue */
7777
7778 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
7779 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7780 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m)) {
7781 assertf(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q,
7782 "Bad page: %p (0x%x:0x%x) on queue %d has state: %d (Discard: %d, Preflight: %d)",
7783 m, m->vmp_pageq.next, m->vmp_pageq.prev, i, m->vmp_q_state, discard, preflight);
7784
7785 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7786 discard = FALSE;
7787 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) &&
7788 hibernate_consider_discard(m, preflight)) {
7789 if (!preflight) {
7790 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7791 }
7792 count_discard_speculative++;
7793 discard = discard_all;
7794 } else {
7795 count_speculative++;
7796 }
7797 count_wire--;
7798 if (!preflight) {
7799 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7800 }
7801 if (discard) {
7802 hibernate_discard_page(m);
7803 }
7804 m = next;
7805 }
316670eb 7806 }
316670eb 7807
0a7de745
A
7808 vm_page_queue_iterate(&compressor_object->memq, m, vmp_listq) {
7809 assert(m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
316670eb 7810
0a7de745
A
7811 count_compressor++;
7812 count_wire--;
7813 if (!preflight) {
7814 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
7815 }
7816 }
b0d623f7 7817
0a7de745
A
7818 if (preflight == FALSE && discard_all == TRUE) {
7819 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START);
0b4c1975 7820
0a7de745
A
7821 HIBLOG("hibernate_teardown started\n");
7822 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
7823 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
db609669 7824
0a7de745
A
7825 pages -= count_discard_vm_struct_pages;
7826 count_wire -= count_discard_vm_struct_pages;
b0d623f7 7827
0a7de745 7828 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
e8c3f781 7829
0a7de745
A
7830 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_END);
7831 }
e8c3f781 7832
db609669 7833 if (!preflight) {
0a7de745
A
7834 // pull wired from hibernate_bitmap
7835 bitmap = &page_list->bank_bitmap[0];
7836 bitmap_wired = &page_list_wired->bank_bitmap[0];
7837 for (bank = 0; bank < page_list->bank_count; bank++) {
7838 for (i = 0; i < bitmap->bitmapwords; i++) {
7839 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
7840 }
7841 bitmap = (hibernate_bitmap_t *)&bitmap->bitmap[bitmap->bitmapwords];
7842 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
7843 }
db609669 7844 }
39037602 7845
0a7de745
A
7846 // machine dependent adjustments
7847 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6d2010ae 7848
db609669 7849 if (!preflight) {
0a7de745
A
7850 hibernate_stats.cd_count_wire = count_wire;
7851 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
7852 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
db609669 7853 }
b0d623f7 7854
0a7de745
A
7855 clock_get_uptime(&end);
7856 absolutetime_to_nanoseconds(end - start, &nsec);
7857 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
0b4c1975 7858
0a7de745
A
7859 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7860 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
7861 discard_all ? "did" : "could",
7862 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7 7863
0a7de745
A
7864 if (hibernate_stats.cd_skipped_xpmapped) {
7865 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
7866 }
8a3053a0 7867
0a7de745 7868 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
316670eb 7869
0a7de745
A
7870 if (preflight && will_discard) {
7871 *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
2a1bd2d3
A
7872 /*
7873 * We try to keep max HIBERNATE_XPMAPPED_LIMIT pages around in the hibernation image
7874 * even if these are clean and so we need to size the hibernation image accordingly.
7875 *
7876 * NB: We have to assume all HIBERNATE_XPMAPPED_LIMIT pages might show up because 'dirty'
7877 * xpmapped pages aren't distinguishable from other 'dirty' pages in preflight. So we might
7878 * only see part of the xpmapped pages if we look at 'cd_found_xpmapped' which solely tracks
7879 * clean xpmapped pages.
7880 *
7881 * Since these pages are all cleaned by the time we are in the post-preflight phase, we might
7882 * see a much larger number in 'cd_found_xpmapped' now than we did in the preflight phase
7883 */
7884 *pagesOut += HIBERNATE_XPMAPPED_LIMIT;
0a7de745 7885 }
39236c6e 7886
0a7de745 7887 hibernation_vmqueues_inspection = FALSE;
e8c3f781 7888
fe8ab488 7889#if MACH_ASSERT || DEBUG
0a7de745
A
7890 if (!preflight) {
7891 if (vm_page_local_q) {
f427ee49 7892 zpercpu_foreach(lq, vm_page_local_q) {
0a7de745
A
7893 VPL_UNLOCK(&lq->vpl_lock);
7894 }
7895 }
7896 vm_page_unlock_queues();
316670eb 7897 }
fe8ab488 7898#endif /* MACH_ASSERT || DEBUG */
0b4c1975 7899
0a7de745
A
7900 if (preflight) {
7901 lck_mtx_unlock(&vm_page_queue_free_lock);
7902 vm_page_unlock_queues();
7903 vm_object_unlock(compressor_object);
7904 }
db609669 7905
0a7de745 7906 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
b0d623f7
A
7907}
7908
7909void
7910hibernate_page_list_discard(hibernate_page_list_t * page_list)
7911{
0a7de745
A
7912 uint64_t start, end, nsec;
7913 vm_page_t m;
7914 vm_page_t next;
7915 uint32_t i;
7916 uint32_t count_discard_active = 0;
7917 uint32_t count_discard_inactive = 0;
7918 uint32_t count_discard_purgeable = 0;
7919 uint32_t count_discard_cleaned = 0;
7920 uint32_t count_discard_speculative = 0;
b0d623f7 7921
39236c6e 7922
fe8ab488 7923#if MACH_ASSERT || DEBUG
0a7de745 7924 vm_page_lock_queues();
316670eb 7925 if (vm_page_local_q) {
f427ee49 7926 zpercpu_foreach(lq, vm_page_local_q) {
0a7de745
A
7927 VPL_LOCK(&lq->vpl_lock);
7928 }
316670eb 7929 }
fe8ab488 7930#endif /* MACH_ASSERT || DEBUG */
316670eb 7931
0a7de745
A
7932 clock_get_uptime(&start);
7933
7934 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
7935 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m)) {
7936 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
7937
7938 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7939 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7940 if (m->vmp_dirty) {
7941 count_discard_purgeable++;
7942 } else {
7943 count_discard_inactive++;
7944 }
7945 hibernate_discard_page(m);
7946 }
7947 m = next;
7948 }
7949
7950 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
7951 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
7952 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m)) {
7953 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
7954
7955 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7956 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7957 count_discard_speculative++;
7958 hibernate_discard_page(m);
7959 }
7960 m = next;
7961 }
7962 }
7963
7964 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7965 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m)) {
7966 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7967
7968 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7969 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7970 if (m->vmp_dirty) {
7971 count_discard_purgeable++;
7972 } else {
7973 count_discard_inactive++;
7974 }
7975 hibernate_discard_page(m);
7976 }
7977 m = next;
7978 }
7979 /* XXX FBDP TODO: secluded queue */
7980
7981 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7982 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m)) {
7983 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
7984
7985 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
7986 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
7987 if (m->vmp_dirty) {
7988 count_discard_purgeable++;
7989 } else {
7990 count_discard_active++;
7991 }
7992 hibernate_discard_page(m);
7993 }
7994 m = next;
7995 }
7996
7997 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7998 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m)) {
7999 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
8000
8001 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->vmp_pageq.next);
8002 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m))) {
8003 if (m->vmp_dirty) {
8004 count_discard_purgeable++;
8005 } else {
8006 count_discard_cleaned++;
8007 }
8008 hibernate_discard_page(m);
8009 }
8010 m = next;
8011 }
316670eb 8012
fe8ab488 8013#if MACH_ASSERT || DEBUG
316670eb 8014 if (vm_page_local_q) {
f427ee49 8015 zpercpu_foreach(lq, vm_page_local_q) {
0a7de745
A
8016 VPL_UNLOCK(&lq->vpl_lock);
8017 }
316670eb 8018 }
0a7de745 8019 vm_page_unlock_queues();
fe8ab488 8020#endif /* MACH_ASSERT || DEBUG */
316670eb 8021
0a7de745
A
8022 clock_get_uptime(&end);
8023 absolutetime_to_nanoseconds(end - start, &nsec);
8024 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
8025 nsec / 1000000ULL,
8026 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7
A
8027}
8028
39236c6e 8029boolean_t hibernate_paddr_map_inited = FALSE;
0a7de745
A
8030unsigned int hibernate_teardown_last_valid_compact_indx = -1;
8031vm_page_t hibernate_rebuild_hash_list = NULL;
39236c6e 8032
0a7de745
A
8033unsigned int hibernate_teardown_found_tabled_pages = 0;
8034unsigned int hibernate_teardown_found_created_pages = 0;
8035unsigned int hibernate_teardown_found_free_pages = 0;
8036unsigned int hibernate_teardown_vm_page_free_count;
39236c6e
A
8037
8038
8039struct ppnum_mapping {
0a7de745
A
8040 struct ppnum_mapping *ppnm_next;
8041 ppnum_t ppnm_base_paddr;
8042 unsigned int ppnm_sindx;
8043 unsigned int ppnm_eindx;
39236c6e
A
8044};
8045
0a7de745
A
8046struct ppnum_mapping *ppnm_head;
8047struct ppnum_mapping *ppnm_last_found = NULL;
39236c6e
A
8048
8049
8050void
f427ee49 8051hibernate_create_paddr_map(void)
39236c6e 8052{
0a7de745
A
8053 unsigned int i;
8054 ppnum_t next_ppnum_in_run = 0;
39236c6e
A
8055 struct ppnum_mapping *ppnm = NULL;
8056
8057 if (hibernate_paddr_map_inited == FALSE) {
39236c6e 8058 for (i = 0; i < vm_pages_count; i++) {
0a7de745 8059 if (ppnm) {
39236c6e 8060 ppnm->ppnm_eindx = i;
0a7de745 8061 }
39236c6e 8062
39037602 8063 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
f427ee49 8064 ppnm = zalloc_permanent_type(struct ppnum_mapping);
39236c6e
A
8065
8066 ppnm->ppnm_next = ppnm_head;
8067 ppnm_head = ppnm;
8068
8069 ppnm->ppnm_sindx = i;
39037602 8070 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
39236c6e 8071 }
39037602 8072 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
39236c6e
A
8073 }
8074 ppnm->ppnm_eindx++;
8075
8076 hibernate_paddr_map_inited = TRUE;
8077 }
8078}
8079
8080ppnum_t
8081hibernate_lookup_paddr(unsigned int indx)
8082{
8083 struct ppnum_mapping *ppnm = NULL;
0a7de745 8084
39236c6e
A
8085 ppnm = ppnm_last_found;
8086
8087 if (ppnm) {
0a7de745 8088 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
39236c6e 8089 goto done;
0a7de745 8090 }
39236c6e
A
8091 }
8092 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
39236c6e
A
8093 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
8094 ppnm_last_found = ppnm;
8095 break;
8096 }
8097 }
0a7de745 8098 if (ppnm == NULL) {
39236c6e 8099 panic("hibernate_lookup_paddr of %d failed\n", indx);
0a7de745 8100 }
39236c6e 8101done:
0a7de745 8102 return ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx);
39236c6e
A
8103}
8104
8105
8106uint32_t
8107hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
8108{
0a7de745
A
8109 addr64_t saddr_aligned;
8110 addr64_t eaddr_aligned;
8111 addr64_t addr;
8112 ppnum_t paddr;
8113 unsigned int mark_as_unneeded_pages = 0;
39236c6e
A
8114
8115 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
8116 eaddr_aligned = eaddr & ~PAGE_MASK_64;
8117
8118 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
39236c6e
A
8119 paddr = pmap_find_phys(kernel_pmap, addr);
8120
8121 assert(paddr);
8122
0a7de745 8123 hibernate_page_bitset(page_list, TRUE, paddr);
39236c6e
A
8124 hibernate_page_bitset(page_list_wired, TRUE, paddr);
8125
8126 mark_as_unneeded_pages++;
8127 }
0a7de745 8128 return mark_as_unneeded_pages;
39236c6e
A
8129}
8130
8131
8132void
8133hibernate_hash_insert_page(vm_page_t mem)
8134{
8135 vm_page_bucket_t *bucket;
0a7de745
A
8136 int hash_id;
8137 vm_object_t m_object;
39037602
A
8138
8139 m_object = VM_PAGE_OBJECT(mem);
39236c6e 8140
d9a64523 8141 assert(mem->vmp_hashed);
39037602 8142 assert(m_object);
d9a64523 8143 assert(mem->vmp_offset != (vm_object_offset_t) -1);
39236c6e
A
8144
8145 /*
8146 * Insert it into the object_object/offset hash table
8147 */
d9a64523 8148 hash_id = vm_page_hash(m_object, mem->vmp_offset);
39236c6e
A
8149 bucket = &vm_page_buckets[hash_id];
8150
d9a64523 8151 mem->vmp_next_m = bucket->page_list;
fe8ab488 8152 bucket->page_list = VM_PAGE_PACK_PTR(mem);
39236c6e
A
8153}
8154
8155
8156void
8157hibernate_free_range(int sindx, int eindx)
8158{
0a7de745
A
8159 vm_page_t mem;
8160 unsigned int color;
39236c6e
A
8161
8162 while (sindx < eindx) {
8163 mem = &vm_pages[sindx];
8164
8165 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
8166
d9a64523
A
8167 mem->vmp_lopage = FALSE;
8168 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
39236c6e 8169
5ba3f43e
A
8170 color = VM_PAGE_GET_COLOR(mem);
8171#if defined(__x86_64__)
0a7de745 8172 vm_page_queue_enter_clump(&vm_page_queue_free[color].qhead, mem);
5ba3f43e 8173#else
0a7de745 8174 vm_page_queue_enter(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
5ba3f43e 8175#endif
39236c6e
A
8176 vm_page_free_count++;
8177
8178 sindx++;
8179 }
8180}
8181
39236c6e
A
8182void
8183hibernate_rebuild_vm_structs(void)
8184{
0a7de745
A
8185 int i, cindx, sindx, eindx;
8186 vm_page_t mem, tmem, mem_next;
8187 AbsoluteTime startTime, endTime;
8188 uint64_t nsec;
39236c6e 8189
0a7de745 8190 if (hibernate_rebuild_needed == FALSE) {
39236c6e 8191 return;
0a7de745 8192 }
39236c6e 8193
5ba3f43e 8194 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START);
39236c6e
A
8195 HIBLOG("hibernate_rebuild started\n");
8196
8197 clock_get_uptime(&startTime);
8198
f427ee49 8199 pal_hib_rebuild_pmap_structs();
39236c6e
A
8200
8201 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
8202 eindx = vm_pages_count;
8203
5ba3f43e 8204 /*
0a7de745 8205 * Mark all the vm_pages[] that have not been initialized yet as being
5ba3f43e 8206 * transient. This is needed to ensure that buddy page search is corrrect.
0a7de745 8207 * Without this random data in these vm_pages[] can trip the buddy search
5ba3f43e 8208 */
0a7de745 8209 for (i = hibernate_teardown_last_valid_compact_indx + 1; i < eindx; ++i) {
d9a64523 8210 vm_pages[i].vmp_q_state = VM_PAGE_NOT_ON_Q;
0a7de745 8211 }
5ba3f43e 8212
39236c6e 8213 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
39236c6e 8214 mem = &vm_pages[cindx];
d9a64523 8215 assert(mem->vmp_q_state != VM_PAGE_ON_FREE_Q);
39236c6e
A
8216 /*
8217 * hibernate_teardown_vm_structs leaves the location where
8218 * this vm_page_t must be located in "next".
8219 */
d9a64523
A
8220 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
8221 mem->vmp_next_m = VM_PAGE_PACK_PTR(NULL);
39236c6e
A
8222
8223 sindx = (int)(tmem - &vm_pages[0]);
8224
8225 if (mem != tmem) {
8226 /*
8227 * this vm_page_t was moved by hibernate_teardown_vm_structs,
8228 * so move it back to its real location
8229 */
8230 *tmem = *mem;
8231 mem = tmem;
8232 }
0a7de745 8233 if (mem->vmp_hashed) {
39236c6e 8234 hibernate_hash_insert_page(mem);
0a7de745 8235 }
39236c6e
A
8236 /*
8237 * the 'hole' between this vm_page_t and the previous
0a7de745 8238 * vm_page_t we moved needs to be initialized as
39236c6e
A
8239 * a range of free vm_page_t's
8240 */
8241 hibernate_free_range(sindx + 1, eindx);
8242
8243 eindx = sindx;
8244 }
0a7de745 8245 if (sindx) {
39236c6e 8246 hibernate_free_range(0, sindx);
0a7de745 8247 }
39236c6e
A
8248
8249 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
8250
8251 /*
15129b1c 8252 * process the list of vm_page_t's that were entered in the hash,
0a7de745 8253 * but were not located in the vm_pages arrary... these are
39236c6e
A
8254 * vm_page_t's that were created on the fly (i.e. fictitious)
8255 */
8256 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
d9a64523 8257 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
39236c6e 8258
d9a64523 8259 mem->vmp_next_m = 0;
39236c6e
A
8260 hibernate_hash_insert_page(mem);
8261 }
8262 hibernate_rebuild_hash_list = NULL;
8263
0a7de745
A
8264 clock_get_uptime(&endTime);
8265 SUB_ABSOLUTETIME(&endTime, &startTime);
8266 absolutetime_to_nanoseconds(endTime, &nsec);
39236c6e
A
8267
8268 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
8269
8270 hibernate_rebuild_needed = FALSE;
8271
5ba3f43e 8272 KDBG(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END);
39236c6e
A
8273}
8274
39236c6e
A
8275uint32_t
8276hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
8277{
0a7de745
A
8278 unsigned int i;
8279 unsigned int compact_target_indx;
8280 vm_page_t mem, mem_next;
39236c6e 8281 vm_page_bucket_t *bucket;
0a7de745
A
8282 unsigned int mark_as_unneeded_pages = 0;
8283 unsigned int unneeded_vm_page_bucket_pages = 0;
8284 unsigned int unneeded_vm_pages_pages = 0;
8285 unsigned int unneeded_pmap_pages = 0;
8286 addr64_t start_of_unneeded = 0;
8287 addr64_t end_of_unneeded = 0;
8288
39236c6e 8289
0a7de745
A
8290 if (hibernate_should_abort()) {
8291 return 0;
8292 }
39236c6e 8293
5ba3f43e
A
8294 hibernate_rebuild_needed = TRUE;
8295
39236c6e 8296 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
0a7de745
A
8297 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
8298 vm_page_cleaned_count, compressor_object->resident_page_count);
39236c6e
A
8299
8300 for (i = 0; i < vm_page_bucket_count; i++) {
39236c6e
A
8301 bucket = &vm_page_buckets[i];
8302
39037602 8303 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
d9a64523 8304 assert(mem->vmp_hashed);
39236c6e 8305
d9a64523 8306 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->vmp_next_m));
39236c6e
A
8307
8308 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
d9a64523 8309 mem->vmp_next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
39236c6e
A
8310 hibernate_rebuild_hash_list = mem;
8311 }
8312 }
8313 }
8314 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
8315 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
8316
8317 hibernate_teardown_vm_page_free_count = vm_page_free_count;
8318
8319 compact_target_indx = 0;
8320
8321 for (i = 0; i < vm_pages_count; i++) {
39236c6e
A
8322 mem = &vm_pages[i];
8323
d9a64523 8324 if (mem->vmp_q_state == VM_PAGE_ON_FREE_Q) {
39236c6e
A
8325 unsigned int color;
8326
d9a64523
A
8327 assert(mem->vmp_busy);
8328 assert(!mem->vmp_lopage);
39236c6e 8329
5ba3f43e 8330 color = VM_PAGE_GET_COLOR(mem);
39037602 8331
0a7de745 8332 vm_page_queue_remove(&vm_page_queue_free[color].qhead, mem, vmp_pageq);
39236c6e 8333
39037602 8334 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
39236c6e
A
8335
8336 vm_page_free_count--;
8337
8338 hibernate_teardown_found_free_pages++;
8339
0a7de745 8340 if (vm_pages[compact_target_indx].vmp_q_state != VM_PAGE_ON_FREE_Q) {
39236c6e 8341 compact_target_indx = i;
0a7de745 8342 }
39236c6e
A
8343 } else {
8344 /*
8345 * record this vm_page_t's original location
8346 * we need this even if it doesn't get moved
8347 * as an indicator to the rebuild function that
8348 * we don't have to move it
8349 */
d9a64523 8350 mem->vmp_next_m = VM_PAGE_PACK_PTR(mem);
39236c6e 8351
d9a64523 8352 if (vm_pages[compact_target_indx].vmp_q_state == VM_PAGE_ON_FREE_Q) {
39236c6e
A
8353 /*
8354 * we've got a hole to fill, so
8355 * move this vm_page_t to it's new home
8356 */
8357 vm_pages[compact_target_indx] = *mem;
d9a64523 8358 mem->vmp_q_state = VM_PAGE_ON_FREE_Q;
39236c6e
A
8359
8360 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
8361 compact_target_indx++;
0a7de745 8362 } else {
39236c6e 8363 hibernate_teardown_last_valid_compact_indx = i;
0a7de745 8364 }
39236c6e
A
8365 }
8366 }
0a7de745
A
8367 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx + 1],
8368 (addr64_t)&vm_pages[vm_pages_count - 1], page_list, page_list_wired);
39236c6e
A
8369 mark_as_unneeded_pages += unneeded_vm_pages_pages;
8370
f427ee49 8371 pal_hib_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
39236c6e
A
8372
8373 if (start_of_unneeded) {
8374 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
8375 mark_as_unneeded_pages += unneeded_pmap_pages;
8376 }
8377 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
8378
0a7de745 8379 return mark_as_unneeded_pages;
39236c6e
A
8380}
8381
8382
d1ecb069
A
8383#endif /* HIBERNATION */
8384
b0d623f7 8385/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
8386
8387#include <mach_vm_debug.h>
0a7de745 8388#if MACH_VM_DEBUG
1c79356b
A
8389
8390#include <mach_debug/hash_info.h>
8391#include <vm/vm_debug.h>
8392
8393/*
8394 * Routine: vm_page_info
8395 * Purpose:
8396 * Return information about the global VP table.
8397 * Fills the buffer with as much information as possible
8398 * and returns the desired size of the buffer.
8399 * Conditions:
8400 * Nothing locked. The caller should provide
8401 * possibly-pageable memory.
8402 */
8403
8404unsigned int
8405vm_page_info(
8406 hash_info_bucket_t *info,
8407 unsigned int count)
8408{
91447636 8409 unsigned int i;
0a7de745 8410 lck_spin_t *bucket_lock;
1c79356b 8411
0a7de745 8412 if (vm_page_bucket_count < count) {
1c79356b 8413 count = vm_page_bucket_count;
0a7de745 8414 }
1c79356b
A
8415
8416 for (i = 0; i < count; i++) {
8417 vm_page_bucket_t *bucket = &vm_page_buckets[i];
8418 unsigned int bucket_count = 0;
8419 vm_page_t m;
8420
b0d623f7 8421 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
0a7de745 8422 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
b0d623f7 8423
39037602 8424 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
0a7de745
A
8425 m != VM_PAGE_NULL;
8426 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->vmp_next_m))) {
1c79356b 8427 bucket_count++;
0a7de745 8428 }
b0d623f7
A
8429
8430 lck_spin_unlock(bucket_lock);
1c79356b
A
8431
8432 /* don't touch pageable memory while holding locks */
8433 info[i].hib_count = bucket_count;
8434 }
8435
8436 return vm_page_bucket_count;
8437}
0a7de745 8438#endif /* MACH_VM_DEBUG */
15129b1c
A
8439
8440#if VM_PAGE_BUCKETS_CHECK
8441void
8442vm_page_buckets_check(void)
8443{
8444 unsigned int i;
8445 vm_page_t p;
8446 unsigned int p_hash;
8447 vm_page_bucket_t *bucket;
0a7de745 8448 lck_spin_t *bucket_lock;
15129b1c
A
8449
8450 if (!vm_page_buckets_check_ready) {
8451 return;
8452 }
8453
8454#if HIBERNATION
8455 if (hibernate_rebuild_needed ||
8456 hibernate_rebuild_hash_list) {
8457 panic("BUCKET_CHECK: hibernation in progress: "
0a7de745
A
8458 "rebuild_needed=%d rebuild_hash_list=%p\n",
8459 hibernate_rebuild_needed,
8460 hibernate_rebuild_hash_list);
15129b1c
A
8461 }
8462#endif /* HIBERNATION */
8463
8464#if VM_PAGE_FAKE_BUCKETS
8465 char *cp;
8466 for (cp = (char *) vm_page_fake_buckets_start;
0a7de745
A
8467 cp < (char *) vm_page_fake_buckets_end;
8468 cp++) {
15129b1c
A
8469 if (*cp != 0x5a) {
8470 panic("BUCKET_CHECK: corruption at %p in fake buckets "
0a7de745
A
8471 "[0x%llx:0x%llx]\n",
8472 cp,
8473 (uint64_t) vm_page_fake_buckets_start,
8474 (uint64_t) vm_page_fake_buckets_end);
15129b1c
A
8475 }
8476 }
8477#endif /* VM_PAGE_FAKE_BUCKETS */
8478
8479 for (i = 0; i < vm_page_bucket_count; i++) {
0a7de745 8480 vm_object_t p_object;
39037602 8481
15129b1c 8482 bucket = &vm_page_buckets[i];
fe8ab488 8483 if (!bucket->page_list) {
15129b1c
A
8484 continue;
8485 }
8486
8487 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
0a7de745 8488 lck_spin_lock_grp(bucket_lock, &vm_page_lck_grp_bucket);
39037602
A
8489 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
8490
15129b1c 8491 while (p != VM_PAGE_NULL) {
39037602
A
8492 p_object = VM_PAGE_OBJECT(p);
8493
d9a64523 8494 if (!p->vmp_hashed) {
15129b1c 8495 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
0a7de745
A
8496 "hash %d in bucket %d at %p "
8497 "is not hashed\n",
8498 p, p_object, p->vmp_offset,
8499 p_hash, i, bucket);
15129b1c 8500 }
d9a64523 8501 p_hash = vm_page_hash(p_object, p->vmp_offset);
15129b1c
A
8502 if (p_hash != i) {
8503 panic("BUCKET_CHECK: corruption in bucket %d "
0a7de745
A
8504 "at %p: page %p object %p offset 0x%llx "
8505 "hash %d\n",
8506 i, bucket, p, p_object, p->vmp_offset,
8507 p_hash);
15129b1c 8508 }
d9a64523 8509 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->vmp_next_m));
15129b1c
A
8510 }
8511 lck_spin_unlock(bucket_lock);
8512 }
8513
8514// printf("BUCKET_CHECK: checked buckets\n");
8515}
8516#endif /* VM_PAGE_BUCKETS_CHECK */
3e170ce0
A
8517
8518/*
8519 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
8520 * local queues if they exist... its the only spot in the system where we add pages
8521 * to those queues... once on those queues, those pages can only move to one of the
8522 * global page queues or the free queues... they NEVER move from local q to local q.
8523 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
8524 * the global vm_page_queue_lock at this point... we still need to take the local lock
8525 * in case this operation is being run on a different CPU then the local queue's identity,
8526 * but we don't have to worry about the page moving to a global queue or becoming wired
8527 * while we're grabbing the local lock since those operations would require the global
8528 * vm_page_queue_lock to be held, and we already own it.
8529 *
8530 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
8531 * 'wired' and local are ALWAYS mutually exclusive conditions.
8532 */
39037602
A
8533
8534#if CONFIG_BACKGROUND_QUEUE
8535void
8536vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
8537#else
3e170ce0 8538void
39037602
A
8539vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
8540#endif
3e170ce0 8541{
0a7de745
A
8542 boolean_t was_pageable = TRUE;
8543 vm_object_t m_object;
3e170ce0 8544
39037602
A
8545 m_object = VM_PAGE_OBJECT(mem);
8546
8547 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
8548
0a7de745 8549 if (mem->vmp_q_state == VM_PAGE_NOT_ON_Q) {
d9a64523 8550 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
39037602 8551#if CONFIG_BACKGROUND_QUEUE
743345f9
A
8552 if (remove_from_backgroundq == TRUE) {
8553 vm_page_remove_from_backgroundq(mem);
39037602 8554 }
d9a64523
A
8555 if (mem->vmp_on_backgroundq) {
8556 assert(mem->vmp_backgroundq.next != 0);
8557 assert(mem->vmp_backgroundq.prev != 0);
743345f9 8558 } else {
d9a64523
A
8559 assert(mem->vmp_backgroundq.next == 0);
8560 assert(mem->vmp_backgroundq.prev == 0);
743345f9
A
8561 }
8562#endif /* CONFIG_BACKGROUND_QUEUE */
39037602
A
8563 return;
8564 }
d190cdc3 8565
0a7de745 8566 if (mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
d9a64523 8567 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
39037602 8568#if CONFIG_BACKGROUND_QUEUE
d9a64523 8569 assert(mem->vmp_backgroundq.next == 0 &&
0a7de745
A
8570 mem->vmp_backgroundq.prev == 0 &&
8571 mem->vmp_on_backgroundq == FALSE);
39037602
A
8572#endif
8573 return;
8574 }
d9a64523 8575 if (mem->vmp_q_state == VM_PAGE_IS_WIRED) {
39037602
A
8576 /*
8577 * might put these guys on a list for debugging purposes
8578 * if we do, we'll need to remove this assert
8579 */
d9a64523 8580 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
39037602 8581#if CONFIG_BACKGROUND_QUEUE
d9a64523 8582 assert(mem->vmp_backgroundq.next == 0 &&
0a7de745
A
8583 mem->vmp_backgroundq.prev == 0 &&
8584 mem->vmp_on_backgroundq == FALSE);
39037602
A
8585#endif
8586 return;
8587 }
8588
8589 assert(m_object != compressor_object);
8590 assert(m_object != kernel_object);
8591 assert(m_object != vm_submap_object);
d9a64523 8592 assert(!mem->vmp_fictitious);
39037602 8593
0a7de745 8594 switch (mem->vmp_q_state) {
39037602
A
8595 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
8596 {
0a7de745 8597 struct vpl *lq;
39037602 8598
f427ee49 8599 lq = zpercpu_get_cpu(vm_page_local_q, mem->vmp_local_id);
3e170ce0 8600 VPL_LOCK(&lq->vpl_lock);
0a7de745 8601 vm_page_queue_remove(&lq->vpl_queue, mem, vmp_pageq);
d9a64523 8602 mem->vmp_local_id = 0;
3e170ce0 8603 lq->vpl_count--;
39037602 8604 if (m_object->internal) {
3e170ce0
A
8605 lq->vpl_internal_count--;
8606 } else {
8607 lq->vpl_external_count--;
8608 }
8609 VPL_UNLOCK(&lq->vpl_lock);
8610 was_pageable = FALSE;
39037602 8611 break;
3e170ce0 8612 }
39037602
A
8613 case VM_PAGE_ON_ACTIVE_Q:
8614 {
0a7de745 8615 vm_page_queue_remove(&vm_page_queue_active, mem, vmp_pageq);
3e170ce0 8616 vm_page_active_count--;
39037602 8617 break;
3e170ce0
A
8618 }
8619
39037602
A
8620 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
8621 {
8622 assert(m_object->internal == TRUE);
8623
3e170ce0 8624 vm_page_inactive_count--;
0a7de745 8625 vm_page_queue_remove(&vm_page_queue_anonymous, mem, vmp_pageq);
39037602 8626 vm_page_anonymous_count--;
d9a64523 8627
39037602 8628 vm_purgeable_q_advance_all();
d9a64523 8629 vm_page_balance_inactive(3);
39037602
A
8630 break;
8631 }
8632
8633 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
8634 {
8635 assert(m_object->internal == FALSE);
8636
8637 vm_page_inactive_count--;
0a7de745 8638 vm_page_queue_remove(&vm_page_queue_inactive, mem, vmp_pageq);
39037602 8639 vm_purgeable_q_advance_all();
d9a64523 8640 vm_page_balance_inactive(3);
39037602
A
8641 break;
8642 }
8643
8644 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
8645 {
8646 assert(m_object->internal == FALSE);
8647
8648 vm_page_inactive_count--;
0a7de745 8649 vm_page_queue_remove(&vm_page_queue_cleaned, mem, vmp_pageq);
39037602 8650 vm_page_cleaned_count--;
d9a64523 8651 vm_page_balance_inactive(3);
39037602
A
8652 break;
8653 }
8654
8655 case VM_PAGE_ON_THROTTLED_Q:
8656 {
8657 assert(m_object->internal == TRUE);
8658
0a7de745 8659 vm_page_queue_remove(&vm_page_queue_throttled, mem, vmp_pageq);
3e170ce0
A
8660 vm_page_throttled_count--;
8661 was_pageable = FALSE;
39037602 8662 break;
3e170ce0
A
8663 }
8664
39037602
A
8665 case VM_PAGE_ON_SPECULATIVE_Q:
8666 {
8667 assert(m_object->internal == FALSE);
8668
0a7de745 8669 vm_page_remque(&mem->vmp_pageq);
3e170ce0 8670 vm_page_speculative_count--;
d9a64523 8671 vm_page_balance_inactive(3);
39037602
A
8672 break;
8673 }
8674
8675#if CONFIG_SECLUDED_MEMORY
8676 case VM_PAGE_ON_SECLUDED_Q:
8677 {
0a7de745 8678 vm_page_queue_remove(&vm_page_queue_secluded, mem, vmp_pageq);
39037602 8679 vm_page_secluded_count--;
cb323159 8680 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
39037602
A
8681 if (m_object == VM_OBJECT_NULL) {
8682 vm_page_secluded_count_free--;
8683 was_pageable = FALSE;
8684 } else {
8685 assert(!m_object->internal);
8686 vm_page_secluded_count_inuse--;
8687 was_pageable = FALSE;
8688// was_pageable = TRUE;
8689 }
8690 break;
8691 }
8692#endif /* CONFIG_SECLUDED_MEMORY */
8693
8694 default:
8695 {
8696 /*
d9a64523 8697 * if (mem->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
0a7de745
A
8698 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
8699 * the caller is responsible for determing if the page is on that queue, and if so, must
8700 * either first remove it (it needs both the page queues lock and the object lock to do
8701 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
39037602
A
8702 *
8703 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
8704 * or any of the undefined states
8705 */
d9a64523 8706 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vmp_q_state);
39037602 8707 break;
3e170ce0 8708 }
3e170ce0 8709 }
39037602 8710 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
d9a64523 8711 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
3e170ce0 8712
39037602 8713#if CONFIG_BACKGROUND_QUEUE
0a7de745 8714 if (remove_from_backgroundq == TRUE) {
39037602 8715 vm_page_remove_from_backgroundq(mem);
0a7de745 8716 }
39037602 8717#endif
3e170ce0 8718 if (was_pageable) {
39037602 8719 if (m_object->internal) {
3e170ce0
A
8720 vm_page_pageable_internal_count--;
8721 } else {
8722 vm_page_pageable_external_count--;
8723 }
8724 }
8725}
8726
8727void
8728vm_page_remove_internal(vm_page_t page)
8729{
39037602 8730 vm_object_t __object = VM_PAGE_OBJECT(page);
3e170ce0 8731 if (page == __object->memq_hint) {
0a7de745
A
8732 vm_page_t __new_hint;
8733 vm_page_queue_entry_t __qe;
d9a64523 8734 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->vmp_listq);
39037602 8735 if (vm_page_queue_end(&__object->memq, __qe)) {
d9a64523 8736 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->vmp_listq);
39037602 8737 if (vm_page_queue_end(&__object->memq, __qe)) {
3e170ce0
A
8738 __qe = NULL;
8739 }
8740 }
39037602 8741 __new_hint = (vm_page_t)((uintptr_t) __qe);
3e170ce0
A
8742 __object->memq_hint = __new_hint;
8743 }
0a7de745 8744 vm_page_queue_remove(&__object->memq, page, vmp_listq);
39037602
A
8745#if CONFIG_SECLUDED_MEMORY
8746 if (__object->eligible_for_secluded) {
8747 vm_page_secluded.eligible_for_secluded--;
8748 }
8749#endif /* CONFIG_SECLUDED_MEMORY */
3e170ce0
A
8750}
8751
8752void
8753vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
8754{
0a7de745 8755 vm_object_t m_object;
39037602
A
8756
8757 m_object = VM_PAGE_OBJECT(mem);
8758
8759 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523
A
8760 assert(!mem->vmp_fictitious);
8761 assert(!mem->vmp_laundry);
8762 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
3e170ce0 8763 vm_page_check_pageable_safe(mem);
39037602 8764
39037602 8765 if (m_object->internal) {
d9a64523 8766 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
39037602 8767
0a7de745
A
8768 if (first == TRUE) {
8769 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vmp_pageq);
8770 } else {
8771 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vmp_pageq);
8772 }
39037602 8773
3e170ce0
A
8774 vm_page_anonymous_count++;
8775 vm_page_pageable_internal_count++;
8776 } else {
d9a64523 8777 mem->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
39037602 8778
0a7de745
A
8779 if (first == TRUE) {
8780 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vmp_pageq);
8781 } else {
8782 vm_page_queue_enter(&vm_page_queue_inactive, mem, vmp_pageq);
8783 }
39037602 8784
3e170ce0
A
8785 vm_page_pageable_external_count++;
8786 }
3e170ce0
A
8787 vm_page_inactive_count++;
8788 token_new_pagecount++;
39037602
A
8789
8790#if CONFIG_BACKGROUND_QUEUE
0a7de745 8791 if (mem->vmp_in_background) {
39037602 8792 vm_page_add_to_backgroundq(mem, FALSE);
0a7de745 8793 }
39037602
A
8794#endif
8795}
8796
8797void
8798vm_page_enqueue_active(vm_page_t mem, boolean_t first)
8799{
0a7de745 8800 vm_object_t m_object;
39037602
A
8801
8802 m_object = VM_PAGE_OBJECT(mem);
8803
8804 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d9a64523
A
8805 assert(!mem->vmp_fictitious);
8806 assert(!mem->vmp_laundry);
8807 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
39037602
A
8808 vm_page_check_pageable_safe(mem);
8809
d9a64523 8810 mem->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
0a7de745
A
8811 if (first == TRUE) {
8812 vm_page_queue_enter_first(&vm_page_queue_active, mem, vmp_pageq);
8813 } else {
8814 vm_page_queue_enter(&vm_page_queue_active, mem, vmp_pageq);
8815 }
39037602
A
8816 vm_page_active_count++;
8817
8818 if (m_object->internal) {
8819 vm_page_pageable_internal_count++;
8820 } else {
8821 vm_page_pageable_external_count++;
8822 }
8823
8824#if CONFIG_BACKGROUND_QUEUE
0a7de745 8825 if (mem->vmp_in_background) {
39037602 8826 vm_page_add_to_backgroundq(mem, FALSE);
0a7de745 8827 }
39037602 8828#endif
d9a64523 8829 vm_page_balance_inactive(3);
3e170ce0
A
8830}
8831
8832/*
8833 * Pages from special kernel objects shouldn't
8834 * be placed on pageable queues.
8835 */
8836void
8837vm_page_check_pageable_safe(vm_page_t page)
8838{
0a7de745 8839 vm_object_t page_object;
39037602
A
8840
8841 page_object = VM_PAGE_OBJECT(page);
8842
8843 if (page_object == kernel_object) {
3e170ce0 8844 panic("vm_page_check_pageable_safe: trying to add page" \
0a7de745 8845 "from kernel object (%p) to pageable queue", kernel_object);
3e170ce0
A
8846 }
8847
39037602 8848 if (page_object == compressor_object) {
3e170ce0 8849 panic("vm_page_check_pageable_safe: trying to add page" \
0a7de745 8850 "from compressor object (%p) to pageable queue", compressor_object);
3e170ce0
A
8851 }
8852
39037602 8853 if (page_object == vm_submap_object) {
3e170ce0 8854 panic("vm_page_check_pageable_safe: trying to add page" \
0a7de745 8855 "from submap object (%p) to pageable queue", vm_submap_object);
3e170ce0
A
8856 }
8857}
8858
8859/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
0a7de745
A
8860* wired page diagnose
8861* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
3e170ce0
A
8862
8863#include <libkern/OSKextLibPrivate.h>
8864
0a7de745 8865#define KA_SIZE(namelen, subtotalscount) \
5ba3f43e
A
8866 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8867
0a7de745 8868#define KA_NAME(alloc) \
5ba3f43e
A
8869 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8870
0a7de745 8871#define KA_NAME_LEN(alloc) \
5ba3f43e 8872 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
3e170ce0 8873
0a7de745 8874vm_tag_t
3e170ce0
A
8875vm_tag_bt(void)
8876{
0a7de745
A
8877 uintptr_t* frameptr;
8878 uintptr_t* frameptr_next;
8879 uintptr_t retaddr;
8880 uintptr_t kstackb, kstackt;
8881 const vm_allocation_site_t * site;
8882 thread_t cthread;
8883 kern_allocation_name_t name;
8884
8885 cthread = current_thread();
8886 if (__improbable(cthread == NULL)) {
8887 return VM_KERN_MEMORY_OSFMK;
8888 }
8889
8890 if ((name = thread_get_kernel_state(cthread)->allocation_name)) {
8891 if (!name->tag) {
8892 vm_tag_alloc(name);
8893 }
8894 return name->tag;
8895 }
8896
8897 kstackb = cthread->kernel_stack;
8898 kstackt = kstackb + kernel_stack_size;
8899
8900 /* Load stack frame pointer (EBP on x86) into frameptr */
8901 frameptr = __builtin_frame_address(0);
8902 site = NULL;
8903 while (frameptr != NULL) {
8904 /* Verify thread stack bounds */
8905 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) {
8906 break;
8907 }
8908
8909 /* Next frame pointer is pointed to by the previous one */
8910 frameptr_next = (uintptr_t*) *frameptr;
8911
8912 /* Pull return address from one spot above the frame pointer */
8913 retaddr = *(frameptr + 1);
8914
cb323159
A
8915#if defined(HAS_APPLE_PAC)
8916 retaddr = (uintptr_t) ptrauth_strip((void *)retaddr, ptrauth_key_return_address);
8917#endif
0a7de745
A
8918
8919 if (((retaddr < vm_kernel_builtinkmod_text_end) && (retaddr >= vm_kernel_builtinkmod_text))
8920 || (retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top)) {
8921 site = OSKextGetAllocationSiteForCaller(retaddr);
8922 break;
8923 }
8924 frameptr = frameptr_next;
3e170ce0 8925 }
5ba3f43e 8926
0a7de745 8927 return site ? site->tag : VM_KERN_MEMORY_NONE;
3e170ce0
A
8928}
8929
0a7de745 8930static uint64_t free_tag_bits[VM_MAX_TAG_VALUE / 64];
3e170ce0
A
8931
8932void
5ba3f43e 8933vm_tag_alloc_locked(vm_allocation_site_t * site, vm_allocation_site_t ** releasesiteP)
3e170ce0 8934{
0a7de745
A
8935 vm_tag_t tag;
8936 uint64_t avail;
8937 uint32_t idx;
8938 vm_allocation_site_t * prev;
3e170ce0 8939
0a7de745
A
8940 if (site->tag) {
8941 return;
8942 }
3e170ce0 8943
0a7de745
A
8944 idx = 0;
8945 while (TRUE) {
5ba3f43e 8946 avail = free_tag_bits[idx];
0a7de745 8947 if (avail) {
f427ee49 8948 tag = (vm_tag_t)__builtin_clzll(avail);
0a7de745
A
8949 avail &= ~(1ULL << (63 - tag));
8950 free_tag_bits[idx] = avail;
8951 tag += (idx << 6);
8952 break;
5ba3f43e
A
8953 }
8954 idx++;
0a7de745
A
8955 if (idx >= ARRAY_COUNT(free_tag_bits)) {
8956 for (idx = 0; idx < ARRAY_COUNT(vm_allocation_sites); idx++) {
5ba3f43e 8957 prev = vm_allocation_sites[idx];
0a7de745
A
8958 if (!prev) {
8959 continue;
8960 }
8961 if (!KA_NAME_LEN(prev)) {
8962 continue;
8963 }
8964 if (!prev->tag) {
8965 continue;
8966 }
8967 if (prev->total) {
8968 continue;
8969 }
8970 if (1 != prev->refcount) {
8971 continue;
8972 }
5ba3f43e
A
8973
8974 assert(idx == prev->tag);
f427ee49 8975 tag = (vm_tag_t)idx;
5ba3f43e
A
8976 prev->tag = VM_KERN_MEMORY_NONE;
8977 *releasesiteP = prev;
8978 break;
0a7de745
A
8979 }
8980 if (idx >= ARRAY_COUNT(vm_allocation_sites)) {
5ba3f43e
A
8981 tag = VM_KERN_MEMORY_ANY;
8982 }
0a7de745 8983 break;
5ba3f43e 8984 }
0a7de745
A
8985 }
8986 site->tag = tag;
5ba3f43e 8987
0a7de745 8988 OSAddAtomic16(1, &site->refcount);
5ba3f43e 8989
0a7de745
A
8990 if (VM_KERN_MEMORY_ANY != tag) {
8991 vm_allocation_sites[tag] = site;
8992 }
5ba3f43e 8993
0a7de745
A
8994 if (tag > vm_allocation_tag_highest) {
8995 vm_allocation_tag_highest = tag;
8996 }
3e170ce0
A
8997}
8998
8999static void
9000vm_tag_free_locked(vm_tag_t tag)
9001{
0a7de745
A
9002 uint64_t avail;
9003 uint32_t idx;
9004 uint64_t bit;
3e170ce0 9005
0a7de745
A
9006 if (VM_KERN_MEMORY_ANY == tag) {
9007 return;
9008 }
3e170ce0 9009
0a7de745
A
9010 idx = (tag >> 6);
9011 avail = free_tag_bits[idx];
9012 tag &= 63;
9013 bit = (1ULL << (63 - tag));
9014 assert(!(avail & bit));
9015 free_tag_bits[idx] = (avail | bit);
3e170ce0
A
9016}
9017
9018static void
9019vm_tag_init(void)
9020{
0a7de745
A
9021 vm_tag_t tag;
9022 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++) {
9023 vm_tag_free_locked(tag);
9024 }
5ba3f43e 9025
0a7de745
A
9026 for (tag = VM_KERN_MEMORY_ANY + 1; tag < VM_MAX_TAG_VALUE; tag++) {
9027 vm_tag_free_locked(tag);
9028 }
3e170ce0
A
9029}
9030
9031vm_tag_t
9032vm_tag_alloc(vm_allocation_site_t * site)
9033{
0a7de745
A
9034 vm_tag_t tag;
9035 vm_allocation_site_t * releasesite;
3e170ce0 9036
0a7de745 9037 if (VM_TAG_BT & site->flags) {
5ba3f43e 9038 tag = vm_tag_bt();
0a7de745
A
9039 if (VM_KERN_MEMORY_NONE != tag) {
9040 return tag;
9041 }
9042 }
3e170ce0 9043
0a7de745 9044 if (!site->tag) {
5ba3f43e
A
9045 releasesite = NULL;
9046 lck_spin_lock(&vm_allocation_sites_lock);
9047 vm_tag_alloc_locked(site, &releasesite);
9048 lck_spin_unlock(&vm_allocation_sites_lock);
0a7de745
A
9049 if (releasesite) {
9050 kern_allocation_name_release(releasesite);
9051 }
9052 }
3e170ce0 9053
0a7de745 9054 return site->tag;
3e170ce0
A
9055}
9056
5ba3f43e
A
9057void
9058vm_tag_update_size(vm_tag_t tag, int64_t delta)
9059{
0a7de745
A
9060 vm_allocation_site_t * allocation;
9061 uint64_t prior;
5ba3f43e 9062
0a7de745
A
9063 assert(VM_KERN_MEMORY_NONE != tag);
9064 assert(tag < VM_MAX_TAG_VALUE);
5ba3f43e 9065
0a7de745
A
9066 allocation = vm_allocation_sites[tag];
9067 assert(allocation);
5ba3f43e 9068
0a7de745 9069 if (delta < 0) {
5ba3f43e 9070 assertf(allocation->total >= ((uint64_t)-delta), "tag %d, site %p", tag, allocation);
0a7de745
A
9071 }
9072 prior = OSAddAtomic64(delta, &allocation->total);
5ba3f43e
A
9073
9074#if DEBUG || DEVELOPMENT
9075
0a7de745 9076 uint64_t new, peak;
5ba3f43e 9077 new = prior + delta;
0a7de745
A
9078 do{
9079 peak = allocation->peak;
9080 if (new <= peak) {
9081 break;
9082 }
9083 }while (!OSCompareAndSwap64(peak, new, &allocation->peak));
5ba3f43e
A
9084
9085#endif /* DEBUG || DEVELOPMENT */
9086
0a7de745
A
9087 if (tag < VM_KERN_MEMORY_FIRST_DYNAMIC) {
9088 return;
9089 }
5ba3f43e 9090
0a7de745
A
9091 if (!prior && !allocation->tag) {
9092 vm_tag_alloc(allocation);
9093 }
5ba3f43e
A
9094}
9095
9096void
9097kern_allocation_update_size(kern_allocation_name_t allocation, int64_t delta)
9098{
0a7de745 9099 uint64_t prior;
5ba3f43e 9100
0a7de745 9101 if (delta < 0) {
5ba3f43e 9102 assertf(allocation->total >= ((uint64_t)-delta), "name %p", allocation);
0a7de745
A
9103 }
9104 prior = OSAddAtomic64(delta, &allocation->total);
5ba3f43e
A
9105
9106#if DEBUG || DEVELOPMENT
9107
0a7de745 9108 uint64_t new, peak;
5ba3f43e 9109 new = prior + delta;
0a7de745
A
9110 do{
9111 peak = allocation->peak;
9112 if (new <= peak) {
9113 break;
9114 }
9115 }while (!OSCompareAndSwap64(peak, new, &allocation->peak));
5ba3f43e
A
9116
9117#endif /* DEBUG || DEVELOPMENT */
9118
0a7de745
A
9119 if (!prior && !allocation->tag) {
9120 vm_tag_alloc(allocation);
9121 }
5ba3f43e
A
9122}
9123
9124#if VM_MAX_TAG_ZONES
9125
9126void
9127vm_allocation_zones_init(void)
9128{
0a7de745
A
9129 kern_return_t ret;
9130 vm_offset_t addr;
5ba3f43e
A
9131 vm_size_t size;
9132
0a7de745
A
9133 size = VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t * *)
9134 + 2 * VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
5ba3f43e
A
9135
9136 ret = kernel_memory_allocate(kernel_map,
0a7de745
A
9137 &addr, round_page(size), 0,
9138 KMA_ZERO, VM_KERN_MEMORY_DIAG);
9139 assert(KERN_SUCCESS == ret);
5ba3f43e 9140
0a7de745
A
9141 vm_allocation_zone_totals = (vm_allocation_zone_total_t **) addr;
9142 addr += VM_MAX_TAG_VALUE * sizeof(vm_allocation_zone_total_t * *);
5ba3f43e 9143
0a7de745
A
9144 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
9145 // in vm_tag_update_zone_size() won't recurse
9146 vm_allocation_zone_totals[VM_KERN_MEMORY_DIAG] = (vm_allocation_zone_total_t *) addr;
9147 addr += VM_MAX_TAG_ZONES * sizeof(vm_allocation_zone_total_t);
9148 vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
5ba3f43e
A
9149}
9150
c3c9b80d
A
9151__attribute__((noinline))
9152static vm_tag_t
9153vm_tag_zone_stats_alloc(vm_tag_t tag, zalloc_flags_t flags)
5ba3f43e 9154{
c3c9b80d
A
9155 vm_allocation_zone_total_t *stats;
9156 vm_size_t size = sizeof(*stats) * VM_MAX_TAG_ZONES;
5ba3f43e 9157
c3c9b80d
A
9158 stats = kheap_alloc(KHEAP_DATA_BUFFERS, size,
9159 Z_VM_TAG(VM_KERN_MEMORY_DIAG) | Z_ZERO | flags);
9160 if (!stats) {
9161 return VM_KERN_MEMORY_NONE;
9162 }
9163 if (!os_atomic_cmpxchg(&vm_allocation_zone_totals[tag], NULL, stats, release)) {
9164 kheap_free(KHEAP_DATA_BUFFERS, stats, size);
9165 }
9166 return tag;
9167}
9168
9169vm_tag_t
9170vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx, uint32_t zflags)
9171{
0a7de745
A
9172 assert(VM_KERN_MEMORY_NONE != tag);
9173 assert(tag < VM_MAX_TAG_VALUE);
5ba3f43e 9174
0a7de745 9175 if (zidx >= VM_MAX_TAG_ZONES) {
c3c9b80d 9176 return VM_KERN_MEMORY_NONE;
0a7de745 9177 }
5ba3f43e 9178
c3c9b80d
A
9179 if (__probable(vm_allocation_zone_totals[tag])) {
9180 return tag;
0a7de745 9181 }
c3c9b80d 9182 return vm_tag_zone_stats_alloc(tag, zflags);
5ba3f43e
A
9183}
9184
9185void
c3c9b80d 9186vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, long delta)
5ba3f43e 9187{
c3c9b80d
A
9188 vm_allocation_zone_total_t *stats;
9189 vm_size_t value;
5ba3f43e 9190
0a7de745
A
9191 assert(VM_KERN_MEMORY_NONE != tag);
9192 assert(tag < VM_MAX_TAG_VALUE);
5ba3f43e 9193
0a7de745
A
9194 if (zidx >= VM_MAX_TAG_ZONES) {
9195 return;
9196 }
5ba3f43e 9197
c3c9b80d
A
9198 stats = vm_allocation_zone_totals[tag];
9199 assert(stats);
9200 stats += zidx;
5ba3f43e 9201
c3c9b80d 9202 value = os_atomic_add(&stats->vazt_total, delta, relaxed);
0a7de745 9203 if (delta < 0) {
c3c9b80d
A
9204 assertf((long)value >= 0, "zidx %d, tag %d, %p", zidx, tag, stats);
9205 return;
9206 } else if (os_atomic_load(&stats->vazt_peak, relaxed) < value) {
9207 os_atomic_max(&stats->vazt_peak, value, relaxed);
0a7de745 9208 }
5ba3f43e
A
9209}
9210
9211#endif /* VM_MAX_TAG_ZONES */
9212
9213void
9214kern_allocation_update_subtotal(kern_allocation_name_t allocation, uint32_t subtag, int64_t delta)
9215{
0a7de745 9216 kern_allocation_name_t other;
5ba3f43e 9217 struct vm_allocation_total * total;
0a7de745 9218 uint32_t subidx;
5ba3f43e 9219
0a7de745
A
9220 subidx = 0;
9221 assert(VM_KERN_MEMORY_NONE != subtag);
cb323159 9222 lck_spin_lock(&vm_allocation_sites_lock);
0a7de745
A
9223 for (; subidx < allocation->subtotalscount; subidx++) {
9224 if (VM_KERN_MEMORY_NONE == allocation->subtotals[subidx].tag) {
f427ee49 9225 allocation->subtotals[subidx].tag = (vm_tag_t)subtag;
5ba3f43e
A
9226 break;
9227 }
0a7de745
A
9228 if (subtag == allocation->subtotals[subidx].tag) {
9229 break;
9230 }
9231 }
cb323159 9232 lck_spin_unlock(&vm_allocation_sites_lock);
0a7de745
A
9233 assert(subidx < allocation->subtotalscount);
9234 if (subidx >= allocation->subtotalscount) {
9235 return;
5ba3f43e 9236 }
5ba3f43e 9237
0a7de745
A
9238 total = &allocation->subtotals[subidx];
9239 other = vm_allocation_sites[subtag];
9240 assert(other);
5ba3f43e 9241
0a7de745 9242 if (delta < 0) {
5ba3f43e 9243 assertf(total->total >= ((uint64_t)-delta), "name %p", allocation);
5ba3f43e 9244 assertf(other->mapped >= ((uint64_t)-delta), "other %p", other);
0a7de745 9245 }
cb323159
A
9246 OSAddAtomic64(delta, &other->mapped);
9247 OSAddAtomic64(delta, &total->total);
5ba3f43e
A
9248}
9249
9250const char *
9251kern_allocation_get_name(kern_allocation_name_t allocation)
9252{
0a7de745 9253 return KA_NAME(allocation);
5ba3f43e
A
9254}
9255
9256kern_allocation_name_t
f427ee49 9257kern_allocation_name_allocate(const char * name, uint16_t subtotalscount)
5ba3f43e 9258{
f427ee49 9259 uint16_t namelen;
5ba3f43e 9260
f427ee49 9261 namelen = (uint16_t)strnlen(name, MACH_MEMORY_INFO_NAME_MAX_LEN - 1);
5ba3f43e 9262
0a7de745 9263 kern_allocation_name_t allocation;
f427ee49
A
9264 allocation = kheap_alloc(KHEAP_DATA_BUFFERS,
9265 KA_SIZE(namelen, subtotalscount), Z_WAITOK);
0a7de745 9266 bzero(allocation, KA_SIZE(namelen, subtotalscount));
5ba3f43e 9267
0a7de745
A
9268 allocation->refcount = 1;
9269 allocation->subtotalscount = subtotalscount;
f427ee49 9270 allocation->flags = (uint16_t)(namelen << VM_TAG_NAME_LEN_SHIFT);
0a7de745 9271 strlcpy(KA_NAME(allocation), name, namelen + 1);
5ba3f43e 9272
0a7de745 9273 return allocation;
5ba3f43e
A
9274}
9275
9276void
9277kern_allocation_name_release(kern_allocation_name_t allocation)
9278{
0a7de745
A
9279 assert(allocation->refcount > 0);
9280 if (1 == OSAddAtomic16(-1, &allocation->refcount)) {
f427ee49
A
9281 kheap_free(KHEAP_DATA_BUFFERS, allocation,
9282 KA_SIZE(KA_NAME_LEN(allocation), allocation->subtotalscount));
0a7de745 9283 }
5ba3f43e
A
9284}
9285
9286vm_tag_t
9287kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation)
9288{
0a7de745 9289 return vm_tag_alloc(allocation);
5ba3f43e
A
9290}
9291
0a7de745
A
9292#if !VM_TAG_ACTIVE_UPDATE
9293static void
5ba3f43e 9294vm_page_count_object(mach_memory_info_t * info, unsigned int __unused num_info, vm_object_t object)
3e170ce0 9295{
0a7de745
A
9296 if (!object->wired_page_count) {
9297 return;
9298 }
9299 if (object != kernel_object) {
5ba3f43e
A
9300 assert(object->wire_tag < num_info);
9301 info[object->wire_tag].size += ptoa_64(object->wired_page_count);
0a7de745 9302 }
3e170ce0
A
9303}
9304
5ba3f43e 9305typedef void (*vm_page_iterate_proc)(mach_memory_info_t * info,
0a7de745 9306 unsigned int num_info, vm_object_t object);
3e170ce0 9307
0a7de745 9308static void
5ba3f43e 9309vm_page_iterate_purgeable_objects(mach_memory_info_t * info, unsigned int num_info,
0a7de745
A
9310 vm_page_iterate_proc proc, purgeable_q_t queue,
9311 int group)
3e170ce0 9312{
0a7de745 9313 vm_object_t object;
3e170ce0 9314
0a7de745
A
9315 for (object = (vm_object_t) queue_first(&queue->objq[group]);
9316 !queue_end(&queue->objq[group], (queue_entry_t) object);
9317 object = (vm_object_t) queue_next(&object->objq)) {
5ba3f43e 9318 proc(info, num_info, object);
0a7de745 9319 }
3e170ce0
A
9320}
9321
0a7de745 9322static void
5ba3f43e 9323vm_page_iterate_objects(mach_memory_info_t * info, unsigned int num_info,
0a7de745 9324 vm_page_iterate_proc proc)
3e170ce0 9325{
0a7de745 9326 vm_object_t object;
3e170ce0 9327
0a7de745
A
9328 lck_spin_lock_grp(&vm_objects_wired_lock, &vm_page_lck_grp_bucket);
9329 queue_iterate(&vm_objects_wired,
9330 object,
9331 vm_object_t,
9332 wired_objq)
9333 {
5ba3f43e 9334 proc(info, num_info, object);
0a7de745
A
9335 }
9336 lck_spin_unlock(&vm_objects_wired_lock);
3e170ce0 9337}
d9a64523 9338#endif /* ! VM_TAG_ACTIVE_UPDATE */
3e170ce0
A
9339
9340static uint64_t
f427ee49
A
9341process_account(mach_memory_info_t * info, unsigned int num_info,
9342 uint64_t zones_collectable_bytes, boolean_t iterated)
3e170ce0 9343{
0a7de745
A
9344 size_t namelen;
9345 unsigned int idx, count, nextinfo;
9346 vm_allocation_site_t * site;
5ba3f43e 9347 lck_spin_lock(&vm_allocation_sites_lock);
3e170ce0 9348
0a7de745 9349 for (idx = 0; idx <= vm_allocation_tag_highest; idx++) {
5ba3f43e 9350 site = vm_allocation_sites[idx];
0a7de745
A
9351 if (!site) {
9352 continue;
9353 }
5ba3f43e
A
9354 info[idx].mapped = site->mapped;
9355 info[idx].tag = site->tag;
0a7de745 9356 if (!iterated) {
5ba3f43e
A
9357 info[idx].size = site->total;
9358#if DEBUG || DEVELOPMENT
9359 info[idx].peak = site->peak;
9360#endif /* DEBUG || DEVELOPMENT */
0a7de745
A
9361 } else {
9362 if (!site->subtotalscount && (site->total != info[idx].size)) {
9363 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx, site->total, info[idx].size);
9364 info[idx].size = site->total;
9365 }
9366 }
5ba3f43e 9367 info[idx].flags |= VM_KERN_SITE_WIRED;
0a7de745
A
9368 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC) {
9369 info[idx].site = idx;
9370 info[idx].flags |= VM_KERN_SITE_TAG;
9371 if (VM_KERN_MEMORY_ZONE == idx) {
5ba3f43e
A
9372 info[idx].flags |= VM_KERN_SITE_HIDE;
9373 info[idx].flags &= ~VM_KERN_SITE_WIRED;
9374 info[idx].collectable_bytes = zones_collectable_bytes;
9375 }
0a7de745
A
9376 } else if ((namelen = (VM_TAG_NAME_LEN_MAX & (site->flags >> VM_TAG_NAME_LEN_SHIFT)))) {
9377 info[idx].site = 0;
9378 info[idx].flags |= VM_KERN_SITE_NAMED;
9379 if (namelen > sizeof(info[idx].name)) {
9380 namelen = sizeof(info[idx].name);
9381 }
9382 strncpy(&info[idx].name[0], KA_NAME(site), namelen);
9383 } else if (VM_TAG_KMOD & site->flags) {
9384 info[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
9385 info[idx].flags |= VM_KERN_SITE_KMOD;
9386 } else {
9387 info[idx].site = VM_KERNEL_UNSLIDE(site);
9388 info[idx].flags |= VM_KERN_SITE_KERNEL;
5ba3f43e 9389 }
0a7de745
A
9390 }
9391
9392 nextinfo = (vm_allocation_tag_highest + 1);
9393 count = nextinfo;
9394 if (count >= num_info) {
9395 count = num_info;
9396 }
9397
9398 for (idx = 0; idx < count; idx++) {
9399 site = vm_allocation_sites[idx];
9400 if (!site) {
9401 continue;
3e170ce0 9402 }
5ba3f43e
A
9403#if VM_MAX_TAG_ZONES
9404 vm_allocation_zone_total_t * zone;
9405 unsigned int zidx;
9406 vm_size_t elem_size;
9407
0a7de745
A
9408 if (vm_allocation_zone_totals
9409 && (zone = vm_allocation_zone_totals[idx])
9410 && (nextinfo < num_info)) {
9411 for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) {
c3c9b80d 9412 if (!zone[zidx].vazt_peak) {
0a7de745
A
9413 continue;
9414 }
f427ee49
A
9415 info[nextinfo] = info[idx];
9416 info[nextinfo].zone = (uint16_t)zone_index_from_tag_index(zidx, &elem_size);
9417 info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
9418 info[nextinfo].flags |= VM_KERN_SITE_ZONE;
c3c9b80d
A
9419 info[nextinfo].size = zone[zidx].vazt_total;
9420 info[nextinfo].peak = zone[zidx].vazt_peak;
f427ee49 9421 info[nextinfo].mapped = 0;
0a7de745
A
9422 nextinfo++;
9423 }
9424 }
5ba3f43e 9425#endif /* VM_MAX_TAG_ZONES */
0a7de745 9426 if (site->subtotalscount) {
5ba3f43e
A
9427 uint64_t mapped, mapcost, take;
9428 uint32_t sub;
9429 vm_tag_t alloctag;
9430
0a7de745
A
9431 info[idx].size = site->total;
9432 mapped = info[idx].size;
9433 info[idx].mapped = mapped;
9434 mapcost = 0;
9435 for (sub = 0; sub < site->subtotalscount; sub++) {
5ba3f43e
A
9436 alloctag = site->subtotals[sub].tag;
9437 assert(alloctag < num_info);
0a7de745
A
9438 if (info[alloctag].name[0]) {
9439 continue;
9440 }
9441 take = site->subtotals[sub].total;
9442 if (take > info[alloctag].size) {
9443 take = info[alloctag].size;
9444 }
9445 if (take > mapped) {
9446 take = mapped;
9447 }
5ba3f43e
A
9448 info[alloctag].mapped -= take;
9449 info[alloctag].size -= take;
9450 mapped -= take;
9451 mapcost += take;
0a7de745
A
9452 }
9453 info[idx].size = mapcost;
9454 }
3e170ce0
A
9455 }
9456 lck_spin_unlock(&vm_allocation_sites_lock);
5ba3f43e 9457
0a7de745 9458 return 0;
5ba3f43e
A
9459}
9460
9461uint32_t
9462vm_page_diagnose_estimate(void)
9463{
0a7de745 9464 vm_allocation_site_t * site;
f427ee49 9465 uint32_t count = zone_view_count;
0a7de745 9466 uint32_t idx;
5ba3f43e
A
9467
9468 lck_spin_lock(&vm_allocation_sites_lock);
f427ee49 9469 for (idx = 0; idx < VM_MAX_TAG_VALUE; idx++) {
5ba3f43e 9470 site = vm_allocation_sites[idx];
0a7de745
A
9471 if (!site) {
9472 continue;
9473 }
5ba3f43e
A
9474 count++;
9475#if VM_MAX_TAG_ZONES
0a7de745 9476 if (vm_allocation_zone_totals) {
5ba3f43e
A
9477 vm_allocation_zone_total_t * zone;
9478 zone = vm_allocation_zone_totals[idx];
0a7de745
A
9479 if (!zone) {
9480 continue;
9481 }
9482 for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) {
c3c9b80d 9483 count += (zone[zidx].vazt_peak != 0);
0a7de745 9484 }
5ba3f43e
A
9485 }
9486#endif
0a7de745 9487 }
5ba3f43e 9488 lck_spin_unlock(&vm_allocation_sites_lock);
39037602 9489
0a7de745
A
9490 /* some slop for new tags created */
9491 count += 8;
9492 count += VM_KERN_COUNTER_COUNT;
5ba3f43e 9493
0a7de745 9494 return count;
3e170ce0
A
9495}
9496
f427ee49
A
9497static void
9498vm_page_diagnose_zone_stats(mach_memory_info_t *info, zone_stats_t zstats,
9499 bool percpu)
9500{
9501 zpercpu_foreach(zs, zstats) {
9502 info->size += zs->zs_mem_allocated - zs->zs_mem_freed;
9503 }
9504 if (percpu) {
9505 info->size *= zpercpu_count();
9506 }
9507 info->flags |= VM_KERN_SITE_NAMED | VM_KERN_SITE_ZONE_VIEW;
9508}
9509
9510static void
9511vm_page_diagnose_zone(mach_memory_info_t *info, zone_t z)
9512{
c3c9b80d 9513 vm_page_diagnose_zone_stats(info, z->z_stats, z->z_percpu);
f427ee49
A
9514 snprintf(info->name, sizeof(info->name),
9515 "%s%s[raw]", zone_heap_name(z), z->z_name);
9516}
9517
9518static int
9519vm_page_diagnose_heap(mach_memory_info_t *info, kalloc_heap_t kheap)
9520{
9521 struct kheap_zones *zones = kheap->kh_zones;
9522 int i = 0;
9523
9524 for (; i < zones->max_k_zone; i++) {
9525 vm_page_diagnose_zone(info + i, zones->k_zone[i]);
9526 }
9527
9528 for (kalloc_heap_t kh = zones->views; kh; kh = kh->kh_next, i++) {
9529 vm_page_diagnose_zone_stats(info + i, kh->kh_stats, false);
9530 snprintf(info[i].name, sizeof(info[i].name),
9531 "%skalloc[%s]", kheap->kh_name, kh->kh_name);
9532 }
9533
9534 return i;
9535}
9536
0a7de745 9537kern_return_t
5ba3f43e 9538vm_page_diagnose(mach_memory_info_t * info, unsigned int num_info, uint64_t zones_collectable_bytes)
3e170ce0 9539{
0a7de745
A
9540 uint64_t wired_size;
9541 uint64_t wired_managed_size;
9542 uint64_t wired_reserved_size;
0a7de745
A
9543 boolean_t iterate;
9544 mach_memory_info_t * counts;
f427ee49 9545 uint32_t i;
3e170ce0 9546
0a7de745 9547 bzero(info, num_info * sizeof(mach_memory_info_t));
3e170ce0 9548
0a7de745
A
9549 if (!vm_page_wire_count_initial) {
9550 return KERN_ABORTED;
9551 }
39037602 9552
c3c9b80d 9553#if !XNU_TARGET_OS_OSX
0a7de745
A
9554 wired_size = ptoa_64(vm_page_wire_count);
9555 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
c3c9b80d 9556#else /* !XNU_TARGET_OS_OSX */
0a7de745
A
9557 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
9558 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
c3c9b80d 9559#endif /* !XNU_TARGET_OS_OSX */
0a7de745 9560 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
3e170ce0 9561
0a7de745 9562 wired_size += booter_size;
5ba3f43e 9563
0a7de745
A
9564 assert(num_info >= VM_KERN_COUNTER_COUNT);
9565 num_info -= VM_KERN_COUNTER_COUNT;
9566 counts = &info[num_info];
3e170ce0 9567
0a7de745 9568#define SET_COUNT(xcount, xsize, xflags) \
5ba3f43e 9569 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
0a7de745
A
9570 counts[xcount].site = (xcount); \
9571 counts[xcount].size = (xsize); \
9572 counts[xcount].mapped = (xsize); \
3e170ce0
A
9573 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
9574
0a7de745
A
9575 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
9576 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
9577 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
9578 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
9579 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
9580 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
9581 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT, ptoa_64(vm_page_wire_count_on_boot), 0);
9582 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN, booter_size, VM_KERN_SITE_WIRED);
f427ee49 9583 SET_COUNT(VM_KERN_COUNT_WIRED_STATIC_KERNELCACHE, ptoa_64(vm_page_kernelcache_count), 0);
0a7de745
A
9584
9585#define SET_MAP(xcount, xsize, xfree, xlargest) \
9586 counts[xcount].site = (xcount); \
9587 counts[xcount].size = (xsize); \
9588 counts[xcount].mapped = (xsize); \
9589 counts[xcount].free = (xfree); \
9590 counts[xcount].largest = (xlargest); \
3e170ce0
A
9591 counts[xcount].flags = VM_KERN_SITE_COUNTER;
9592
0a7de745 9593 vm_map_size_t map_size, map_free, map_largest;
3e170ce0 9594
0a7de745
A
9595 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
9596 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
3e170ce0 9597
f427ee49 9598 zone_map_sizes(&map_size, &map_free, &map_largest);
0a7de745 9599 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
3e170ce0 9600
0a7de745
A
9601 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
9602 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
3e170ce0 9603
f427ee49
A
9604 assert(num_info >= zone_view_count);
9605 num_info -= zone_view_count;
9606 counts = &info[num_info];
9607 i = 0;
9608
9609 i += vm_page_diagnose_heap(counts + i, KHEAP_DEFAULT);
9610 if (KHEAP_DATA_BUFFERS->kh_heap_id == KHEAP_ID_DATA_BUFFERS) {
9611 i += vm_page_diagnose_heap(counts + i, KHEAP_DATA_BUFFERS);
9612 }
9613 if (KHEAP_KEXT->kh_heap_id == KHEAP_ID_KEXT) {
9614 i += vm_page_diagnose_heap(counts + i, KHEAP_KEXT);
9615 }
9616 assert(i <= zone_view_count);
9617
9618 zone_index_foreach(zidx) {
9619 zone_t z = &zone_array[zidx];
9620 zone_view_t zv = z->z_views;
9621
9622 if (zv == NULL) {
9623 continue;
9624 }
9625
9626 if (z->kalloc_heap == KHEAP_ID_NONE) {
9627 vm_page_diagnose_zone(counts + i, z);
9628 i++;
9629 assert(i <= zone_view_count);
9630 }
9631
9632 for (; zv; zv = zv->zv_next) {
9633 vm_page_diagnose_zone_stats(counts + i, zv->zv_stats,
c3c9b80d 9634 z->z_percpu);
f427ee49
A
9635 snprintf(counts[i].name, sizeof(counts[i].name), "%s%s[%s]",
9636 zone_heap_name(z), z->z_name, zv->zv_name);
9637 i++;
9638 assert(i <= zone_view_count);
9639 }
9640 }
9641
0a7de745
A
9642 iterate = !VM_TAG_ACTIVE_UPDATE;
9643 if (iterate) {
9644 enum { kMaxKernelDepth = 1 };
9645 vm_map_t maps[kMaxKernelDepth];
9646 vm_map_entry_t entries[kMaxKernelDepth];
9647 vm_map_t map;
9648 vm_map_entry_t entry;
9649 vm_object_offset_t offset;
9650 vm_page_t page;
9651 int stackIdx, count;
9652
9653#if !VM_TAG_ACTIVE_UPDATE
9654 vm_page_iterate_objects(info, num_info, &vm_page_count_object);
d9a64523 9655#endif /* ! VM_TAG_ACTIVE_UPDATE */
5ba3f43e 9656
0a7de745
A
9657 map = kernel_map;
9658 stackIdx = 0;
9659 while (map) {
5ba3f43e 9660 vm_map_lock(map);
0a7de745
A
9661 for (entry = map->hdr.links.next; map; entry = entry->links.next) {
9662 if (entry->is_sub_map) {
5ba3f43e
A
9663 assert(stackIdx < kMaxKernelDepth);
9664 maps[stackIdx] = map;
9665 entries[stackIdx] = entry;
9666 stackIdx++;
9667 map = VME_SUBMAP(entry);
9668 entry = NULL;
9669 break;
0a7de745
A
9670 }
9671 if (VME_OBJECT(entry) == kernel_object) {
5ba3f43e
A
9672 count = 0;
9673 vm_object_lock(VME_OBJECT(entry));
0a7de745 9674 for (offset = entry->links.start; offset < entry->links.end; offset += page_size) {
5ba3f43e 9675 page = vm_page_lookup(VME_OBJECT(entry), offset);
0a7de745
A
9676 if (page && VM_PAGE_WIRED(page)) {
9677 count++;
9678 }
5ba3f43e
A
9679 }
9680 vm_object_unlock(VME_OBJECT(entry));
3e170ce0 9681
0a7de745
A
9682 if (count) {
9683 assert(VME_ALIAS(entry) != VM_KERN_MEMORY_NONE);
9684 assert(VME_ALIAS(entry) < num_info);
9685 info[VME_ALIAS(entry)].size += ptoa_64(count);
5ba3f43e 9686 }
0a7de745
A
9687 }
9688 while (map && (entry == vm_map_last_entry(map))) {
5ba3f43e 9689 vm_map_unlock(map);
0a7de745
A
9690 if (!stackIdx) {
9691 map = NULL;
9692 } else {
9693 --stackIdx;
9694 map = maps[stackIdx];
9695 entry = entries[stackIdx];
5ba3f43e 9696 }
0a7de745 9697 }
5ba3f43e 9698 }
0a7de745
A
9699 }
9700 }
9701
9702 process_account(info, num_info, zones_collectable_bytes, iterate);
3e170ce0 9703
0a7de745 9704 return KERN_SUCCESS;
3e170ce0 9705}
39037602 9706
39037602
A
9707#if DEBUG || DEVELOPMENT
9708
39037602 9709kern_return_t
5ba3f43e 9710vm_kern_allocation_info(uintptr_t addr, vm_size_t * size, vm_tag_t * tag, vm_size_t * zone_size)
39037602 9711{
0a7de745
A
9712 kern_return_t ret;
9713 vm_size_t zsize;
9714 vm_map_t map;
9715 vm_map_entry_t entry;
39037602 9716
0a7de745
A
9717 zsize = zone_element_info((void *) addr, tag);
9718 if (zsize) {
5ba3f43e 9719 *zone_size = *size = zsize;
0a7de745
A
9720 return KERN_SUCCESS;
9721 }
39037602 9722
5ba3f43e 9723 *zone_size = 0;
0a7de745
A
9724 ret = KERN_INVALID_ADDRESS;
9725 for (map = kernel_map; map;) {
5ba3f43e 9726 vm_map_lock(map);
0a7de745
A
9727 if (!vm_map_lookup_entry(map, addr, &entry)) {
9728 break;
9729 }
9730 if (entry->is_sub_map) {
9731 if (map != kernel_map) {
9732 break;
9733 }
5ba3f43e
A
9734 map = VME_SUBMAP(entry);
9735 continue;
0a7de745
A
9736 }
9737 if (entry->vme_start != addr) {
9738 break;
9739 }
f427ee49 9740 *tag = (vm_tag_t)VME_ALIAS(entry);
5ba3f43e
A
9741 *size = (entry->vme_end - addr);
9742 ret = KERN_SUCCESS;
9743 break;
9744 }
0a7de745
A
9745 if (map != kernel_map) {
9746 vm_map_unlock(map);
9747 }
5ba3f43e 9748 vm_map_unlock(kernel_map);
39037602 9749
0a7de745 9750 return ret;
39037602
A
9751}
9752
5ba3f43e 9753#endif /* DEBUG || DEVELOPMENT */
39037602 9754
5ba3f43e
A
9755uint32_t
9756vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
9757{
0a7de745
A
9758 vm_allocation_site_t * site;
9759 uint32_t kmodId;
39037602 9760
0a7de745
A
9761 kmodId = 0;
9762 lck_spin_lock(&vm_allocation_sites_lock);
9763 if ((site = vm_allocation_sites[tag])) {
9764 if (VM_TAG_KMOD & site->flags) {
9765 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
9766 }
9767 }
9768 lck_spin_unlock(&vm_allocation_sites_lock);
39037602 9769
0a7de745 9770 return kmodId;
39037602 9771}
d9a64523
A
9772
9773
9774#if CONFIG_SECLUDED_MEMORY
9775/*
9776 * Note that there's no locking around other accesses to vm_page_secluded_target.
9777 * That should be OK, since these are the only place where it can be changed after
9778 * initialization. Other users (like vm_pageout) may see the wrong value briefly,
9779 * but will eventually get the correct value. This brief mismatch is OK as pageout
9780 * and page freeing will auto-adjust the vm_page_secluded_count to match the target
9781 * over time.
9782 */
9783unsigned int vm_page_secluded_suppress_cnt = 0;
9784unsigned int vm_page_secluded_save_target;
9785
f427ee49
A
9786LCK_GRP_DECLARE(secluded_suppress_slock_grp, "secluded_suppress_slock");
9787LCK_SPIN_DECLARE(secluded_suppress_slock, &secluded_suppress_slock_grp);
d9a64523
A
9788
9789void
9790start_secluded_suppression(task_t task)
9791{
0a7de745 9792 if (task->task_suppressed_secluded) {
d9a64523 9793 return;
0a7de745 9794 }
d9a64523
A
9795 lck_spin_lock(&secluded_suppress_slock);
9796 if (!task->task_suppressed_secluded && vm_page_secluded_suppress_cnt++ == 0) {
9797 task->task_suppressed_secluded = TRUE;
9798 vm_page_secluded_save_target = vm_page_secluded_target;
9799 vm_page_secluded_target = 0;
cb323159 9800 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
d9a64523
A
9801 }
9802 lck_spin_unlock(&secluded_suppress_slock);
9803}
9804
9805void
9806stop_secluded_suppression(task_t task)
9807{
9808 lck_spin_lock(&secluded_suppress_slock);
9809 if (task->task_suppressed_secluded && --vm_page_secluded_suppress_cnt == 0) {
9810 task->task_suppressed_secluded = FALSE;
9811 vm_page_secluded_target = vm_page_secluded_save_target;
cb323159 9812 VM_PAGE_SECLUDED_COUNT_OVER_TARGET_UPDATE();
d9a64523
A
9813 }
9814 lck_spin_unlock(&secluded_suppress_slock);
9815}
9816
9817#endif /* CONFIG_SECLUDED_MEMORY */
c3c9b80d
A
9818
9819/*
9820 * Move the list of retired pages on the vm_page_queue_retired to
9821 * their final resting place on retired_pages_object.
9822 */
9823void
9824vm_retire_boot_pages(void)
9825{
9826#if defined(__arm64__)
9827 vm_page_t p;
9828
9829 vm_object_lock(retired_pages_object);
9830 while (!vm_page_queue_empty(&vm_page_queue_retired)) {
9831 vm_page_queue_remove_first(&vm_page_queue_retired, p, vmp_pageq);
9832 assert(p != NULL);
9833 vm_page_lock_queues();
9834 p->vmp_q_state = VM_PAGE_IS_WIRED;
9835 p->vmp_wire_count++;
9836 vm_page_unlock_queues();
9837 vm_page_insert_wired(p, retired_pages_object, ptoa(VM_PAGE_GET_PHYS_PAGE(p)), VM_KERN_MEMORY_RETIRED);
9838 vm_object_unlock(retired_pages_object);
9839 pmap_retire_page(VM_PAGE_GET_PHYS_PAGE(p));
9840 vm_object_lock(retired_pages_object);
9841 }
9842 vm_object_unlock(retired_pages_object);
9843#endif /* defined(__arm64__) */
9844}
9845
9846/*
9847 * Returns the current number of retired pages, used for sysctl.
9848 */
9849uint32_t
9850vm_retired_pages_count(void)
9851{
9852 return retired_pages_object->resident_page_count;
9853}