]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-3789.31.2.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
3e170ce0 67#include <libkern/OSDebug.h>
91447636 68
9bccf70c 69#include <mach/clock_types.h>
1c79356b
A
70#include <mach/vm_prot.h>
71#include <mach/vm_statistics.h>
2d21ac55 72#include <mach/sdt.h>
1c79356b
A
73#include <kern/counters.h>
74#include <kern/sched_prim.h>
39037602 75#include <kern/policy_internal.h>
1c79356b
A
76#include <kern/task.h>
77#include <kern/thread.h>
b0d623f7 78#include <kern/kalloc.h>
1c79356b
A
79#include <kern/zalloc.h>
80#include <kern/xpr.h>
fe8ab488 81#include <kern/ledger.h>
1c79356b
A
82#include <vm/pmap.h>
83#include <vm/vm_init.h>
84#include <vm/vm_map.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pageout.h>
87#include <vm/vm_kern.h> /* kernel_memory_allocate() */
88#include <kern/misc_protos.h>
89#include <zone_debug.h>
3e170ce0 90#include <mach_debug/zone_info.h>
1c79356b 91#include <vm/cpm.h>
6d2010ae 92#include <pexpert/pexpert.h>
55e303ae 93
91447636 94#include <vm/vm_protos.h>
2d21ac55
A
95#include <vm/memory_object.h>
96#include <vm/vm_purgeable_internal.h>
39236c6e 97#include <vm/vm_compressor.h>
2d21ac55 98
fe8ab488
A
99#if CONFIG_PHANTOM_CACHE
100#include <vm/vm_phantom_cache.h>
101#endif
102
b0d623f7
A
103#include <IOKit/IOHibernatePrivate.h>
104
b0d623f7
A
105#include <sys/kdebug.h>
106
39037602
A
107
108char vm_page_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
109char vm_page_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
110char vm_page_non_speculative_pageable_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
111char vm_page_active_or_inactive_states[VM_PAGE_Q_STATE_ARRAY_SIZE];
112
113#if CONFIG_SECLUDED_MEMORY
114struct vm_page_secluded_data vm_page_secluded;
115#endif /* CONFIG_SECLUDED_MEMORY */
116
316670eb 117boolean_t hibernate_cleaning_in_progress = FALSE;
b0d623f7
A
118boolean_t vm_page_free_verify = TRUE;
119
6d2010ae
A
120uint32_t vm_lopage_free_count = 0;
121uint32_t vm_lopage_free_limit = 0;
122uint32_t vm_lopage_lowater = 0;
0b4c1975
A
123boolean_t vm_lopage_refill = FALSE;
124boolean_t vm_lopage_needed = FALSE;
125
b0d623f7
A
126lck_mtx_ext_t vm_page_queue_lock_ext;
127lck_mtx_ext_t vm_page_queue_free_lock_ext;
128lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55 129
0b4c1975
A
130int speculative_age_index = 0;
131int speculative_steal_index = 0;
2d21ac55
A
132struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
133
0b4e3aa0 134
b0d623f7
A
135__private_extern__ void vm_page_init_lck_grp(void);
136
6d2010ae
A
137static void vm_page_free_prepare(vm_page_t page);
138static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
139
3e170ce0 140static void vm_tag_init(void);
b0d623f7 141
3e170ce0 142uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
39037602
A
143uint32_t vm_packed_from_vm_pages_array_mask = VM_PACKED_FROM_VM_PAGES_ARRAY;
144uint32_t vm_packed_pointer_shift = VM_PACKED_POINTER_SHIFT;
b0d623f7 145
1c79356b
A
146/*
147 * Associated with page of user-allocatable memory is a
148 * page structure.
149 */
150
151/*
152 * These variables record the values returned by vm_page_bootstrap,
153 * for debugging purposes. The implementation of pmap_steal_memory
154 * and pmap_startup here also uses them internally.
155 */
156
157vm_offset_t virtual_space_start;
158vm_offset_t virtual_space_end;
7ddcb079 159uint32_t vm_page_pages;
1c79356b
A
160
161/*
162 * The vm_page_lookup() routine, which provides for fast
163 * (virtual memory object, offset) to page lookup, employs
164 * the following hash table. The vm_page_{insert,remove}
165 * routines install and remove associations in the table.
166 * [This table is often called the virtual-to-physical,
167 * or VP, table.]
168 */
169typedef struct {
fe8ab488 170 vm_page_packed_t page_list;
1c79356b
A
171#if MACH_PAGE_HASH_STATS
172 int cur_count; /* current count */
173 int hi_count; /* high water mark */
174#endif /* MACH_PAGE_HASH_STATS */
175} vm_page_bucket_t;
176
b0d623f7
A
177
178#define BUCKETS_PER_LOCK 16
179
1c79356b
A
180vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
181unsigned int vm_page_bucket_count = 0; /* How big is array? */
182unsigned int vm_page_hash_mask; /* Mask for hash function */
183unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 184uint32_t vm_page_bucket_hash; /* Basic bucket hash */
b0d623f7
A
185unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
186
187lck_spin_t *vm_page_bucket_locks;
3e170ce0
A
188lck_spin_t vm_objects_wired_lock;
189lck_spin_t vm_allocation_sites_lock;
1c79356b 190
15129b1c
A
191#if VM_PAGE_BUCKETS_CHECK
192boolean_t vm_page_buckets_check_ready = FALSE;
193#if VM_PAGE_FAKE_BUCKETS
194vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
195vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
196#endif /* VM_PAGE_FAKE_BUCKETS */
197#endif /* VM_PAGE_BUCKETS_CHECK */
91447636 198
3e170ce0
A
199
200
1c79356b
A
201#if MACH_PAGE_HASH_STATS
202/* This routine is only for debug. It is intended to be called by
203 * hand by a developer using a kernel debugger. This routine prints
204 * out vm_page_hash table statistics to the kernel debug console.
205 */
206void
207hash_debug(void)
208{
209 int i;
210 int numbuckets = 0;
211 int highsum = 0;
212 int maxdepth = 0;
213
214 for (i = 0; i < vm_page_bucket_count; i++) {
215 if (vm_page_buckets[i].hi_count) {
216 numbuckets++;
217 highsum += vm_page_buckets[i].hi_count;
218 if (vm_page_buckets[i].hi_count > maxdepth)
219 maxdepth = vm_page_buckets[i].hi_count;
220 }
221 }
222 printf("Total number of buckets: %d\n", vm_page_bucket_count);
223 printf("Number used buckets: %d = %d%%\n",
224 numbuckets, 100*numbuckets/vm_page_bucket_count);
225 printf("Number unused buckets: %d = %d%%\n",
226 vm_page_bucket_count - numbuckets,
227 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
228 printf("Sum of bucket max depth: %d\n", highsum);
229 printf("Average bucket depth: %d.%2d\n",
230 highsum/vm_page_bucket_count,
231 highsum%vm_page_bucket_count);
232 printf("Maximum bucket depth: %d\n", maxdepth);
233}
234#endif /* MACH_PAGE_HASH_STATS */
235
236/*
237 * The virtual page size is currently implemented as a runtime
238 * variable, but is constant once initialized using vm_set_page_size.
239 * This initialization must be done in the machine-dependent
240 * bootstrap sequence, before calling other machine-independent
241 * initializations.
242 *
243 * All references to the virtual page size outside this
244 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
245 * constants.
246 */
55e303ae
A
247vm_size_t page_size = PAGE_SIZE;
248vm_size_t page_mask = PAGE_MASK;
2d21ac55 249int page_shift = PAGE_SHIFT;
1c79356b
A
250
251/*
252 * Resident page structures are initialized from
253 * a template (see vm_page_alloc).
254 *
255 * When adding a new field to the virtual memory
256 * object structure, be sure to add initialization
257 * (see vm_page_bootstrap).
258 */
259struct vm_page vm_page_template;
260
2d21ac55 261vm_page_t vm_pages = VM_PAGE_NULL;
39037602
A
262vm_page_t vm_page_array_beginning_addr;
263vm_page_t vm_page_array_ending_addr;
264
2d21ac55 265unsigned int vm_pages_count = 0;
0b4c1975 266ppnum_t vm_page_lowest = 0;
2d21ac55 267
1c79356b
A
268/*
269 * Resident pages that represent real memory
2d21ac55
A
270 * are allocated from a set of free lists,
271 * one per color.
1c79356b 272 */
2d21ac55
A
273unsigned int vm_colors;
274unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
275unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
fe8ab488 276unsigned int vm_free_magazine_refill_limit = 0;
39037602
A
277
278
279struct vm_page_queue_free_head {
280 vm_page_queue_head_t qhead;
281} __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
282
283struct vm_page_queue_free_head vm_page_queue_free[MAX_COLORS];
284
285
1c79356b 286unsigned int vm_page_free_wanted;
2d21ac55 287unsigned int vm_page_free_wanted_privileged;
39037602
A
288#if CONFIG_SECLUDED_MEMORY
289unsigned int vm_page_free_wanted_secluded;
290#endif /* CONFIG_SECLUDED_MEMORY */
91447636 291unsigned int vm_page_free_count;
1c79356b 292
1c79356b
A
293/*
294 * Occasionally, the virtual memory system uses
295 * resident page structures that do not refer to
296 * real pages, for example to leave a page with
297 * important state information in the VP table.
298 *
299 * These page structures are allocated the way
300 * most other kernel structures are.
301 */
39037602 302zone_t vm_page_array_zone;
1c79356b 303zone_t vm_page_zone;
b0d623f7
A
304vm_locks_array_t vm_page_locks;
305decl_lck_mtx_data(,vm_page_alloc_lock)
316670eb
A
306lck_mtx_ext_t vm_page_alloc_lock_ext;
307
9bccf70c 308unsigned int io_throttle_zero_fill;
1c79356b 309
b0d623f7
A
310unsigned int vm_page_local_q_count = 0;
311unsigned int vm_page_local_q_soft_limit = 250;
312unsigned int vm_page_local_q_hard_limit = 500;
313struct vplq *vm_page_local_q = NULL;
314
316670eb
A
315/* N.B. Guard and fictitious pages must not
316 * be assigned a zero phys_page value.
317 */
1c79356b
A
318/*
319 * Fictitious pages don't have a physical address,
55e303ae 320 * but we must initialize phys_page to something.
1c79356b
A
321 * For debugging, this should be a strange value
322 * that the pmap module can recognize in assertions.
323 */
b0d623f7 324ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 325
2d21ac55
A
326/*
327 * Guard pages are not accessible so they don't
328 * need a physical address, but we need to enter
329 * one in the pmap.
330 * Let's make it recognizable and make sure that
331 * we don't use a real physical page with that
332 * physical address.
333 */
b0d623f7 334ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 335
1c79356b
A
336/*
337 * Resident page structures are also chained on
338 * queues that are used by the page replacement
339 * system (pageout daemon). These queues are
340 * defined here, but are shared by the pageout
9bccf70c 341 * module. The inactive queue is broken into
39236c6e 342 * file backed and anonymous for convenience as the
9bccf70c 343 * pageout daemon often assignes a higher
39236c6e 344 * importance to anonymous pages (less likely to pick)
1c79356b 345 */
39037602
A
346vm_page_queue_head_t vm_page_queue_active __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
347vm_page_queue_head_t vm_page_queue_inactive __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
348#if CONFIG_SECLUDED_MEMORY
349vm_page_queue_head_t vm_page_queue_secluded __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
350#endif /* CONFIG_SECLUDED_MEMORY */
351vm_page_queue_head_t vm_page_queue_anonymous __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* inactive memory queue for anonymous pages */
352vm_page_queue_head_t vm_page_queue_throttled __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
2d21ac55 353
3e170ce0
A
354queue_head_t vm_objects_wired;
355
39037602
A
356#if CONFIG_BACKGROUND_QUEUE
357vm_page_queue_head_t vm_page_queue_background __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
358uint32_t vm_page_background_limit;
359uint32_t vm_page_background_target;
360uint32_t vm_page_background_count;
361uint64_t vm_page_background_promoted_count;
362
363uint32_t vm_page_background_internal_count;
364uint32_t vm_page_background_external_count;
365
366uint32_t vm_page_background_mode;
367uint32_t vm_page_background_exclude_external;
368#endif
369
91447636
A
370unsigned int vm_page_active_count;
371unsigned int vm_page_inactive_count;
39037602
A
372#if CONFIG_SECLUDED_MEMORY
373unsigned int vm_page_secluded_count;
374unsigned int vm_page_secluded_count_free;
375unsigned int vm_page_secluded_count_inuse;
376#endif /* CONFIG_SECLUDED_MEMORY */
316670eb 377unsigned int vm_page_anonymous_count;
2d21ac55
A
378unsigned int vm_page_throttled_count;
379unsigned int vm_page_speculative_count;
3e170ce0 380
91447636 381unsigned int vm_page_wire_count;
3e170ce0 382unsigned int vm_page_stolen_count;
0b4c1975 383unsigned int vm_page_wire_count_initial;
3e170ce0 384unsigned int vm_page_pages_initial;
91447636 385unsigned int vm_page_gobble_count = 0;
fe8ab488
A
386
387#define VM_PAGE_WIRE_COUNT_WARNING 0
388#define VM_PAGE_GOBBLE_COUNT_WARNING 0
91447636
A
389
390unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
b0d623f7 391unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
91447636 392uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 393
fe8ab488 394unsigned int vm_page_xpmapped_external_count = 0;
39236c6e
A
395unsigned int vm_page_external_count = 0;
396unsigned int vm_page_internal_count = 0;
397unsigned int vm_page_pageable_external_count = 0;
398unsigned int vm_page_pageable_internal_count = 0;
399
b0d623f7 400#if DEVELOPMENT || DEBUG
2d21ac55
A
401unsigned int vm_page_speculative_recreated = 0;
402unsigned int vm_page_speculative_created = 0;
403unsigned int vm_page_speculative_used = 0;
b0d623f7 404#endif
2d21ac55 405
39037602 406vm_page_queue_head_t vm_page_queue_cleaned __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
316670eb
A
407
408unsigned int vm_page_cleaned_count = 0;
409unsigned int vm_pageout_enqueued_cleaned = 0;
410
0c530ab8 411uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
0b4c1975 412ppnum_t max_valid_low_ppnum = 0xffffffff;
0c530ab8
A
413
414
1c79356b
A
415/*
416 * Several page replacement parameters are also
417 * shared with this module, so that page allocation
418 * (done here in vm_page_alloc) can trigger the
419 * pageout daemon.
420 */
91447636
A
421unsigned int vm_page_free_target = 0;
422unsigned int vm_page_free_min = 0;
b0d623f7 423unsigned int vm_page_throttle_limit = 0;
91447636 424unsigned int vm_page_inactive_target = 0;
39037602
A
425#if CONFIG_SECLUDED_MEMORY
426unsigned int vm_page_secluded_target = 0;
427#endif /* CONFIG_SECLUDED_MEMORY */
39236c6e 428unsigned int vm_page_anonymous_min = 0;
2d21ac55 429unsigned int vm_page_inactive_min = 0;
91447636 430unsigned int vm_page_free_reserved = 0;
b0d623f7 431unsigned int vm_page_throttle_count = 0;
1c79356b 432
316670eb 433
1c79356b
A
434/*
435 * The VM system has a couple of heuristics for deciding
436 * that pages are "uninteresting" and should be placed
437 * on the inactive queue as likely candidates for replacement.
438 * These variables let the heuristics be controlled at run-time
439 * to make experimentation easier.
440 */
441
442boolean_t vm_page_deactivate_hint = TRUE;
443
b0d623f7
A
444struct vm_page_stats_reusable vm_page_stats_reusable;
445
1c79356b
A
446/*
447 * vm_set_page_size:
448 *
449 * Sets the page size, perhaps based upon the memory
450 * size. Must be called before any use of page-size
451 * dependent functions.
452 *
453 * Sets page_shift and page_mask from page_size.
454 */
455void
456vm_set_page_size(void)
457{
fe8ab488
A
458 page_size = PAGE_SIZE;
459 page_mask = PAGE_MASK;
460 page_shift = PAGE_SHIFT;
1c79356b
A
461
462 if ((page_mask & page_size) != 0)
463 panic("vm_set_page_size: page size not a power of two");
464
465 for (page_shift = 0; ; page_shift++)
91447636 466 if ((1U << page_shift) == page_size)
1c79356b 467 break;
1c79356b
A
468}
469
fe8ab488
A
470#define COLOR_GROUPS_TO_STEAL 4
471
2d21ac55
A
472
473/* Called once during statup, once the cache geometry is known.
474 */
475static void
476vm_page_set_colors( void )
477{
478 unsigned int n, override;
479
593a1d5f 480 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
481 n = override;
482 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
483 n = vm_cache_geometry_colors;
484 else n = DEFAULT_COLORS; /* use default if all else fails */
485
486 if ( n == 0 )
487 n = 1;
488 if ( n > MAX_COLORS )
489 n = MAX_COLORS;
490
491 /* the count must be a power of 2 */
b0d623f7 492 if ( ( n & (n - 1)) != 0 )
2d21ac55
A
493 panic("vm_page_set_colors");
494
495 vm_colors = n;
496 vm_color_mask = n - 1;
fe8ab488
A
497
498 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
2d21ac55
A
499}
500
501
b0d623f7
A
502lck_grp_t vm_page_lck_grp_free;
503lck_grp_t vm_page_lck_grp_queue;
504lck_grp_t vm_page_lck_grp_local;
505lck_grp_t vm_page_lck_grp_purge;
506lck_grp_t vm_page_lck_grp_alloc;
507lck_grp_t vm_page_lck_grp_bucket;
508lck_grp_attr_t vm_page_lck_grp_attr;
509lck_attr_t vm_page_lck_attr;
510
511
512__private_extern__ void
513vm_page_init_lck_grp(void)
514{
515 /*
516 * initialze the vm_page lock world
517 */
518 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
519 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
520 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
521 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
522 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
523 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
524 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
525 lck_attr_setdefault(&vm_page_lck_attr);
316670eb 526 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
39236c6e
A
527
528 vm_compressor_init_locks();
b0d623f7
A
529}
530
531void
532vm_page_init_local_q()
533{
534 unsigned int num_cpus;
535 unsigned int i;
536 struct vplq *t_local_q;
537
538 num_cpus = ml_get_max_cpus();
539
540 /*
541 * no point in this for a uni-processor system
542 */
543 if (num_cpus >= 2) {
544 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
545
546 for (i = 0; i < num_cpus; i++) {
547 struct vpl *lq;
548
549 lq = &t_local_q[i].vpl_un.vpl;
550 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
39037602 551 vm_page_queue_init(&lq->vpl_queue);
b0d623f7 552 lq->vpl_count = 0;
39236c6e
A
553 lq->vpl_internal_count = 0;
554 lq->vpl_external_count = 0;
b0d623f7
A
555 }
556 vm_page_local_q_count = num_cpus;
557
558 vm_page_local_q = (struct vplq *)t_local_q;
559 }
560}
561
562
1c79356b
A
563/*
564 * vm_page_bootstrap:
565 *
566 * Initializes the resident memory module.
567 *
568 * Allocates memory for the page cells, and
569 * for the object/offset-to-page hash table headers.
570 * Each page cell is initialized and placed on the free list.
571 * Returns the range of available kernel virtual memory.
572 */
573
574void
575vm_page_bootstrap(
576 vm_offset_t *startp,
577 vm_offset_t *endp)
578{
39037602 579 vm_page_t m;
91447636 580 unsigned int i;
1c79356b
A
581 unsigned int log1;
582 unsigned int log2;
583 unsigned int size;
584
585 /*
586 * Initialize the vm_page template.
587 */
588
589 m = &vm_page_template;
b0d623f7 590 bzero(m, sizeof (*m));
1c79356b 591
39037602
A
592#if CONFIG_BACKGROUND_QUEUE
593 m->vm_page_backgroundq.next = 0;
594 m->vm_page_backgroundq.prev = 0;
595 m->vm_page_in_background = FALSE;
596 m->vm_page_on_backgroundq = FALSE;
597#endif
598
599 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
600 m->listq.next = 0;
601 m->listq.prev = 0;
602 m->next_m = 0;
91447636 603
39037602 604 m->vm_page_object = 0; /* reset later */
b0d623f7
A
605 m->offset = (vm_object_offset_t) -1; /* reset later */
606
607 m->wire_count = 0;
39037602 608 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
1c79356b 609 m->laundry = FALSE;
1c79356b 610 m->reference = FALSE;
b0d623f7
A
611 m->gobbled = FALSE;
612 m->private = FALSE;
b0d623f7
A
613 m->__unused_pageq_bits = 0;
614
39037602 615 VM_PAGE_SET_PHYS_PAGE(m, 0); /* reset later */
1c79356b
A
616 m->busy = TRUE;
617 m->wanted = FALSE;
618 m->tabled = FALSE;
15129b1c 619 m->hashed = FALSE;
1c79356b 620 m->fictitious = FALSE;
b0d623f7
A
621 m->pmapped = FALSE;
622 m->wpmapped = FALSE;
39037602 623 m->free_when_done = FALSE;
1c79356b
A
624 m->absent = FALSE;
625 m->error = FALSE;
626 m->dirty = FALSE;
627 m->cleaning = FALSE;
628 m->precious = FALSE;
629 m->clustered = FALSE;
b0d623f7 630 m->overwriting = FALSE;
1c79356b 631 m->restart = FALSE;
b0d623f7 632 m->unusual = FALSE;
91447636 633 m->encrypted = FALSE;
2d21ac55 634 m->encrypted_cleaning = FALSE;
b0d623f7
A
635 m->cs_validated = FALSE;
636 m->cs_tainted = FALSE;
c18c124e 637 m->cs_nx = FALSE;
b0d623f7 638 m->no_cache = FALSE;
b0d623f7 639 m->reusable = FALSE;
6d2010ae 640 m->slid = FALSE;
39236c6e 641 m->xpmapped = FALSE;
15129b1c 642 m->written_by_kernel = FALSE;
b0d623f7 643 m->__unused_object_bits = 0;
1c79356b 644
1c79356b
A
645 /*
646 * Initialize the page queues.
647 */
b0d623f7
A
648 vm_page_init_lck_grp();
649
650 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
651 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
652 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
2d21ac55
A
653
654 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
655 int group;
656
657 purgeable_queues[i].token_q_head = 0;
658 purgeable_queues[i].token_q_tail = 0;
659 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
660 queue_init(&purgeable_queues[i].objq[group]);
661
662 purgeable_queues[i].type = i;
663 purgeable_queues[i].new_pages = 0;
664#if MACH_ASSERT
665 purgeable_queues[i].debug_count_tokens = 0;
666 purgeable_queues[i].debug_count_objects = 0;
667#endif
668 };
fe8ab488
A
669 purgeable_nonvolatile_count = 0;
670 queue_init(&purgeable_nonvolatile_queue);
2d21ac55
A
671
672 for (i = 0; i < MAX_COLORS; i++ )
39037602
A
673 vm_page_queue_init(&vm_page_queue_free[i].qhead);
674
675 vm_page_queue_init(&vm_lopage_queue_free);
676 vm_page_queue_init(&vm_page_queue_active);
677 vm_page_queue_init(&vm_page_queue_inactive);
678#if CONFIG_SECLUDED_MEMORY
679 vm_page_queue_init(&vm_page_queue_secluded);
680#endif /* CONFIG_SECLUDED_MEMORY */
681 vm_page_queue_init(&vm_page_queue_cleaned);
682 vm_page_queue_init(&vm_page_queue_throttled);
683 vm_page_queue_init(&vm_page_queue_anonymous);
3e170ce0 684 queue_init(&vm_objects_wired);
1c79356b 685
2d21ac55 686 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
39037602 687 vm_page_queue_init(&vm_page_queue_speculative[i].age_q);
2d21ac55
A
688
689 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
690 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
691 }
39037602
A
692#if CONFIG_BACKGROUND_QUEUE
693 vm_page_queue_init(&vm_page_queue_background);
694
695 vm_page_background_count = 0;
696 vm_page_background_internal_count = 0;
697 vm_page_background_external_count = 0;
698 vm_page_background_promoted_count = 0;
699
700 vm_page_background_target = (unsigned int)(atop_64(max_mem) / 25);
701
702 if (vm_page_background_target > VM_PAGE_BACKGROUND_TARGET_MAX)
703 vm_page_background_target = VM_PAGE_BACKGROUND_TARGET_MAX;
704 vm_page_background_limit = vm_page_background_target + 256;
705
706 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
707 vm_page_background_exclude_external = 0;
708
709 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode, sizeof(vm_page_background_mode));
710 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external, sizeof(vm_page_background_exclude_external));
711 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target, sizeof(vm_page_background_target));
712 PE_parse_boot_argn("vm_page_bg_limit", &vm_page_background_limit, sizeof(vm_page_background_limit));
713
714 if (vm_page_background_mode > VM_PAGE_BG_LEVEL_3)
715 vm_page_background_mode = VM_PAGE_BG_LEVEL_1;
716
717 if (vm_page_background_limit <= vm_page_background_target)
718 vm_page_background_limit = vm_page_background_target + 256;
719#endif
1c79356b 720 vm_page_free_wanted = 0;
2d21ac55 721 vm_page_free_wanted_privileged = 0;
39037602
A
722#if CONFIG_SECLUDED_MEMORY
723 vm_page_free_wanted_secluded = 0;
724#endif /* CONFIG_SECLUDED_MEMORY */
2d21ac55
A
725
726 vm_page_set_colors();
727
39037602
A
728 bzero(vm_page_inactive_states, sizeof(vm_page_inactive_states));
729 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
730 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
731 vm_page_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
732
733 bzero(vm_page_pageable_states, sizeof(vm_page_pageable_states));
734 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
735 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
736 vm_page_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
737 vm_page_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
738 vm_page_pageable_states[VM_PAGE_ON_SPECULATIVE_Q] = 1;
739 vm_page_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
740#if CONFIG_SECLUDED_MEMORY
741 vm_page_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
742#endif /* CONFIG_SECLUDED_MEMORY */
743
744 bzero(vm_page_non_speculative_pageable_states, sizeof(vm_page_non_speculative_pageable_states));
745 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
746 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
747 vm_page_non_speculative_pageable_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
748 vm_page_non_speculative_pageable_states[VM_PAGE_ON_ACTIVE_Q] = 1;
749 vm_page_non_speculative_pageable_states[VM_PAGE_ON_THROTTLED_Q] = 1;
750#if CONFIG_SECLUDED_MEMORY
751 vm_page_non_speculative_pageable_states[VM_PAGE_ON_SECLUDED_Q] = 1;
752#endif /* CONFIG_SECLUDED_MEMORY */
753
754 bzero(vm_page_active_or_inactive_states, sizeof(vm_page_active_or_inactive_states));
755 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_INTERNAL_Q] = 1;
756 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_EXTERNAL_Q] = 1;
757 vm_page_active_or_inactive_states[VM_PAGE_ON_INACTIVE_CLEANED_Q] = 1;
758 vm_page_active_or_inactive_states[VM_PAGE_ON_ACTIVE_Q] = 1;
759#if CONFIG_SECLUDED_MEMORY
760 vm_page_active_or_inactive_states[VM_PAGE_ON_SECLUDED_Q] = 1;
761#endif /* CONFIG_SECLUDED_MEMORY */
762
1c79356b
A
763
764 /*
765 * Steal memory for the map and zone subsystems.
766 */
39037602
A
767#if CONFIG_GZALLOC
768 gzalloc_configure();
769#endif
770 kernel_debug_string_early("vm_map_steal_memory");
316670eb 771 vm_map_steal_memory();
1c79356b
A
772
773 /*
774 * Allocate (and initialize) the virtual-to-physical
775 * table hash buckets.
776 *
777 * The number of buckets should be a power of two to
778 * get a good hash function. The following computation
779 * chooses the first power of two that is greater
780 * than the number of physical pages in the system.
781 */
782
1c79356b
A
783 if (vm_page_bucket_count == 0) {
784 unsigned int npages = pmap_free_pages();
785
786 vm_page_bucket_count = 1;
787 while (vm_page_bucket_count < npages)
788 vm_page_bucket_count <<= 1;
789 }
b0d623f7 790 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
791
792 vm_page_hash_mask = vm_page_bucket_count - 1;
793
794 /*
795 * Calculate object shift value for hashing algorithm:
796 * O = log2(sizeof(struct vm_object))
797 * B = log2(vm_page_bucket_count)
798 * hash shifts the object left by
799 * B/2 - O
800 */
801 size = vm_page_bucket_count;
802 for (log1 = 0; size > 1; log1++)
803 size /= 2;
804 size = sizeof(struct vm_object);
805 for (log2 = 0; size > 1; log2++)
806 size /= 2;
807 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
808
809 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
810 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
811 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
812
813 if (vm_page_hash_mask & vm_page_bucket_count)
814 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
815
15129b1c
A
816#if VM_PAGE_BUCKETS_CHECK
817#if VM_PAGE_FAKE_BUCKETS
818 /*
819 * Allocate a decoy set of page buckets, to detect
820 * any stomping there.
821 */
822 vm_page_fake_buckets = (vm_page_bucket_t *)
823 pmap_steal_memory(vm_page_bucket_count *
824 sizeof(vm_page_bucket_t));
825 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
826 vm_page_fake_buckets_end =
827 vm_map_round_page((vm_page_fake_buckets_start +
828 (vm_page_bucket_count *
829 sizeof (vm_page_bucket_t))),
830 PAGE_MASK);
831 char *cp;
832 for (cp = (char *)vm_page_fake_buckets_start;
833 cp < (char *)vm_page_fake_buckets_end;
834 cp++) {
835 *cp = 0x5a;
836 }
837#endif /* VM_PAGE_FAKE_BUCKETS */
838#endif /* VM_PAGE_BUCKETS_CHECK */
839
39037602 840 kernel_debug_string_early("vm_page_buckets");
1c79356b
A
841 vm_page_buckets = (vm_page_bucket_t *)
842 pmap_steal_memory(vm_page_bucket_count *
843 sizeof(vm_page_bucket_t));
844
39037602 845 kernel_debug_string_early("vm_page_bucket_locks");
b0d623f7
A
846 vm_page_bucket_locks = (lck_spin_t *)
847 pmap_steal_memory(vm_page_bucket_lock_count *
848 sizeof(lck_spin_t));
849
1c79356b 850 for (i = 0; i < vm_page_bucket_count; i++) {
39037602 851 vm_page_bucket_t *bucket = &vm_page_buckets[i];
1c79356b 852
fe8ab488 853 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1c79356b
A
854#if MACH_PAGE_HASH_STATS
855 bucket->cur_count = 0;
856 bucket->hi_count = 0;
857#endif /* MACH_PAGE_HASH_STATS */
858 }
859
b0d623f7
A
860 for (i = 0; i < vm_page_bucket_lock_count; i++)
861 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
862
3e170ce0
A
863 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
864 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
865 vm_tag_init();
866
15129b1c
A
867#if VM_PAGE_BUCKETS_CHECK
868 vm_page_buckets_check_ready = TRUE;
869#endif /* VM_PAGE_BUCKETS_CHECK */
870
1c79356b
A
871 /*
872 * Machine-dependent code allocates the resident page table.
873 * It uses vm_page_init to initialize the page frames.
874 * The code also returns to us the virtual space available
875 * to the kernel. We don't trust the pmap module
876 * to get the alignment right.
877 */
878
39037602 879 kernel_debug_string_early("pmap_startup");
1c79356b 880 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
881 virtual_space_start = round_page(virtual_space_start);
882 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
883
884 *startp = virtual_space_start;
885 *endp = virtual_space_end;
886
887 /*
888 * Compute the initial "wire" count.
889 * Up until now, the pages which have been set aside are not under
890 * the VM system's control, so although they aren't explicitly
891 * wired, they nonetheless can't be moved. At this moment,
892 * all VM managed pages are "free", courtesy of pmap_startup.
893 */
b0d623f7 894 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
0b4c1975 895 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
39037602
A
896#if CONFIG_SECLUDED_MEMORY
897 vm_page_wire_count -= vm_page_secluded_count;
898#endif
0b4c1975 899 vm_page_wire_count_initial = vm_page_wire_count;
3e170ce0 900 vm_page_pages_initial = vm_page_pages;
91447636 901
2d21ac55
A
902 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
903 vm_page_free_count, vm_page_wire_count);
904
39037602 905 kernel_debug_string_early("vm_page_bootstrap complete");
91447636 906 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
907}
908
909#ifndef MACHINE_PAGES
910/*
911 * We implement pmap_steal_memory and pmap_startup with the help
912 * of two simpler functions, pmap_virtual_space and pmap_next_page.
913 */
914
91447636 915void *
1c79356b
A
916pmap_steal_memory(
917 vm_size_t size)
918{
55e303ae
A
919 vm_offset_t addr, vaddr;
920 ppnum_t phys_page;
1c79356b
A
921
922 /*
923 * We round the size to a round multiple.
924 */
925
926 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
927
928 /*
929 * If this is the first call to pmap_steal_memory,
930 * we have to initialize ourself.
931 */
932
933 if (virtual_space_start == virtual_space_end) {
934 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
935
936 /*
937 * The initial values must be aligned properly, and
938 * we don't trust the pmap module to do it right.
939 */
940
91447636
A
941 virtual_space_start = round_page(virtual_space_start);
942 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
943 }
944
945 /*
946 * Allocate virtual memory for this request.
947 */
948
949 addr = virtual_space_start;
950 virtual_space_start += size;
951
6d2010ae 952 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
953
954 /*
955 * Allocate and map physical pages to back new virtual pages.
956 */
957
91447636 958 for (vaddr = round_page(addr);
1c79356b
A
959 vaddr < addr + size;
960 vaddr += PAGE_SIZE) {
b0d623f7 961
0b4c1975 962 if (!pmap_next_page_hi(&phys_page))
39037602 963 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size);
1c79356b
A
964
965 /*
966 * XXX Logically, these mappings should be wired,
967 * but some pmap modules barf if they are.
968 */
b0d623f7
A
969#if defined(__LP64__)
970 pmap_pre_expand(kernel_pmap, vaddr);
971#endif
1c79356b 972
55e303ae 973 pmap_enter(kernel_pmap, vaddr, phys_page,
316670eb 974 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
9bccf70c 975 VM_WIMG_USE_DEFAULT, FALSE);
1c79356b
A
976 /*
977 * Account for newly stolen memory
978 */
979 vm_page_wire_count++;
3e170ce0 980 vm_page_stolen_count++;
1c79356b
A
981 }
982
91447636 983 return (void *) addr;
1c79356b
A
984}
985
39037602
A
986#if CONFIG_SECLUDED_MEMORY
987/* boot-args to control secluded memory */
988unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
989int secluded_for_iokit = 1; /* IOKit can use secluded memory */
990int secluded_for_apps = 1; /* apps can use secluded memory */
991int secluded_for_filecache = 2; /* filecache can use seclude memory */
992#if 11
993int secluded_for_fbdp = 0;
994#endif
995int secluded_aging_policy = SECLUDED_AGING_BEFORE_ACTIVE;
996#endif /* CONFIG_SECLUDED_MEMORY */
997
998
999
1000
fe8ab488 1001void vm_page_release_startup(vm_page_t mem);
1c79356b
A
1002void
1003pmap_startup(
1004 vm_offset_t *startp,
1005 vm_offset_t *endp)
1006{
55e303ae 1007 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
1008 ppnum_t phys_page;
1009 addr64_t tmpaddr;
1c79356b 1010
fe8ab488 1011#if defined(__LP64__)
fe8ab488
A
1012 /*
1013 * make sure we are aligned on a 64 byte boundary
1014 * for VM_PAGE_PACK_PTR (it clips off the low-order
1015 * 6 bits of the pointer)
1016 */
1017 if (virtual_space_start != virtual_space_end)
1018 virtual_space_start = round_page(virtual_space_start);
1019#endif
1020
1c79356b
A
1021 /*
1022 * We calculate how many page frames we will have
1023 * and then allocate the page structures in one chunk.
1024 */
1025
55e303ae 1026 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
b0d623f7 1027 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 1028 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 1029
2d21ac55 1030 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
1031
1032 /*
1033 * Initialize the page frames.
1034 */
39037602
A
1035 kernel_debug_string_early("Initialize the page frames");
1036
1037 vm_page_array_beginning_addr = &vm_pages[0];
1038 vm_page_array_ending_addr = &vm_pages[npages];
1039
1040
1c79356b 1041 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 1042 if (!pmap_next_page(&phys_page))
1c79356b 1043 break;
0b4c1975
A
1044 if (pages_initialized == 0 || phys_page < vm_page_lowest)
1045 vm_page_lowest = phys_page;
1c79356b 1046
0b4c1975 1047 vm_page_init(&vm_pages[i], phys_page, FALSE);
1c79356b
A
1048 vm_page_pages++;
1049 pages_initialized++;
1050 }
2d21ac55 1051 vm_pages_count = pages_initialized;
1c79356b 1052
fe8ab488
A
1053#if defined(__LP64__)
1054
39037602 1055 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0]))) != &vm_pages[0])
fe8ab488
A
1056 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
1057
39037602 1058 if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1]))) != &vm_pages[vm_pages_count-1])
fe8ab488
A
1059 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
1060#endif
39037602 1061 kernel_debug_string_early("page fill/release");
0c530ab8
A
1062 /*
1063 * Check if we want to initialize pages to a known value
1064 */
1065 fill = 0; /* Assume no fill */
593a1d5f 1066 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
316670eb
A
1067#if DEBUG
1068 /* This slows down booting the DEBUG kernel, particularly on
1069 * large memory systems, but is worthwhile in deterministically
1070 * trapping uninitialized memory usage.
1071 */
1072 if (fill == 0) {
1073 fill = 1;
1074 fillval = 0xDEB8F177;
1075 }
1076#endif
1077 if (fill)
1078 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
39037602
A
1079
1080#if CONFIG_SECLUDED_MEMORY
1081 /* default: no secluded mem */
1082 secluded_mem_mb = 0;
1083 if (max_mem > 1*1024*1024*1024) {
1084 /* default to 90MB for devices with > 1GB of RAM */
1085 secluded_mem_mb = 90;
1086 }
1087 /* override with value from device tree, if provided */
1088 PE_get_default("kern.secluded_mem_mb",
1089 &secluded_mem_mb, sizeof(secluded_mem_mb));
1090 /* override with value from boot-args, if provided */
1091 PE_parse_boot_argn("secluded_mem_mb",
1092 &secluded_mem_mb,
1093 sizeof (secluded_mem_mb));
1094
1095 vm_page_secluded_target = (unsigned int)
1096 ((secluded_mem_mb * 1024ULL * 1024ULL) / PAGE_SIZE);
1097 PE_parse_boot_argn("secluded_for_iokit",
1098 &secluded_for_iokit,
1099 sizeof (secluded_for_iokit));
1100 PE_parse_boot_argn("secluded_for_apps",
1101 &secluded_for_apps,
1102 sizeof (secluded_for_apps));
1103 PE_parse_boot_argn("secluded_for_filecache",
1104 &secluded_for_filecache,
1105 sizeof (secluded_for_filecache));
1106#if 11
1107 PE_parse_boot_argn("secluded_for_fbdp",
1108 &secluded_for_fbdp,
1109 sizeof (secluded_for_fbdp));
1110#endif
1111 PE_parse_boot_argn("secluded_aging_policy",
1112 &secluded_aging_policy,
1113 sizeof (secluded_aging_policy));
1114#endif /* CONFIG_SECLUDED_MEMORY */
1115
0c530ab8
A
1116 // -debug code remove
1117 if (2 == vm_himemory_mode) {
1118 // free low -> high so high is preferred
0b4c1975 1119 for (i = 1; i <= pages_initialized; i++) {
39037602 1120 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
fe8ab488 1121 vm_page_release_startup(&vm_pages[i - 1]);
0c530ab8
A
1122 }
1123 }
1124 else
1125 // debug code remove-
1126
1c79356b
A
1127 /*
1128 * Release pages in reverse order so that physical pages
1129 * initially get allocated in ascending addresses. This keeps
1130 * the devices (which must address physical memory) happy if
1131 * they require several consecutive pages.
1132 */
0b4c1975 1133 for (i = pages_initialized; i > 0; i--) {
39037602 1134 if(fill) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages[i - 1]), fillval); /* Fill the page with a know value if requested at boot */
fe8ab488 1135 vm_page_release_startup(&vm_pages[i - 1]);
1c79356b
A
1136 }
1137
fe8ab488
A
1138 VM_CHECK_MEMORYSTATUS;
1139
55e303ae
A
1140#if 0
1141 {
1142 vm_page_t xx, xxo, xxl;
2d21ac55 1143 int i, j, k, l;
55e303ae
A
1144
1145 j = 0; /* (BRINGUP) */
1146 xxl = 0;
1147
2d21ac55 1148 for( i = 0; i < vm_colors; i++ ) {
39037602 1149 queue_iterate(&vm_page_queue_free[i].qhead,
2d21ac55
A
1150 xx,
1151 vm_page_t,
1152 pageq) { /* BRINGUP */
1153 j++; /* (BRINGUP) */
1154 if(j > vm_page_free_count) { /* (BRINGUP) */
1155 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 1156 }
2d21ac55
A
1157
1158 l = vm_page_free_count - j; /* (BRINGUP) */
1159 k = 0; /* (BRINGUP) */
1160
1161 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
1162
39037602 1163 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i].qhead; xxo = xxo->pageq.next) { /* (BRINGUP) */
2d21ac55
A
1164 k++;
1165 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
1166 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
1167 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
1168 }
1169 }
1170
1171 xxl = xx;
55e303ae
A
1172 }
1173 }
1174
1175 if(j != vm_page_free_count) { /* (BRINGUP) */
1176 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1177 }
1178 }
1179#endif
1180
1181
1c79356b
A
1182 /*
1183 * We have to re-align virtual_space_start,
1184 * because pmap_steal_memory has been using it.
1185 */
1186
b0d623f7 1187 virtual_space_start = round_page(virtual_space_start);
1c79356b
A
1188
1189 *startp = virtual_space_start;
1190 *endp = virtual_space_end;
1191}
1192#endif /* MACHINE_PAGES */
1193
1194/*
1195 * Routine: vm_page_module_init
1196 * Purpose:
1197 * Second initialization pass, to be done after
1198 * the basic VM system is ready.
1199 */
1200void
1201vm_page_module_init(void)
1202{
39037602
A
1203 uint64_t vm_page_zone_pages, vm_page_array_zone_data_size;
1204 vm_size_t vm_page_with_ppnum_size;
1c79356b 1205
39037602
A
1206 vm_page_array_zone = zinit((vm_size_t) sizeof(struct vm_page),
1207 0, PAGE_SIZE, "vm pages array");
1c79356b 1208
39037602
A
1209 zone_change(vm_page_array_zone, Z_CALLERACCT, FALSE);
1210 zone_change(vm_page_array_zone, Z_EXPAND, FALSE);
1211 zone_change(vm_page_array_zone, Z_EXHAUST, TRUE);
1212 zone_change(vm_page_array_zone, Z_FOREIGN, TRUE);
1213 zone_change(vm_page_array_zone, Z_GZALLOC_EXEMPT, TRUE);
3e170ce0
A
1214 /*
1215 * Adjust zone statistics to account for the real pages allocated
1216 * in vm_page_create(). [Q: is this really what we want?]
1217 */
39037602
A
1218 vm_page_array_zone->count += vm_page_pages;
1219 vm_page_array_zone->sum_count += vm_page_pages;
1220 vm_page_array_zone_data_size = vm_page_pages * vm_page_array_zone->elem_size;
1221 vm_page_array_zone->cur_size += vm_page_array_zone_data_size;
1222 vm_page_zone_pages = ((round_page(vm_page_array_zone_data_size)) / PAGE_SIZE);
1223 OSAddAtomic64(vm_page_zone_pages, &(vm_page_array_zone->page_count));
3e170ce0
A
1224 /* since zone accounts for these, take them out of stolen */
1225 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
39037602
A
1226
1227 vm_page_with_ppnum_size = (sizeof(struct vm_page_with_ppnum) + (VM_PACKED_POINTER_ALIGNMENT-1)) & ~(VM_PACKED_POINTER_ALIGNMENT - 1);
1228
1229 vm_page_zone = zinit(vm_page_with_ppnum_size,
1230 0, PAGE_SIZE, "vm pages");
1231
1232 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1233 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1234 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1235 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1236 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b
A
1237}
1238
1239/*
1240 * Routine: vm_page_create
1241 * Purpose:
1242 * After the VM system is up, machine-dependent code
1243 * may stumble across more physical memory. For example,
1244 * memory that it was reserving for a frame buffer.
1245 * vm_page_create turns this memory into available pages.
1246 */
1247
1248void
1249vm_page_create(
55e303ae
A
1250 ppnum_t start,
1251 ppnum_t end)
1c79356b 1252{
55e303ae
A
1253 ppnum_t phys_page;
1254 vm_page_t m;
1c79356b 1255
55e303ae
A
1256 for (phys_page = start;
1257 phys_page < end;
1258 phys_page++) {
6d2010ae 1259 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1c79356b
A
1260 == VM_PAGE_NULL)
1261 vm_page_more_fictitious();
1262
6d2010ae 1263 m->fictitious = FALSE;
0b4c1975 1264 pmap_clear_noencrypt(phys_page);
6d2010ae 1265
1c79356b 1266 vm_page_pages++;
39037602 1267 vm_page_release(m, FALSE);
1c79356b
A
1268 }
1269}
1270
1271/*
1272 * vm_page_hash:
1273 *
1274 * Distributes the object/offset key pair among hash buckets.
1275 *
55e303ae 1276 * NOTE: The bucket count must be a power of 2
1c79356b
A
1277 */
1278#define vm_page_hash(object, offset) (\
b0d623f7 1279 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
1280 & vm_page_hash_mask)
1281
2d21ac55 1282
1c79356b
A
1283/*
1284 * vm_page_insert: [ internal use only ]
1285 *
1286 * Inserts the given mem entry into the object/object-page
1287 * table and object list.
1288 *
1289 * The object must be locked.
1290 */
1c79356b
A
1291void
1292vm_page_insert(
2d21ac55
A
1293 vm_page_t mem,
1294 vm_object_t object,
1295 vm_object_offset_t offset)
1296{
3e170ce0
A
1297 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1298}
1299
1300void
1301vm_page_insert_wired(
1302 vm_page_t mem,
1303 vm_object_t object,
1304 vm_object_offset_t offset,
1305 vm_tag_t tag)
1306{
1307 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
2d21ac55
A
1308}
1309
4a3eedf9 1310void
2d21ac55
A
1311vm_page_insert_internal(
1312 vm_page_t mem,
1313 vm_object_t object,
1314 vm_object_offset_t offset,
3e170ce0 1315 vm_tag_t tag,
b0d623f7 1316 boolean_t queues_lock_held,
316670eb 1317 boolean_t insert_in_hash,
3e170ce0
A
1318 boolean_t batch_pmap_op,
1319 boolean_t batch_accounting,
1320 uint64_t *delayed_ledger_update)
1c79356b 1321{
fe8ab488
A
1322 vm_page_bucket_t *bucket;
1323 lck_spin_t *bucket_lock;
1324 int hash_id;
1325 task_t owner;
1c79356b
A
1326
1327 XPR(XPR_VM_PAGE,
1328 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 1329 object, offset, mem, 0,0);
316670eb
A
1330#if 0
1331 /*
1332 * we may not hold the page queue lock
1333 * so this check isn't safe to make
1334 */
1c79356b 1335 VM_PAGE_CHECK(mem);
316670eb 1336#endif
1c79356b 1337
39236c6e
A
1338 assert(page_aligned(offset));
1339
3e170ce0
A
1340 assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1341
fe8ab488
A
1342 /* the vm_submap_object is only a placeholder for submaps */
1343 assert(object != vm_submap_object);
2d21ac55
A
1344
1345 vm_object_lock_assert_exclusive(object);
39037602 1346 LCK_MTX_ASSERT(&vm_page_queue_lock,
b0d623f7
A
1347 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1348 : LCK_MTX_ASSERT_NOTOWNED);
39037602
A
1349 if (queues_lock_held == FALSE)
1350 assert(!VM_PAGE_PAGEABLE(mem));
3e170ce0 1351
b0d623f7 1352 if (insert_in_hash == TRUE) {
15129b1c 1353#if DEBUG || VM_PAGE_CHECK_BUCKETS
39037602 1354 if (mem->tabled || mem->vm_page_object)
b0d623f7
A
1355 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1356 "already in (obj=%p,off=0x%llx)",
39037602 1357 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
91447636 1358#endif
6d2010ae 1359 assert(!object->internal || offset < object->vo_size);
b0d623f7
A
1360 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1361
1362 /*
1363 * Record the object/offset pair in this page
1364 */
1c79356b 1365
39037602 1366 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
b0d623f7 1367 mem->offset = offset;
1c79356b 1368
39037602
A
1369#if CONFIG_SECLUDED_MEMORY
1370 if (object->eligible_for_secluded) {
1371 vm_page_secluded.eligible_for_secluded++;
1372 }
1373#endif /* CONFIG_SECLUDED_MEMORY */
1374
b0d623f7
A
1375 /*
1376 * Insert it into the object_object/offset hash table
1377 */
1378 hash_id = vm_page_hash(object, offset);
1379 bucket = &vm_page_buckets[hash_id];
1380 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1381
1382 lck_spin_lock(bucket_lock);
1c79356b 1383
fe8ab488
A
1384 mem->next_m = bucket->page_list;
1385 bucket->page_list = VM_PAGE_PACK_PTR(mem);
39037602 1386 assert(mem == (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)));
fe8ab488 1387
1c79356b 1388#if MACH_PAGE_HASH_STATS
b0d623f7
A
1389 if (++bucket->cur_count > bucket->hi_count)
1390 bucket->hi_count = bucket->cur_count;
1c79356b 1391#endif /* MACH_PAGE_HASH_STATS */
15129b1c 1392 mem->hashed = TRUE;
b0d623f7
A
1393 lck_spin_unlock(bucket_lock);
1394 }
6d2010ae 1395
316670eb
A
1396 {
1397 unsigned int cache_attr;
6d2010ae
A
1398
1399 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1400
1401 if (cache_attr != VM_WIMG_USE_DEFAULT) {
316670eb 1402 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
6d2010ae
A
1403 }
1404 }
1c79356b
A
1405 /*
1406 * Now link into the object's list of backed pages.
1407 */
39037602 1408 vm_page_queue_enter(&object->memq, mem, vm_page_t, listq);
3e170ce0 1409 object->memq_hint = mem;
1c79356b
A
1410 mem->tabled = TRUE;
1411
1412 /*
1413 * Show that the object has one more resident page.
1414 */
1415
1416 object->resident_page_count++;
b0d623f7 1417 if (VM_PAGE_WIRED(mem)) {
39037602
A
1418 assert(mem->wire_count > 0);
1419
3e170ce0
A
1420 if (!mem->private && !mem->fictitious)
1421 {
1422 if (!object->wired_page_count)
1423 {
1424 assert(VM_KERN_MEMORY_NONE != tag);
1425 object->wire_tag = tag;
1426 VM_OBJECT_WIRED(object);
1427 }
1428 }
1429 object->wired_page_count++;
b0d623f7
A
1430 }
1431 assert(object->resident_page_count >= object->wired_page_count);
91447636 1432
3e170ce0
A
1433 if (batch_accounting == FALSE) {
1434 if (object->internal) {
1435 OSAddAtomic(1, &vm_page_internal_count);
1436 } else {
1437 OSAddAtomic(1, &vm_page_external_count);
1438 }
39236c6e
A
1439 }
1440
1441 /*
1442 * It wouldn't make sense to insert a "reusable" page in
1443 * an object (the page would have been marked "reusable" only
1444 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1445 * in the object at that time).
1446 * But a page could be inserted in a "all_reusable" object, if
1447 * something faults it in (a vm_read() from another task or a
1448 * "use-after-free" issue in user space, for example). It can
1449 * also happen if we're relocating a page from that object to
1450 * a different physical page during a physically-contiguous
1451 * allocation.
1452 */
b0d623f7 1453 assert(!mem->reusable);
39037602 1454 if (object->all_reusable) {
39236c6e
A
1455 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1456 }
2d21ac55 1457
fe8ab488
A
1458 if (object->purgable == VM_PURGABLE_DENY) {
1459 owner = TASK_NULL;
1460 } else {
1461 owner = object->vo_purgeable_owner;
1462 }
1463 if (owner &&
1464 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1465 VM_PAGE_WIRED(mem))) {
3e170ce0
A
1466
1467 if (delayed_ledger_update)
1468 *delayed_ledger_update += PAGE_SIZE;
1469 else {
1470 /* more non-volatile bytes */
1471 ledger_credit(owner->ledger,
1472 task_ledgers.purgeable_nonvolatile,
1473 PAGE_SIZE);
1474 /* more footprint */
1475 ledger_credit(owner->ledger,
1476 task_ledgers.phys_footprint,
1477 PAGE_SIZE);
1478 }
fe8ab488
A
1479
1480 } else if (owner &&
1481 (object->purgable == VM_PURGABLE_VOLATILE ||
1482 object->purgable == VM_PURGABLE_EMPTY)) {
1483 assert(! VM_PAGE_WIRED(mem));
1484 /* more volatile bytes */
1485 ledger_credit(owner->ledger,
1486 task_ledgers.purgeable_volatile,
1487 PAGE_SIZE);
1488 }
1489
b0d623f7
A
1490 if (object->purgable == VM_PURGABLE_VOLATILE) {
1491 if (VM_PAGE_WIRED(mem)) {
fe8ab488 1492 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
b0d623f7 1493 } else {
fe8ab488 1494 OSAddAtomic(+1, &vm_page_purgeable_count);
b0d623f7 1495 }
593a1d5f 1496 } else if (object->purgable == VM_PURGABLE_EMPTY &&
39037602 1497 mem->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q) {
b0d623f7
A
1498 /*
1499 * This page belongs to a purged VM object but hasn't
1500 * been purged (because it was "busy").
1501 * It's in the "throttled" queue and hence not
1502 * visible to vm_pageout_scan(). Move it to a pageable
1503 * queue, so that it can eventually be reclaimed, instead
1504 * of lingering in the "empty" object.
1505 */
593a1d5f 1506 if (queues_lock_held == FALSE)
b0d623f7 1507 vm_page_lockspin_queues();
593a1d5f 1508 vm_page_deactivate(mem);
2d21ac55
A
1509 if (queues_lock_held == FALSE)
1510 vm_page_unlock_queues();
91447636 1511 }
fe8ab488
A
1512
1513#if VM_OBJECT_TRACKING_OP_MODIFIED
1514 if (vm_object_tracking_inited &&
1515 object->internal &&
1516 object->resident_page_count == 0 &&
1517 object->pager == NULL &&
1518 object->shadow != NULL &&
1519 object->shadow->copy == object) {
1520 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1521 int numsaved = 0;
1522
1523 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1524 btlog_add_entry(vm_object_tracking_btlog,
1525 object,
1526 VM_OBJECT_TRACKING_OP_MODIFIED,
1527 bt,
1528 numsaved);
1529 }
1530#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1c79356b
A
1531}
1532
1533/*
1534 * vm_page_replace:
1535 *
1536 * Exactly like vm_page_insert, except that we first
1537 * remove any existing page at the given offset in object.
1538 *
b0d623f7 1539 * The object must be locked.
1c79356b 1540 */
1c79356b
A
1541void
1542vm_page_replace(
39037602
A
1543 vm_page_t mem,
1544 vm_object_t object,
1545 vm_object_offset_t offset)
1c79356b 1546{
0c530ab8
A
1547 vm_page_bucket_t *bucket;
1548 vm_page_t found_m = VM_PAGE_NULL;
b0d623f7
A
1549 lck_spin_t *bucket_lock;
1550 int hash_id;
1c79356b 1551
316670eb
A
1552#if 0
1553 /*
1554 * we don't hold the page queue lock
1555 * so this check isn't safe to make
1556 */
1c79356b 1557 VM_PAGE_CHECK(mem);
316670eb 1558#endif
2d21ac55 1559 vm_object_lock_assert_exclusive(object);
15129b1c 1560#if DEBUG || VM_PAGE_CHECK_BUCKETS
39037602 1561 if (mem->tabled || mem->vm_page_object)
91447636
A
1562 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1563 "already in (obj=%p,off=0x%llx)",
39037602 1564 mem, object, offset, VM_PAGE_OBJECT(mem), mem->offset);
91447636 1565#endif
39037602
A
1566 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1567
1568 assert(!VM_PAGE_PAGEABLE(mem));
1569
1c79356b
A
1570 /*
1571 * Record the object/offset pair in this page
1572 */
39037602 1573 mem->vm_page_object = VM_PAGE_PACK_OBJECT(object);
1c79356b
A
1574 mem->offset = offset;
1575
1576 /*
1577 * Insert it into the object_object/offset hash table,
1578 * replacing any page that might have been there.
1579 */
1580
b0d623f7
A
1581 hash_id = vm_page_hash(object, offset);
1582 bucket = &vm_page_buckets[hash_id];
1583 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1584
1585 lck_spin_lock(bucket_lock);
0c530ab8 1586
fe8ab488
A
1587 if (bucket->page_list) {
1588 vm_page_packed_t *mp = &bucket->page_list;
39037602 1589 vm_page_t m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp));
0c530ab8 1590
1c79356b 1591 do {
39037602
A
1592 /*
1593 * compare packed object pointers
1594 */
1595 if (m->vm_page_object == mem->vm_page_object && m->offset == offset) {
1c79356b 1596 /*
0c530ab8 1597 * Remove old page from hash list
1c79356b 1598 */
fe8ab488 1599 *mp = m->next_m;
15129b1c 1600 m->hashed = FALSE;
d190cdc3 1601 m->next_m = VM_PAGE_PACK_PTR(NULL);
1c79356b 1602
0c530ab8 1603 found_m = m;
1c79356b
A
1604 break;
1605 }
fe8ab488 1606 mp = &m->next_m;
39037602 1607 } while ((m = (vm_page_t)(VM_PAGE_UNPACK_PTR(*mp))));
0c530ab8 1608
fe8ab488 1609 mem->next_m = bucket->page_list;
1c79356b 1610 } else {
39037602 1611 mem->next_m = VM_PAGE_PACK_PTR(NULL);
1c79356b 1612 }
0c530ab8
A
1613 /*
1614 * insert new page at head of hash list
1615 */
fe8ab488 1616 bucket->page_list = VM_PAGE_PACK_PTR(mem);
15129b1c 1617 mem->hashed = TRUE;
0c530ab8 1618
b0d623f7 1619 lck_spin_unlock(bucket_lock);
1c79356b 1620
0c530ab8
A
1621 if (found_m) {
1622 /*
1623 * there was already a page at the specified
1624 * offset for this object... remove it from
1625 * the object and free it back to the free list
1626 */
b0d623f7 1627 vm_page_free_unlocked(found_m, FALSE);
91447636 1628 }
3e170ce0 1629 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1c79356b
A
1630}
1631
1632/*
1633 * vm_page_remove: [ internal use only ]
1634 *
1635 * Removes the given mem entry from the object/offset-page
1636 * table and the object page list.
1637 *
b0d623f7 1638 * The object must be locked.
1c79356b
A
1639 */
1640
1641void
1642vm_page_remove(
b0d623f7
A
1643 vm_page_t mem,
1644 boolean_t remove_from_hash)
1c79356b 1645{
b0d623f7
A
1646 vm_page_bucket_t *bucket;
1647 vm_page_t this;
1648 lck_spin_t *bucket_lock;
1649 int hash_id;
fe8ab488 1650 task_t owner;
39037602
A
1651 vm_object_t m_object;
1652
1653 m_object = VM_PAGE_OBJECT(mem);
1c79356b
A
1654
1655 XPR(XPR_VM_PAGE,
1656 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
39037602 1657 m_object, mem->offset,
b0d623f7
A
1658 mem, 0,0);
1659
39037602 1660 vm_object_lock_assert_exclusive(m_object);
1c79356b
A
1661 assert(mem->tabled);
1662 assert(!mem->cleaning);
316670eb 1663 assert(!mem->laundry);
39037602
A
1664
1665 if (VM_PAGE_PAGEABLE(mem)) {
1666 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1667 }
316670eb
A
1668#if 0
1669 /*
1670 * we don't hold the page queue lock
1671 * so this check isn't safe to make
1672 */
1c79356b 1673 VM_PAGE_CHECK(mem);
316670eb 1674#endif
b0d623f7
A
1675 if (remove_from_hash == TRUE) {
1676 /*
1677 * Remove from the object_object/offset hash table
1678 */
39037602 1679 hash_id = vm_page_hash(m_object, mem->offset);
b0d623f7
A
1680 bucket = &vm_page_buckets[hash_id];
1681 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 1682
b0d623f7 1683 lck_spin_lock(bucket_lock);
1c79356b 1684
39037602 1685 if ((this = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list))) == mem) {
b0d623f7 1686 /* optimize for common case */
1c79356b 1687
fe8ab488 1688 bucket->page_list = mem->next_m;
b0d623f7 1689 } else {
fe8ab488 1690 vm_page_packed_t *prev;
1c79356b 1691
fe8ab488 1692 for (prev = &this->next_m;
39037602 1693 (this = (vm_page_t)(VM_PAGE_UNPACK_PTR(*prev))) != mem;
fe8ab488 1694 prev = &this->next_m)
b0d623f7 1695 continue;
fe8ab488 1696 *prev = this->next_m;
b0d623f7 1697 }
1c79356b 1698#if MACH_PAGE_HASH_STATS
b0d623f7 1699 bucket->cur_count--;
1c79356b 1700#endif /* MACH_PAGE_HASH_STATS */
15129b1c 1701 mem->hashed = FALSE;
d190cdc3 1702 this->next_m = VM_PAGE_PACK_PTR(NULL);
b0d623f7
A
1703 lck_spin_unlock(bucket_lock);
1704 }
1c79356b
A
1705 /*
1706 * Now remove from the object's list of backed pages.
1707 */
1708
3e170ce0 1709 vm_page_remove_internal(mem);
1c79356b
A
1710
1711 /*
1712 * And show that the object has one fewer resident
1713 * page.
1714 */
1715
39037602
A
1716 assert(m_object->resident_page_count > 0);
1717 m_object->resident_page_count--;
6d2010ae 1718
39037602 1719 if (m_object->internal) {
fe8ab488 1720#if DEBUG
39236c6e 1721 assert(vm_page_internal_count);
fe8ab488
A
1722#endif /* DEBUG */
1723
39236c6e
A
1724 OSAddAtomic(-1, &vm_page_internal_count);
1725 } else {
1726 assert(vm_page_external_count);
1727 OSAddAtomic(-1, &vm_page_external_count);
fe8ab488
A
1728
1729 if (mem->xpmapped) {
1730 assert(vm_page_xpmapped_external_count);
1731 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1732 }
39236c6e 1733 }
39037602
A
1734 if (!m_object->internal && (m_object->objq.next || m_object->objq.prev)) {
1735 if (m_object->resident_page_count == 0)
1736 vm_object_cache_remove(m_object);
6d2010ae
A
1737 }
1738
b0d623f7 1739 if (VM_PAGE_WIRED(mem)) {
39037602
A
1740 assert(mem->wire_count > 0);
1741 assert(m_object->wired_page_count > 0);
1742 m_object->wired_page_count--;
1743 if (!m_object->wired_page_count) {
1744 VM_OBJECT_UNWIRED(m_object);
3e170ce0 1745 }
b0d623f7 1746 }
39037602
A
1747 assert(m_object->resident_page_count >=
1748 m_object->wired_page_count);
b0d623f7 1749 if (mem->reusable) {
39037602
A
1750 assert(m_object->reusable_page_count > 0);
1751 m_object->reusable_page_count--;
1752 assert(m_object->reusable_page_count <=
1753 m_object->resident_page_count);
b0d623f7
A
1754 mem->reusable = FALSE;
1755 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1756 vm_page_stats_reusable.reused_remove++;
39037602 1757 } else if (m_object->all_reusable) {
b0d623f7
A
1758 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1759 vm_page_stats_reusable.reused_remove++;
1760 }
1c79356b 1761
39037602 1762 if (m_object->purgable == VM_PURGABLE_DENY) {
fe8ab488
A
1763 owner = TASK_NULL;
1764 } else {
39037602 1765 owner = m_object->vo_purgeable_owner;
fe8ab488
A
1766 }
1767 if (owner &&
39037602 1768 (m_object->purgable == VM_PURGABLE_NONVOLATILE ||
fe8ab488
A
1769 VM_PAGE_WIRED(mem))) {
1770 /* less non-volatile bytes */
1771 ledger_debit(owner->ledger,
1772 task_ledgers.purgeable_nonvolatile,
1773 PAGE_SIZE);
1774 /* less footprint */
1775 ledger_debit(owner->ledger,
1776 task_ledgers.phys_footprint,
1777 PAGE_SIZE);
1778 } else if (owner &&
39037602
A
1779 (m_object->purgable == VM_PURGABLE_VOLATILE ||
1780 m_object->purgable == VM_PURGABLE_EMPTY)) {
fe8ab488
A
1781 assert(! VM_PAGE_WIRED(mem));
1782 /* less volatile bytes */
1783 ledger_debit(owner->ledger,
1784 task_ledgers.purgeable_volatile,
1785 PAGE_SIZE);
1786 }
39037602 1787 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
1788 if (VM_PAGE_WIRED(mem)) {
1789 assert(vm_page_purgeable_wired_count > 0);
1790 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1791 } else {
1792 assert(vm_page_purgeable_count > 0);
1793 OSAddAtomic(-1, &vm_page_purgeable_count);
1794 }
91447636 1795 }
39037602
A
1796 if (m_object->set_cache_attr == TRUE)
1797 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem), 0);
6d2010ae 1798
1c79356b 1799 mem->tabled = FALSE;
39037602 1800 mem->vm_page_object = 0;
91447636 1801 mem->offset = (vm_object_offset_t) -1;
1c79356b
A
1802}
1803
b0d623f7 1804
1c79356b
A
1805/*
1806 * vm_page_lookup:
1807 *
1808 * Returns the page associated with the object/offset
1809 * pair specified; if none is found, VM_PAGE_NULL is returned.
1810 *
1811 * The object must be locked. No side effects.
1812 */
1813
3e170ce0
A
1814#define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1815
1816#if DEBUG_VM_PAGE_LOOKUP
2d21ac55 1817
3e170ce0
A
1818struct {
1819 uint64_t vpl_total;
1820 uint64_t vpl_empty_obj;
1821 uint64_t vpl_bucket_NULL;
1822 uint64_t vpl_hit_hint;
1823 uint64_t vpl_hit_hint_next;
1824 uint64_t vpl_hit_hint_prev;
1825 uint64_t vpl_fast;
1826 uint64_t vpl_slow;
1827 uint64_t vpl_hit;
1828 uint64_t vpl_miss;
1829
1830 uint64_t vpl_fast_elapsed;
1831 uint64_t vpl_slow_elapsed;
1832} vm_page_lookup_stats __attribute__((aligned(8)));
1833
1834#endif
1835
1836#define KDP_VM_PAGE_WALK_MAX 1000
1837
1838vm_page_t
1839kdp_vm_page_lookup(
1840 vm_object_t object,
1841 vm_object_offset_t offset)
1842{
1843 vm_page_t cur_page;
1844 int num_traversed = 0;
1845
1846 if (not_in_kdp) {
1847 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1848 }
1849
39037602 1850 vm_page_queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
3e170ce0
A
1851 if (cur_page->offset == offset) {
1852 return cur_page;
1853 }
1854 num_traversed++;
1855
1856 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1857 return VM_PAGE_NULL;
1858 }
1859 }
1860
1861 return VM_PAGE_NULL;
1862}
91447636 1863
1c79356b
A
1864vm_page_t
1865vm_page_lookup(
b0d623f7
A
1866 vm_object_t object,
1867 vm_object_offset_t offset)
1c79356b 1868{
b0d623f7
A
1869 vm_page_t mem;
1870 vm_page_bucket_t *bucket;
39037602 1871 vm_page_queue_entry_t qe;
3e170ce0 1872 lck_spin_t *bucket_lock = NULL;
b0d623f7 1873 int hash_id;
3e170ce0
A
1874#if DEBUG_VM_PAGE_LOOKUP
1875 uint64_t start, elapsed;
91447636 1876
3e170ce0
A
1877 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
1878#endif
2d21ac55 1879 vm_object_lock_assert_held(object);
3e170ce0
A
1880
1881 if (object->resident_page_count == 0) {
1882#if DEBUG_VM_PAGE_LOOKUP
1883 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
1884#endif
1885 return (VM_PAGE_NULL);
1886 }
1887
91447636 1888 mem = object->memq_hint;
2d21ac55 1889
91447636 1890 if (mem != VM_PAGE_NULL) {
39037602 1891 assert(VM_PAGE_OBJECT(mem) == object);
2d21ac55 1892
91447636 1893 if (mem->offset == offset) {
3e170ce0
A
1894#if DEBUG_VM_PAGE_LOOKUP
1895 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
1896#endif
1897 return (mem);
91447636 1898 }
39037602 1899 qe = (vm_page_queue_entry_t)vm_page_queue_next(&mem->listq);
2d21ac55 1900
39037602 1901 if (! vm_page_queue_end(&object->memq, qe)) {
91447636
A
1902 vm_page_t next_page;
1903
39037602
A
1904 next_page = (vm_page_t)((uintptr_t)qe);
1905 assert(VM_PAGE_OBJECT(next_page) == object);
2d21ac55 1906
91447636 1907 if (next_page->offset == offset) {
91447636 1908 object->memq_hint = next_page; /* new hint */
3e170ce0
A
1909#if DEBUG_VM_PAGE_LOOKUP
1910 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
1911#endif
1912 return (next_page);
91447636
A
1913 }
1914 }
39037602 1915 qe = (vm_page_queue_entry_t)vm_page_queue_prev(&mem->listq);
2d21ac55 1916
39037602 1917 if (! vm_page_queue_end(&object->memq, qe)) {
91447636
A
1918 vm_page_t prev_page;
1919
39037602
A
1920 prev_page = (vm_page_t)((uintptr_t)qe);
1921 assert(VM_PAGE_OBJECT(prev_page) == object);
2d21ac55 1922
91447636 1923 if (prev_page->offset == offset) {
91447636 1924 object->memq_hint = prev_page; /* new hint */
3e170ce0
A
1925#if DEBUG_VM_PAGE_LOOKUP
1926 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
1927#endif
1928 return (prev_page);
91447636
A
1929 }
1930 }
1931 }
1c79356b 1932 /*
2d21ac55 1933 * Search the hash table for this object/offset pair
1c79356b 1934 */
b0d623f7
A
1935 hash_id = vm_page_hash(object, offset);
1936 bucket = &vm_page_buckets[hash_id];
1c79356b 1937
2d21ac55
A
1938 /*
1939 * since we hold the object lock, we are guaranteed that no
1940 * new pages can be inserted into this object... this in turn
1941 * guarantess that the page we're looking for can't exist
1942 * if the bucket it hashes to is currently NULL even when looked
1943 * at outside the scope of the hash bucket lock... this is a
1944 * really cheap optimiztion to avoid taking the lock
1945 */
fe8ab488 1946 if (!bucket->page_list) {
3e170ce0
A
1947#if DEBUG_VM_PAGE_LOOKUP
1948 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
1949#endif
2d21ac55
A
1950 return (VM_PAGE_NULL);
1951 }
0c530ab8 1952
3e170ce0
A
1953#if DEBUG_VM_PAGE_LOOKUP
1954 start = mach_absolute_time();
1955#endif
1956 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
316670eb 1957 /*
3e170ce0
A
1958 * on average, it's roughly 3 times faster to run a short memq list
1959 * than to take the spin lock and go through the hash list
316670eb 1960 */
39037602 1961 mem = (vm_page_t)vm_page_queue_first(&object->memq);
3e170ce0 1962
39037602 1963 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem)) {
3e170ce0
A
1964
1965 if (mem->offset == offset)
1966 break;
1967
39037602 1968 mem = (vm_page_t)vm_page_queue_next(&mem->listq);
3e170ce0 1969 }
39037602 1970 if (vm_page_queue_end(&object->memq, (vm_page_queue_entry_t)mem))
3e170ce0
A
1971 mem = NULL;
1972 } else {
39037602
A
1973 vm_page_object_t packed_object;
1974
1975 packed_object = VM_PAGE_PACK_OBJECT(object);
3e170ce0
A
1976
1977 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1978
1979 lck_spin_lock(bucket_lock);
1980
39037602
A
1981 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
1982 mem != VM_PAGE_NULL;
1983 mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m))) {
3e170ce0
A
1984#if 0
1985 /*
1986 * we don't hold the page queue lock
1987 * so this check isn't safe to make
1988 */
1989 VM_PAGE_CHECK(mem);
316670eb 1990#endif
39037602 1991 if ((mem->vm_page_object == packed_object) && (mem->offset == offset))
3e170ce0
A
1992 break;
1993 }
1994 lck_spin_unlock(bucket_lock);
1c79356b 1995 }
55e303ae 1996
3e170ce0
A
1997#if DEBUG_VM_PAGE_LOOKUP
1998 elapsed = mach_absolute_time() - start;
1999
2000 if (bucket_lock) {
2001 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
2002 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
2003 } else {
2004 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
2005 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
2006 }
2007 if (mem != VM_PAGE_NULL)
2008 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
2009 else
2010 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
2011#endif
91447636 2012 if (mem != VM_PAGE_NULL) {
39037602 2013 assert(VM_PAGE_OBJECT(mem) == object);
91447636 2014
3e170ce0
A
2015 object->memq_hint = mem;
2016 }
2017 return (mem);
91447636
A
2018}
2019
2020
1c79356b
A
2021/*
2022 * vm_page_rename:
2023 *
2024 * Move the given memory entry from its
2025 * current object to the specified target object/offset.
2026 *
2027 * The object must be locked.
2028 */
2029void
2030vm_page_rename(
39037602
A
2031 vm_page_t mem,
2032 vm_object_t new_object,
2033 vm_object_offset_t new_offset,
2034 boolean_t encrypted_ok)
1c79356b 2035{
39037602
A
2036 boolean_t internal_to_external, external_to_internal;
2037 vm_tag_t tag;
2038 vm_object_t m_object;
39236c6e 2039
39037602 2040 m_object = VM_PAGE_OBJECT(mem);
2d21ac55 2041
39037602
A
2042 assert(m_object != new_object);
2043 assert(m_object);
3e170ce0 2044
91447636
A
2045 /*
2046 * ENCRYPTED SWAP:
2047 * The encryption key is based on the page's memory object
2048 * (aka "pager") and paging offset. Moving the page to
2049 * another VM object changes its "pager" and "paging_offset"
2d21ac55
A
2050 * so it has to be decrypted first, or we would lose the key.
2051 *
2052 * One exception is VM object collapsing, where we transfer pages
2053 * from one backing object to its parent object. This operation also
2054 * transfers the paging information, so the <pager,paging_offset> info
2055 * should remain consistent. The caller (vm_object_do_collapse())
2056 * sets "encrypted_ok" in this case.
91447636 2057 */
2d21ac55 2058 if (!encrypted_ok && mem->encrypted) {
91447636
A
2059 panic("vm_page_rename: page %p is encrypted\n", mem);
2060 }
2d21ac55 2061
b0d623f7
A
2062 XPR(XPR_VM_PAGE,
2063 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2064 new_object, new_offset,
2065 mem, 0,0);
2066
1c79356b
A
2067 /*
2068 * Changes to mem->object require the page lock because
2069 * the pageout daemon uses that lock to get the object.
2070 */
b0d623f7 2071 vm_page_lockspin_queues();
1c79356b 2072
39236c6e
A
2073 internal_to_external = FALSE;
2074 external_to_internal = FALSE;
2075
39037602 2076 if (mem->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q) {
39236c6e
A
2077 /*
2078 * it's much easier to get the vm_page_pageable_xxx accounting correct
2079 * if we first move the page to the active queue... it's going to end
2080 * up there anyway, and we don't do vm_page_rename's frequently enough
2081 * for this to matter.
2082 */
39037602 2083 vm_page_queues_remove(mem, FALSE);
39236c6e
A
2084 vm_page_activate(mem);
2085 }
39037602
A
2086 if (VM_PAGE_PAGEABLE(mem)) {
2087 if (m_object->internal && !new_object->internal) {
39236c6e
A
2088 internal_to_external = TRUE;
2089 }
39037602 2090 if (!m_object->internal && new_object->internal) {
39236c6e
A
2091 external_to_internal = TRUE;
2092 }
2093 }
2094
39037602 2095 tag = m_object->wire_tag;
b0d623f7 2096 vm_page_remove(mem, TRUE);
3e170ce0 2097 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
1c79356b 2098
39236c6e
A
2099 if (internal_to_external) {
2100 vm_page_pageable_internal_count--;
2101 vm_page_pageable_external_count++;
2102 } else if (external_to_internal) {
2103 vm_page_pageable_external_count--;
2104 vm_page_pageable_internal_count++;
2105 }
2106
1c79356b
A
2107 vm_page_unlock_queues();
2108}
2109
2110/*
2111 * vm_page_init:
2112 *
2113 * Initialize the fields in a new page.
2114 * This takes a structure with random values and initializes it
2115 * so that it can be given to vm_page_release or vm_page_insert.
2116 */
2117void
2118vm_page_init(
2119 vm_page_t mem,
0b4c1975
A
2120 ppnum_t phys_page,
2121 boolean_t lopage)
1c79356b 2122{
91447636 2123 assert(phys_page);
7ddcb079
A
2124
2125#if DEBUG
2126 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
2127 if (!(pmap_valid_page(phys_page))) {
2128 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
2129 }
2130 }
2131#endif
1c79356b 2132 *mem = vm_page_template;
39037602
A
2133
2134 VM_PAGE_SET_PHYS_PAGE(mem, phys_page);
6d2010ae
A
2135#if 0
2136 /*
2137 * we're leaving this turned off for now... currently pages
2138 * come off the free list and are either immediately dirtied/referenced
2139 * due to zero-fill or COW faults, or are used to read or write files...
2140 * in the file I/O case, the UPL mechanism takes care of clearing
2141 * the state of the HW ref/mod bits in a somewhat fragile way.
2142 * Since we may change the way this works in the future (to toughen it up),
2143 * I'm leaving this as a reminder of where these bits could get cleared
2144 */
2145
2146 /*
2147 * make sure both the h/w referenced and modified bits are
2148 * clear at this point... we are especially dependent on
2149 * not finding a 'stale' h/w modified in a number of spots
2150 * once this page goes back into use
2151 */
2152 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2153#endif
0b4c1975 2154 mem->lopage = lopage;
1c79356b
A
2155}
2156
2157/*
2158 * vm_page_grab_fictitious:
2159 *
2160 * Remove a fictitious page from the free list.
2161 * Returns VM_PAGE_NULL if there are no free pages.
2162 */
2163int c_vm_page_grab_fictitious = 0;
6d2010ae 2164int c_vm_page_grab_fictitious_failed = 0;
1c79356b
A
2165int c_vm_page_release_fictitious = 0;
2166int c_vm_page_more_fictitious = 0;
2167
2168vm_page_t
2d21ac55 2169vm_page_grab_fictitious_common(
b0d623f7 2170 ppnum_t phys_addr)
1c79356b 2171{
6d2010ae
A
2172 vm_page_t m;
2173
2174 if ((m = (vm_page_t)zget(vm_page_zone))) {
1c79356b 2175
0b4c1975 2176 vm_page_init(m, phys_addr, FALSE);
1c79356b 2177 m->fictitious = TRUE;
1c79356b 2178
6d2010ae
A
2179 c_vm_page_grab_fictitious++;
2180 } else
2181 c_vm_page_grab_fictitious_failed++;
2182
1c79356b
A
2183 return m;
2184}
2185
2d21ac55
A
2186vm_page_t
2187vm_page_grab_fictitious(void)
2188{
2189 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
2190}
2191
2192vm_page_t
2193vm_page_grab_guard(void)
2194{
2195 return vm_page_grab_fictitious_common(vm_page_guard_addr);
2196}
2197
6d2010ae 2198
1c79356b
A
2199/*
2200 * vm_page_release_fictitious:
2201 *
6d2010ae 2202 * Release a fictitious page to the zone pool
1c79356b 2203 */
1c79356b
A
2204void
2205vm_page_release_fictitious(
6d2010ae 2206 vm_page_t m)
1c79356b 2207{
39037602 2208 assert((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) || (m->vm_page_q_state == VM_PAGE_IS_WIRED));
1c79356b 2209 assert(m->fictitious);
39037602
A
2210 assert(VM_PAGE_GET_PHYS_PAGE(m) == vm_page_fictitious_addr ||
2211 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr);
1c79356b
A
2212
2213 c_vm_page_release_fictitious++;
6d2010ae 2214
91447636 2215 zfree(vm_page_zone, m);
1c79356b
A
2216}
2217
2218/*
2219 * vm_page_more_fictitious:
2220 *
6d2010ae 2221 * Add more fictitious pages to the zone.
1c79356b
A
2222 * Allowed to block. This routine is way intimate
2223 * with the zones code, for several reasons:
2224 * 1. we need to carve some page structures out of physical
2225 * memory before zones work, so they _cannot_ come from
2226 * the zone_map.
2227 * 2. the zone needs to be collectable in order to prevent
2228 * growth without bound. These structures are used by
2229 * the device pager (by the hundreds and thousands), as
2230 * private pages for pageout, and as blocking pages for
2231 * pagein. Temporary bursts in demand should not result in
2232 * permanent allocation of a resource.
2233 * 3. To smooth allocation humps, we allocate single pages
2234 * with kernel_memory_allocate(), and cram them into the
6d2010ae 2235 * zone.
1c79356b
A
2236 */
2237
2238void vm_page_more_fictitious(void)
2239{
6d2010ae
A
2240 vm_offset_t addr;
2241 kern_return_t retval;
1c79356b
A
2242
2243 c_vm_page_more_fictitious++;
2244
1c79356b
A
2245 /*
2246 * Allocate a single page from the zone_map. Do not wait if no physical
2247 * pages are immediately available, and do not zero the space. We need
2248 * our own blocking lock here to prevent having multiple,
2249 * simultaneous requests from piling up on the zone_map lock. Exactly
2250 * one (of our) threads should be potentially waiting on the map lock.
2251 * If winner is not vm-privileged, then the page allocation will fail,
2252 * and it will temporarily block here in the vm_page_wait().
2253 */
b0d623f7 2254 lck_mtx_lock(&vm_page_alloc_lock);
1c79356b
A
2255 /*
2256 * If another thread allocated space, just bail out now.
2257 */
2258 if (zone_free_count(vm_page_zone) > 5) {
2259 /*
2260 * The number "5" is a small number that is larger than the
2261 * number of fictitious pages that any single caller will
2262 * attempt to allocate. Otherwise, a thread will attempt to
2263 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2264 * release all of the resources and locks already acquired,
2265 * and then call this routine. This routine finds the pages
2266 * that the caller released, so fails to allocate new space.
2267 * The process repeats infinitely. The largest known number
2268 * of fictitious pages required in this manner is 2. 5 is
2269 * simply a somewhat larger number.
2270 */
b0d623f7 2271 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2272 return;
2273 }
2274
91447636
A
2275 retval = kernel_memory_allocate(zone_map,
2276 &addr, PAGE_SIZE, VM_PROT_ALL,
3e170ce0 2277 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
91447636 2278 if (retval != KERN_SUCCESS) {
1c79356b 2279 /*
6d2010ae 2280 * No page was available. Drop the
1c79356b
A
2281 * lock to give another thread a chance at it, and
2282 * wait for the pageout daemon to make progress.
2283 */
b0d623f7 2284 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2285 vm_page_wait(THREAD_UNINT);
2286 return;
2287 }
39236c6e 2288
7ddcb079 2289 zcram(vm_page_zone, addr, PAGE_SIZE);
6d2010ae 2290
b0d623f7 2291 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2292}
2293
1c79356b
A
2294
2295/*
2296 * vm_pool_low():
2297 *
2298 * Return true if it is not likely that a non-vm_privileged thread
2299 * can get memory without blocking. Advisory only, since the
2300 * situation may change under us.
2301 */
2302int
2303vm_pool_low(void)
2304{
2305 /* No locking, at worst we will fib. */
b0d623f7 2306 return( vm_page_free_count <= vm_page_free_reserved );
1c79356b
A
2307}
2308
0c530ab8 2309
39037602
A
2310#if CONFIG_BACKGROUND_QUEUE
2311
2312void
2313vm_page_update_background_state(vm_page_t mem)
2314{
2315 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2316 return;
2317
2318 if (mem->vm_page_in_background == FALSE)
2319 return;
2320
2321#if BACKGROUNDQ_BASED_ON_QOS
2322 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2323 return;
2324#else
2325 task_t my_task;
2326
2327 my_task = current_task();
2328
2329 if (my_task) {
2330 if (proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG))
2331 return;
2332 }
2333#endif
2334 vm_page_lockspin_queues();
2335
2336 mem->vm_page_in_background = FALSE;
2337 vm_page_background_promoted_count++;
2338
2339 vm_page_remove_from_backgroundq(mem);
2340
2341 vm_page_unlock_queues();
2342}
2343
2344
2345void
2346vm_page_assign_background_state(vm_page_t mem)
2347{
2348 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2349 return;
2350
2351#if BACKGROUNDQ_BASED_ON_QOS
2352 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS) <= THREAD_QOS_LEGACY)
2353 mem->vm_page_in_background = TRUE;
2354 else
2355 mem->vm_page_in_background = FALSE;
2356#else
2357 task_t my_task;
2358
2359 my_task = current_task();
2360
2361 if (my_task)
2362 mem->vm_page_in_background = proc_get_effective_task_policy(my_task, TASK_POLICY_DARWIN_BG);
2363#endif
2364}
2365
2366
2367void
2368vm_page_remove_from_backgroundq(
2369 vm_page_t mem)
2370{
2371 vm_object_t m_object;
2372
2373 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2374
2375 if (mem->vm_page_on_backgroundq) {
2376 vm_page_queue_remove(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2377
2378 mem->vm_page_backgroundq.next = 0;
2379 mem->vm_page_backgroundq.prev = 0;
2380 mem->vm_page_on_backgroundq = FALSE;
2381
2382 vm_page_background_count--;
2383
2384 m_object = VM_PAGE_OBJECT(mem);
2385
2386 if (m_object->internal)
2387 vm_page_background_internal_count--;
2388 else
2389 vm_page_background_external_count--;
2390 } else {
2391 assert(VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.next) == (uintptr_t)NULL &&
2392 VM_PAGE_UNPACK_PTR(mem->vm_page_backgroundq.prev) == (uintptr_t)NULL);
2393 }
2394}
2395
2396
2397void
2398vm_page_add_to_backgroundq(
2399 vm_page_t mem,
2400 boolean_t first)
2401{
2402 vm_object_t m_object;
2403
2404 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2405
2406 if (vm_page_background_mode == VM_PAGE_BG_DISABLED)
2407 return;
2408
2409 if (mem->vm_page_on_backgroundq == FALSE) {
2410
2411 m_object = VM_PAGE_OBJECT(mem);
2412
2413 if (vm_page_background_exclude_external && !m_object->internal)
2414 return;
2415
2416 if (first == TRUE)
2417 vm_page_queue_enter_first(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2418 else
2419 vm_page_queue_enter(&vm_page_queue_background, mem, vm_page_t, vm_page_backgroundq);
2420 mem->vm_page_on_backgroundq = TRUE;
2421
2422 vm_page_background_count++;
2423
2424 if (m_object->internal)
2425 vm_page_background_internal_count++;
2426 else
2427 vm_page_background_external_count++;
2428 }
2429}
2430
2431#endif
0c530ab8
A
2432
2433/*
2434 * this is an interface to support bring-up of drivers
2435 * on platforms with physical memory > 4G...
2436 */
fe8ab488 2437int vm_himemory_mode = 2;
0c530ab8
A
2438
2439
2440/*
2441 * this interface exists to support hardware controllers
2442 * incapable of generating DMAs with more than 32 bits
2443 * of address on platforms with physical memory > 4G...
2444 */
0b4c1975
A
2445unsigned int vm_lopages_allocated_q = 0;
2446unsigned int vm_lopages_allocated_cpm_success = 0;
2447unsigned int vm_lopages_allocated_cpm_failed = 0;
39037602 2448vm_page_queue_head_t vm_lopage_queue_free __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
0c530ab8
A
2449
2450vm_page_t
2451vm_page_grablo(void)
2452{
0b4c1975 2453 vm_page_t mem;
0c530ab8 2454
0b4c1975 2455 if (vm_lopage_needed == FALSE)
0c530ab8
A
2456 return (vm_page_grab());
2457
b0d623f7 2458 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 2459
39037602
A
2460 if ( !vm_page_queue_empty(&vm_lopage_queue_free)) {
2461 vm_page_queue_remove_first(&vm_lopage_queue_free,
0b4c1975
A
2462 mem,
2463 vm_page_t,
2464 pageq);
2465 assert(vm_lopage_free_count);
39037602
A
2466 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
2467 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
0c530ab8 2468
0b4c1975
A
2469 vm_lopage_free_count--;
2470 vm_lopages_allocated_q++;
2471
2472 if (vm_lopage_free_count < vm_lopage_lowater)
2473 vm_lopage_refill = TRUE;
0c530ab8 2474
0b4c1975 2475 lck_mtx_unlock(&vm_page_queue_free_lock);
39037602
A
2476
2477#if CONFIG_BACKGROUND_QUEUE
2478 vm_page_assign_background_state(mem);
2479#endif
2d21ac55 2480 } else {
0b4c1975
A
2481 lck_mtx_unlock(&vm_page_queue_free_lock);
2482
2483 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2484
2485 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2486 vm_lopages_allocated_cpm_failed++;
2487 lck_mtx_unlock(&vm_page_queue_free_lock);
2488
2489 return (VM_PAGE_NULL);
2490 }
39037602
A
2491 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2492
0b4c1975
A
2493 mem->busy = TRUE;
2494
2495 vm_page_lockspin_queues();
2496
2497 mem->gobbled = FALSE;
2498 vm_page_gobble_count--;
2499 vm_page_wire_count--;
2500
2501 vm_lopages_allocated_cpm_success++;
2502 vm_page_unlock_queues();
0c530ab8 2503 }
0b4c1975 2504 assert(mem->busy);
0b4c1975
A
2505 assert(!mem->pmapped);
2506 assert(!mem->wpmapped);
39037602 2507 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
0b4c1975 2508
39037602 2509 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
0c530ab8
A
2510
2511 return (mem);
2512}
2513
6d2010ae 2514
1c79356b
A
2515/*
2516 * vm_page_grab:
2517 *
2d21ac55
A
2518 * first try to grab a page from the per-cpu free list...
2519 * this must be done while pre-emption is disabled... if
2520 * a page is available, we're done...
2521 * if no page is available, grab the vm_page_queue_free_lock
2522 * and see if current number of free pages would allow us
2523 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2524 * if there are pages available, disable preemption and
2525 * recheck the state of the per-cpu free list... we could
2526 * have been preempted and moved to a different cpu, or
2527 * some other thread could have re-filled it... if still
2528 * empty, figure out how many pages we can steal from the
2529 * global free queue and move to the per-cpu queue...
2530 * return 1 of these pages when done... only wakeup the
2531 * pageout_scan thread if we moved pages from the global
2532 * list... no need for the wakeup if we've satisfied the
2533 * request from the per-cpu queue.
1c79356b
A
2534 */
2535
39037602
A
2536#if CONFIG_SECLUDED_MEMORY
2537vm_page_t vm_page_grab_secluded(void);
2538#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b
A
2539
2540vm_page_t
39037602 2541vm_page_grab(void)
1c79356b 2542{
39037602
A
2543 return vm_page_grab_options(0);
2544}
2d21ac55 2545
39037602
A
2546vm_page_t
2547vm_page_grab_options(
2548 int grab_options)
2549{
2550 vm_page_t mem;
2d21ac55
A
2551
2552 disable_preemption();
2553
2554 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2555return_page_from_cpu_list:
39037602
A
2556 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2557
2d21ac55 2558 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
39037602 2559 PROCESSOR_DATA(current_processor(), free_pages) = mem->snext;
2d21ac55
A
2560
2561 enable_preemption();
39037602
A
2562 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2563 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
2d21ac55 2564
39037602 2565 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2d21ac55 2566 assert(mem->tabled == FALSE);
39037602 2567 assert(mem->vm_page_object == 0);
2d21ac55 2568 assert(!mem->laundry);
39037602 2569 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55
A
2570 assert(mem->busy);
2571 assert(!mem->encrypted);
2572 assert(!mem->pmapped);
4a3eedf9 2573 assert(!mem->wpmapped);
39037602 2574 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55 2575
39037602
A
2576#if CONFIG_BACKGROUND_QUEUE
2577 vm_page_assign_background_state(mem);
2578#endif
2d21ac55
A
2579 return mem;
2580 }
2581 enable_preemption();
2582
1c79356b 2583
1c79356b
A
2584 /*
2585 * Optionally produce warnings if the wire or gobble
2586 * counts exceed some threshold.
2587 */
fe8ab488
A
2588#if VM_PAGE_WIRE_COUNT_WARNING
2589 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
1c79356b
A
2590 printf("mk: vm_page_grab(): high wired page count of %d\n",
2591 vm_page_wire_count);
1c79356b 2592 }
fe8ab488
A
2593#endif
2594#if VM_PAGE_GOBBLE_COUNT_WARNING
2595 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
1c79356b
A
2596 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2597 vm_page_gobble_count);
1c79356b 2598 }
fe8ab488 2599#endif
39037602 2600
b0d623f7
A
2601 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2602
1c79356b
A
2603 /*
2604 * Only let privileged threads (involved in pageout)
2605 * dip into the reserved pool.
2606 */
1c79356b 2607 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 2608 !(current_thread()->options & TH_OPT_VMPRIV)) {
39037602 2609 /* no page for us in the free queue... */
b0d623f7 2610 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2611 mem = VM_PAGE_NULL;
39037602
A
2612
2613#if CONFIG_SECLUDED_MEMORY
2614 /* ... but can we try and grab from the secluded queue? */
2615 if (vm_page_secluded_count > 0 &&
2616 ((grab_options & VM_PAGE_GRAB_SECLUDED) ||
2617 task_can_use_secluded_mem(current_task()))) {
2618 mem = vm_page_grab_secluded();
2619 if (grab_options & VM_PAGE_GRAB_SECLUDED) {
2620 vm_page_secluded.grab_for_iokit++;
2621 if (mem) {
2622 vm_page_secluded.grab_for_iokit_success++;
2623 }
2624 }
2625 if (mem) {
2626 VM_CHECK_MEMORYSTATUS;
2627 return mem;
2628 }
2629 }
2630#else /* CONFIG_SECLUDED_MEMORY */
2631 (void) grab_options;
2632#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b 2633 }
2d21ac55
A
2634 else {
2635 vm_page_t head;
2636 vm_page_t tail;
2637 unsigned int pages_to_steal;
2638 unsigned int color;
1c79356b 2639
2d21ac55 2640 while ( vm_page_free_count == 0 ) {
1c79356b 2641
b0d623f7 2642 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2643 /*
2644 * must be a privileged thread to be
2645 * in this state since a non-privileged
2646 * thread would have bailed if we were
2647 * under the vm_page_free_reserved mark
2648 */
2649 VM_PAGE_WAIT();
b0d623f7 2650 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2651 }
2652
2653 disable_preemption();
2654
2655 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
b0d623f7 2656 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2657
2658 /*
2659 * we got preempted and moved to another processor
2660 * or we got preempted and someone else ran and filled the cache
2661 */
2662 goto return_page_from_cpu_list;
2663 }
2664 if (vm_page_free_count <= vm_page_free_reserved)
2665 pages_to_steal = 1;
2666 else {
fe8ab488
A
2667 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2668 pages_to_steal = vm_free_magazine_refill_limit;
2669 else
2d21ac55
A
2670 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2671 }
2672 color = PROCESSOR_DATA(current_processor(), start_color);
2673 head = tail = NULL;
2674
fe8ab488
A
2675 vm_page_free_count -= pages_to_steal;
2676
2d21ac55 2677 while (pages_to_steal--) {
2d21ac55 2678
39037602 2679 while (vm_page_queue_empty(&vm_page_queue_free[color].qhead))
2d21ac55
A
2680 color = (color + 1) & vm_color_mask;
2681
39037602 2682 vm_page_queue_remove_first(&vm_page_queue_free[color].qhead,
2d21ac55
A
2683 mem,
2684 vm_page_t,
2685 pageq);
39037602 2686 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_Q);
6d2010ae 2687
39037602
A
2688 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2689
2d21ac55
A
2690 color = (color + 1) & vm_color_mask;
2691
2692 if (head == NULL)
2693 head = mem;
2694 else
39037602 2695 tail->snext = mem;
2d21ac55
A
2696 tail = mem;
2697
39037602 2698 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2d21ac55 2699 assert(mem->tabled == FALSE);
39037602 2700 assert(mem->vm_page_object == 0);
2d21ac55 2701 assert(!mem->laundry);
2d21ac55 2702
39037602
A
2703 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOCAL_Q;
2704
2705 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55 2706 assert(mem->busy);
2d21ac55
A
2707 assert(!mem->encrypted);
2708 assert(!mem->pmapped);
4a3eedf9 2709 assert(!mem->wpmapped);
39037602 2710 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem)));
2d21ac55 2711 }
fe8ab488
A
2712 lck_mtx_unlock(&vm_page_queue_free_lock);
2713
39037602 2714 PROCESSOR_DATA(current_processor(), free_pages) = head->snext;
2d21ac55
A
2715 PROCESSOR_DATA(current_processor(), start_color) = color;
2716
2717 /*
2718 * satisfy this request
2719 */
2720 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2721 mem = head;
39037602
A
2722 assert(mem->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
2723
2724 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
2725 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
91447636 2726
2d21ac55
A
2727 enable_preemption();
2728 }
1c79356b
A
2729 /*
2730 * Decide if we should poke the pageout daemon.
2731 * We do this if the free count is less than the low
2732 * water mark, or if the free count is less than the high
2733 * water mark (but above the low water mark) and the inactive
2734 * count is less than its target.
2735 *
2736 * We don't have the counts locked ... if they change a little,
2737 * it doesn't really matter.
2738 */
1c79356b 2739 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
2740 ((vm_page_free_count < vm_page_free_target) &&
2741 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2742 thread_wakeup((event_t) &vm_page_free_wanted);
39037602
A
2743#if CONFIG_BACKGROUND_QUEUE
2744 if (vm_page_background_mode == VM_PAGE_BG_LEVEL_3 && (vm_page_background_count > vm_page_background_limit))
2745 thread_wakeup((event_t) &vm_page_free_wanted);
2746#endif
2d21ac55 2747
6d2010ae 2748 VM_CHECK_MEMORYSTATUS;
39037602
A
2749
2750 if (mem) {
2751// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2752
2753#if CONFIG_BACKGROUND_QUEUE
2754 vm_page_assign_background_state(mem);
2755#endif
2756 }
2757 return mem;
2758}
2759
2760#if CONFIG_SECLUDED_MEMORY
2761vm_page_t
2762vm_page_grab_secluded(void)
2763{
2764 vm_page_t mem;
2765 vm_object_t object;
2766 int refmod_state;
2767
2768 if (vm_page_secluded_count == 0) {
2769 /* no secluded pages to grab... */
2770 return VM_PAGE_NULL;
2771 }
2772
2773 /* secluded queue is protected by the VM page queue lock */
2774 vm_page_lock_queues();
2775
2776 if (vm_page_secluded_count == 0) {
2777 /* no secluded pages to grab... */
2778 vm_page_unlock_queues();
2779 return VM_PAGE_NULL;
2780 }
2781
2782#if 00
2783 /* can we grab from the secluded queue? */
2784 if (vm_page_secluded_count > vm_page_secluded_target ||
2785 (vm_page_secluded_count > 0 &&
2786 task_can_use_secluded_mem(current_task()))) {
2787 /* OK */
2788 } else {
2789 /* can't grab from secluded queue... */
2790 vm_page_unlock_queues();
2791 return VM_PAGE_NULL;
2792 }
2793#endif
2794
2795 /* we can grab a page from secluded queue! */
2796 assert((vm_page_secluded_count_free +
2797 vm_page_secluded_count_inuse) ==
2798 vm_page_secluded_count);
2799 if (current_task()->task_can_use_secluded_mem) {
2800 assert(num_tasks_can_use_secluded_mem > 0);
2801 }
2802 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
2803 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
d190cdc3 2804 mem = vm_page_queue_first(&vm_page_queue_secluded);
39037602 2805 assert(mem->vm_page_q_state == VM_PAGE_ON_SECLUDED_Q);
d190cdc3 2806 vm_page_queues_remove(mem, TRUE);
39037602
A
2807
2808 object = VM_PAGE_OBJECT(mem);
2809
2810 assert(!mem->fictitious);
2811 assert(!VM_PAGE_WIRED(mem));
2812 if (object == VM_OBJECT_NULL) {
2813 /* free for grab! */
39037602
A
2814 vm_page_unlock_queues();
2815 vm_page_secluded.grab_success_free++;
d190cdc3
A
2816
2817 assert(mem->busy);
2818 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2819 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
2820 assert(mem->pageq.next == 0);
2821 assert(mem->pageq.prev == 0);
2822 assert(mem->listq.next == 0);
2823 assert(mem->listq.prev == 0);
2824#if CONFIG_BACKGROUND_QUEUE
2825 assert(mem->vm_page_on_backgroundq == 0);
2826 assert(mem->vm_page_backgroundq.next == 0);
2827 assert(mem->vm_page_backgroundq.prev == 0);
2828#endif /* CONFIG_BACKGROUND_QUEUE */
39037602
A
2829 return mem;
2830 }
2831
39037602
A
2832 assert(!object->internal);
2833// vm_page_pageable_external_count--;
2834
2835 if (!vm_object_lock_try(object)) {
2836// printf("SECLUDED: page %p: object %p locked\n", mem, object);
2837 vm_page_secluded.grab_failure_locked++;
2838 reactivate_secluded_page:
2839 vm_page_activate(mem);
2840 vm_page_unlock_queues();
2841 return VM_PAGE_NULL;
2842 }
2843 if (mem->busy ||
2844 mem->cleaning ||
2845 mem->laundry) {
2846 /* can't steal page in this state... */
2847 vm_object_unlock(object);
2848 vm_page_secluded.grab_failure_state++;
2849 goto reactivate_secluded_page;
2850 }
2851
2852 mem->busy = TRUE;
2853 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
2854 if (refmod_state & VM_MEM_REFERENCED) {
2855 mem->reference = TRUE;
2856 }
2857 if (refmod_state & VM_MEM_MODIFIED) {
2858 SET_PAGE_DIRTY(mem, FALSE);
2859 }
2860 if (mem->dirty || mem->precious) {
2861 /* can't grab a dirty page; re-activate */
2862// printf("SECLUDED: dirty page %p\n", mem);
743345f9 2863 PAGE_WAKEUP_DONE(mem);
39037602
A
2864 vm_page_secluded.grab_failure_dirty++;
2865 vm_object_unlock(object);
2866 goto reactivate_secluded_page;
2867 }
2868 if (mem->reference) {
2869 /* it's been used but we do need to grab a page... */
2870 }
743345f9 2871
39037602
A
2872 vm_page_unlock_queues();
2873
2874 /* finish what vm_page_free() would have done... */
2875 vm_page_free_prepare_object(mem, TRUE);
2876 vm_object_unlock(object);
2877 object = VM_OBJECT_NULL;
2878 if (vm_page_free_verify) {
2879 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
2880 }
2881 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
39037602 2882 vm_page_secluded.grab_success_other++;
1c79356b 2883
d190cdc3
A
2884 assert(mem->busy);
2885 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2886 assert(VM_PAGE_OBJECT(mem) == VM_OBJECT_NULL);
2887 assert(mem->pageq.next == 0);
2888 assert(mem->pageq.prev == 0);
2889 assert(mem->listq.next == 0);
2890 assert(mem->listq.prev == 0);
2891#if CONFIG_BACKGROUND_QUEUE
2892 assert(mem->vm_page_on_backgroundq == 0);
2893 assert(mem->vm_page_backgroundq.next == 0);
2894 assert(mem->vm_page_backgroundq.prev == 0);
2895#endif /* CONFIG_BACKGROUND_QUEUE */
2896
1c79356b
A
2897 return mem;
2898}
39037602 2899#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b
A
2900
2901/*
2902 * vm_page_release:
2903 *
2904 * Return a page to the free list.
2905 */
2906
2907void
2908vm_page_release(
39037602
A
2909 vm_page_t mem,
2910 boolean_t page_queues_locked)
1c79356b 2911{
2d21ac55 2912 unsigned int color;
b0d623f7
A
2913 int need_wakeup = 0;
2914 int need_priv_wakeup = 0;
39037602
A
2915#if CONFIG_SECLUDED_MEMORY
2916 int need_secluded_wakeup = 0;
2917#endif /* CONFIG_SECLUDED_MEMORY */
55e303ae 2918
39037602
A
2919 if (page_queues_locked) {
2920 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2921 } else {
2922 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
2923 }
6d2010ae 2924
1c79356b 2925 assert(!mem->private && !mem->fictitious);
b0d623f7 2926 if (vm_page_free_verify) {
39037602 2927 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
b0d623f7 2928 }
39037602 2929// dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 2930
39037602 2931 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
7ddcb079 2932
b0d623f7 2933 lck_mtx_lock_spin(&vm_page_queue_free_lock);
6d2010ae 2934
39037602 2935 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
2d21ac55 2936 assert(mem->busy);
91447636 2937 assert(!mem->laundry);
39037602
A
2938 assert(mem->vm_page_object == 0);
2939 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
2940 assert(mem->listq.next == 0 && mem->listq.prev == 0);
2941#if CONFIG_BACKGROUND_QUEUE
2942 assert(mem->vm_page_backgroundq.next == 0 &&
2943 mem->vm_page_backgroundq.prev == 0 &&
2944 mem->vm_page_on_backgroundq == FALSE);
2945#endif
6d2010ae 2946 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
0b4c1975 2947 vm_lopage_free_count < vm_lopage_free_limit &&
39037602 2948 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
0c530ab8
A
2949 /*
2950 * this exists to support hardware controllers
2951 * incapable of generating DMAs with more than 32 bits
2952 * of address on platforms with physical memory > 4G...
2953 */
39037602
A
2954 vm_page_queue_enter_first(&vm_lopage_queue_free,
2955 mem,
2956 vm_page_t,
2957 pageq);
0c530ab8 2958 vm_lopage_free_count++;
0b4c1975
A
2959
2960 if (vm_lopage_free_count >= vm_lopage_free_limit)
2961 vm_lopage_refill = FALSE;
2962
39037602 2963 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
0b4c1975 2964 mem->lopage = TRUE;
39037602
A
2965#if CONFIG_SECLUDED_MEMORY
2966 } else if (vm_page_free_count > vm_page_free_reserved &&
2967 vm_page_secluded_count < vm_page_secluded_target &&
2968 num_tasks_can_use_secluded_mem == 0) {
2969 /*
2970 * XXX FBDP TODO: also avoid refilling secluded queue
2971 * when some IOKit objects are already grabbing from it...
2972 */
2973 if (!page_queues_locked) {
2974 if (!vm_page_trylock_queues()) {
2975 /* take locks in right order */
2976 lck_mtx_unlock(&vm_page_queue_free_lock);
2977 vm_page_lock_queues();
2978 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2979 }
2980 }
6d2010ae 2981 mem->lopage = FALSE;
39037602
A
2982 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2983 vm_page_queue_enter_first(&vm_page_queue_secluded,
2984 mem,
2985 vm_page_t,
2986 pageq);
2987 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
2988 vm_page_secluded_count++;
2989 vm_page_secluded_count_free++;
2990 if (!page_queues_locked) {
2991 vm_page_unlock_queues();
2992 }
2993 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_OWNED);
2994 if (vm_page_free_wanted_secluded > 0) {
2995 vm_page_free_wanted_secluded--;
2996 need_secluded_wakeup = 1;
2997 }
2998#endif /* CONFIG_SECLUDED_MEMORY */
2999 } else {
3000 mem->lopage = FALSE;
3001 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
0b4c1975 3002
39037602
A
3003 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
3004 vm_page_queue_enter_first(&vm_page_queue_free[color].qhead,
3005 mem,
3006 vm_page_t,
3007 pageq);
0c530ab8
A
3008 vm_page_free_count++;
3009 /*
3010 * Check if we should wake up someone waiting for page.
3011 * But don't bother waking them unless they can allocate.
3012 *
3013 * We wakeup only one thread, to prevent starvation.
3014 * Because the scheduling system handles wait queues FIFO,
3015 * if we wakeup all waiting threads, one greedy thread
3016 * can starve multiple niceguy threads. When the threads
3017 * all wakeup, the greedy threads runs first, grabs the page,
3018 * and waits for another page. It will be the first to run
3019 * when the next page is freed.
3020 *
3021 * However, there is a slight danger here.
3022 * The thread we wake might not use the free page.
3023 * Then the other threads could wait indefinitely
3024 * while the page goes unused. To forestall this,
3025 * the pageout daemon will keep making free pages
3026 * as long as vm_page_free_wanted is non-zero.
3027 */
1c79356b 3028
b0d623f7
A
3029 assert(vm_page_free_count > 0);
3030 if (vm_page_free_wanted_privileged > 0) {
2d21ac55 3031 vm_page_free_wanted_privileged--;
b0d623f7 3032 need_priv_wakeup = 1;
39037602
A
3033#if CONFIG_SECLUDED_MEMORY
3034 } else if (vm_page_free_wanted_secluded > 0 &&
3035 vm_page_free_count > vm_page_free_reserved) {
3036 vm_page_free_wanted_secluded--;
3037 need_secluded_wakeup = 1;
3038#endif /* CONFIG_SECLUDED_MEMORY */
b0d623f7
A
3039 } else if (vm_page_free_wanted > 0 &&
3040 vm_page_free_count > vm_page_free_reserved) {
0c530ab8 3041 vm_page_free_wanted--;
b0d623f7 3042 need_wakeup = 1;
0c530ab8 3043 }
1c79356b 3044 }
b0d623f7
A
3045 lck_mtx_unlock(&vm_page_queue_free_lock);
3046
3047 if (need_priv_wakeup)
3048 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
39037602
A
3049#if CONFIG_SECLUDED_MEMORY
3050 else if (need_secluded_wakeup)
3051 thread_wakeup_one((event_t) &vm_page_free_wanted_secluded);
3052#endif /* CONFIG_SECLUDED_MEMORY */
b0d623f7
A
3053 else if (need_wakeup)
3054 thread_wakeup_one((event_t) &vm_page_free_count);
2d21ac55 3055
6d2010ae 3056 VM_CHECK_MEMORYSTATUS;
1c79356b
A
3057}
3058
fe8ab488
A
3059/*
3060 * This version of vm_page_release() is used only at startup
3061 * when we are single-threaded and pages are being released
3062 * for the first time. Hence, no locking or unnecessary checks are made.
3063 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3064 */
3065void
3066vm_page_release_startup(
39037602 3067 vm_page_t mem)
fe8ab488 3068{
39037602 3069 vm_page_queue_t queue_free;
fe8ab488
A
3070
3071 if (vm_lopage_free_count < vm_lopage_free_limit &&
39037602 3072 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
fe8ab488 3073 mem->lopage = TRUE;
39037602 3074 mem->vm_page_q_state = VM_PAGE_ON_FREE_LOPAGE_Q;
fe8ab488
A
3075 vm_lopage_free_count++;
3076 queue_free = &vm_lopage_queue_free;
39037602
A
3077#if CONFIG_SECLUDED_MEMORY
3078 } else if (vm_page_secluded_count < vm_page_secluded_target) {
3079 mem->lopage = FALSE;
3080 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3081 vm_page_secluded_count++;
3082 vm_page_secluded_count_free++;
3083 queue_free = &vm_page_queue_secluded;
3084#endif /* CONFIG_SECLUDED_MEMORY */
3085 } else {
fe8ab488 3086 mem->lopage = FALSE;
39037602 3087 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
fe8ab488 3088 vm_page_free_count++;
39037602 3089 queue_free = &vm_page_queue_free[VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask].qhead;
fe8ab488 3090 }
39037602 3091 vm_page_queue_enter_first(queue_free, mem, vm_page_t, pageq);
fe8ab488
A
3092}
3093
1c79356b
A
3094/*
3095 * vm_page_wait:
3096 *
3097 * Wait for a page to become available.
3098 * If there are plenty of free pages, then we don't sleep.
3099 *
3100 * Returns:
3101 * TRUE: There may be another page, try again
3102 * FALSE: We were interrupted out of our wait, don't try again
3103 */
3104
3105boolean_t
3106vm_page_wait(
3107 int interruptible )
3108{
3109 /*
3110 * We can't use vm_page_free_reserved to make this
3111 * determination. Consider: some thread might
3112 * need to allocate two pages. The first allocation
3113 * succeeds, the second fails. After the first page is freed,
3114 * a call to vm_page_wait must really block.
3115 */
9bccf70c 3116 kern_return_t wait_result;
9bccf70c 3117 int need_wakeup = 0;
2d21ac55 3118 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b 3119
b0d623f7 3120 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
3121
3122 if (is_privileged && vm_page_free_count) {
b0d623f7 3123 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
3124 return TRUE;
3125 }
2d21ac55 3126
39037602 3127 if (vm_page_free_count >= vm_page_free_target) {
b0d623f7 3128 lck_mtx_unlock(&vm_page_queue_free_lock);
39037602
A
3129 return TRUE;
3130 }
9bccf70c 3131
39037602
A
3132 if (is_privileged) {
3133 if (vm_page_free_wanted_privileged++ == 0)
3134 need_wakeup = 1;
3135 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
3136#if CONFIG_SECLUDED_MEMORY
3137 } else if (secluded_for_apps &&
3138 task_can_use_secluded_mem(current_task())) {
3139#if 00
3140 /* XXX FBDP: need pageq lock for this... */
3141 /* XXX FBDP: might wait even if pages available, */
3142 /* XXX FBDP: hopefully not for too long... */
3143 if (vm_page_secluded_count > 0) {
3144 lck_mtx_unlock(&vm_page_queue_free_lock);
3145 return TRUE;
39236c6e 3146 }
39037602
A
3147#endif
3148 if (vm_page_free_wanted_secluded++ == 0) {
3149 need_wakeup = 1;
3150 }
3151 wait_result = assert_wait(
3152 (event_t)&vm_page_free_wanted_secluded,
3153 interruptible);
3154#endif /* CONFIG_SECLUDED_MEMORY */
1c79356b 3155 } else {
39037602
A
3156 if (vm_page_free_wanted++ == 0)
3157 need_wakeup = 1;
3158 wait_result = assert_wait((event_t)&vm_page_free_count,
3159 interruptible);
3160 }
3161 lck_mtx_unlock(&vm_page_queue_free_lock);
3162 counter(c_vm_page_wait_block++);
3163
3164 if (need_wakeup)
3165 thread_wakeup((event_t)&vm_page_free_wanted);
3166
3167 if (wait_result == THREAD_WAITING) {
3168 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
3169 vm_page_free_wanted_privileged,
3170 vm_page_free_wanted,
3171#if CONFIG_SECLUDED_MEMORY
3172 vm_page_free_wanted_secluded,
3173#else /* CONFIG_SECLUDED_MEMORY */
3174 0,
3175#endif /* CONFIG_SECLUDED_MEMORY */
3176 0);
3177 wait_result = thread_block(THREAD_CONTINUE_NULL);
3178 VM_DEBUG_EVENT(vm_page_wait_block,
3179 VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
1c79356b 3180 }
39037602
A
3181
3182 return (wait_result == THREAD_AWAKENED);
1c79356b
A
3183}
3184
3185/*
3186 * vm_page_alloc:
3187 *
3188 * Allocate and return a memory cell associated
3189 * with this VM object/offset pair.
3190 *
3191 * Object must be locked.
3192 */
3193
3194vm_page_t
3195vm_page_alloc(
3196 vm_object_t object,
3197 vm_object_offset_t offset)
3198{
39037602
A
3199 vm_page_t mem;
3200 int grab_options;
1c79356b 3201
2d21ac55 3202 vm_object_lock_assert_exclusive(object);
39037602
A
3203 grab_options = 0;
3204#if CONFIG_SECLUDED_MEMORY
3205 if (object->can_grab_secluded) {
3206 grab_options |= VM_PAGE_GRAB_SECLUDED;
3207 }
3208#endif /* CONFIG_SECLUDED_MEMORY */
3209 mem = vm_page_grab_options(grab_options);
1c79356b
A
3210 if (mem == VM_PAGE_NULL)
3211 return VM_PAGE_NULL;
3212
3213 vm_page_insert(mem, object, offset);
3214
3215 return(mem);
3216}
3217
2d21ac55
A
3218/*
3219 * vm_page_alloc_guard:
3220 *
b0d623f7 3221 * Allocate a fictitious page which will be used
2d21ac55
A
3222 * as a guard page. The page will be inserted into
3223 * the object and returned to the caller.
3224 */
3225
3226vm_page_t
3227vm_page_alloc_guard(
3228 vm_object_t object,
3229 vm_object_offset_t offset)
3230{
39037602 3231 vm_page_t mem;
2d21ac55
A
3232
3233 vm_object_lock_assert_exclusive(object);
3234 mem = vm_page_grab_guard();
3235 if (mem == VM_PAGE_NULL)
3236 return VM_PAGE_NULL;
3237
3238 vm_page_insert(mem, object, offset);
3239
3240 return(mem);
3241}
3242
3243
1c79356b
A
3244counter(unsigned int c_laundry_pages_freed = 0;)
3245
1c79356b 3246/*
6d2010ae 3247 * vm_page_free_prepare:
1c79356b 3248 *
6d2010ae
A
3249 * Removes page from any queue it may be on
3250 * and disassociates it from its VM object.
1c79356b
A
3251 *
3252 * Object and page queues must be locked prior to entry.
3253 */
b0d623f7 3254static void
2d21ac55 3255vm_page_free_prepare(
6d2010ae 3256 vm_page_t mem)
b0d623f7
A
3257{
3258 vm_page_free_prepare_queues(mem);
3259 vm_page_free_prepare_object(mem, TRUE);
3260}
3261
3262
3263void
3264vm_page_free_prepare_queues(
3265 vm_page_t mem)
1c79356b 3266{
39037602
A
3267 vm_object_t m_object;
3268
2d21ac55 3269 VM_PAGE_CHECK(mem);
39037602
A
3270
3271 assert(mem->vm_page_q_state != VM_PAGE_ON_FREE_Q);
1c79356b 3272 assert(!mem->cleaning);
39037602 3273 m_object = VM_PAGE_OBJECT(mem);
fe8ab488 3274
39037602
A
3275 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3276 if (m_object) {
3277 vm_object_lock_assert_exclusive(m_object);
b0d623f7 3278 }
2d21ac55
A
3279 if (mem->laundry) {
3280 /*
3281 * We may have to free a page while it's being laundered
3282 * if we lost its pager (due to a forced unmount, for example).
316670eb
A
3283 * We need to call vm_pageout_steal_laundry() before removing
3284 * the page from its VM object, so that we can remove it
3285 * from its pageout queue and adjust the laundry accounting
2d21ac55 3286 */
316670eb 3287 vm_pageout_steal_laundry(mem, TRUE);
2d21ac55
A
3288 counter(++c_laundry_pages_freed);
3289 }
39236c6e 3290
39037602 3291 vm_page_queues_remove(mem, TRUE);
b0d623f7
A
3292
3293 if (VM_PAGE_WIRED(mem)) {
39037602
A
3294 assert(mem->wire_count > 0);
3295
3296 if (m_object) {
3297 assert(m_object->wired_page_count > 0);
3298 m_object->wired_page_count--;
3299 if (!m_object->wired_page_count) {
3300 VM_OBJECT_UNWIRED(m_object);
3e170ce0
A
3301 }
3302
39037602
A
3303 assert(m_object->resident_page_count >=
3304 m_object->wired_page_count);
6d2010ae 3305
39037602 3306 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
6d2010ae
A
3307 OSAddAtomic(+1, &vm_page_purgeable_count);
3308 assert(vm_page_purgeable_wired_count > 0);
3309 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3310 }
39037602
A
3311 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3312 m_object->purgable == VM_PURGABLE_EMPTY) &&
3313 m_object->vo_purgeable_owner != TASK_NULL) {
fe8ab488
A
3314 task_t owner;
3315
39037602 3316 owner = m_object->vo_purgeable_owner;
fe8ab488
A
3317 /*
3318 * While wired, this page was accounted
3319 * as "non-volatile" but it should now
3320 * be accounted as "volatile".
3321 */
3322 /* one less "non-volatile"... */
3323 ledger_debit(owner->ledger,
3324 task_ledgers.purgeable_nonvolatile,
3325 PAGE_SIZE);
3326 /* ... and "phys_footprint" */
3327 ledger_debit(owner->ledger,
3328 task_ledgers.phys_footprint,
3329 PAGE_SIZE);
3330 /* one more "volatile" */
3331 ledger_credit(owner->ledger,
3332 task_ledgers.purgeable_volatile,
3333 PAGE_SIZE);
3334 }
b0d623f7 3335 }
1c79356b
A
3336 if (!mem->private && !mem->fictitious)
3337 vm_page_wire_count--;
39037602
A
3338
3339 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
1c79356b
A
3340 mem->wire_count = 0;
3341 assert(!mem->gobbled);
3342 } else if (mem->gobbled) {
3343 if (!mem->private && !mem->fictitious)
3344 vm_page_wire_count--;
3345 vm_page_gobble_count--;
3346 }
b0d623f7
A
3347}
3348
3349
3350void
3351vm_page_free_prepare_object(
3352 vm_page_t mem,
3353 boolean_t remove_from_hash)
3354{
b0d623f7
A
3355 if (mem->tabled)
3356 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
1c79356b 3357
b0d623f7 3358 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b
A
3359
3360 if (mem->private) {
3361 mem->private = FALSE;
3362 mem->fictitious = TRUE;
39037602 3363 VM_PAGE_SET_PHYS_PAGE(mem, vm_page_fictitious_addr);
1c79356b 3364 }
6d2010ae 3365 if ( !mem->fictitious) {
d190cdc3
A
3366 assert(mem->pageq.next == 0);
3367 assert(mem->pageq.prev == 0);
3368 assert(mem->listq.next == 0);
3369 assert(mem->listq.prev == 0);
3370#if CONFIG_BACKGROUND_QUEUE
3371 assert(mem->vm_page_backgroundq.next == 0);
3372 assert(mem->vm_page_backgroundq.prev == 0);
3373#endif /* CONFIG_BACKGROUND_QUEUE */
3374 assert(mem->next_m == 0);
39037602 3375 vm_page_init(mem, VM_PAGE_GET_PHYS_PAGE(mem), mem->lopage);
1c79356b
A
3376 }
3377}
3378
b0d623f7 3379
6d2010ae
A
3380/*
3381 * vm_page_free:
3382 *
3383 * Returns the given page to the free list,
3384 * disassociating it with any VM object.
3385 *
3386 * Object and page queues must be locked prior to entry.
3387 */
2d21ac55
A
3388void
3389vm_page_free(
3390 vm_page_t mem)
3391{
b0d623f7 3392 vm_page_free_prepare(mem);
6d2010ae 3393
b0d623f7
A
3394 if (mem->fictitious) {
3395 vm_page_release_fictitious(mem);
3396 } else {
39037602
A
3397 vm_page_release(mem,
3398 TRUE); /* page queues are locked */
b0d623f7
A
3399 }
3400}
3401
3402
3403void
3404vm_page_free_unlocked(
3405 vm_page_t mem,
3406 boolean_t remove_from_hash)
3407{
3408 vm_page_lockspin_queues();
3409 vm_page_free_prepare_queues(mem);
3410 vm_page_unlock_queues();
3411
3412 vm_page_free_prepare_object(mem, remove_from_hash);
3413
2d21ac55
A
3414 if (mem->fictitious) {
3415 vm_page_release_fictitious(mem);
3416 } else {
39037602 3417 vm_page_release(mem, FALSE); /* page queues are not locked */
2d21ac55
A
3418 }
3419}
55e303ae 3420
316670eb 3421
2d21ac55
A
3422/*
3423 * Free a list of pages. The list can be up to several hundred pages,
3424 * as blocked up by vm_pageout_scan().
b0d623f7 3425 * The big win is not having to take the free list lock once
316670eb 3426 * per page.
d190cdc3
A
3427 *
3428 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3429 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
2d21ac55 3430 */
55e303ae
A
3431void
3432vm_page_free_list(
316670eb 3433 vm_page_t freeq,
b0d623f7 3434 boolean_t prepare_object)
55e303ae 3435{
316670eb 3436 vm_page_t mem;
2d21ac55 3437 vm_page_t nxt;
316670eb
A
3438 vm_page_t local_freeq;
3439 int pg_count;
2d21ac55 3440
d190cdc3
A
3441 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
3442 LCK_MTX_ASSERT(&vm_page_queue_free_lock, LCK_MTX_ASSERT_NOTOWNED);
3443
316670eb 3444 while (freeq) {
55e303ae 3445
316670eb
A
3446 pg_count = 0;
3447 local_freeq = VM_PAGE_NULL;
3448 mem = freeq;
b0d623f7 3449
316670eb
A
3450 /*
3451 * break up the processing into smaller chunks so
3452 * that we can 'pipeline' the pages onto the
3453 * free list w/o introducing too much
3454 * contention on the global free queue lock
3455 */
3456 while (mem && pg_count < 64) {
3457
39037602
A
3458 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3459#if CONFIG_BACKGROUND_QUEUE
3460 assert(mem->vm_page_backgroundq.next == 0 &&
3461 mem->vm_page_backgroundq.prev == 0 &&
3462 mem->vm_page_on_backgroundq == FALSE);
3463#endif
3464 nxt = mem->snext;
3465 mem->snext = NULL;
3466 assert(mem->pageq.prev == 0);
316670eb 3467
316670eb 3468 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
39037602 3469 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem)));
316670eb
A
3470 }
3471 if (prepare_object == TRUE)
3472 vm_page_free_prepare_object(mem, TRUE);
b0d623f7 3473
316670eb
A
3474 if (!mem->fictitious) {
3475 assert(mem->busy);
55e303ae 3476
316670eb
A
3477 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
3478 vm_lopage_free_count < vm_lopage_free_limit &&
39037602
A
3479 VM_PAGE_GET_PHYS_PAGE(mem) < max_valid_low_ppnum) {
3480 vm_page_release(mem, FALSE); /* page queues are not locked */
3481#if CONFIG_SECLUDED_MEMORY
3482 } else if (vm_page_secluded_count < vm_page_secluded_target &&
3483 num_tasks_can_use_secluded_mem == 0) {
3484 vm_page_release(mem,
3485 FALSE); /* page queues are not locked */
3486#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
3487 } else {
3488 /*
3489 * IMPORTANT: we can't set the page "free" here
3490 * because that would make the page eligible for
3491 * a physically-contiguous allocation (see
3492 * vm_page_find_contiguous()) right away (we don't
3493 * hold the vm_page_queue_free lock). That would
3494 * cause trouble because the page is not actually
3495 * in the free queue yet...
3496 */
39037602 3497 mem->snext = local_freeq;
316670eb
A
3498 local_freeq = mem;
3499 pg_count++;
935ed37a 3500
39037602 3501 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
935ed37a 3502 }
316670eb 3503 } else {
39037602
A
3504 assert(VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_fictitious_addr ||
3505 VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr);
316670eb 3506 vm_page_release_fictitious(mem);
2d21ac55 3507 }
316670eb 3508 mem = nxt;
55e303ae 3509 }
316670eb
A
3510 freeq = mem;
3511
3512 if ( (mem = local_freeq) ) {
3513 unsigned int avail_free_count;
3514 unsigned int need_wakeup = 0;
3515 unsigned int need_priv_wakeup = 0;
39037602
A
3516#if CONFIG_SECLUDED_MEMORY
3517 unsigned int need_wakeup_secluded = 0;
3518#endif /* CONFIG_SECLUDED_MEMORY */
2d21ac55 3519
316670eb 3520 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 3521
316670eb
A
3522 while (mem) {
3523 int color;
3524
39037602 3525 nxt = mem->snext;
2d21ac55 3526
39037602 3527 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
b0d623f7 3528 assert(mem->busy);
39037602
A
3529 mem->lopage = FALSE;
3530 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
3531
3532 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
3533 vm_page_queue_enter_first(&vm_page_queue_free[color].qhead,
3534 mem,
3535 vm_page_t,
3536 pageq);
316670eb 3537 mem = nxt;
2d21ac55 3538 }
316670eb
A
3539 vm_page_free_count += pg_count;
3540 avail_free_count = vm_page_free_count;
3541
3542 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
3543
3544 if (avail_free_count < vm_page_free_wanted_privileged) {
3545 need_priv_wakeup = avail_free_count;
3546 vm_page_free_wanted_privileged -= avail_free_count;
3547 avail_free_count = 0;
3548 } else {
3549 need_priv_wakeup = vm_page_free_wanted_privileged;
316670eb 3550 avail_free_count -= vm_page_free_wanted_privileged;
39037602 3551 vm_page_free_wanted_privileged = 0;
316670eb 3552 }
b0d623f7 3553 }
39037602
A
3554#if CONFIG_SECLUDED_MEMORY
3555 if (vm_page_free_wanted_secluded > 0 &&
3556 avail_free_count > vm_page_free_reserved) {
3557 unsigned int available_pages;
3558 available_pages = (avail_free_count -
3559 vm_page_free_reserved);
3560 if (available_pages <
3561 vm_page_free_wanted_secluded) {
3562 need_wakeup_secluded = available_pages;
3563 vm_page_free_wanted_secluded -=
3564 available_pages;
3565 avail_free_count -= available_pages;
3566 } else {
3567 need_wakeup_secluded =
3568 vm_page_free_wanted_secluded;
3569 avail_free_count -=
3570 vm_page_free_wanted_secluded;
3571 vm_page_free_wanted_secluded = 0;
3572 }
3573 }
3574#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
3575 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
3576 unsigned int available_pages;
55e303ae 3577
316670eb 3578 available_pages = avail_free_count - vm_page_free_reserved;
55e303ae 3579
316670eb
A
3580 if (available_pages >= vm_page_free_wanted) {
3581 need_wakeup = vm_page_free_wanted;
3582 vm_page_free_wanted = 0;
3583 } else {
3584 need_wakeup = available_pages;
3585 vm_page_free_wanted -= available_pages;
3586 }
3587 }
3588 lck_mtx_unlock(&vm_page_queue_free_lock);
55e303ae 3589
316670eb
A
3590 if (need_priv_wakeup != 0) {
3591 /*
3592 * There shouldn't be that many VM-privileged threads,
3593 * so let's wake them all up, even if we don't quite
3594 * have enough pages to satisfy them all.
3595 */
3596 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
3597 }
39037602
A
3598#if CONFIG_SECLUDED_MEMORY
3599 if (need_wakeup_secluded != 0 &&
3600 vm_page_free_wanted_secluded == 0) {
3601 thread_wakeup((event_t)
3602 &vm_page_free_wanted_secluded);
3603 } else {
3604 for (;
3605 need_wakeup_secluded != 0;
3606 need_wakeup_secluded--) {
3607 thread_wakeup_one(
3608 (event_t)
3609 &vm_page_free_wanted_secluded);
3610 }
3611 }
3612#endif /* CONFIG_SECLUDED_MEMORY */
316670eb
A
3613 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
3614 /*
3615 * We don't expect to have any more waiters
3616 * after this, so let's wake them all up at
3617 * once.
3618 */
3619 thread_wakeup((event_t) &vm_page_free_count);
3620 } else for (; need_wakeup != 0; need_wakeup--) {
3621 /*
3622 * Wake up one waiter per page we just released.
3623 */
3624 thread_wakeup_one((event_t) &vm_page_free_count);
55e303ae 3625 }
2d21ac55 3626
316670eb 3627 VM_CHECK_MEMORYSTATUS;
b0d623f7 3628 }
55e303ae
A
3629 }
3630}
3631
3632
1c79356b
A
3633/*
3634 * vm_page_wire:
3635 *
3636 * Mark this page as wired down by yet
3637 * another map, removing it from paging queues
3638 * as necessary.
3639 *
3640 * The page's object and the page queues must be locked.
3641 */
3e170ce0
A
3642
3643
1c79356b
A
3644void
3645vm_page_wire(
39037602 3646 vm_page_t mem,
3e170ce0
A
3647 vm_tag_t tag,
3648 boolean_t check_memorystatus)
1c79356b 3649{
39037602
A
3650 vm_object_t m_object;
3651
3652 m_object = VM_PAGE_OBJECT(mem);
1c79356b 3653
39037602 3654// dbgLog(current_thread(), mem->offset, m_object, 1); /* (TEST/DEBUG) */
1c79356b
A
3655
3656 VM_PAGE_CHECK(mem);
39037602
A
3657 if (m_object) {
3658 vm_object_lock_assert_exclusive(m_object);
b0d623f7
A
3659 } else {
3660 /*
3661 * In theory, the page should be in an object before it
3662 * gets wired, since we need to hold the object lock
3663 * to update some fields in the page structure.
3664 * However, some code (i386 pmap, for example) might want
3665 * to wire a page before it gets inserted into an object.
3666 * That's somewhat OK, as long as nobody else can get to
3667 * that page and update it at the same time.
3668 */
3669 }
39037602 3670 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7 3671 if ( !VM_PAGE_WIRED(mem)) {
316670eb 3672
39037602
A
3673 if (mem->laundry)
3674 vm_pageout_steal_laundry(mem, TRUE);
3675
3676 vm_page_queues_remove(mem, TRUE);
3677
3678 assert(mem->wire_count == 0);
3679 mem->vm_page_q_state = VM_PAGE_IS_WIRED;
b0d623f7 3680
39037602 3681 if (m_object) {
3e170ce0
A
3682
3683 if (!mem->private && !mem->fictitious)
3684 {
39037602 3685 if (!m_object->wired_page_count)
3e170ce0
A
3686 {
3687 assert(VM_KERN_MEMORY_NONE != tag);
39037602
A
3688 m_object->wire_tag = tag;
3689 VM_OBJECT_WIRED(m_object);
3e170ce0
A
3690 }
3691 }
39037602 3692 m_object->wired_page_count++;
3e170ce0 3693
39037602
A
3694 assert(m_object->resident_page_count >=
3695 m_object->wired_page_count);
3696 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
3697 assert(vm_page_purgeable_count > 0);
3698 OSAddAtomic(-1, &vm_page_purgeable_count);
3699 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3700 }
39037602
A
3701 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3702 m_object->purgable == VM_PURGABLE_EMPTY) &&
3703 m_object->vo_purgeable_owner != TASK_NULL) {
fe8ab488
A
3704 task_t owner;
3705
39037602 3706 owner = m_object->vo_purgeable_owner;
fe8ab488
A
3707 /* less volatile bytes */
3708 ledger_debit(owner->ledger,
3709 task_ledgers.purgeable_volatile,
3710 PAGE_SIZE);
3711 /* more not-quite-volatile bytes */
3712 ledger_credit(owner->ledger,
3713 task_ledgers.purgeable_nonvolatile,
3714 PAGE_SIZE);
3715 /* more footprint */
3716 ledger_credit(owner->ledger,
3717 task_ledgers.phys_footprint,
3718 PAGE_SIZE);
3719 }
39037602 3720 if (m_object->all_reusable) {
b0d623f7
A
3721 /*
3722 * Wired pages are not counted as "re-usable"
3723 * in "all_reusable" VM objects, so nothing
3724 * to do here.
3725 */
3726 } else if (mem->reusable) {
3727 /*
3728 * This page is not "re-usable" when it's
3729 * wired, so adjust its state and the
3730 * accounting.
3731 */
39037602 3732 vm_object_reuse_pages(m_object,
b0d623f7
A
3733 mem->offset,
3734 mem->offset+PAGE_SIZE_64,
3735 FALSE);
3736 }
3737 }
3738 assert(!mem->reusable);
3739
1c79356b
A
3740 if (!mem->private && !mem->fictitious && !mem->gobbled)
3741 vm_page_wire_count++;
3742 if (mem->gobbled)
3743 vm_page_gobble_count--;
3744 mem->gobbled = FALSE;
593a1d5f 3745
3e170ce0
A
3746 if (check_memorystatus == TRUE) {
3747 VM_CHECK_MEMORYSTATUS;
3748 }
91447636
A
3749 /*
3750 * ENCRYPTED SWAP:
3751 * The page could be encrypted, but
3752 * We don't have to decrypt it here
3753 * because we don't guarantee that the
3754 * data is actually valid at this point.
3755 * The page will get decrypted in
3756 * vm_fault_wire() if needed.
3757 */
1c79356b
A
3758 }
3759 assert(!mem->gobbled);
39037602 3760 assert(mem->vm_page_q_state == VM_PAGE_IS_WIRED);
1c79356b 3761 mem->wire_count++;
39037602
A
3762 if (__improbable(mem->wire_count == 0)) {
3763 panic("vm_page_wire(%p): wire_count overflow", mem);
3764 }
b0d623f7 3765 VM_PAGE_CHECK(mem);
1c79356b
A
3766}
3767
1c79356b
A
3768/*
3769 * vm_page_unwire:
3770 *
3771 * Release one wiring of this page, potentially
3772 * enabling it to be paged again.
3773 *
3774 * The page's object and the page queues must be locked.
3775 */
3776void
3777vm_page_unwire(
0b4c1975
A
3778 vm_page_t mem,
3779 boolean_t queueit)
1c79356b 3780{
39037602
A
3781 vm_object_t m_object;
3782
3783 m_object = VM_PAGE_OBJECT(mem);
1c79356b 3784
39037602 3785// dbgLog(current_thread(), mem->offset, m_object, 0); /* (TEST/DEBUG) */
1c79356b
A
3786
3787 VM_PAGE_CHECK(mem);
b0d623f7 3788 assert(VM_PAGE_WIRED(mem));
39037602 3789 assert(mem->wire_count > 0);
4bd07ac2 3790 assert(!mem->gobbled);
39037602
A
3791 assert(m_object != VM_OBJECT_NULL);
3792 vm_object_lock_assert_exclusive(m_object);
3793 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1c79356b 3794 if (--mem->wire_count == 0) {
39037602
A
3795 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3796
4bd07ac2
A
3797 if (!mem->private && !mem->fictitious) {
3798 vm_page_wire_count--;
3799 }
39037602
A
3800 assert(m_object->wired_page_count > 0);
3801 m_object->wired_page_count--;
3802 if (!m_object->wired_page_count) {
3803 VM_OBJECT_UNWIRED(m_object);
3e170ce0 3804 }
39037602
A
3805 assert(m_object->resident_page_count >=
3806 m_object->wired_page_count);
3807 if (m_object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
3808 OSAddAtomic(+1, &vm_page_purgeable_count);
3809 assert(vm_page_purgeable_wired_count > 0);
3810 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3811 }
39037602
A
3812 if ((m_object->purgable == VM_PURGABLE_VOLATILE ||
3813 m_object->purgable == VM_PURGABLE_EMPTY) &&
3814 m_object->vo_purgeable_owner != TASK_NULL) {
fe8ab488
A
3815 task_t owner;
3816
39037602 3817 owner = m_object->vo_purgeable_owner;
fe8ab488
A
3818 /* more volatile bytes */
3819 ledger_credit(owner->ledger,
3820 task_ledgers.purgeable_volatile,
3821 PAGE_SIZE);
3822 /* less not-quite-volatile bytes */
3823 ledger_debit(owner->ledger,
3824 task_ledgers.purgeable_nonvolatile,
3825 PAGE_SIZE);
3826 /* less footprint */
3827 ledger_debit(owner->ledger,
3828 task_ledgers.phys_footprint,
3829 PAGE_SIZE);
3830 }
39037602
A
3831 assert(m_object != kernel_object);
3832 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
0b4c1975
A
3833
3834 if (queueit == TRUE) {
39037602 3835 if (m_object->purgable == VM_PURGABLE_EMPTY) {
0b4c1975
A
3836 vm_page_deactivate(mem);
3837 } else {
3838 vm_page_activate(mem);
3839 }
2d21ac55 3840 }
593a1d5f 3841
6d2010ae
A
3842 VM_CHECK_MEMORYSTATUS;
3843
1c79356b 3844 }
b0d623f7 3845 VM_PAGE_CHECK(mem);
1c79356b
A
3846}
3847
3848/*
3849 * vm_page_deactivate:
3850 *
3851 * Returns the given page to the inactive list,
3852 * indicating that no physical maps have access
3853 * to this page. [Used by the physical mapping system.]
3854 *
3855 * The page queues must be locked.
3856 */
3857void
3858vm_page_deactivate(
b0d623f7
A
3859 vm_page_t m)
3860{
3861 vm_page_deactivate_internal(m, TRUE);
3862}
3863
3864
3865void
3866vm_page_deactivate_internal(
3867 vm_page_t m,
3868 boolean_t clear_hw_reference)
1c79356b 3869{
39037602
A
3870 vm_object_t m_object;
3871
3872 m_object = VM_PAGE_OBJECT(m);
2d21ac55 3873
1c79356b 3874 VM_PAGE_CHECK(m);
39037602
A
3875 assert(m_object != kernel_object);
3876 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1c79356b 3877
39037602
A
3878// dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3879 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1c79356b
A
3880 /*
3881 * This page is no longer very interesting. If it was
3882 * interesting (active or inactive/referenced), then we
3883 * clear the reference bit and (re)enter it in the
3884 * inactive queue. Note wired pages should not have
3885 * their reference bit cleared.
3886 */
6d2010ae 3887 assert ( !(m->absent && !m->unusual));
0b4c1975 3888
1c79356b 3889 if (m->gobbled) { /* can this happen? */
b0d623f7 3890 assert( !VM_PAGE_WIRED(m));
2d21ac55 3891
1c79356b
A
3892 if (!m->private && !m->fictitious)
3893 vm_page_wire_count--;
3894 vm_page_gobble_count--;
3895 m->gobbled = FALSE;
3896 }
316670eb
A
3897 /*
3898 * if this page is currently on the pageout queue, we can't do the
3e170ce0 3899 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
3900 * and we can't remove it manually since we would need the object lock
3901 * (which is not required here) to decrement the activity_in_progress
3902 * reference which is held on the object while the page is in the pageout queue...
3903 * just let the normal laundry processing proceed
39037602
A
3904 */
3905 if (m->laundry || m->private || m->fictitious ||
3906 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
3907 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
3908 VM_PAGE_WIRED(m)) {
3909 return;
3910 }
6d2010ae 3911 if (!m->absent && clear_hw_reference == TRUE)
39037602 3912 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2d21ac55
A
3913
3914 m->reference = FALSE;
2d21ac55
A
3915 m->no_cache = FALSE;
3916
39037602
A
3917 if ( !VM_PAGE_INACTIVE(m)) {
3918 vm_page_queues_remove(m, FALSE);
0b4e3aa0 3919
39037602
A
3920 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3921 m->dirty && m_object->internal &&
3922 (m_object->purgable == VM_PURGABLE_DENY ||
3923 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
3924 m_object->purgable == VM_PURGABLE_VOLATILE)) {
3e170ce0 3925 vm_page_check_pageable_safe(m);
39037602
A
3926 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3927 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
2d21ac55 3928 vm_page_throttled_count++;
9bccf70c 3929 } else {
39037602 3930 if (m_object->named && m_object->ref_count == 1) {
2d21ac55 3931 vm_page_speculate(m, FALSE);
b0d623f7 3932#if DEVELOPMENT || DEBUG
2d21ac55 3933 vm_page_speculative_recreated++;
b0d623f7 3934#endif
2d21ac55 3935 } else {
3e170ce0 3936 vm_page_enqueue_inactive(m, FALSE);
2d21ac55 3937 }
9bccf70c 3938 }
1c79356b
A
3939 }
3940}
3941
316670eb
A
3942/*
3943 * vm_page_enqueue_cleaned
3944 *
3945 * Put the page on the cleaned queue, mark it cleaned, etc.
3946 * Being on the cleaned queue (and having m->clean_queue set)
3947 * does ** NOT ** guarantee that the page is clean!
3948 *
3949 * Call with the queues lock held.
3950 */
3951
3952void vm_page_enqueue_cleaned(vm_page_t m)
3953{
39037602
A
3954 vm_object_t m_object;
3955
3956 m_object = VM_PAGE_OBJECT(m);
3957
3958 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
3959 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
316670eb 3960 assert( !(m->absent && !m->unusual));
39037602 3961 assert( !VM_PAGE_WIRED(m));
316670eb
A
3962
3963 if (m->gobbled) {
316670eb
A
3964 if (!m->private && !m->fictitious)
3965 vm_page_wire_count--;
3966 vm_page_gobble_count--;
3967 m->gobbled = FALSE;
3968 }
3969 /*
3970 * if this page is currently on the pageout queue, we can't do the
3e170ce0 3971 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
3972 * and we can't remove it manually since we would need the object lock
3973 * (which is not required here) to decrement the activity_in_progress
3974 * reference which is held on the object while the page is in the pageout queue...
3975 * just let the normal laundry processing proceed
3976 */
39037602
A
3977 if (m->laundry || m->private || m->fictitious ||
3978 (m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||
3979 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
3980 return;
3981 }
3982 vm_page_queues_remove(m, FALSE);
316670eb 3983
3e170ce0 3984 vm_page_check_pageable_safe(m);
39037602
A
3985 vm_page_queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3986 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_CLEANED_Q;
316670eb
A
3987 vm_page_cleaned_count++;
3988
316670eb 3989 vm_page_inactive_count++;
39037602 3990 if (m_object->internal) {
39236c6e
A
3991 vm_page_pageable_internal_count++;
3992 } else {
3993 vm_page_pageable_external_count++;
3994 }
39037602
A
3995#if CONFIG_BACKGROUND_QUEUE
3996 if (m->vm_page_in_background)
3997 vm_page_add_to_backgroundq(m, TRUE);
3998#endif
316670eb
A
3999 vm_pageout_enqueued_cleaned++;
4000}
4001
1c79356b
A
4002/*
4003 * vm_page_activate:
4004 *
4005 * Put the specified page on the active list (if appropriate).
4006 *
4007 * The page queues must be locked.
4008 */
4009
4010void
4011vm_page_activate(
39037602 4012 vm_page_t m)
1c79356b 4013{
39037602
A
4014 vm_object_t m_object;
4015
4016 m_object = VM_PAGE_OBJECT(m);
4017
1c79356b 4018 VM_PAGE_CHECK(m);
2d21ac55 4019#ifdef FIXME_4778297
39037602 4020 assert(m_object != kernel_object);
91447636 4021#endif
39037602
A
4022 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4023 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae 4024 assert( !(m->absent && !m->unusual));
0b4c1975 4025
1c79356b 4026 if (m->gobbled) {
b0d623f7 4027 assert( !VM_PAGE_WIRED(m));
1c79356b
A
4028 if (!m->private && !m->fictitious)
4029 vm_page_wire_count--;
4030 vm_page_gobble_count--;
4031 m->gobbled = FALSE;
4032 }
316670eb
A
4033 /*
4034 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4035 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4036 * and we can't remove it manually since we would need the object lock
4037 * (which is not required here) to decrement the activity_in_progress
4038 * reference which is held on the object while the page is in the pageout queue...
4039 * just let the normal laundry processing proceed
4040 */
39037602
A
4041 if (m->laundry || m->private || m->fictitious ||
4042 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4043 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
1c79356b
A
4044 return;
4045
2d21ac55 4046#if DEBUG
39037602 4047 if (m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q)
2d21ac55
A
4048 panic("vm_page_activate: already active");
4049#endif
4050
39037602 4051 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
2d21ac55
A
4052 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
4053 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
4054 }
316670eb 4055
39037602 4056 vm_page_queues_remove(m, FALSE);
2d21ac55 4057
b0d623f7 4058 if ( !VM_PAGE_WIRED(m)) {
3e170ce0 4059 vm_page_check_pageable_safe(m);
39037602
A
4060 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4061 m->dirty && m_object->internal &&
4062 (m_object->purgable == VM_PURGABLE_DENY ||
4063 m_object->purgable == VM_PURGABLE_NONVOLATILE ||
4064 m_object->purgable == VM_PURGABLE_VOLATILE)) {
4065 vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
4066 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
2d21ac55 4067 vm_page_throttled_count++;
9bccf70c 4068 } else {
39037602
A
4069#if CONFIG_SECLUDED_MEMORY
4070 if (secluded_for_filecache &&
4071 vm_page_secluded_target != 0 &&
4072 num_tasks_can_use_secluded_mem == 0 &&
4073 m_object->eligible_for_secluded &&
4074 ((secluded_aging_policy == SECLUDED_AGING_FIFO) ||
4075 (secluded_aging_policy ==
4076 SECLUDED_AGING_ALONG_ACTIVE) ||
4077 (secluded_aging_policy ==
4078 SECLUDED_AGING_BEFORE_ACTIVE))) {
4079 vm_page_queue_enter(&vm_page_queue_secluded, m,
4080 vm_page_t, pageq);
4081 m->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
4082 vm_page_secluded_count++;
4083 vm_page_secluded_count_inuse++;
4084 assert(!m_object->internal);
4085// vm_page_pageable_external_count++;
4086 } else
4087#endif /* CONFIG_SECLUDED_MEMORY */
4088 vm_page_enqueue_active(m, FALSE);
9bccf70c 4089 }
2d21ac55
A
4090 m->reference = TRUE;
4091 m->no_cache = FALSE;
1c79356b 4092 }
b0d623f7 4093 VM_PAGE_CHECK(m);
2d21ac55
A
4094}
4095
4096
4097/*
4098 * vm_page_speculate:
4099 *
4100 * Put the specified page on the speculative list (if appropriate).
4101 *
4102 * The page queues must be locked.
4103 */
4104void
4105vm_page_speculate(
4106 vm_page_t m,
4107 boolean_t new)
4108{
4109 struct vm_speculative_age_q *aq;
39037602
A
4110 vm_object_t m_object;
4111
4112 m_object = VM_PAGE_OBJECT(m);
2d21ac55
A
4113
4114 VM_PAGE_CHECK(m);
3e170ce0
A
4115 vm_page_check_pageable_safe(m);
4116
39037602
A
4117 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
4118 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae 4119 assert( !(m->absent && !m->unusual));
39037602 4120 assert(m_object->internal == FALSE);
b0d623f7 4121
316670eb
A
4122 /*
4123 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4124 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4125 * and we can't remove it manually since we would need the object lock
4126 * (which is not required here) to decrement the activity_in_progress
4127 * reference which is held on the object while the page is in the pageout queue...
4128 * just let the normal laundry processing proceed
4129 */
39037602
A
4130 if (m->laundry || m->private || m->fictitious ||
4131 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4132 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q))
6d2010ae 4133 return;
0b4c1975 4134
39037602 4135 vm_page_queues_remove(m, FALSE);
b0d623f7
A
4136
4137 if ( !VM_PAGE_WIRED(m)) {
2d21ac55 4138 mach_timespec_t ts;
b0d623f7
A
4139 clock_sec_t sec;
4140 clock_nsec_t nsec;
2d21ac55 4141
b0d623f7
A
4142 clock_get_system_nanotime(&sec, &nsec);
4143 ts.tv_sec = (unsigned int) sec;
4144 ts.tv_nsec = nsec;
2d21ac55
A
4145
4146 if (vm_page_speculative_count == 0) {
4147
4148 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4149 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4150
4151 aq = &vm_page_queue_speculative[speculative_age_index];
4152
4153 /*
4154 * set the timer to begin a new group
4155 */
6d2010ae
A
4156 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4157 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
4158
4159 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4160 } else {
4161 aq = &vm_page_queue_speculative[speculative_age_index];
4162
4163 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
4164
4165 speculative_age_index++;
4166
4167 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4168 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4169 if (speculative_age_index == speculative_steal_index) {
4170 speculative_steal_index = speculative_age_index + 1;
4171
4172 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
4173 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
4174 }
4175 aq = &vm_page_queue_speculative[speculative_age_index];
4176
39037602 4177 if (!vm_page_queue_empty(&aq->age_q))
2d21ac55
A
4178 vm_page_speculate_ageit(aq);
4179
6d2010ae
A
4180 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
4181 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
4182
4183 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
4184 }
4185 }
39037602
A
4186 vm_page_enqueue_tail(&aq->age_q, &m->pageq);
4187 m->vm_page_q_state = VM_PAGE_ON_SPECULATIVE_Q;
2d21ac55 4188 vm_page_speculative_count++;
39037602 4189 vm_page_pageable_external_count++;
2d21ac55
A
4190
4191 if (new == TRUE) {
39037602 4192 vm_object_lock_assert_exclusive(m_object);
6d2010ae 4193
39037602 4194 m_object->pages_created++;
b0d623f7 4195#if DEVELOPMENT || DEBUG
2d21ac55 4196 vm_page_speculative_created++;
b0d623f7 4197#endif
2d21ac55
A
4198 }
4199 }
b0d623f7 4200 VM_PAGE_CHECK(m);
2d21ac55
A
4201}
4202
4203
4204/*
4205 * move pages from the specified aging bin to
4206 * the speculative bin that pageout_scan claims from
4207 *
4208 * The page queues must be locked.
4209 */
4210void
4211vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
4212{
4213 struct vm_speculative_age_q *sq;
4214 vm_page_t t;
4215
4216 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
4217
39037602 4218 if (vm_page_queue_empty(&sq->age_q)) {
2d21ac55
A
4219 sq->age_q.next = aq->age_q.next;
4220 sq->age_q.prev = aq->age_q.prev;
4221
39037602
A
4222 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.next);
4223 t->pageq.prev = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55 4224
39037602
A
4225 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
4226 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55 4227 } else {
39037602 4228 t = (vm_page_t)VM_PAGE_UNPACK_PTR(sq->age_q.prev);
2d21ac55
A
4229 t->pageq.next = aq->age_q.next;
4230
39037602 4231 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.next);
2d21ac55
A
4232 t->pageq.prev = sq->age_q.prev;
4233
39037602
A
4234 t = (vm_page_t)VM_PAGE_UNPACK_PTR(aq->age_q.prev);
4235 t->pageq.next = VM_PAGE_PACK_PTR(&sq->age_q);
2d21ac55
A
4236
4237 sq->age_q.prev = aq->age_q.prev;
1c79356b 4238 }
39037602 4239 vm_page_queue_init(&aq->age_q);
2d21ac55
A
4240}
4241
4242
4243void
4244vm_page_lru(
4245 vm_page_t m)
4246{
4247 VM_PAGE_CHECK(m);
39037602
A
4248 assert(VM_PAGE_OBJECT(m) != kernel_object);
4249 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2d21ac55 4250
39037602 4251 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
316670eb
A
4252 /*
4253 * if this page is currently on the pageout queue, we can't do the
3e170ce0 4254 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
4255 * and we can't remove it manually since we would need the object lock
4256 * (which is not required here) to decrement the activity_in_progress
4257 * reference which is held on the object while the page is in the pageout queue...
4258 * just let the normal laundry processing proceed
4259 */
39037602
A
4260 if (m->laundry || m->private ||
4261 (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
4262 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) ||
4263 VM_PAGE_WIRED(m))
2d21ac55
A
4264 return;
4265
4266 m->no_cache = FALSE;
4267
39037602 4268 vm_page_queues_remove(m, FALSE);
2d21ac55 4269
3e170ce0 4270 vm_page_enqueue_inactive(m, FALSE);
1c79356b
A
4271}
4272
2d21ac55 4273
b0d623f7
A
4274void
4275vm_page_reactivate_all_throttled(void)
4276{
4277 vm_page_t first_throttled, last_throttled;
4278 vm_page_t first_active;
4279 vm_page_t m;
4280 int extra_active_count;
39236c6e 4281 int extra_internal_count, extra_external_count;
39037602 4282 vm_object_t m_object;
b0d623f7 4283
39037602 4284 if (!VM_DYNAMIC_PAGING_ENABLED())
6d2010ae
A
4285 return;
4286
b0d623f7 4287 extra_active_count = 0;
39236c6e
A
4288 extra_internal_count = 0;
4289 extra_external_count = 0;
b0d623f7 4290 vm_page_lock_queues();
39037602 4291 if (! vm_page_queue_empty(&vm_page_queue_throttled)) {
b0d623f7
A
4292 /*
4293 * Switch "throttled" pages to "active".
4294 */
39037602 4295 vm_page_queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
b0d623f7 4296 VM_PAGE_CHECK(m);
39037602
A
4297 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
4298
4299 m_object = VM_PAGE_OBJECT(m);
6d2010ae
A
4300
4301 extra_active_count++;
39037602 4302 if (m_object->internal) {
39236c6e
A
4303 extra_internal_count++;
4304 } else {
4305 extra_external_count++;
4306 }
6d2010ae 4307
39037602 4308 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
b0d623f7 4309 VM_PAGE_CHECK(m);
39037602
A
4310#if CONFIG_BACKGROUND_QUEUE
4311 if (m->vm_page_in_background)
4312 vm_page_add_to_backgroundq(m, FALSE);
4313#endif
b0d623f7
A
4314 }
4315
4316 /*
4317 * Transfer the entire throttled queue to a regular LRU page queues.
4318 * We insert it at the head of the active queue, so that these pages
4319 * get re-evaluated by the LRU algorithm first, since they've been
4320 * completely out of it until now.
4321 */
39037602
A
4322 first_throttled = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
4323 last_throttled = (vm_page_t) vm_page_queue_last(&vm_page_queue_throttled);
4324 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
4325 if (vm_page_queue_empty(&vm_page_queue_active)) {
4326 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
b0d623f7 4327 } else {
39037602 4328 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled);
b0d623f7 4329 }
39037602
A
4330 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled);
4331 first_throttled->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4332 last_throttled->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
b0d623f7
A
4333
4334#if DEBUG
4335 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
4336#endif
39037602 4337 vm_page_queue_init(&vm_page_queue_throttled);
b0d623f7
A
4338 /*
4339 * Adjust the global page counts.
4340 */
4341 vm_page_active_count += extra_active_count;
39236c6e
A
4342 vm_page_pageable_internal_count += extra_internal_count;
4343 vm_page_pageable_external_count += extra_external_count;
b0d623f7
A
4344 vm_page_throttled_count = 0;
4345 }
4346 assert(vm_page_throttled_count == 0);
39037602 4347 assert(vm_page_queue_empty(&vm_page_queue_throttled));
b0d623f7
A
4348 vm_page_unlock_queues();
4349}
4350
4351
4352/*
4353 * move pages from the indicated local queue to the global active queue
4354 * its ok to fail if we're below the hard limit and force == FALSE
4355 * the nolocks == TRUE case is to allow this function to be run on
4356 * the hibernate path
4357 */
4358
4359void
4360vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
4361{
4362 struct vpl *lq;
4363 vm_page_t first_local, last_local;
4364 vm_page_t first_active;
4365 vm_page_t m;
4366 uint32_t count = 0;
4367
4368 if (vm_page_local_q == NULL)
4369 return;
4370
4371 lq = &vm_page_local_q[lid].vpl_un.vpl;
4372
4373 if (nolocks == FALSE) {
4374 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
4375 if ( !vm_page_trylockspin_queues())
4376 return;
4377 } else
4378 vm_page_lockspin_queues();
4379
4380 VPL_LOCK(&lq->vpl_lock);
4381 }
4382 if (lq->vpl_count) {
4383 /*
4384 * Switch "local" pages to "active".
4385 */
39037602 4386 assert(!vm_page_queue_empty(&lq->vpl_queue));
b0d623f7 4387
39037602 4388 vm_page_queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
b0d623f7 4389 VM_PAGE_CHECK(m);
3e170ce0 4390 vm_page_check_pageable_safe(m);
39037602 4391 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_LOCAL_Q);
b0d623f7
A
4392 assert(!m->fictitious);
4393
4394 if (m->local_id != lid)
4395 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
4396
4397 m->local_id = 0;
39037602 4398 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
b0d623f7 4399 VM_PAGE_CHECK(m);
39037602
A
4400#if CONFIG_BACKGROUND_QUEUE
4401 if (m->vm_page_in_background)
4402 vm_page_add_to_backgroundq(m, FALSE);
4403#endif
b0d623f7
A
4404 count++;
4405 }
4406 if (count != lq->vpl_count)
4407 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
4408
4409 /*
4410 * Transfer the entire local queue to a regular LRU page queues.
4411 */
39037602
A
4412 first_local = (vm_page_t) vm_page_queue_first(&lq->vpl_queue);
4413 last_local = (vm_page_t) vm_page_queue_last(&lq->vpl_queue);
4414 first_active = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
b0d623f7 4415
39037602
A
4416 if (vm_page_queue_empty(&vm_page_queue_active)) {
4417 vm_page_queue_active.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
b0d623f7 4418 } else {
39037602 4419 first_active->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
b0d623f7 4420 }
39037602
A
4421 vm_page_queue_active.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
4422 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active);
4423 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active);
b0d623f7 4424
39037602 4425 vm_page_queue_init(&lq->vpl_queue);
b0d623f7
A
4426 /*
4427 * Adjust the global page counts.
4428 */
4429 vm_page_active_count += lq->vpl_count;
39236c6e
A
4430 vm_page_pageable_internal_count += lq->vpl_internal_count;
4431 vm_page_pageable_external_count += lq->vpl_external_count;
b0d623f7 4432 lq->vpl_count = 0;
39236c6e
A
4433 lq->vpl_internal_count = 0;
4434 lq->vpl_external_count = 0;
b0d623f7 4435 }
39037602 4436 assert(vm_page_queue_empty(&lq->vpl_queue));
b0d623f7
A
4437
4438 if (nolocks == FALSE) {
4439 VPL_UNLOCK(&lq->vpl_lock);
4440 vm_page_unlock_queues();
4441 }
4442}
4443
1c79356b
A
4444/*
4445 * vm_page_part_zero_fill:
4446 *
4447 * Zero-fill a part of the page.
4448 */
39236c6e 4449#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
1c79356b
A
4450void
4451vm_page_part_zero_fill(
4452 vm_page_t m,
4453 vm_offset_t m_pa,
4454 vm_size_t len)
4455{
1c79356b 4456
316670eb
A
4457#if 0
4458 /*
4459 * we don't hold the page queue lock
4460 * so this check isn't safe to make
4461 */
1c79356b 4462 VM_PAGE_CHECK(m);
316670eb
A
4463#endif
4464
1c79356b 4465#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
39037602 4466 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m), m_pa, len);
1c79356b 4467#else
39236c6e 4468 vm_page_t tmp;
1c79356b
A
4469 while (1) {
4470 tmp = vm_page_grab();
4471 if (tmp == VM_PAGE_NULL) {
4472 vm_page_wait(THREAD_UNINT);
4473 continue;
4474 }
4475 break;
4476 }
4477 vm_page_zero_fill(tmp);
4478 if(m_pa != 0) {
4479 vm_page_part_copy(m, 0, tmp, 0, m_pa);
4480 }
4481 if((m_pa + len) < PAGE_SIZE) {
4482 vm_page_part_copy(m, m_pa + len, tmp,
4483 m_pa + len, PAGE_SIZE - (m_pa + len));
4484 }
4485 vm_page_copy(tmp,m);
b0d623f7 4486 VM_PAGE_FREE(tmp);
1c79356b
A
4487#endif
4488
4489}
4490
4491/*
4492 * vm_page_zero_fill:
4493 *
4494 * Zero-fill the specified page.
4495 */
4496void
4497vm_page_zero_fill(
4498 vm_page_t m)
4499{
4500 XPR(XPR_VM_PAGE,
39037602
A
4501 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4502 VM_PAGE_OBJECT(m), m->offset, m, 0,0);
316670eb
A
4503#if 0
4504 /*
4505 * we don't hold the page queue lock
4506 * so this check isn't safe to make
4507 */
1c79356b 4508 VM_PAGE_CHECK(m);
316670eb 4509#endif
1c79356b 4510
39037602
A
4511// dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4512 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
1c79356b
A
4513}
4514
4515/*
4516 * vm_page_part_copy:
4517 *
4518 * copy part of one page to another
4519 */
4520
4521void
4522vm_page_part_copy(
4523 vm_page_t src_m,
4524 vm_offset_t src_pa,
4525 vm_page_t dst_m,
4526 vm_offset_t dst_pa,
4527 vm_size_t len)
4528{
316670eb
A
4529#if 0
4530 /*
4531 * we don't hold the page queue lock
4532 * so this check isn't safe to make
4533 */
1c79356b
A
4534 VM_PAGE_CHECK(src_m);
4535 VM_PAGE_CHECK(dst_m);
316670eb 4536#endif
39037602
A
4537 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m), src_pa,
4538 VM_PAGE_GET_PHYS_PAGE(dst_m), dst_pa, len);
1c79356b
A
4539}
4540
4541/*
4542 * vm_page_copy:
4543 *
4544 * Copy one page to another
91447636
A
4545 *
4546 * ENCRYPTED SWAP:
4547 * The source page should not be encrypted. The caller should
4548 * make sure the page is decrypted first, if necessary.
1c79356b
A
4549 */
4550
2d21ac55
A
4551int vm_page_copy_cs_validations = 0;
4552int vm_page_copy_cs_tainted = 0;
4553
1c79356b
A
4554void
4555vm_page_copy(
4556 vm_page_t src_m,
4557 vm_page_t dest_m)
4558{
39037602
A
4559 vm_object_t src_m_object;
4560
4561 src_m_object = VM_PAGE_OBJECT(src_m);
4562
1c79356b 4563 XPR(XPR_VM_PAGE,
39037602
A
4564 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4565 src_m_object, src_m->offset,
4566 VM_PAGE_OBJECT(dest_m), dest_m->offset,
4567 0);
316670eb
A
4568#if 0
4569 /*
4570 * we don't hold the page queue lock
4571 * so this check isn't safe to make
4572 */
1c79356b
A
4573 VM_PAGE_CHECK(src_m);
4574 VM_PAGE_CHECK(dest_m);
316670eb 4575#endif
39037602 4576 vm_object_lock_assert_held(src_m_object);
1c79356b 4577
91447636
A
4578 /*
4579 * ENCRYPTED SWAP:
4580 * The source page should not be encrypted at this point.
4581 * The destination page will therefore not contain encrypted
4582 * data after the copy.
4583 */
4584 if (src_m->encrypted) {
4585 panic("vm_page_copy: source page %p is encrypted\n", src_m);
4586 }
4587 dest_m->encrypted = FALSE;
4588
39037602
A
4589 if (src_m_object != VM_OBJECT_NULL &&
4590 src_m_object->code_signed) {
2d21ac55 4591 /*
4a3eedf9 4592 * We're copying a page from a code-signed object.
2d21ac55
A
4593 * Whoever ends up mapping the copy page might care about
4594 * the original page's integrity, so let's validate the
4595 * source page now.
4596 */
4597 vm_page_copy_cs_validations++;
4598 vm_page_validate_cs(src_m);
39037602
A
4599#if DEVELOPMENT || DEBUG
4600 DTRACE_VM4(codesigned_copy,
4601 vm_object_t, src_m_object,
4602 vm_object_offset_t, src_m->offset,
4603 int, src_m->cs_validated,
4604 int, src_m->cs_tainted);
4605#endif /* DEVELOPMENT || DEBUG */
4606
2d21ac55 4607 }
6d2010ae
A
4608
4609 if (vm_page_is_slideable(src_m)) {
4610 boolean_t was_busy = src_m->busy;
4611 src_m->busy = TRUE;
4612 (void) vm_page_slide(src_m, 0);
4613 assert(src_m->busy);
316670eb 4614 if (!was_busy) {
6d2010ae
A
4615 PAGE_WAKEUP_DONE(src_m);
4616 }
4617 }
4618
2d21ac55 4619 /*
b0d623f7
A
4620 * Propagate the cs_tainted bit to the copy page. Do not propagate
4621 * the cs_validated bit.
2d21ac55 4622 */
2d21ac55
A
4623 dest_m->cs_tainted = src_m->cs_tainted;
4624 if (dest_m->cs_tainted) {
2d21ac55
A
4625 vm_page_copy_cs_tainted++;
4626 }
6d2010ae
A
4627 dest_m->slid = src_m->slid;
4628 dest_m->error = src_m->error; /* sliding src_m might have failed... */
39037602 4629 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m), VM_PAGE_GET_PHYS_PAGE(dest_m));
1c79356b
A
4630}
4631
2d21ac55 4632#if MACH_ASSERT
b0d623f7
A
4633static void
4634_vm_page_print(
4635 vm_page_t p)
4636{
4637 printf("vm_page %p: \n", p);
39037602
A
4638 printf(" pageq: next=%p prev=%p\n",
4639 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.next),
4640 (vm_page_t)VM_PAGE_UNPACK_PTR(p->pageq.prev));
4641 printf(" listq: next=%p prev=%p\n",
4642 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.next)),
4643 (vm_page_t)(VM_PAGE_UNPACK_PTR(p->listq.prev)));
4644 printf(" next=%p\n", (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m)));
4645 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p), p->offset);
b0d623f7 4646 printf(" wire_count=%u\n", p->wire_count);
39037602 4647 printf(" q_state=%u\n", p->vm_page_q_state);
b0d623f7 4648
39037602
A
4649 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
4650 (p->laundry ? "" : "!"),
b0d623f7
A
4651 (p->reference ? "" : "!"),
4652 (p->gobbled ? "" : "!"),
39037602 4653 (p->private ? "" : "!"));
b0d623f7 4654 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
39037602
A
4655 (p->busy ? "" : "!"),
4656 (p->wanted ? "" : "!"),
4657 (p->tabled ? "" : "!"),
4658 (p->fictitious ? "" : "!"),
4659 (p->pmapped ? "" : "!"),
4660 (p->wpmapped ? "" : "!"));
4661 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4662 (p->free_when_done ? "" : "!"),
b0d623f7
A
4663 (p->absent ? "" : "!"),
4664 (p->error ? "" : "!"),
4665 (p->dirty ? "" : "!"),
4666 (p->cleaning ? "" : "!"),
4667 (p->precious ? "" : "!"),
4668 (p->clustered ? "" : "!"));
4669 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
4670 (p->overwriting ? "" : "!"),
4671 (p->restart ? "" : "!"),
4672 (p->unusual ? "" : "!"),
4673 (p->encrypted ? "" : "!"),
4674 (p->encrypted_cleaning ? "" : "!"));
c18c124e 4675 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
b0d623f7
A
4676 (p->cs_validated ? "" : "!"),
4677 (p->cs_tainted ? "" : "!"),
c18c124e 4678 (p->cs_nx ? "" : "!"),
b0d623f7 4679 (p->no_cache ? "" : "!"));
b0d623f7 4680
39037602 4681 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p));
b0d623f7
A
4682}
4683
1c79356b
A
4684/*
4685 * Check that the list of pages is ordered by
4686 * ascending physical address and has no holes.
4687 */
2d21ac55 4688static int
1c79356b
A
4689vm_page_verify_contiguous(
4690 vm_page_t pages,
4691 unsigned int npages)
4692{
39037602 4693 vm_page_t m;
1c79356b 4694 unsigned int page_count;
91447636 4695 vm_offset_t prev_addr;
1c79356b 4696
39037602 4697 prev_addr = VM_PAGE_GET_PHYS_PAGE(pages);
1c79356b
A
4698 page_count = 1;
4699 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
39037602 4700 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
b0d623f7 4701 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
39037602 4702 m, (long)prev_addr, VM_PAGE_GET_PHYS_PAGE(m));
6d2010ae 4703 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
1c79356b
A
4704 panic("vm_page_verify_contiguous: not contiguous!");
4705 }
39037602 4706 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
1c79356b
A
4707 ++page_count;
4708 }
4709 if (page_count != npages) {
2d21ac55 4710 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
4711 pages, page_count, npages);
4712 panic("vm_page_verify_contiguous: count error");
4713 }
4714 return 1;
4715}
1c79356b
A
4716
4717
2d21ac55
A
4718/*
4719 * Check the free lists for proper length etc.
4720 */
fe8ab488 4721static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
b0d623f7
A
4722static unsigned int
4723vm_page_verify_free_list(
39037602 4724 vm_page_queue_head_t *vm_page_queue,
b0d623f7
A
4725 unsigned int color,
4726 vm_page_t look_for_page,
4727 boolean_t expect_page)
4728{
4729 unsigned int npages;
4730 vm_page_t m;
4731 vm_page_t prev_m;
4732 boolean_t found_page;
4733
fe8ab488
A
4734 if (! vm_page_verify_this_free_list_enabled)
4735 return 0;
4736
b0d623f7
A
4737 found_page = FALSE;
4738 npages = 0;
39037602
A
4739 prev_m = (vm_page_t)((uintptr_t)vm_page_queue);
4740
4741 vm_page_queue_iterate(vm_page_queue,
4742 m,
4743 vm_page_t,
4744 pageq) {
6d2010ae 4745
b0d623f7
A
4746 if (m == look_for_page) {
4747 found_page = TRUE;
4748 }
39037602 4749 if ((vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev) != prev_m)
b0d623f7 4750 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
39037602 4751 color, npages, m, (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.prev), prev_m);
b0d623f7
A
4752 if ( ! m->busy )
4753 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4754 color, npages, m);
6d2010ae 4755 if (color != (unsigned int) -1) {
39037602 4756 if ((VM_PAGE_GET_PHYS_PAGE(m) & vm_color_mask) != color)
6d2010ae 4757 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
39037602
A
4758 color, npages, m, VM_PAGE_GET_PHYS_PAGE(m) & vm_color_mask, color);
4759 if (m->vm_page_q_state != VM_PAGE_ON_FREE_Q)
4760 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
4761 color, npages, m, m->vm_page_q_state);
4762 } else {
4763 if (m->vm_page_q_state != VM_PAGE_ON_FREE_LOCAL_Q)
4764 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
4765 npages, m, m->vm_page_q_state);
6d2010ae 4766 }
b0d623f7
A
4767 ++npages;
4768 prev_m = m;
4769 }
4770 if (look_for_page != VM_PAGE_NULL) {
4771 unsigned int other_color;
4772
4773 if (expect_page && !found_page) {
4774 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
39037602 4775 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
b0d623f7
A
4776 _vm_page_print(look_for_page);
4777 for (other_color = 0;
4778 other_color < vm_colors;
4779 other_color++) {
4780 if (other_color == color)
4781 continue;
39037602 4782 vm_page_verify_free_list(&vm_page_queue_free[other_color].qhead,
6d2010ae 4783 other_color, look_for_page, FALSE);
b0d623f7 4784 }
6d2010ae 4785 if (color == (unsigned int) -1) {
d1ecb069
A
4786 vm_page_verify_free_list(&vm_lopage_queue_free,
4787 (unsigned int) -1, look_for_page, FALSE);
4788 }
b0d623f7
A
4789 panic("vm_page_verify_free_list(color=%u)\n", color);
4790 }
4791 if (!expect_page && found_page) {
4792 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
39037602 4793 color, npages, look_for_page, VM_PAGE_GET_PHYS_PAGE(look_for_page));
b0d623f7
A
4794 }
4795 }
4796 return npages;
4797}
4798
fe8ab488 4799static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
2d21ac55
A
4800static void
4801vm_page_verify_free_lists( void )
4802{
d1ecb069 4803 unsigned int color, npages, nlopages;
fe8ab488 4804 boolean_t toggle = TRUE;
b0d623f7 4805
fe8ab488 4806 if (! vm_page_verify_all_free_lists_enabled)
b0d623f7
A
4807 return;
4808
2d21ac55 4809 npages = 0;
b0d623f7
A
4810
4811 lck_mtx_lock(&vm_page_queue_free_lock);
fe8ab488
A
4812
4813 if (vm_page_verify_this_free_list_enabled == TRUE) {
4814 /*
4815 * This variable has been set globally for extra checking of
4816 * each free list Q. Since we didn't set it, we don't own it
4817 * and we shouldn't toggle it.
4818 */
4819 toggle = FALSE;
4820 }
4821
4822 if (toggle == TRUE) {
4823 vm_page_verify_this_free_list_enabled = TRUE;
4824 }
2d21ac55
A
4825
4826 for( color = 0; color < vm_colors; color++ ) {
39037602 4827 npages += vm_page_verify_free_list(&vm_page_queue_free[color].qhead,
6d2010ae 4828 color, VM_PAGE_NULL, FALSE);
2d21ac55 4829 }
d1ecb069
A
4830 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4831 (unsigned int) -1,
4832 VM_PAGE_NULL, FALSE);
4833 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4834 panic("vm_page_verify_free_lists: "
4835 "npages %u free_count %d nlopages %u lo_free_count %u",
4836 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
6d2010ae 4837
fe8ab488
A
4838 if (toggle == TRUE) {
4839 vm_page_verify_this_free_list_enabled = FALSE;
4840 }
4841
b0d623f7 4842 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 4843}
2d21ac55 4844
b0d623f7 4845#endif /* MACH_ASSERT */
2d21ac55 4846
91447636 4847
3e170ce0
A
4848
4849
4850
4851extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4852
1c79356b 4853/*
2d21ac55 4854 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
4855 *
4856 * Find a region large enough to contain at least n pages
1c79356b
A
4857 * of contiguous physical memory.
4858 *
2d21ac55
A
4859 * This is done by traversing the vm_page_t array in a linear fashion
4860 * we assume that the vm_page_t array has the avaiable physical pages in an
4861 * ordered, ascending list... this is currently true of all our implementations
4862 * and must remain so... there can be 'holes' in the array... we also can
4863 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4864 * which use to happen via 'vm_page_convert'... that function was no longer
4865 * being called and was removed...
4866 *
4867 * The basic flow consists of stabilizing some of the interesting state of
4868 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4869 * sweep at the beginning of the array looking for pages that meet our criterea
4870 * for a 'stealable' page... currently we are pretty conservative... if the page
4871 * meets this criterea and is physically contiguous to the previous page in the 'run'
4872 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4873 * and start to develop a new run... if at this point we've already considered
4874 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4875 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4876 * to other threads trying to acquire free pages (or move pages from q to q),
4877 * and then continue from the spot we left off... we only make 1 pass through the
4878 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4879 * which steals the pages from the queues they're currently on... pages on the free
4880 * queue can be stolen directly... pages that are on any of the other queues
4881 * must be removed from the object they are tabled on... this requires taking the
4882 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4883 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4884 * dump the pages we've currently stolen back to the free list, and pick up our
4885 * scan from the point where we aborted the 'current' run.
4886 *
4887 *
1c79356b 4888 * Requirements:
2d21ac55 4889 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 4890 *
2d21ac55 4891 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 4892 *
e5568f75 4893 * Algorithm:
1c79356b 4894 */
2d21ac55
A
4895
4896#define MAX_CONSIDERED_BEFORE_YIELD 1000
4897
4898
4899#define RESET_STATE_OF_RUN() \
4900 MACRO_BEGIN \
4901 prevcontaddr = -2; \
b0d623f7 4902 start_pnum = -1; \
2d21ac55
A
4903 free_considered = 0; \
4904 substitute_needed = 0; \
4905 npages = 0; \
4906 MACRO_END
4907
b0d623f7
A
4908/*
4909 * Can we steal in-use (i.e. not free) pages when searching for
4910 * physically-contiguous pages ?
4911 */
4912#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4913
4914static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4915#if DEBUG
4916int vm_page_find_contig_debug = 0;
4917#endif
2d21ac55 4918
1c79356b
A
4919static vm_page_t
4920vm_page_find_contiguous(
2d21ac55
A
4921 unsigned int contig_pages,
4922 ppnum_t max_pnum,
b0d623f7
A
4923 ppnum_t pnum_mask,
4924 boolean_t wire,
4925 int flags)
1c79356b 4926{
2d21ac55 4927 vm_page_t m = NULL;
e5568f75 4928 ppnum_t prevcontaddr;
b0d623f7
A
4929 ppnum_t start_pnum;
4930 unsigned int npages, considered, scanned;
4931 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4932 unsigned int idx_last_contig_page_found = 0;
2d21ac55
A
4933 int free_considered, free_available;
4934 int substitute_needed;
3e170ce0 4935 boolean_t wrapped, zone_gc_called = FALSE;
593a1d5f 4936#if DEBUG
b0d623f7
A
4937 clock_sec_t tv_start_sec, tv_end_sec;
4938 clock_usec_t tv_start_usec, tv_end_usec;
593a1d5f 4939#endif
3e170ce0 4940
2d21ac55
A
4941 int yielded = 0;
4942 int dumped_run = 0;
4943 int stolen_pages = 0;
39236c6e 4944 int compressed_pages = 0;
3e170ce0 4945
1c79356b 4946
2d21ac55 4947 if (contig_pages == 0)
1c79356b
A
4948 return VM_PAGE_NULL;
4949
3e170ce0
A
4950full_scan_again:
4951
2d21ac55
A
4952#if MACH_ASSERT
4953 vm_page_verify_free_lists();
593a1d5f
A
4954#endif
4955#if DEBUG
2d21ac55
A
4956 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4957#endif
39236c6e
A
4958 PAGE_REPLACEMENT_ALLOWED(TRUE);
4959
2d21ac55 4960 vm_page_lock_queues();
3e170ce0
A
4961
4962
b0d623f7 4963 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
4964
4965 RESET_STATE_OF_RUN();
1c79356b 4966
b0d623f7 4967 scanned = 0;
2d21ac55
A
4968 considered = 0;
4969 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 4970
b0d623f7
A
4971 wrapped = FALSE;
4972
4973 if(flags & KMA_LOMEM)
4974 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4975 else
4976 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4977
4978 orig_last_idx = idx_last_contig_page_found;
4979 last_idx = orig_last_idx;
4980
4981 for (page_idx = last_idx, start_idx = last_idx;
2d21ac55
A
4982 npages < contig_pages && page_idx < vm_pages_count;
4983 page_idx++) {
b0d623f7
A
4984retry:
4985 if (wrapped &&
4986 npages == 0 &&
4987 page_idx >= orig_last_idx) {
4988 /*
4989 * We're back where we started and we haven't
4990 * found any suitable contiguous range. Let's
4991 * give up.
4992 */
4993 break;
4994 }
4995 scanned++;
2d21ac55 4996 m = &vm_pages[page_idx];
e5568f75 4997
b0d623f7
A
4998 assert(!m->fictitious);
4999 assert(!m->private);
5000
39037602 5001 if (max_pnum && VM_PAGE_GET_PHYS_PAGE(m) > max_pnum) {
2d21ac55
A
5002 /* no more low pages... */
5003 break;
e5568f75 5004 }
39037602 5005 if (!npages & ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0)) {
b0d623f7
A
5006 /*
5007 * not aligned
5008 */
5009 RESET_STATE_OF_RUN();
5010
5011 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
39037602
A
5012 m->encrypted_cleaning || m->laundry || m->wanted ||
5013 m->cleaning || m->overwriting || m->free_when_done) {
2d21ac55
A
5014 /*
5015 * page is in a transient state
5016 * or a state we don't want to deal
5017 * with, so don't consider it which
5018 * means starting a new run
5019 */
5020 RESET_STATE_OF_RUN();
1c79356b 5021
39037602
A
5022 } else if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) ||
5023 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q) ||
5024 (m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q) ||
5025 (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
2d21ac55 5026 /*
39037602
A
5027 * page needs to be on one of our queues (other then the pageout or special free queues)
5028 * or it needs to belong to the compressor pool (which is now indicated
5029 * by vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5030 * from the check for VM_PAGE_NOT_ON_Q)
2d21ac55
A
5031 * in order for it to be stable behind the
5032 * locks we hold at this point...
5033 * if not, don't consider it which
5034 * means starting a new run
5035 */
5036 RESET_STATE_OF_RUN();
5037
39037602 5038 } else if ((m->vm_page_q_state != VM_PAGE_ON_FREE_Q) && (!m->tabled || m->busy)) {
2d21ac55
A
5039 /*
5040 * pages on the free list are always 'busy'
5041 * so we couldn't test for 'busy' in the check
5042 * for the transient states... pages that are
5043 * 'free' are never 'tabled', so we also couldn't
5044 * test for 'tabled'. So we check here to make
5045 * sure that a non-free page is not busy and is
5046 * tabled on an object...
5047 * if not, don't consider it which
5048 * means starting a new run
5049 */
5050 RESET_STATE_OF_RUN();
5051
5052 } else {
39037602
A
5053 if (VM_PAGE_GET_PHYS_PAGE(m) != prevcontaddr + 1) {
5054 if ((VM_PAGE_GET_PHYS_PAGE(m) & pnum_mask) != 0) {
b0d623f7
A
5055 RESET_STATE_OF_RUN();
5056 goto did_consider;
5057 } else {
5058 npages = 1;
5059 start_idx = page_idx;
39037602 5060 start_pnum = VM_PAGE_GET_PHYS_PAGE(m);
b0d623f7 5061 }
2d21ac55
A
5062 } else {
5063 npages++;
e5568f75 5064 }
39037602 5065 prevcontaddr = VM_PAGE_GET_PHYS_PAGE(m);
b0d623f7
A
5066
5067 VM_PAGE_CHECK(m);
39037602 5068 if (m->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
2d21ac55 5069 free_considered++;
b0d623f7
A
5070 } else {
5071 /*
5072 * This page is not free.
5073 * If we can't steal used pages,
5074 * we have to give up this run
5075 * and keep looking.
5076 * Otherwise, we might need to
5077 * move the contents of this page
5078 * into a substitute page.
5079 */
5080#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
39236c6e 5081 if (m->pmapped || m->dirty || m->precious) {
b0d623f7
A
5082 substitute_needed++;
5083 }
5084#else
5085 RESET_STATE_OF_RUN();
5086#endif
2d21ac55 5087 }
b0d623f7 5088
2d21ac55
A
5089 if ((free_considered + substitute_needed) > free_available) {
5090 /*
5091 * if we let this run continue
5092 * we will end up dropping the vm_page_free_count
5093 * below the reserve limit... we need to abort
5094 * this run, but we can at least re-consider this
5095 * page... thus the jump back to 'retry'
5096 */
5097 RESET_STATE_OF_RUN();
5098
5099 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
5100 considered++;
5101 goto retry;
e5568f75 5102 }
2d21ac55
A
5103 /*
5104 * free_available == 0
5105 * so can't consider any free pages... if
5106 * we went to retry in this case, we'd
5107 * get stuck looking at the same page
5108 * w/o making any forward progress
5109 * we also want to take this path if we've already
5110 * reached our limit that controls the lock latency
5111 */
e5568f75 5112 }
2d21ac55 5113 }
b0d623f7 5114did_consider:
2d21ac55 5115 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
39236c6e
A
5116
5117 PAGE_REPLACEMENT_ALLOWED(FALSE);
5118
b0d623f7 5119 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 5120 vm_page_unlock_queues();
e5568f75 5121
2d21ac55
A
5122 mutex_pause(0);
5123
39236c6e
A
5124 PAGE_REPLACEMENT_ALLOWED(TRUE);
5125
2d21ac55 5126 vm_page_lock_queues();
b0d623f7 5127 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
5128
5129 RESET_STATE_OF_RUN();
1c79356b 5130 /*
2d21ac55
A
5131 * reset our free page limit since we
5132 * dropped the lock protecting the vm_page_free_queue
1c79356b 5133 */
2d21ac55
A
5134 free_available = vm_page_free_count - vm_page_free_reserved;
5135 considered = 0;
3e170ce0 5136
2d21ac55 5137 yielded++;
3e170ce0 5138
2d21ac55
A
5139 goto retry;
5140 }
5141 considered++;
5142 }
5143 m = VM_PAGE_NULL;
5144
b0d623f7
A
5145 if (npages != contig_pages) {
5146 if (!wrapped) {
5147 /*
5148 * We didn't find a contiguous range but we didn't
5149 * start from the very first page.
5150 * Start again from the very first page.
5151 */
5152 RESET_STATE_OF_RUN();
5153 if( flags & KMA_LOMEM)
5154 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
5155 else
5156 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
5157 last_idx = 0;
5158 page_idx = last_idx;
5159 wrapped = TRUE;
5160 goto retry;
5161 }
5162 lck_mtx_unlock(&vm_page_queue_free_lock);
5163 } else {
2d21ac55
A
5164 vm_page_t m1;
5165 vm_page_t m2;
5166 unsigned int cur_idx;
5167 unsigned int tmp_start_idx;
5168 vm_object_t locked_object = VM_OBJECT_NULL;
5169 boolean_t abort_run = FALSE;
5170
b0d623f7
A
5171 assert(page_idx - start_idx == contig_pages);
5172
2d21ac55
A
5173 tmp_start_idx = start_idx;
5174
5175 /*
5176 * first pass through to pull the free pages
5177 * off of the free queue so that in case we
5178 * need substitute pages, we won't grab any
5179 * of the free pages in the run... we'll clear
5180 * the 'free' bit in the 2nd pass, and even in
5181 * an abort_run case, we'll collect all of the
5182 * free pages in this run and return them to the free list
5183 */
5184 while (start_idx < page_idx) {
5185
5186 m1 = &vm_pages[start_idx++];
5187
b0d623f7 5188#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
39037602 5189 assert(m1->vm_page_q_state == VM_PAGE_ON_FREE_Q);
b0d623f7
A
5190#endif
5191
39037602 5192 if (m1->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
0b4c1975 5193 unsigned int color;
2d21ac55 5194
39037602 5195 color = VM_PAGE_GET_PHYS_PAGE(m1) & vm_color_mask;
b0d623f7 5196#if MACH_ASSERT
39037602 5197 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, m1, TRUE);
b0d623f7 5198#endif
39037602
A
5199 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
5200 m1,
5201 vm_page_t,
5202 pageq);
5203
5204 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
0b4c1975 5205#if MACH_ASSERT
39037602 5206 vm_page_verify_free_list(&vm_page_queue_free[color].qhead, color, VM_PAGE_NULL, FALSE);
0b4c1975 5207#endif
b0d623f7
A
5208 /*
5209 * Clear the "free" bit so that this page
5210 * does not get considered for another
5211 * concurrent physically-contiguous allocation.
5212 */
39037602 5213 m1->vm_page_q_state = VM_PAGE_NOT_ON_Q;
b0d623f7 5214 assert(m1->busy);
0b4c1975
A
5215
5216 vm_page_free_count--;
2d21ac55
A
5217 }
5218 }
b0d623f7
A
5219 if( flags & KMA_LOMEM)
5220 vm_page_lomem_find_contiguous_last_idx = page_idx;
5221 else
5222 vm_page_find_contiguous_last_idx = page_idx;
5223
2d21ac55
A
5224 /*
5225 * we can drop the free queue lock at this point since
5226 * we've pulled any 'free' candidates off of the list
5227 * we need it dropped so that we can do a vm_page_grab
5228 * when substituing for pmapped/dirty pages
5229 */
b0d623f7 5230 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
5231
5232 start_idx = tmp_start_idx;
5233 cur_idx = page_idx - 1;
5234
5235 while (start_idx++ < page_idx) {
5236 /*
5237 * must go through the list from back to front
5238 * so that the page list is created in the
5239 * correct order - low -> high phys addresses
5240 */
5241 m1 = &vm_pages[cur_idx--];
5242
39037602 5243 if (m1->vm_page_object == 0) {
2d21ac55 5244 /*
b0d623f7 5245 * page has already been removed from
2d21ac55
A
5246 * the free list in the 1st pass
5247 */
39037602 5248 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
b0d623f7 5249 assert(m1->offset == (vm_object_offset_t) -1);
2d21ac55
A
5250 assert(m1->busy);
5251 assert(!m1->wanted);
5252 assert(!m1->laundry);
e5568f75 5253 } else {
2d21ac55 5254 vm_object_t object;
39236c6e
A
5255 int refmod;
5256 boolean_t disconnected, reusable;
2d21ac55
A
5257
5258 if (abort_run == TRUE)
5259 continue;
5260
39037602
A
5261 assert(m1->vm_page_q_state != VM_PAGE_NOT_ON_Q);
5262
5263 object = VM_PAGE_OBJECT(m1);
2d21ac55
A
5264
5265 if (object != locked_object) {
5266 if (locked_object) {
5267 vm_object_unlock(locked_object);
5268 locked_object = VM_OBJECT_NULL;
5269 }
5270 if (vm_object_lock_try(object))
5271 locked_object = object;
5272 }
5273 if (locked_object == VM_OBJECT_NULL ||
b0d623f7 5274 (VM_PAGE_WIRED(m1) || m1->gobbled ||
39037602
A
5275 m1->encrypted_cleaning || m1->laundry || m1->wanted ||
5276 m1->cleaning || m1->overwriting || m1->free_when_done || m1->busy) ||
5277 (m1->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
2d21ac55
A
5278
5279 if (locked_object) {
5280 vm_object_unlock(locked_object);
5281 locked_object = VM_OBJECT_NULL;
5282 }
5283 tmp_start_idx = cur_idx;
5284 abort_run = TRUE;
5285 continue;
5286 }
39236c6e
A
5287
5288 disconnected = FALSE;
5289 reusable = FALSE;
5290
5291 if ((m1->reusable ||
39037602
A
5292 object->all_reusable) &&
5293 (m1->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) &&
39236c6e
A
5294 !m1->dirty &&
5295 !m1->reference) {
5296 /* reusable page... */
39037602 5297 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
39236c6e
A
5298 disconnected = TRUE;
5299 if (refmod == 0) {
5300 /*
5301 * ... not reused: can steal
5302 * without relocating contents.
5303 */
5304 reusable = TRUE;
5305 }
5306 }
5307
5308 if ((m1->pmapped &&
5309 ! reusable) ||
5310 m1->dirty ||
5311 m1->precious) {
2d21ac55
A
5312 vm_object_offset_t offset;
5313
5314 m2 = vm_page_grab();
5315
5316 if (m2 == VM_PAGE_NULL) {
5317 if (locked_object) {
5318 vm_object_unlock(locked_object);
5319 locked_object = VM_OBJECT_NULL;
5320 }
5321 tmp_start_idx = cur_idx;
5322 abort_run = TRUE;
5323 continue;
5324 }
39236c6e
A
5325 if (! disconnected) {
5326 if (m1->pmapped)
39037602 5327 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1));
39236c6e
A
5328 else
5329 refmod = 0;
5330 }
5331
5332 /* copy the page's contents */
39037602 5333 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1), VM_PAGE_GET_PHYS_PAGE(m2));
39236c6e
A
5334 /* copy the page's state */
5335 assert(!VM_PAGE_WIRED(m1));
39037602
A
5336 assert(m1->vm_page_q_state != VM_PAGE_ON_FREE_Q);
5337 assert(m1->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q);
39236c6e
A
5338 assert(!m1->laundry);
5339 m2->reference = m1->reference;
5340 assert(!m1->gobbled);
5341 assert(!m1->private);
5342 m2->no_cache = m1->no_cache;
fe8ab488 5343 m2->xpmapped = 0;
39236c6e
A
5344 assert(!m1->busy);
5345 assert(!m1->wanted);
5346 assert(!m1->fictitious);
5347 m2->pmapped = m1->pmapped; /* should flush cache ? */
5348 m2->wpmapped = m1->wpmapped;
39037602 5349 assert(!m1->free_when_done);
39236c6e
A
5350 m2->absent = m1->absent;
5351 m2->error = m1->error;
5352 m2->dirty = m1->dirty;
5353 assert(!m1->cleaning);
5354 m2->precious = m1->precious;
5355 m2->clustered = m1->clustered;
5356 assert(!m1->overwriting);
5357 m2->restart = m1->restart;
5358 m2->unusual = m1->unusual;
5359 m2->encrypted = m1->encrypted;
5360 assert(!m1->encrypted_cleaning);
5361 m2->cs_validated = m1->cs_validated;
5362 m2->cs_tainted = m1->cs_tainted;
c18c124e 5363 m2->cs_nx = m1->cs_nx;
39236c6e
A
5364
5365 /*
5366 * If m1 had really been reusable,
5367 * we would have just stolen it, so
5368 * let's not propagate it's "reusable"
5369 * bit and assert that m2 is not
5370 * marked as "reusable".
5371 */
5372 // m2->reusable = m1->reusable;
5373 assert(!m2->reusable);
5374
39037602 5375 // assert(!m1->lopage);
39236c6e 5376 m2->slid = m1->slid;
39037602
A
5377
5378 if (m1->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
5379 m2->vm_page_q_state = VM_PAGE_USED_BY_COMPRESSOR;
39236c6e 5380
15129b1c
A
5381 /*
5382 * page may need to be flushed if
5383 * it is marshalled into a UPL
5384 * that is going to be used by a device
5385 * that doesn't support coherency
5386 */
5387 m2->written_by_kernel = TRUE;
5388
39236c6e
A
5389 /*
5390 * make sure we clear the ref/mod state
5391 * from the pmap layer... else we risk
5392 * inheriting state from the last time
5393 * this page was used...
5394 */
39037602 5395 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2d21ac55
A
5396
5397 if (refmod & VM_MEM_REFERENCED)
5398 m2->reference = TRUE;
316670eb
A
5399 if (refmod & VM_MEM_MODIFIED) {
5400 SET_PAGE_DIRTY(m2, TRUE);
5401 }
2d21ac55
A
5402 offset = m1->offset;
5403
5404 /*
5405 * completely cleans up the state
5406 * of the page so that it is ready
5407 * to be put onto the free list, or
5408 * for this purpose it looks like it
5409 * just came off of the free list
5410 */
5411 vm_page_free_prepare(m1);
5412
5413 /*
39236c6e
A
5414 * now put the substitute page
5415 * on the object
2d21ac55 5416 */
3e170ce0 5417 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
2d21ac55 5418
39037602 5419 if (m2->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
39236c6e
A
5420 m2->pmapped = TRUE;
5421 m2->wpmapped = TRUE;
2d21ac55 5422
39236c6e
A
5423 PMAP_ENTER(kernel_pmap, m2->offset, m2,
5424 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
3e170ce0 5425
39236c6e 5426 compressed_pages++;
3e170ce0 5427
39236c6e
A
5428 } else {
5429 if (m2->reference)
5430 vm_page_activate(m2);
5431 else
5432 vm_page_deactivate(m2);
5433 }
2d21ac55
A
5434 PAGE_WAKEUP_DONE(m2);
5435
5436 } else {
39037602 5437 assert(m1->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
39236c6e 5438
2d21ac55
A
5439 /*
5440 * completely cleans up the state
5441 * of the page so that it is ready
5442 * to be put onto the free list, or
5443 * for this purpose it looks like it
5444 * just came off of the free list
5445 */
5446 vm_page_free_prepare(m1);
5447 }
3e170ce0 5448
2d21ac55 5449 stolen_pages++;
3e170ce0 5450
1c79356b 5451 }
39037602
A
5452#if CONFIG_BACKGROUND_QUEUE
5453 vm_page_assign_background_state(m1);
5454#endif
5455 VM_PAGE_ZERO_PAGEQ_ENTRY(m1);
5456 m1->snext = m;
2d21ac55 5457 m = m1;
e5568f75 5458 }
2d21ac55
A
5459 if (locked_object) {
5460 vm_object_unlock(locked_object);
5461 locked_object = VM_OBJECT_NULL;
1c79356b
A
5462 }
5463
2d21ac55 5464 if (abort_run == TRUE) {
2d21ac55
A
5465 /*
5466 * want the index of the last
5467 * page in this run that was
5468 * successfully 'stolen', so back
5469 * it up 1 for the auto-decrement on use
5470 * and 1 more to bump back over this page
5471 */
5472 page_idx = tmp_start_idx + 2;
b0d623f7 5473 if (page_idx >= vm_pages_count) {
d190cdc3
A
5474 if (wrapped) {
5475 if (m != VM_PAGE_NULL) {
5476 vm_page_unlock_queues();
5477 vm_page_free_list(m, FALSE);
5478 vm_page_lock_queues();
5479 m = VM_PAGE_NULL;
5480 }
5481 dumped_run++;
b0d623f7 5482 goto done_scanning;
d190cdc3 5483 }
b0d623f7
A
5484 page_idx = last_idx = 0;
5485 wrapped = TRUE;
5486 }
5487 abort_run = FALSE;
5488
2d21ac55 5489 /*
b0d623f7
A
5490 * We didn't find a contiguous range but we didn't
5491 * start from the very first page.
5492 * Start again from the very first page.
2d21ac55 5493 */
b0d623f7
A
5494 RESET_STATE_OF_RUN();
5495
5496 if( flags & KMA_LOMEM)
5497 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
5498 else
5499 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
5500
5501 last_idx = page_idx;
2d21ac55 5502
d190cdc3
A
5503 if (m != VM_PAGE_NULL) {
5504 vm_page_unlock_queues();
5505 vm_page_free_list(m, FALSE);
5506 vm_page_lock_queues();
5507 m = VM_PAGE_NULL;
5508 }
5509 dumped_run++;
5510
b0d623f7
A
5511 lck_mtx_lock(&vm_page_queue_free_lock);
5512 /*
5513 * reset our free page limit since we
5514 * dropped the lock protecting the vm_page_free_queue
5515 */
5516 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
5517 goto retry;
5518 }
e5568f75 5519
e5568f75 5520 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55 5521
39037602
A
5522 assert(m1->vm_page_q_state == VM_PAGE_NOT_ON_Q);
5523 assert(m1->wire_count == 0);
5524
5525 if (wire == TRUE) {
2d21ac55 5526 m1->wire_count++;
39037602
A
5527 m1->vm_page_q_state = VM_PAGE_IS_WIRED;
5528 } else
2d21ac55 5529 m1->gobbled = TRUE;
e5568f75 5530 }
2d21ac55
A
5531 if (wire == FALSE)
5532 vm_page_gobble_count += npages;
5533
5534 /*
5535 * gobbled pages are also counted as wired pages
5536 */
e5568f75 5537 vm_page_wire_count += npages;
e5568f75 5538
2d21ac55
A
5539 assert(vm_page_verify_contiguous(m, npages));
5540 }
5541done_scanning:
39236c6e
A
5542 PAGE_REPLACEMENT_ALLOWED(FALSE);
5543
2d21ac55
A
5544 vm_page_unlock_queues();
5545
593a1d5f 5546#if DEBUG
2d21ac55
A
5547 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
5548
5549 tv_end_sec -= tv_start_sec;
5550 if (tv_end_usec < tv_start_usec) {
5551 tv_end_sec--;
5552 tv_end_usec += 1000000;
1c79356b 5553 }
2d21ac55
A
5554 tv_end_usec -= tv_start_usec;
5555 if (tv_end_usec >= 1000000) {
5556 tv_end_sec++;
5557 tv_end_sec -= 1000000;
5558 }
b0d623f7 5559 if (vm_page_find_contig_debug) {
39236c6e
A
5560 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5561 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5562 (long)tv_end_sec, tv_end_usec, orig_last_idx,
5563 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
b0d623f7 5564 }
e5568f75 5565
593a1d5f
A
5566#endif
5567#if MACH_ASSERT
2d21ac55
A
5568 vm_page_verify_free_lists();
5569#endif
3e170ce0
A
5570 if (m == NULL && zone_gc_called == FALSE) {
5571 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5572 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
5573 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
5574
5575 if (consider_buffer_cache_collect != NULL) {
5576 (void)(*consider_buffer_cache_collect)(1);
5577 }
5578
39037602 5579 consider_zone_gc();
3e170ce0
A
5580
5581 zone_gc_called = TRUE;
5582
5583 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
5584 goto full_scan_again;
5585 }
5586
e5568f75 5587 return m;
1c79356b
A
5588}
5589
5590/*
5591 * Allocate a list of contiguous, wired pages.
5592 */
5593kern_return_t
5594cpm_allocate(
5595 vm_size_t size,
5596 vm_page_t *list,
2d21ac55 5597 ppnum_t max_pnum,
b0d623f7
A
5598 ppnum_t pnum_mask,
5599 boolean_t wire,
5600 int flags)
1c79356b 5601{
91447636
A
5602 vm_page_t pages;
5603 unsigned int npages;
1c79356b 5604
6d2010ae 5605 if (size % PAGE_SIZE != 0)
1c79356b
A
5606 return KERN_INVALID_ARGUMENT;
5607
b0d623f7
A
5608 npages = (unsigned int) (size / PAGE_SIZE);
5609 if (npages != size / PAGE_SIZE) {
5610 /* 32-bit overflow */
5611 return KERN_INVALID_ARGUMENT;
5612 }
1c79356b 5613
1c79356b
A
5614 /*
5615 * Obtain a pointer to a subset of the free
5616 * list large enough to satisfy the request;
5617 * the region will be physically contiguous.
5618 */
b0d623f7 5619 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 5620
2d21ac55 5621 if (pages == VM_PAGE_NULL)
1c79356b 5622 return KERN_NO_SPACE;
1c79356b 5623 /*
2d21ac55 5624 * determine need for wakeups
1c79356b 5625 */
2d21ac55 5626 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
5627 ((vm_page_free_count < vm_page_free_target) &&
5628 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
5629 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 5630
6d2010ae
A
5631 VM_CHECK_MEMORYSTATUS;
5632
1c79356b
A
5633 /*
5634 * The CPM pages should now be available and
5635 * ordered by ascending physical address.
5636 */
5637 assert(vm_page_verify_contiguous(pages, npages));
5638
5639 *list = pages;
5640 return KERN_SUCCESS;
5641}
6d2010ae
A
5642
5643
5644unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
5645
5646/*
5647 * when working on a 'run' of pages, it is necessary to hold
5648 * the vm_page_queue_lock (a hot global lock) for certain operations
5649 * on the page... however, the majority of the work can be done
5650 * while merely holding the object lock... in fact there are certain
5651 * collections of pages that don't require any work brokered by the
5652 * vm_page_queue_lock... to mitigate the time spent behind the global
5653 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
5654 * while doing all of the work that doesn't require the vm_page_queue_lock...
5655 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
5656 * necessary work for each page... we will grab the busy bit on the page
5657 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
5658 * if it can't immediately take the vm_page_queue_lock in order to compete
5659 * for the locks in the same order that vm_pageout_scan takes them.
5660 * the operation names are modeled after the names of the routines that
5661 * need to be called in order to make the changes very obvious in the
5662 * original loop
5663 */
5664
5665void
5666vm_page_do_delayed_work(
5667 vm_object_t object,
3e170ce0 5668 vm_tag_t tag,
6d2010ae
A
5669 struct vm_page_delayed_work *dwp,
5670 int dw_count)
5671{
5672 int j;
5673 vm_page_t m;
5674 vm_page_t local_free_q = VM_PAGE_NULL;
6d2010ae
A
5675
5676 /*
5677 * pageout_scan takes the vm_page_lock_queues first
5678 * then tries for the object lock... to avoid what
5679 * is effectively a lock inversion, we'll go to the
5680 * trouble of taking them in that same order... otherwise
5681 * if this object contains the majority of the pages resident
5682 * in the UBC (or a small set of large objects actively being
5683 * worked on contain the majority of the pages), we could
5684 * cause the pageout_scan thread to 'starve' in its attempt
5685 * to find pages to move to the free queue, since it has to
5686 * successfully acquire the object lock of any candidate page
5687 * before it can steal/clean it.
5688 */
5689 if (!vm_page_trylockspin_queues()) {
5690 vm_object_unlock(object);
5691
5692 vm_page_lockspin_queues();
5693
5694 for (j = 0; ; j++) {
5695 if (!vm_object_lock_avoid(object) &&
5696 _vm_object_lock_try(object))
5697 break;
5698 vm_page_unlock_queues();
5699 mutex_pause(j);
5700 vm_page_lockspin_queues();
5701 }
6d2010ae
A
5702 }
5703 for (j = 0; j < dw_count; j++, dwp++) {
5704
5705 m = dwp->dw_m;
5706
6d2010ae
A
5707 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
5708 vm_pageout_throttle_up(m);
fe8ab488
A
5709#if CONFIG_PHANTOM_CACHE
5710 if (dwp->dw_mask & DW_vm_phantom_cache_update)
5711 vm_phantom_cache_update(m);
5712#endif
6d2010ae 5713 if (dwp->dw_mask & DW_vm_page_wire)
3e170ce0 5714 vm_page_wire(m, tag, FALSE);
6d2010ae
A
5715 else if (dwp->dw_mask & DW_vm_page_unwire) {
5716 boolean_t queueit;
5717
fe8ab488 5718 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6d2010ae
A
5719
5720 vm_page_unwire(m, queueit);
5721 }
5722 if (dwp->dw_mask & DW_vm_page_free) {
5723 vm_page_free_prepare_queues(m);
5724
39037602 5725 assert(m->pageq.next == 0 && m->pageq.prev == 0);
6d2010ae
A
5726 /*
5727 * Add this page to our list of reclaimed pages,
5728 * to be freed later.
5729 */
39037602 5730 m->snext = local_free_q;
6d2010ae
A
5731 local_free_q = m;
5732 } else {
5733 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5734 vm_page_deactivate_internal(m, FALSE);
5735 else if (dwp->dw_mask & DW_vm_page_activate) {
39037602 5736 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q) {
6d2010ae
A
5737 vm_page_activate(m);
5738 }
5739 }
5740 else if (dwp->dw_mask & DW_vm_page_speculate)
5741 vm_page_speculate(m, TRUE);
316670eb
A
5742 else if (dwp->dw_mask & DW_enqueue_cleaned) {
5743 /*
5744 * if we didn't hold the object lock and did this,
5745 * we might disconnect the page, then someone might
5746 * soft fault it back in, then we would put it on the
5747 * cleaned queue, and so we would have a referenced (maybe even dirty)
5748 * page on that queue, which we don't want
5749 */
39037602 5750 int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
316670eb
A
5751
5752 if ((refmod_state & VM_MEM_REFERENCED)) {
5753 /*
5754 * this page has been touched since it got cleaned; let's activate it
5755 * if it hasn't already been
5756 */
5757 vm_pageout_enqueued_cleaned++;
5758 vm_pageout_cleaned_reactivated++;
5759 vm_pageout_cleaned_commit_reactivated++;
5760
39037602 5761 if (m->vm_page_q_state != VM_PAGE_ON_ACTIVE_Q)
316670eb
A
5762 vm_page_activate(m);
5763 } else {
5764 m->reference = FALSE;
5765 vm_page_enqueue_cleaned(m);
5766 }
5767 }
6d2010ae
A
5768 else if (dwp->dw_mask & DW_vm_page_lru)
5769 vm_page_lru(m);
316670eb 5770 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
39037602
A
5771 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q)
5772 vm_page_queues_remove(m, TRUE);
316670eb 5773 }
6d2010ae
A
5774 if (dwp->dw_mask & DW_set_reference)
5775 m->reference = TRUE;
5776 else if (dwp->dw_mask & DW_clear_reference)
5777 m->reference = FALSE;
5778
5779 if (dwp->dw_mask & DW_move_page) {
39037602
A
5780 if (m->vm_page_q_state != VM_PAGE_ON_PAGEOUT_Q) {
5781 vm_page_queues_remove(m, FALSE);
6d2010ae 5782
39037602 5783 assert(VM_PAGE_OBJECT(m) != kernel_object);
6d2010ae 5784
3e170ce0 5785 vm_page_enqueue_inactive(m, FALSE);
316670eb 5786 }
6d2010ae
A
5787 }
5788 if (dwp->dw_mask & DW_clear_busy)
5789 m->busy = FALSE;
5790
5791 if (dwp->dw_mask & DW_PAGE_WAKEUP)
5792 PAGE_WAKEUP(m);
5793 }
5794 }
5795 vm_page_unlock_queues();
5796
5797 if (local_free_q)
5798 vm_page_free_list(local_free_q, TRUE);
5799
5800 VM_CHECK_MEMORYSTATUS;
5801
5802}
5803
0b4c1975
A
5804kern_return_t
5805vm_page_alloc_list(
5806 int page_count,
5807 int flags,
5808 vm_page_t *list)
5809{
5810 vm_page_t lo_page_list = VM_PAGE_NULL;
5811 vm_page_t mem;
5812 int i;
5813
5814 if ( !(flags & KMA_LOMEM))
5815 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
5816
5817 for (i = 0; i < page_count; i++) {
5818
5819 mem = vm_page_grablo();
5820
5821 if (mem == VM_PAGE_NULL) {
5822 if (lo_page_list)
5823 vm_page_free_list(lo_page_list, FALSE);
5824
5825 *list = VM_PAGE_NULL;
5826
5827 return (KERN_RESOURCE_SHORTAGE);
5828 }
39037602 5829 mem->snext = lo_page_list;
0b4c1975
A
5830 lo_page_list = mem;
5831 }
5832 *list = lo_page_list;
5833
5834 return (KERN_SUCCESS);
5835}
5836
5837void
5838vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
5839{
5840 page->offset = offset;
5841}
5842
5843vm_page_t
5844vm_page_get_next(vm_page_t page)
5845{
39037602 5846 return (page->snext);
0b4c1975
A
5847}
5848
5849vm_object_offset_t
5850vm_page_get_offset(vm_page_t page)
5851{
5852 return (page->offset);
5853}
5854
5855ppnum_t
5856vm_page_get_phys_page(vm_page_t page)
5857{
39037602 5858 return (VM_PAGE_GET_PHYS_PAGE(page));
0b4c1975
A
5859}
5860
5861
b0d623f7
A
5862/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5863
d1ecb069
A
5864#if HIBERNATION
5865
b0d623f7
A
5866static vm_page_t hibernate_gobble_queue;
5867
0b4c1975 5868static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
39236c6e 5869static int hibernate_flush_dirty_pages(int);
39037602 5870static int hibernate_flush_queue(vm_page_queue_head_t *, int);
0b4c1975
A
5871
5872void hibernate_flush_wait(void);
5873void hibernate_mark_in_progress(void);
5874void hibernate_clear_in_progress(void);
5875
39236c6e
A
5876void hibernate_free_range(int, int);
5877void hibernate_hash_insert_page(vm_page_t);
5878uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
5879void hibernate_rebuild_vm_structs(void);
5880uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
5881ppnum_t hibernate_lookup_paddr(unsigned int);
0b4c1975
A
5882
5883struct hibernate_statistics {
5884 int hibernate_considered;
5885 int hibernate_reentered_on_q;
5886 int hibernate_found_dirty;
5887 int hibernate_skipped_cleaning;
5888 int hibernate_skipped_transient;
5889 int hibernate_skipped_precious;
39236c6e 5890 int hibernate_skipped_external;
0b4c1975
A
5891 int hibernate_queue_nolock;
5892 int hibernate_queue_paused;
5893 int hibernate_throttled;
5894 int hibernate_throttle_timeout;
5895 int hibernate_drained;
5896 int hibernate_drain_timeout;
5897 int cd_lock_failed;
5898 int cd_found_precious;
5899 int cd_found_wired;
5900 int cd_found_busy;
5901 int cd_found_unusual;
5902 int cd_found_cleaning;
5903 int cd_found_laundry;
5904 int cd_found_dirty;
39236c6e 5905 int cd_found_xpmapped;
8a3053a0 5906 int cd_skipped_xpmapped;
0b4c1975
A
5907 int cd_local_free;
5908 int cd_total_free;
5909 int cd_vm_page_wire_count;
39236c6e 5910 int cd_vm_struct_pages_unneeded;
0b4c1975
A
5911 int cd_pages;
5912 int cd_discarded;
5913 int cd_count_wire;
5914} hibernate_stats;
5915
5916
8a3053a0
A
5917/*
5918 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5919 * so that we don't overrun the estimated image size, which would
5920 * result in a hibernation failure.
5921 */
5922#define HIBERNATE_XPMAPPED_LIMIT 40000
5923
0b4c1975
A
5924
5925static int
5926hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5927{
5928 wait_result_t wait_result;
5929
5930 vm_page_lock_queues();
5931
39037602 5932 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
0b4c1975
A
5933
5934 q->pgo_draining = TRUE;
5935
5936 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5937
5938 vm_page_unlock_queues();
5939
5940 wait_result = thread_block(THREAD_CONTINUE_NULL);
5941
39037602 5942 if (wait_result == THREAD_TIMED_OUT && !vm_page_queue_empty(&q->pgo_pending)) {
0b4c1975 5943 hibernate_stats.hibernate_drain_timeout++;
39236c6e
A
5944
5945 if (q == &vm_pageout_queue_external)
5946 return (0);
5947
0b4c1975
A
5948 return (1);
5949 }
5950 vm_page_lock_queues();
5951
5952 hibernate_stats.hibernate_drained++;
5953 }
5954 vm_page_unlock_queues();
5955
5956 return (0);
5957}
5958
0b4c1975 5959
39236c6e
A
5960boolean_t hibernate_skip_external = FALSE;
5961
0b4c1975 5962static int
39037602 5963hibernate_flush_queue(vm_page_queue_head_t *q, int qcount)
0b4c1975
A
5964{
5965 vm_page_t m;
5966 vm_object_t l_object = NULL;
5967 vm_object_t m_object = NULL;
5968 int refmod_state = 0;
5969 int try_failed_count = 0;
5970 int retval = 0;
5971 int current_run = 0;
5972 struct vm_pageout_queue *iq;
5973 struct vm_pageout_queue *eq;
5974 struct vm_pageout_queue *tq;
5975
5976
5977 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5978
5979 iq = &vm_pageout_queue_internal;
5980 eq = &vm_pageout_queue_external;
5981
5982 vm_page_lock_queues();
5983
39037602 5984 while (qcount && !vm_page_queue_empty(q)) {
0b4c1975
A
5985
5986 if (current_run++ == 1000) {
5987 if (hibernate_should_abort()) {
5988 retval = 1;
5989 break;
5990 }
5991 current_run = 0;
5992 }
5993
39037602
A
5994 m = (vm_page_t) vm_page_queue_first(q);
5995 m_object = VM_PAGE_OBJECT(m);
0b4c1975
A
5996
5997 /*
5998 * check to see if we currently are working
5999 * with the same object... if so, we've
6000 * already got the lock
6001 */
6002 if (m_object != l_object) {
6003 /*
6004 * the object associated with candidate page is
6005 * different from the one we were just working
6006 * with... dump the lock if we still own it
6007 */
6008 if (l_object != NULL) {
6009 vm_object_unlock(l_object);
6010 l_object = NULL;
6011 }
6012 /*
6013 * Try to lock object; since we've alread got the
6014 * page queues lock, we can only 'try' for this one.
6015 * if the 'try' fails, we need to do a mutex_pause
6016 * to allow the owner of the object lock a chance to
6017 * run...
6018 */
6019 if ( !vm_object_lock_try_scan(m_object)) {
6020
6021 if (try_failed_count > 20) {
6022 hibernate_stats.hibernate_queue_nolock++;
6023
6024 goto reenter_pg_on_q;
6025 }
0b4c1975
A
6026
6027 vm_page_unlock_queues();
6028 mutex_pause(try_failed_count++);
6029 vm_page_lock_queues();
6030
6031 hibernate_stats.hibernate_queue_paused++;
6032 continue;
6033 } else {
6034 l_object = m_object;
0b4c1975
A
6035 }
6036 }
316670eb 6037 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
0b4c1975
A
6038 /*
6039 * page is not to be cleaned
6040 * put it back on the head of its queue
6041 */
6042 if (m->cleaning)
6043 hibernate_stats.hibernate_skipped_cleaning++;
6044 else
6045 hibernate_stats.hibernate_skipped_transient++;
6046
6047 goto reenter_pg_on_q;
6048 }
0b4c1975
A
6049 if (m_object->copy == VM_OBJECT_NULL) {
6050 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
6051 /*
6052 * let the normal hibernate image path
6053 * deal with these
6054 */
6055 goto reenter_pg_on_q;
6056 }
6057 }
6058 if ( !m->dirty && m->pmapped) {
39037602 6059 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
0b4c1975 6060
316670eb
A
6061 if ((refmod_state & VM_MEM_MODIFIED)) {
6062 SET_PAGE_DIRTY(m, FALSE);
6063 }
0b4c1975
A
6064 } else
6065 refmod_state = 0;
6066
6067 if ( !m->dirty) {
6068 /*
6069 * page is not to be cleaned
6070 * put it back on the head of its queue
6071 */
6072 if (m->precious)
6073 hibernate_stats.hibernate_skipped_precious++;
6074
6075 goto reenter_pg_on_q;
6076 }
39236c6e
A
6077
6078 if (hibernate_skip_external == TRUE && !m_object->internal) {
6079
6080 hibernate_stats.hibernate_skipped_external++;
6081
6082 goto reenter_pg_on_q;
6083 }
0b4c1975
A
6084 tq = NULL;
6085
6086 if (m_object->internal) {
6087 if (VM_PAGE_Q_THROTTLED(iq))
6088 tq = iq;
6089 } else if (VM_PAGE_Q_THROTTLED(eq))
6090 tq = eq;
6091
6092 if (tq != NULL) {
6093 wait_result_t wait_result;
6094 int wait_count = 5;
6095
6096 if (l_object != NULL) {
6097 vm_object_unlock(l_object);
6098 l_object = NULL;
6099 }
0b4c1975 6100
0b4c1975
A
6101 while (retval == 0) {
6102
39236c6e
A
6103 tq->pgo_throttled = TRUE;
6104
0b4c1975
A
6105 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
6106
316670eb 6107 vm_page_unlock_queues();
0b4c1975 6108
316670eb 6109 wait_result = thread_block(THREAD_CONTINUE_NULL);
0b4c1975
A
6110
6111 vm_page_lock_queues();
6112
39236c6e
A
6113 if (wait_result != THREAD_TIMED_OUT)
6114 break;
6115 if (!VM_PAGE_Q_THROTTLED(tq))
6116 break;
6117
0b4c1975
A
6118 if (hibernate_should_abort())
6119 retval = 1;
6120
0b4c1975 6121 if (--wait_count == 0) {
39236c6e 6122
316670eb 6123 hibernate_stats.hibernate_throttle_timeout++;
39236c6e
A
6124
6125 if (tq == eq) {
6126 hibernate_skip_external = TRUE;
6127 break;
6128 }
316670eb
A
6129 retval = 1;
6130 }
0b4c1975
A
6131 }
6132 if (retval)
6133 break;
6134
6135 hibernate_stats.hibernate_throttled++;
6136
6137 continue;
6138 }
316670eb
A
6139 /*
6140 * we've already factored out pages in the laundry which
6141 * means this page can't be on the pageout queue so it's
3e170ce0 6142 * safe to do the vm_page_queues_remove
316670eb 6143 */
39037602 6144 vm_page_queues_remove(m, TRUE);
0b4c1975 6145
39037602
A
6146 if (m_object->internal == TRUE)
6147 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m), PMAP_OPTIONS_COMPRESSOR, NULL);
39236c6e 6148
39037602 6149 (void)vm_pageout_cluster(m, FALSE, FALSE);
0b4c1975
A
6150
6151 hibernate_stats.hibernate_found_dirty++;
6152
6153 goto next_pg;
6154
6155reenter_pg_on_q:
39037602
A
6156 vm_page_queue_remove(q, m, vm_page_t, pageq);
6157 vm_page_queue_enter(q, m, vm_page_t, pageq);
0b4c1975
A
6158
6159 hibernate_stats.hibernate_reentered_on_q++;
6160next_pg:
6161 hibernate_stats.hibernate_considered++;
6162
6163 qcount--;
6164 try_failed_count = 0;
6165 }
6166 if (l_object != NULL) {
6167 vm_object_unlock(l_object);
6168 l_object = NULL;
6169 }
0b4c1975
A
6170
6171 vm_page_unlock_queues();
6172
6173 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
6174
6175 return (retval);
6176}
6177
6178
6179static int
39236c6e 6180hibernate_flush_dirty_pages(int pass)
0b4c1975
A
6181{
6182 struct vm_speculative_age_q *aq;
6183 uint32_t i;
6184
0b4c1975
A
6185 if (vm_page_local_q) {
6186 for (i = 0; i < vm_page_local_q_count; i++)
6187 vm_page_reactivate_local(i, TRUE, FALSE);
6188 }
6189
6190 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
6191 int qcount;
6192 vm_page_t m;
6193
6194 aq = &vm_page_queue_speculative[i];
6195
39037602 6196 if (vm_page_queue_empty(&aq->age_q))
0b4c1975
A
6197 continue;
6198 qcount = 0;
6199
6200 vm_page_lockspin_queues();
6201
39037602 6202 vm_page_queue_iterate(&aq->age_q,
0b4c1975
A
6203 m,
6204 vm_page_t,
6205 pageq)
6206 {
6207 qcount++;
6208 }
6209 vm_page_unlock_queues();
6210
6211 if (qcount) {
6212 if (hibernate_flush_queue(&aq->age_q, qcount))
6213 return (1);
6214 }
6215 }
316670eb 6216 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
0b4c1975 6217 return (1);
39037602 6218 /* XXX FBDP TODO: flush secluded queue */
316670eb
A
6219 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
6220 return (1);
6221 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
0b4c1975 6222 return (1);
0b4c1975
A
6223 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
6224 return (1);
0b4c1975 6225
39037602 6226 if (pass == 1)
39236c6e
A
6227 vm_compressor_record_warmup_start();
6228
6229 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
39037602 6230 if (pass == 1)
39236c6e
A
6231 vm_compressor_record_warmup_end();
6232 return (1);
6233 }
6234 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
39037602 6235 if (pass == 1)
39236c6e
A
6236 vm_compressor_record_warmup_end();
6237 return (1);
6238 }
39037602 6239 if (pass == 1)
39236c6e
A
6240 vm_compressor_record_warmup_end();
6241
6242 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
6243 return (1);
6244
6245 return (0);
6246}
0b4c1975 6247
0b4c1975 6248
fe8ab488
A
6249void
6250hibernate_reset_stats()
6251{
6252 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
6253}
6254
6255
0b4c1975
A
6256int
6257hibernate_flush_memory()
6258{
6259 int retval;
6260
39037602
A
6261 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
6262
0b4c1975
A
6263 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
6264
39236c6e
A
6265 hibernate_cleaning_in_progress = TRUE;
6266 hibernate_skip_external = FALSE;
6267
6268 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
6269
39037602 6270 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
0b4c1975 6271
39037602 6272 vm_compressor_flush();
0b4c1975 6273
39037602 6274 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
39236c6e 6275
fe8ab488 6276 if (consider_buffer_cache_collect != NULL) {
39236c6e
A
6277 unsigned int orig_wire_count;
6278
6279 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6280 orig_wire_count = vm_page_wire_count;
0b4c1975 6281
0b4c1975 6282 (void)(*consider_buffer_cache_collect)(1);
39037602 6283 consider_zone_gc();
0b4c1975 6284
39236c6e
A
6285 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
6286
6287 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
0b4c1975
A
6288 }
6289 }
39236c6e
A
6290 hibernate_cleaning_in_progress = FALSE;
6291
0b4c1975
A
6292 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
6293
39037602 6294 if (retval)
39236c6e
A
6295 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
6296
6297
0b4c1975
A
6298 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6299 hibernate_stats.hibernate_considered,
6300 hibernate_stats.hibernate_reentered_on_q,
6301 hibernate_stats.hibernate_found_dirty);
39236c6e 6302 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
0b4c1975
A
6303 hibernate_stats.hibernate_skipped_cleaning,
6304 hibernate_stats.hibernate_skipped_transient,
6305 hibernate_stats.hibernate_skipped_precious,
39236c6e 6306 hibernate_stats.hibernate_skipped_external,
0b4c1975
A
6307 hibernate_stats.hibernate_queue_nolock);
6308 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6309 hibernate_stats.hibernate_queue_paused,
6310 hibernate_stats.hibernate_throttled,
6311 hibernate_stats.hibernate_throttle_timeout,
6312 hibernate_stats.hibernate_drained,
6313 hibernate_stats.hibernate_drain_timeout);
6314
6315 return (retval);
6316}
6317
6d2010ae 6318
b0d623f7
A
6319static void
6320hibernate_page_list_zero(hibernate_page_list_t *list)
6321{
6322 uint32_t bank;
6323 hibernate_bitmap_t * bitmap;
6324
6325 bitmap = &list->bank_bitmap[0];
6326 for (bank = 0; bank < list->bank_count; bank++)
6327 {
6328 uint32_t last_bit;
6329
6330 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
6331 // set out-of-bound bits at end of bitmap.
6332 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
6333 if (last_bit)
6334 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
6335
6336 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
6337 }
6338}
6339
b0d623f7
A
6340void
6341hibernate_free_gobble_pages(void)
6342{
6343 vm_page_t m, next;
6344 uint32_t count = 0;
6345
6346 m = (vm_page_t) hibernate_gobble_queue;
6347 while(m)
6348 {
39037602 6349 next = m->snext;
b0d623f7
A
6350 vm_page_free(m);
6351 count++;
6352 m = next;
6353 }
6354 hibernate_gobble_queue = VM_PAGE_NULL;
6355
6356 if (count)
6357 HIBLOG("Freed %d pages\n", count);
6358}
6359
6360static boolean_t
db609669 6361hibernate_consider_discard(vm_page_t m, boolean_t preflight)
b0d623f7
A
6362{
6363 vm_object_t object = NULL;
6364 int refmod_state;
6365 boolean_t discard = FALSE;
6366
6367 do
6368 {
0b4c1975 6369 if (m->private)
b0d623f7
A
6370 panic("hibernate_consider_discard: private");
6371
39037602
A
6372 object = VM_PAGE_OBJECT(m);
6373
6374 if (!vm_object_lock_try(object)) {
6375 object = NULL;
db609669 6376 if (!preflight) hibernate_stats.cd_lock_failed++;
b0d623f7 6377 break;
0b4c1975 6378 }
0b4c1975 6379 if (VM_PAGE_WIRED(m)) {
db609669 6380 if (!preflight) hibernate_stats.cd_found_wired++;
b0d623f7 6381 break;
0b4c1975
A
6382 }
6383 if (m->precious) {
db609669 6384 if (!preflight) hibernate_stats.cd_found_precious++;
b0d623f7 6385 break;
0b4c1975
A
6386 }
6387 if (m->busy || !object->alive) {
b0d623f7
A
6388 /*
6389 * Somebody is playing with this page.
6390 */
db609669 6391 if (!preflight) hibernate_stats.cd_found_busy++;
6d2010ae 6392 break;
0b4c1975
A
6393 }
6394 if (m->absent || m->unusual || m->error) {
b0d623f7
A
6395 /*
6396 * If it's unusual in anyway, ignore it
6397 */
db609669 6398 if (!preflight) hibernate_stats.cd_found_unusual++;
b0d623f7 6399 break;
0b4c1975
A
6400 }
6401 if (m->cleaning) {
db609669 6402 if (!preflight) hibernate_stats.cd_found_cleaning++;
b0d623f7 6403 break;
0b4c1975 6404 }
316670eb 6405 if (m->laundry) {
db609669 6406 if (!preflight) hibernate_stats.cd_found_laundry++;
b0d623f7 6407 break;
0b4c1975 6408 }
b0d623f7
A
6409 if (!m->dirty)
6410 {
39037602 6411 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
6412
6413 if (refmod_state & VM_MEM_REFERENCED)
6414 m->reference = TRUE;
316670eb
A
6415 if (refmod_state & VM_MEM_MODIFIED) {
6416 SET_PAGE_DIRTY(m, FALSE);
6417 }
b0d623f7
A
6418 }
6419
6420 /*
6421 * If it's clean or purgeable we can discard the page on wakeup.
6422 */
6423 discard = (!m->dirty)
6424 || (VM_PURGABLE_VOLATILE == object->purgable)
0b4c1975
A
6425 || (VM_PURGABLE_EMPTY == object->purgable);
6426
39236c6e
A
6427
6428 if (discard == FALSE) {
6429 if (!preflight)
6430 hibernate_stats.cd_found_dirty++;
8a3053a0
A
6431 } else if (m->xpmapped && m->reference && !object->internal) {
6432 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
6433 if (!preflight)
6434 hibernate_stats.cd_found_xpmapped++;
6435 discard = FALSE;
6436 } else {
6437 if (!preflight)
6438 hibernate_stats.cd_skipped_xpmapped++;
6439 }
39236c6e 6440 }
b0d623f7
A
6441 }
6442 while (FALSE);
6443
6444 if (object)
6445 vm_object_unlock(object);
6446
6447 return (discard);
6448}
6449
6450
6451static void
6452hibernate_discard_page(vm_page_t m)
6453{
39037602
A
6454 vm_object_t m_object;
6455
b0d623f7
A
6456 if (m->absent || m->unusual || m->error)
6457 /*
6458 * If it's unusual in anyway, ignore
6459 */
6460 return;
6461
39037602
A
6462 m_object = VM_PAGE_OBJECT(m);
6463
fe8ab488 6464#if MACH_ASSERT || DEBUG
39037602 6465 if (!vm_object_lock_try(m_object))
316670eb
A
6466 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
6467#else
6468 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6469 makes sure these locks are uncontended before sleep */
fe8ab488 6470#endif /* MACH_ASSERT || DEBUG */
316670eb 6471
b0d623f7
A
6472 if (m->pmapped == TRUE)
6473 {
39037602 6474 __unused int refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
6475 }
6476
6477 if (m->laundry)
6478 panic("hibernate_discard_page(%p) laundry", m);
6479 if (m->private)
6480 panic("hibernate_discard_page(%p) private", m);
6481 if (m->fictitious)
6482 panic("hibernate_discard_page(%p) fictitious", m);
6483
39037602 6484 if (VM_PURGABLE_VOLATILE == m_object->purgable)
b0d623f7
A
6485 {
6486 /* object should be on a queue */
39037602
A
6487 assert((m_object->objq.next != NULL) && (m_object->objq.prev != NULL));
6488 purgeable_q_t old_queue = vm_purgeable_object_remove(m_object);
b0d623f7 6489 assert(old_queue);
39037602 6490 if (m_object->purgeable_when_ripe) {
39236c6e
A
6491 vm_purgeable_token_delete_first(old_queue);
6492 }
39037602
A
6493 vm_object_lock_assert_exclusive(m_object);
6494 m_object->purgable = VM_PURGABLE_EMPTY;
fe8ab488
A
6495
6496 /*
6497 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6498 * accounted in the "volatile" ledger, so no change here.
6499 * We have to update vm_page_purgeable_count, though, since we're
6500 * effectively purging this object.
6501 */
6502 unsigned int delta;
39037602
A
6503 assert(m_object->resident_page_count >= m_object->wired_page_count);
6504 delta = (m_object->resident_page_count - m_object->wired_page_count);
fe8ab488
A
6505 assert(vm_page_purgeable_count >= delta);
6506 assert(delta > 0);
6507 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
b0d623f7
A
6508 }
6509
6510 vm_page_free(m);
316670eb 6511
fe8ab488 6512#if MACH_ASSERT || DEBUG
39037602 6513 vm_object_unlock(m_object);
fe8ab488 6514#endif /* MACH_ASSERT || DEBUG */
b0d623f7
A
6515}
6516
db609669
A
6517/*
6518 Grab locks for hibernate_page_list_setall()
6519*/
6520void
6521hibernate_vm_lock_queues(void)
6522{
39236c6e 6523 vm_object_lock(compressor_object);
db609669
A
6524 vm_page_lock_queues();
6525 lck_mtx_lock(&vm_page_queue_free_lock);
6526
6527 if (vm_page_local_q) {
6528 uint32_t i;
6529 for (i = 0; i < vm_page_local_q_count; i++) {
6530 struct vpl *lq;
6531 lq = &vm_page_local_q[i].vpl_un.vpl;
6532 VPL_LOCK(&lq->vpl_lock);
6533 }
6534 }
6535}
6536
6537void
6538hibernate_vm_unlock_queues(void)
6539{
6540 if (vm_page_local_q) {
6541 uint32_t i;
6542 for (i = 0; i < vm_page_local_q_count; i++) {
6543 struct vpl *lq;
6544 lq = &vm_page_local_q[i].vpl_un.vpl;
6545 VPL_UNLOCK(&lq->vpl_lock);
6546 }
6547 }
6548 lck_mtx_unlock(&vm_page_queue_free_lock);
6549 vm_page_unlock_queues();
39236c6e 6550 vm_object_unlock(compressor_object);
db609669
A
6551}
6552
b0d623f7
A
6553/*
6554 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6555 pages known to VM to not need saving are subtracted.
6556 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6557*/
6558
6559void
6560hibernate_page_list_setall(hibernate_page_list_t * page_list,
6561 hibernate_page_list_t * page_list_wired,
6d2010ae 6562 hibernate_page_list_t * page_list_pal,
39236c6e
A
6563 boolean_t preflight,
6564 boolean_t will_discard,
b0d623f7
A
6565 uint32_t * pagesOut)
6566{
6567 uint64_t start, end, nsec;
6568 vm_page_t m;
39236c6e 6569 vm_page_t next;
b0d623f7 6570 uint32_t pages = page_list->page_count;
39236c6e 6571 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
316670eb 6572 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
b0d623f7
A
6573 uint32_t count_wire = pages;
6574 uint32_t count_discard_active = 0;
6575 uint32_t count_discard_inactive = 0;
316670eb 6576 uint32_t count_discard_cleaned = 0;
b0d623f7
A
6577 uint32_t count_discard_purgeable = 0;
6578 uint32_t count_discard_speculative = 0;
39236c6e 6579 uint32_t count_discard_vm_struct_pages = 0;
b0d623f7
A
6580 uint32_t i;
6581 uint32_t bank;
6582 hibernate_bitmap_t * bitmap;
6583 hibernate_bitmap_t * bitmap_wired;
39236c6e
A
6584 boolean_t discard_all;
6585 boolean_t discard;
b0d623f7 6586
3e170ce0 6587 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
b0d623f7 6588
db609669
A
6589 if (preflight) {
6590 page_list = NULL;
6591 page_list_wired = NULL;
6592 page_list_pal = NULL;
39236c6e
A
6593 discard_all = FALSE;
6594 } else {
6595 discard_all = will_discard;
db609669 6596 }
0b4c1975 6597
fe8ab488 6598#if MACH_ASSERT || DEBUG
39236c6e
A
6599 if (!preflight)
6600 {
316670eb
A
6601 vm_page_lock_queues();
6602 if (vm_page_local_q) {
6603 for (i = 0; i < vm_page_local_q_count; i++) {
6604 struct vpl *lq;
6605 lq = &vm_page_local_q[i].vpl_un.vpl;
6606 VPL_LOCK(&lq->vpl_lock);
6607 }
6608 }
39236c6e 6609 }
fe8ab488 6610#endif /* MACH_ASSERT || DEBUG */
316670eb
A
6611
6612
0b4c1975 6613 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
b0d623f7
A
6614
6615 clock_get_uptime(&start);
6616
db609669
A
6617 if (!preflight) {
6618 hibernate_page_list_zero(page_list);
6619 hibernate_page_list_zero(page_list_wired);
6620 hibernate_page_list_zero(page_list_pal);
6621
6622 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
6623 hibernate_stats.cd_pages = pages;
6624 }
0b4c1975 6625
b0d623f7
A
6626 if (vm_page_local_q) {
6627 for (i = 0; i < vm_page_local_q_count; i++)
db609669
A
6628 vm_page_reactivate_local(i, TRUE, !preflight);
6629 }
6630
6631 if (preflight) {
39236c6e 6632 vm_object_lock(compressor_object);
db609669
A
6633 vm_page_lock_queues();
6634 lck_mtx_lock(&vm_page_queue_free_lock);
b0d623f7
A
6635 }
6636
6637 m = (vm_page_t) hibernate_gobble_queue;
39236c6e 6638 while (m)
b0d623f7
A
6639 {
6640 pages--;
6641 count_wire--;
db609669 6642 if (!preflight) {
39037602
A
6643 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6644 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
db609669 6645 }
39037602 6646 m = m->snext;
b0d623f7 6647 }
6d2010ae 6648
db609669 6649 if (!preflight) for( i = 0; i < real_ncpus; i++ )
0b4c1975
A
6650 {
6651 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
6652 {
39037602 6653 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = m->snext)
0b4c1975 6654 {
39037602
A
6655 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
6656
0b4c1975
A
6657 pages--;
6658 count_wire--;
39037602
A
6659 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6660 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
0b4c1975
A
6661
6662 hibernate_stats.cd_local_free++;
6663 hibernate_stats.cd_total_free++;
6664 }
6665 }
6666 }
6d2010ae 6667
b0d623f7
A
6668 for( i = 0; i < vm_colors; i++ )
6669 {
39037602
A
6670 vm_page_queue_iterate(&vm_page_queue_free[i].qhead,
6671 m,
6672 vm_page_t,
6673 pageq)
b0d623f7 6674 {
39037602
A
6675 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_Q);
6676
b0d623f7
A
6677 pages--;
6678 count_wire--;
db609669 6679 if (!preflight) {
39037602
A
6680 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6681 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
db609669
A
6682
6683 hibernate_stats.cd_total_free++;
6684 }
b0d623f7
A
6685 }
6686 }
6687
39037602
A
6688 vm_page_queue_iterate(&vm_lopage_queue_free,
6689 m,
6690 vm_page_t,
6691 pageq)
b0d623f7 6692 {
39037602
A
6693 assert(m->vm_page_q_state == VM_PAGE_ON_FREE_LOPAGE_Q);
6694
b0d623f7
A
6695 pages--;
6696 count_wire--;
db609669 6697 if (!preflight) {
39037602
A
6698 hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
6699 hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
db609669
A
6700
6701 hibernate_stats.cd_total_free++;
6702 }
b0d623f7
A
6703 }
6704
39037602
A
6705 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
6706 while (m && !vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t)m))
b0d623f7 6707 {
39037602
A
6708 assert(m->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q);
6709
6710 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
39236c6e 6711 discard = FALSE;
b0d623f7 6712 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 6713 && hibernate_consider_discard(m, preflight))
b0d623f7 6714 {
39037602 6715 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7 6716 count_discard_inactive++;
39236c6e 6717 discard = discard_all;
b0d623f7
A
6718 }
6719 else
6720 count_throttled++;
6721 count_wire--;
39037602 6722 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
6723
6724 if (discard) hibernate_discard_page(m);
6725 m = next;
b0d623f7
A
6726 }
6727
39037602
A
6728 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6729 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
b0d623f7 6730 {
39037602
A
6731 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6732
6733 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
39236c6e 6734 discard = FALSE;
b0d623f7 6735 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 6736 && hibernate_consider_discard(m, preflight))
b0d623f7 6737 {
39037602 6738 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
6739 if (m->dirty)
6740 count_discard_purgeable++;
6741 else
6742 count_discard_inactive++;
39236c6e 6743 discard = discard_all;
b0d623f7
A
6744 }
6745 else
39236c6e 6746 count_anonymous++;
b0d623f7 6747 count_wire--;
39037602 6748 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
6749 if (discard) hibernate_discard_page(m);
6750 m = next;
b0d623f7
A
6751 }
6752
39037602
A
6753 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
6754 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
b0d623f7 6755 {
39037602
A
6756 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
6757
6758 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
39236c6e 6759 discard = FALSE;
b0d623f7 6760 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 6761 && hibernate_consider_discard(m, preflight))
b0d623f7 6762 {
39037602 6763 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
6764 if (m->dirty)
6765 count_discard_purgeable++;
6766 else
8a3053a0 6767 count_discard_cleaned++;
39236c6e 6768 discard = discard_all;
b0d623f7
A
6769 }
6770 else
8a3053a0 6771 count_cleaned++;
b0d623f7 6772 count_wire--;
39037602 6773 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
6774 if (discard) hibernate_discard_page(m);
6775 m = next;
b0d623f7
A
6776 }
6777
39037602
A
6778 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
6779 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
8a3053a0 6780 {
39037602
A
6781 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
6782
6783 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
8a3053a0
A
6784 discard = FALSE;
6785 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
6786 && hibernate_consider_discard(m, preflight))
6787 {
39037602 6788 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8a3053a0
A
6789 if (m->dirty)
6790 count_discard_purgeable++;
6791 else
6792 count_discard_active++;
6793 discard = discard_all;
6794 }
6795 else
6796 count_active++;
6797 count_wire--;
39037602 6798 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
8a3053a0
A
6799 if (discard) hibernate_discard_page(m);
6800 m = next;
6801 }
6802
39037602
A
6803 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
6804 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
316670eb 6805 {
39037602
A
6806 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
6807
6808 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
39236c6e 6809 discard = FALSE;
316670eb 6810 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 6811 && hibernate_consider_discard(m, preflight))
316670eb 6812 {
39037602 6813 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
316670eb
A
6814 if (m->dirty)
6815 count_discard_purgeable++;
6816 else
8a3053a0 6817 count_discard_inactive++;
39236c6e 6818 discard = discard_all;
316670eb
A
6819 }
6820 else
8a3053a0 6821 count_inactive++;
316670eb 6822 count_wire--;
39037602 6823 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
6824 if (discard) hibernate_discard_page(m);
6825 m = next;
316670eb 6826 }
39037602 6827 /* XXX FBDP TODO: secluded queue */
316670eb 6828
b0d623f7
A
6829 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6830 {
39037602
A
6831 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
6832 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
39236c6e 6833 {
39037602
A
6834 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
6835
6836 next = (vm_page_t)VM_PAGE_UNPACK_PTR(m->pageq.next);
39236c6e
A
6837 discard = FALSE;
6838 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6839 && hibernate_consider_discard(m, preflight))
6840 {
39037602 6841 if (!preflight) hibernate_page_bitset(page_list, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
6842 count_discard_speculative++;
6843 discard = discard_all;
6844 }
6845 else
6846 count_speculative++;
6847 count_wire--;
39037602 6848 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
6849 if (discard) hibernate_discard_page(m);
6850 m = next;
6851 }
b0d623f7
A
6852 }
6853
39037602 6854 vm_page_queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
39236c6e 6855 {
39037602
A
6856 assert(m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR);
6857
39236c6e
A
6858 count_compressor++;
6859 count_wire--;
39037602 6860 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, VM_PAGE_GET_PHYS_PAGE(m));
39236c6e
A
6861 }
6862
6863 if (preflight == FALSE && discard_all == TRUE) {
6864 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6865
6866 HIBLOG("hibernate_teardown started\n");
6867 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
6868 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
6869
6870 pages -= count_discard_vm_struct_pages;
6871 count_wire -= count_discard_vm_struct_pages;
6872
6873 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
6874
6875 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
b0d623f7
A
6876 }
6877
db609669
A
6878 if (!preflight) {
6879 // pull wired from hibernate_bitmap
6880 bitmap = &page_list->bank_bitmap[0];
6881 bitmap_wired = &page_list_wired->bank_bitmap[0];
6882 for (bank = 0; bank < page_list->bank_count; bank++)
6883 {
6884 for (i = 0; i < bitmap->bitmapwords; i++)
6885 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
6886 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
6887 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
6888 }
b0d623f7
A
6889 }
6890
6891 // machine dependent adjustments
db609669 6892 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
b0d623f7 6893
db609669
A
6894 if (!preflight) {
6895 hibernate_stats.cd_count_wire = count_wire;
39236c6e
A
6896 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6897 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
db609669 6898 }
0b4c1975 6899
b0d623f7
A
6900 clock_get_uptime(&end);
6901 absolutetime_to_nanoseconds(end - start, &nsec);
6902 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6903
39236c6e
A
6904 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6905 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6906 discard_all ? "did" : "could",
316670eb 6907 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7 6908
8a3053a0
A
6909 if (hibernate_stats.cd_skipped_xpmapped)
6910 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6911
316670eb
A
6912 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6913
39236c6e
A
6914 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6915
fe8ab488 6916#if MACH_ASSERT || DEBUG
39236c6e
A
6917 if (!preflight)
6918 {
316670eb
A
6919 if (vm_page_local_q) {
6920 for (i = 0; i < vm_page_local_q_count; i++) {
6921 struct vpl *lq;
6922 lq = &vm_page_local_q[i].vpl_un.vpl;
6923 VPL_UNLOCK(&lq->vpl_lock);
6924 }
6925 }
6926 vm_page_unlock_queues();
39236c6e 6927 }
fe8ab488 6928#endif /* MACH_ASSERT || DEBUG */
0b4c1975 6929
db609669
A
6930 if (preflight) {
6931 lck_mtx_unlock(&vm_page_queue_free_lock);
6932 vm_page_unlock_queues();
39236c6e 6933 vm_object_unlock(compressor_object);
db609669
A
6934 }
6935
0b4c1975 6936 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
b0d623f7
A
6937}
6938
6939void
6940hibernate_page_list_discard(hibernate_page_list_t * page_list)
6941{
6942 uint64_t start, end, nsec;
6943 vm_page_t m;
6944 vm_page_t next;
6945 uint32_t i;
6946 uint32_t count_discard_active = 0;
6947 uint32_t count_discard_inactive = 0;
6948 uint32_t count_discard_purgeable = 0;
316670eb 6949 uint32_t count_discard_cleaned = 0;
b0d623f7
A
6950 uint32_t count_discard_speculative = 0;
6951
39236c6e 6952
fe8ab488 6953#if MACH_ASSERT || DEBUG
316670eb
A
6954 vm_page_lock_queues();
6955 if (vm_page_local_q) {
6956 for (i = 0; i < vm_page_local_q_count; i++) {
6957 struct vpl *lq;
6958 lq = &vm_page_local_q[i].vpl_un.vpl;
6959 VPL_LOCK(&lq->vpl_lock);
6960 }
6961 }
fe8ab488 6962#endif /* MACH_ASSERT || DEBUG */
316670eb 6963
b0d623f7
A
6964 clock_get_uptime(&start);
6965
39037602
A
6966 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
6967 while (m && !vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t)m))
b0d623f7 6968 {
39037602
A
6969 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
6970
6971 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
6972 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7
A
6973 {
6974 if (m->dirty)
6975 count_discard_purgeable++;
6976 else
6977 count_discard_inactive++;
6978 hibernate_discard_page(m);
6979 }
6980 m = next;
6981 }
6982
6983 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6984 {
39037602
A
6985 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_speculative[i].age_q);
6986 while (m && !vm_page_queue_end(&vm_page_queue_speculative[i].age_q, (vm_page_queue_entry_t)m))
b0d623f7 6987 {
39037602
A
6988 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
6989
6990 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
6991 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7
A
6992 {
6993 count_discard_speculative++;
6994 hibernate_discard_page(m);
6995 }
6996 m = next;
6997 }
6998 }
6999
39037602
A
7000 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
7001 while (m && !vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t)m))
b0d623f7 7002 {
39037602
A
7003 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
7004
7005 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7006 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7
A
7007 {
7008 if (m->dirty)
7009 count_discard_purgeable++;
7010 else
7011 count_discard_inactive++;
7012 hibernate_discard_page(m);
7013 }
7014 m = next;
7015 }
39037602 7016 /* XXX FBDP TODO: secluded queue */
b0d623f7 7017
39037602
A
7018 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
7019 while (m && !vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t)m))
b0d623f7 7020 {
39037602
A
7021 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
7022
7023 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7024 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7
A
7025 {
7026 if (m->dirty)
7027 count_discard_purgeable++;
7028 else
7029 count_discard_active++;
7030 hibernate_discard_page(m);
7031 }
7032 m = next;
7033 }
7034
39037602
A
7035 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
7036 while (m && !vm_page_queue_end(&vm_page_queue_cleaned, (vm_page_queue_entry_t)m))
316670eb 7037 {
39037602
A
7038 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
7039
7040 next = (vm_page_t) VM_PAGE_UNPACK_PTR(m->pageq.next);
7041 if (hibernate_page_bittst(page_list, VM_PAGE_GET_PHYS_PAGE(m)))
316670eb
A
7042 {
7043 if (m->dirty)
7044 count_discard_purgeable++;
7045 else
7046 count_discard_cleaned++;
7047 hibernate_discard_page(m);
7048 }
7049 m = next;
7050 }
7051
fe8ab488 7052#if MACH_ASSERT || DEBUG
316670eb
A
7053 if (vm_page_local_q) {
7054 for (i = 0; i < vm_page_local_q_count; i++) {
7055 struct vpl *lq;
7056 lq = &vm_page_local_q[i].vpl_un.vpl;
7057 VPL_UNLOCK(&lq->vpl_lock);
7058 }
7059 }
7060 vm_page_unlock_queues();
fe8ab488 7061#endif /* MACH_ASSERT || DEBUG */
316670eb 7062
b0d623f7
A
7063 clock_get_uptime(&end);
7064 absolutetime_to_nanoseconds(end - start, &nsec);
316670eb 7065 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
b0d623f7 7066 nsec / 1000000ULL,
316670eb 7067 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7
A
7068}
7069
39236c6e
A
7070boolean_t hibernate_paddr_map_inited = FALSE;
7071boolean_t hibernate_rebuild_needed = FALSE;
7072unsigned int hibernate_teardown_last_valid_compact_indx = -1;
7073vm_page_t hibernate_rebuild_hash_list = NULL;
7074
7075unsigned int hibernate_teardown_found_tabled_pages = 0;
7076unsigned int hibernate_teardown_found_created_pages = 0;
7077unsigned int hibernate_teardown_found_free_pages = 0;
7078unsigned int hibernate_teardown_vm_page_free_count;
7079
7080
7081struct ppnum_mapping {
7082 struct ppnum_mapping *ppnm_next;
7083 ppnum_t ppnm_base_paddr;
7084 unsigned int ppnm_sindx;
7085 unsigned int ppnm_eindx;
7086};
7087
7088struct ppnum_mapping *ppnm_head;
7089struct ppnum_mapping *ppnm_last_found = NULL;
7090
7091
7092void
7093hibernate_create_paddr_map()
7094{
7095 unsigned int i;
7096 ppnum_t next_ppnum_in_run = 0;
7097 struct ppnum_mapping *ppnm = NULL;
7098
7099 if (hibernate_paddr_map_inited == FALSE) {
7100
7101 for (i = 0; i < vm_pages_count; i++) {
7102
7103 if (ppnm)
7104 ppnm->ppnm_eindx = i;
7105
39037602 7106 if (ppnm == NULL || VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) != next_ppnum_in_run) {
39236c6e
A
7107
7108 ppnm = kalloc(sizeof(struct ppnum_mapping));
7109
7110 ppnm->ppnm_next = ppnm_head;
7111 ppnm_head = ppnm;
7112
7113 ppnm->ppnm_sindx = i;
39037602 7114 ppnm->ppnm_base_paddr = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]);
39236c6e 7115 }
39037602 7116 next_ppnum_in_run = VM_PAGE_GET_PHYS_PAGE(&vm_pages[i]) + 1;
39236c6e
A
7117 }
7118 ppnm->ppnm_eindx++;
7119
7120 hibernate_paddr_map_inited = TRUE;
7121 }
7122}
7123
7124ppnum_t
7125hibernate_lookup_paddr(unsigned int indx)
7126{
7127 struct ppnum_mapping *ppnm = NULL;
7128
7129 ppnm = ppnm_last_found;
7130
7131 if (ppnm) {
7132 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
7133 goto done;
7134 }
7135 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
7136
7137 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
7138 ppnm_last_found = ppnm;
7139 break;
7140 }
7141 }
7142 if (ppnm == NULL)
7143 panic("hibernate_lookup_paddr of %d failed\n", indx);
7144done:
7145 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
7146}
7147
7148
7149uint32_t
7150hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7151{
7152 addr64_t saddr_aligned;
7153 addr64_t eaddr_aligned;
7154 addr64_t addr;
7155 ppnum_t paddr;
7156 unsigned int mark_as_unneeded_pages = 0;
7157
7158 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
7159 eaddr_aligned = eaddr & ~PAGE_MASK_64;
7160
7161 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
7162
7163 paddr = pmap_find_phys(kernel_pmap, addr);
7164
7165 assert(paddr);
7166
7167 hibernate_page_bitset(page_list, TRUE, paddr);
7168 hibernate_page_bitset(page_list_wired, TRUE, paddr);
7169
7170 mark_as_unneeded_pages++;
7171 }
7172 return (mark_as_unneeded_pages);
7173}
7174
7175
7176void
7177hibernate_hash_insert_page(vm_page_t mem)
7178{
7179 vm_page_bucket_t *bucket;
7180 int hash_id;
39037602
A
7181 vm_object_t m_object;
7182
7183 m_object = VM_PAGE_OBJECT(mem);
39236c6e 7184
15129b1c 7185 assert(mem->hashed);
39037602 7186 assert(m_object);
39236c6e
A
7187 assert(mem->offset != (vm_object_offset_t) -1);
7188
7189 /*
7190 * Insert it into the object_object/offset hash table
7191 */
39037602 7192 hash_id = vm_page_hash(m_object, mem->offset);
39236c6e
A
7193 bucket = &vm_page_buckets[hash_id];
7194
fe8ab488
A
7195 mem->next_m = bucket->page_list;
7196 bucket->page_list = VM_PAGE_PACK_PTR(mem);
39236c6e
A
7197}
7198
7199
7200void
7201hibernate_free_range(int sindx, int eindx)
7202{
7203 vm_page_t mem;
7204 unsigned int color;
7205
7206 while (sindx < eindx) {
7207 mem = &vm_pages[sindx];
7208
7209 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
7210
7211 mem->lopage = FALSE;
39037602 7212 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
39236c6e 7213
39037602
A
7214 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
7215 vm_page_queue_enter_first(&vm_page_queue_free[color].qhead,
7216 mem,
7217 vm_page_t,
7218 pageq);
39236c6e
A
7219 vm_page_free_count++;
7220
7221 sindx++;
7222 }
7223}
7224
7225
7226extern void hibernate_rebuild_pmap_structs(void);
7227
7228void
7229hibernate_rebuild_vm_structs(void)
7230{
7231 int cindx, sindx, eindx;
7232 vm_page_t mem, tmem, mem_next;
7233 AbsoluteTime startTime, endTime;
7234 uint64_t nsec;
7235
7236 if (hibernate_rebuild_needed == FALSE)
7237 return;
7238
7239 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
7240 HIBLOG("hibernate_rebuild started\n");
7241
7242 clock_get_uptime(&startTime);
7243
7244 hibernate_rebuild_pmap_structs();
7245
7246 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
7247 eindx = vm_pages_count;
7248
7249 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
7250
7251 mem = &vm_pages[cindx];
7252 /*
7253 * hibernate_teardown_vm_structs leaves the location where
7254 * this vm_page_t must be located in "next".
7255 */
39037602 7256 tmem = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
fe8ab488 7257 mem->next_m = VM_PAGE_PACK_PTR(NULL);
39236c6e
A
7258
7259 sindx = (int)(tmem - &vm_pages[0]);
7260
7261 if (mem != tmem) {
7262 /*
7263 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7264 * so move it back to its real location
7265 */
7266 *tmem = *mem;
7267 mem = tmem;
7268 }
15129b1c 7269 if (mem->hashed)
39236c6e
A
7270 hibernate_hash_insert_page(mem);
7271 /*
7272 * the 'hole' between this vm_page_t and the previous
7273 * vm_page_t we moved needs to be initialized as
7274 * a range of free vm_page_t's
7275 */
7276 hibernate_free_range(sindx + 1, eindx);
7277
7278 eindx = sindx;
7279 }
7280 if (sindx)
7281 hibernate_free_range(0, sindx);
7282
7283 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
7284
7285 /*
15129b1c 7286 * process the list of vm_page_t's that were entered in the hash,
39236c6e
A
7287 * but were not located in the vm_pages arrary... these are
7288 * vm_page_t's that were created on the fly (i.e. fictitious)
7289 */
7290 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
39037602 7291 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
39236c6e 7292
39037602 7293 mem->next_m = 0;
39236c6e
A
7294 hibernate_hash_insert_page(mem);
7295 }
7296 hibernate_rebuild_hash_list = NULL;
7297
7298 clock_get_uptime(&endTime);
7299 SUB_ABSOLUTETIME(&endTime, &startTime);
7300 absolutetime_to_nanoseconds(endTime, &nsec);
7301
7302 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
7303
7304 hibernate_rebuild_needed = FALSE;
7305
7306 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
7307}
7308
7309
7310extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
7311
7312uint32_t
7313hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
7314{
7315 unsigned int i;
7316 unsigned int compact_target_indx;
7317 vm_page_t mem, mem_next;
7318 vm_page_bucket_t *bucket;
7319 unsigned int mark_as_unneeded_pages = 0;
7320 unsigned int unneeded_vm_page_bucket_pages = 0;
7321 unsigned int unneeded_vm_pages_pages = 0;
7322 unsigned int unneeded_pmap_pages = 0;
7323 addr64_t start_of_unneeded = 0;
7324 addr64_t end_of_unneeded = 0;
7325
7326
7327 if (hibernate_should_abort())
7328 return (0);
7329
7330 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7331 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
7332 vm_page_cleaned_count, compressor_object->resident_page_count);
7333
7334 for (i = 0; i < vm_page_bucket_count; i++) {
7335
7336 bucket = &vm_page_buckets[i];
7337
39037602 7338 for (mem = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list)); mem != VM_PAGE_NULL; mem = mem_next) {
15129b1c 7339 assert(mem->hashed);
39236c6e 7340
39037602 7341 mem_next = (vm_page_t)(VM_PAGE_UNPACK_PTR(mem->next_m));
39236c6e
A
7342
7343 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
fe8ab488 7344 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
39236c6e
A
7345 hibernate_rebuild_hash_list = mem;
7346 }
7347 }
7348 }
7349 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
7350 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
7351
7352 hibernate_teardown_vm_page_free_count = vm_page_free_count;
7353
7354 compact_target_indx = 0;
7355
7356 for (i = 0; i < vm_pages_count; i++) {
7357
7358 mem = &vm_pages[i];
7359
39037602 7360 if (mem->vm_page_q_state == VM_PAGE_ON_FREE_Q) {
39236c6e
A
7361 unsigned int color;
7362
7363 assert(mem->busy);
7364 assert(!mem->lopage);
7365
39037602
A
7366 color = VM_PAGE_GET_PHYS_PAGE(mem) & vm_color_mask;
7367
7368 vm_page_queue_remove(&vm_page_queue_free[color].qhead,
7369 mem,
7370 vm_page_t,
7371 pageq);
39236c6e 7372
39037602 7373 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
39236c6e
A
7374
7375 vm_page_free_count--;
7376
7377 hibernate_teardown_found_free_pages++;
7378
39037602 7379 if (vm_pages[compact_target_indx].vm_page_q_state != VM_PAGE_ON_FREE_Q)
39236c6e
A
7380 compact_target_indx = i;
7381 } else {
7382 /*
7383 * record this vm_page_t's original location
7384 * we need this even if it doesn't get moved
7385 * as an indicator to the rebuild function that
7386 * we don't have to move it
7387 */
fe8ab488 7388 mem->next_m = VM_PAGE_PACK_PTR(mem);
39236c6e 7389
39037602 7390 if (vm_pages[compact_target_indx].vm_page_q_state == VM_PAGE_ON_FREE_Q) {
39236c6e
A
7391 /*
7392 * we've got a hole to fill, so
7393 * move this vm_page_t to it's new home
7394 */
7395 vm_pages[compact_target_indx] = *mem;
39037602 7396 mem->vm_page_q_state = VM_PAGE_ON_FREE_Q;
39236c6e
A
7397
7398 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
7399 compact_target_indx++;
7400 } else
7401 hibernate_teardown_last_valid_compact_indx = i;
7402 }
7403 }
7404 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
7405 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
7406 mark_as_unneeded_pages += unneeded_vm_pages_pages;
7407
7408 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
7409
7410 if (start_of_unneeded) {
7411 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
7412 mark_as_unneeded_pages += unneeded_pmap_pages;
7413 }
7414 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
7415
7416 hibernate_rebuild_needed = TRUE;
7417
7418 return (mark_as_unneeded_pages);
7419}
7420
7421
d1ecb069
A
7422#endif /* HIBERNATION */
7423
b0d623f7 7424/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
7425
7426#include <mach_vm_debug.h>
7427#if MACH_VM_DEBUG
7428
7429#include <mach_debug/hash_info.h>
7430#include <vm/vm_debug.h>
7431
7432/*
7433 * Routine: vm_page_info
7434 * Purpose:
7435 * Return information about the global VP table.
7436 * Fills the buffer with as much information as possible
7437 * and returns the desired size of the buffer.
7438 * Conditions:
7439 * Nothing locked. The caller should provide
7440 * possibly-pageable memory.
7441 */
7442
7443unsigned int
7444vm_page_info(
7445 hash_info_bucket_t *info,
7446 unsigned int count)
7447{
91447636 7448 unsigned int i;
b0d623f7 7449 lck_spin_t *bucket_lock;
1c79356b
A
7450
7451 if (vm_page_bucket_count < count)
7452 count = vm_page_bucket_count;
7453
7454 for (i = 0; i < count; i++) {
7455 vm_page_bucket_t *bucket = &vm_page_buckets[i];
7456 unsigned int bucket_count = 0;
7457 vm_page_t m;
7458
b0d623f7
A
7459 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7460 lck_spin_lock(bucket_lock);
7461
39037602
A
7462 for (m = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7463 m != VM_PAGE_NULL;
7464 m = (vm_page_t)(VM_PAGE_UNPACK_PTR(m->next_m)))
1c79356b 7465 bucket_count++;
b0d623f7
A
7466
7467 lck_spin_unlock(bucket_lock);
1c79356b
A
7468
7469 /* don't touch pageable memory while holding locks */
7470 info[i].hib_count = bucket_count;
7471 }
7472
7473 return vm_page_bucket_count;
7474}
7475#endif /* MACH_VM_DEBUG */
15129b1c
A
7476
7477#if VM_PAGE_BUCKETS_CHECK
7478void
7479vm_page_buckets_check(void)
7480{
7481 unsigned int i;
7482 vm_page_t p;
7483 unsigned int p_hash;
7484 vm_page_bucket_t *bucket;
7485 lck_spin_t *bucket_lock;
7486
7487 if (!vm_page_buckets_check_ready) {
7488 return;
7489 }
7490
7491#if HIBERNATION
7492 if (hibernate_rebuild_needed ||
7493 hibernate_rebuild_hash_list) {
7494 panic("BUCKET_CHECK: hibernation in progress: "
7495 "rebuild_needed=%d rebuild_hash_list=%p\n",
7496 hibernate_rebuild_needed,
7497 hibernate_rebuild_hash_list);
7498 }
7499#endif /* HIBERNATION */
7500
7501#if VM_PAGE_FAKE_BUCKETS
7502 char *cp;
7503 for (cp = (char *) vm_page_fake_buckets_start;
7504 cp < (char *) vm_page_fake_buckets_end;
7505 cp++) {
7506 if (*cp != 0x5a) {
7507 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7508 "[0x%llx:0x%llx]\n",
7509 cp,
fe8ab488
A
7510 (uint64_t) vm_page_fake_buckets_start,
7511 (uint64_t) vm_page_fake_buckets_end);
15129b1c
A
7512 }
7513 }
7514#endif /* VM_PAGE_FAKE_BUCKETS */
7515
7516 for (i = 0; i < vm_page_bucket_count; i++) {
39037602
A
7517 vm_object_t p_object;
7518
15129b1c 7519 bucket = &vm_page_buckets[i];
fe8ab488 7520 if (!bucket->page_list) {
15129b1c
A
7521 continue;
7522 }
7523
7524 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
7525 lck_spin_lock(bucket_lock);
39037602
A
7526 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(bucket->page_list));
7527
15129b1c 7528 while (p != VM_PAGE_NULL) {
39037602
A
7529 p_object = VM_PAGE_OBJECT(p);
7530
15129b1c
A
7531 if (!p->hashed) {
7532 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7533 "hash %d in bucket %d at %p "
7534 "is not hashed\n",
39037602 7535 p, p_object, p->offset,
15129b1c
A
7536 p_hash, i, bucket);
7537 }
39037602 7538 p_hash = vm_page_hash(p_object, p->offset);
15129b1c
A
7539 if (p_hash != i) {
7540 panic("BUCKET_CHECK: corruption in bucket %d "
7541 "at %p: page %p object %p offset 0x%llx "
7542 "hash %d\n",
39037602 7543 i, bucket, p, p_object, p->offset,
15129b1c
A
7544 p_hash);
7545 }
39037602 7546 p = (vm_page_t)(VM_PAGE_UNPACK_PTR(p->next_m));
15129b1c
A
7547 }
7548 lck_spin_unlock(bucket_lock);
7549 }
7550
7551// printf("BUCKET_CHECK: checked buckets\n");
7552}
7553#endif /* VM_PAGE_BUCKETS_CHECK */
3e170ce0
A
7554
7555/*
7556 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7557 * local queues if they exist... its the only spot in the system where we add pages
7558 * to those queues... once on those queues, those pages can only move to one of the
7559 * global page queues or the free queues... they NEVER move from local q to local q.
7560 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7561 * the global vm_page_queue_lock at this point... we still need to take the local lock
7562 * in case this operation is being run on a different CPU then the local queue's identity,
7563 * but we don't have to worry about the page moving to a global queue or becoming wired
7564 * while we're grabbing the local lock since those operations would require the global
7565 * vm_page_queue_lock to be held, and we already own it.
7566 *
7567 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7568 * 'wired' and local are ALWAYS mutually exclusive conditions.
7569 */
39037602
A
7570
7571#if CONFIG_BACKGROUND_QUEUE
7572void
7573vm_page_queues_remove(vm_page_t mem, boolean_t remove_from_backgroundq)
7574#else
3e170ce0 7575void
39037602
A
7576vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq)
7577#endif
3e170ce0 7578{
39037602
A
7579 boolean_t was_pageable = TRUE;
7580 vm_object_t m_object;
3e170ce0 7581
39037602
A
7582 m_object = VM_PAGE_OBJECT(mem);
7583
7584 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7585
7586 if (mem->vm_page_q_state == VM_PAGE_NOT_ON_Q)
7587 {
7588 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7589#if CONFIG_BACKGROUND_QUEUE
743345f9
A
7590 if (remove_from_backgroundq == TRUE) {
7591 vm_page_remove_from_backgroundq(mem);
39037602 7592 }
743345f9
A
7593 if (mem->vm_page_on_backgroundq) {
7594 assert(mem->vm_page_backgroundq.next != 0);
7595 assert(mem->vm_page_backgroundq.prev != 0);
7596 } else {
7597 assert(mem->vm_page_backgroundq.next == 0);
7598 assert(mem->vm_page_backgroundq.prev == 0);
7599 }
7600#endif /* CONFIG_BACKGROUND_QUEUE */
39037602
A
7601 return;
7602 }
d190cdc3 7603
39037602
A
7604 if (mem->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR)
7605 {
7606 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7607#if CONFIG_BACKGROUND_QUEUE
7608 assert(mem->vm_page_backgroundq.next == 0 &&
7609 mem->vm_page_backgroundq.prev == 0 &&
7610 mem->vm_page_on_backgroundq == FALSE);
7611#endif
7612 return;
7613 }
7614 if (mem->vm_page_q_state == VM_PAGE_IS_WIRED) {
7615 /*
7616 * might put these guys on a list for debugging purposes
7617 * if we do, we'll need to remove this assert
7618 */
7619 assert(mem->pageq.next == 0 && mem->pageq.prev == 0);
7620#if CONFIG_BACKGROUND_QUEUE
7621 assert(mem->vm_page_backgroundq.next == 0 &&
7622 mem->vm_page_backgroundq.prev == 0 &&
7623 mem->vm_page_on_backgroundq == FALSE);
7624#endif
7625 return;
7626 }
7627
7628 assert(m_object != compressor_object);
7629 assert(m_object != kernel_object);
7630 assert(m_object != vm_submap_object);
7631 assert(!mem->fictitious);
7632
7633 switch(mem->vm_page_q_state) {
7634
7635 case VM_PAGE_ON_ACTIVE_LOCAL_Q:
7636 {
3e170ce0 7637 struct vpl *lq;
39037602 7638
3e170ce0
A
7639 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
7640 VPL_LOCK(&lq->vpl_lock);
39037602
A
7641 vm_page_queue_remove(&lq->vpl_queue,
7642 mem, vm_page_t, pageq);
3e170ce0
A
7643 mem->local_id = 0;
7644 lq->vpl_count--;
39037602 7645 if (m_object->internal) {
3e170ce0
A
7646 lq->vpl_internal_count--;
7647 } else {
7648 lq->vpl_external_count--;
7649 }
7650 VPL_UNLOCK(&lq->vpl_lock);
7651 was_pageable = FALSE;
39037602 7652 break;
3e170ce0 7653 }
39037602
A
7654 case VM_PAGE_ON_ACTIVE_Q:
7655 {
7656 vm_page_queue_remove(&vm_page_queue_active,
7657 mem, vm_page_t, pageq);
3e170ce0 7658 vm_page_active_count--;
39037602 7659 break;
3e170ce0
A
7660 }
7661
39037602
A
7662 case VM_PAGE_ON_INACTIVE_INTERNAL_Q:
7663 {
7664 assert(m_object->internal == TRUE);
7665
3e170ce0 7666 vm_page_inactive_count--;
39037602
A
7667 vm_page_queue_remove(&vm_page_queue_anonymous,
7668 mem, vm_page_t, pageq);
7669 vm_page_anonymous_count--;
7670 vm_purgeable_q_advance_all();
7671 break;
7672 }
7673
7674 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q:
7675 {
7676 assert(m_object->internal == FALSE);
7677
7678 vm_page_inactive_count--;
7679 vm_page_queue_remove(&vm_page_queue_inactive,
7680 mem, vm_page_t, pageq);
7681 vm_purgeable_q_advance_all();
7682 break;
7683 }
7684
7685 case VM_PAGE_ON_INACTIVE_CLEANED_Q:
7686 {
7687 assert(m_object->internal == FALSE);
7688
7689 vm_page_inactive_count--;
7690 vm_page_queue_remove(&vm_page_queue_cleaned,
7691 mem, vm_page_t, pageq);
7692 vm_page_cleaned_count--;
7693 break;
7694 }
7695
7696 case VM_PAGE_ON_THROTTLED_Q:
7697 {
7698 assert(m_object->internal == TRUE);
7699
7700 vm_page_queue_remove(&vm_page_queue_throttled,
7701 mem, vm_page_t, pageq);
3e170ce0
A
7702 vm_page_throttled_count--;
7703 was_pageable = FALSE;
39037602 7704 break;
3e170ce0
A
7705 }
7706
39037602
A
7707 case VM_PAGE_ON_SPECULATIVE_Q:
7708 {
7709 assert(m_object->internal == FALSE);
7710
7711 vm_page_remque(&mem->pageq);
3e170ce0 7712 vm_page_speculative_count--;
39037602
A
7713 break;
7714 }
7715
7716#if CONFIG_SECLUDED_MEMORY
7717 case VM_PAGE_ON_SECLUDED_Q:
7718 {
7719 vm_page_queue_remove(&vm_page_queue_secluded,
7720 mem, vm_page_t, pageq);
7721 vm_page_secluded_count--;
7722 if (m_object == VM_OBJECT_NULL) {
7723 vm_page_secluded_count_free--;
7724 was_pageable = FALSE;
7725 } else {
7726 assert(!m_object->internal);
7727 vm_page_secluded_count_inuse--;
7728 was_pageable = FALSE;
7729// was_pageable = TRUE;
7730 }
7731 break;
7732 }
7733#endif /* CONFIG_SECLUDED_MEMORY */
7734
7735 default:
7736 {
7737 /*
7738 * if (mem->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
7739 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
7740 * the caller is responsible for determing if the page is on that queue, and if so, must
7741 * either first remove it (it needs both the page queues lock and the object lock to do
7742 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
7743 *
7744 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
7745 * or any of the undefined states
7746 */
7747 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem, mem->vm_page_q_state);
7748 break;
3e170ce0
A
7749 }
7750
3e170ce0 7751 }
39037602
A
7752 VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
7753 mem->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3e170ce0 7754
39037602
A
7755#if CONFIG_BACKGROUND_QUEUE
7756 if (remove_from_backgroundq == TRUE)
7757 vm_page_remove_from_backgroundq(mem);
7758#endif
3e170ce0 7759 if (was_pageable) {
39037602 7760 if (m_object->internal) {
3e170ce0
A
7761 vm_page_pageable_internal_count--;
7762 } else {
7763 vm_page_pageable_external_count--;
7764 }
7765 }
7766}
7767
7768void
7769vm_page_remove_internal(vm_page_t page)
7770{
39037602 7771 vm_object_t __object = VM_PAGE_OBJECT(page);
3e170ce0
A
7772 if (page == __object->memq_hint) {
7773 vm_page_t __new_hint;
39037602
A
7774 vm_page_queue_entry_t __qe;
7775 __qe = (vm_page_queue_entry_t)vm_page_queue_next(&page->listq);
7776 if (vm_page_queue_end(&__object->memq, __qe)) {
7777 __qe = (vm_page_queue_entry_t)vm_page_queue_prev(&page->listq);
7778 if (vm_page_queue_end(&__object->memq, __qe)) {
3e170ce0
A
7779 __qe = NULL;
7780 }
7781 }
39037602 7782 __new_hint = (vm_page_t)((uintptr_t) __qe);
3e170ce0
A
7783 __object->memq_hint = __new_hint;
7784 }
39037602
A
7785 vm_page_queue_remove(&__object->memq, page, vm_page_t, listq);
7786#if CONFIG_SECLUDED_MEMORY
7787 if (__object->eligible_for_secluded) {
7788 vm_page_secluded.eligible_for_secluded--;
7789 }
7790#endif /* CONFIG_SECLUDED_MEMORY */
3e170ce0
A
7791}
7792
7793void
7794vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
7795{
39037602
A
7796 vm_object_t m_object;
7797
7798 m_object = VM_PAGE_OBJECT(mem);
7799
7800 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3e170ce0
A
7801 assert(!mem->fictitious);
7802 assert(!mem->laundry);
39037602 7803 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3e170ce0 7804 vm_page_check_pageable_safe(mem);
39037602
A
7805
7806#if CONFIG_SECLUDED_MEMORY
7807 if (secluded_for_filecache &&
7808 vm_page_secluded_target != 0 &&
7809 num_tasks_can_use_secluded_mem == 0 &&
7810 m_object->eligible_for_secluded &&
7811 secluded_aging_policy == SECLUDED_AGING_FIFO) {
7812 mem->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
7813 vm_page_queue_enter(&vm_page_queue_secluded, mem,
7814 vm_page_t, pageq);
7815 vm_page_secluded_count++;
7816 vm_page_secluded_count_inuse++;
7817 assert(!m_object->internal);
7818// vm_page_pageable_external_count++;
7819 return;
7820 }
7821#endif /* CONFIG_SECLUDED_MEMORY */
7822
7823 if (m_object->internal) {
7824 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
7825
3e170ce0 7826 if (first == TRUE)
39037602 7827 vm_page_queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
3e170ce0 7828 else
39037602
A
7829 vm_page_queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
7830
3e170ce0
A
7831 vm_page_anonymous_count++;
7832 vm_page_pageable_internal_count++;
7833 } else {
39037602
A
7834 mem->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
7835
3e170ce0 7836 if (first == TRUE)
39037602 7837 vm_page_queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
3e170ce0 7838 else
39037602
A
7839 vm_page_queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
7840
3e170ce0
A
7841 vm_page_pageable_external_count++;
7842 }
3e170ce0
A
7843 vm_page_inactive_count++;
7844 token_new_pagecount++;
39037602
A
7845
7846#if CONFIG_BACKGROUND_QUEUE
7847 if (mem->vm_page_in_background)
7848 vm_page_add_to_backgroundq(mem, FALSE);
7849#endif
7850}
7851
7852void
7853vm_page_enqueue_active(vm_page_t mem, boolean_t first)
7854{
7855 vm_object_t m_object;
7856
7857 m_object = VM_PAGE_OBJECT(mem);
7858
7859 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
7860 assert(!mem->fictitious);
7861 assert(!mem->laundry);
7862 assert(mem->vm_page_q_state == VM_PAGE_NOT_ON_Q);
7863 vm_page_check_pageable_safe(mem);
7864
7865 mem->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
7866 if (first == TRUE)
7867 vm_page_queue_enter_first(&vm_page_queue_active, mem, vm_page_t, pageq);
7868 else
7869 vm_page_queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
7870 vm_page_active_count++;
7871
7872 if (m_object->internal) {
7873 vm_page_pageable_internal_count++;
7874 } else {
7875 vm_page_pageable_external_count++;
7876 }
7877
7878#if CONFIG_BACKGROUND_QUEUE
7879 if (mem->vm_page_in_background)
7880 vm_page_add_to_backgroundq(mem, FALSE);
7881#endif
3e170ce0
A
7882}
7883
7884/*
7885 * Pages from special kernel objects shouldn't
7886 * be placed on pageable queues.
7887 */
7888void
7889vm_page_check_pageable_safe(vm_page_t page)
7890{
39037602
A
7891 vm_object_t page_object;
7892
7893 page_object = VM_PAGE_OBJECT(page);
7894
7895 if (page_object == kernel_object) {
3e170ce0
A
7896 panic("vm_page_check_pageable_safe: trying to add page" \
7897 "from kernel object (%p) to pageable queue", kernel_object);
7898 }
7899
39037602 7900 if (page_object == compressor_object) {
3e170ce0
A
7901 panic("vm_page_check_pageable_safe: trying to add page" \
7902 "from compressor object (%p) to pageable queue", compressor_object);
7903 }
7904
39037602 7905 if (page_object == vm_submap_object) {
3e170ce0
A
7906 panic("vm_page_check_pageable_safe: trying to add page" \
7907 "from submap object (%p) to pageable queue", vm_submap_object);
7908 }
7909}
7910
7911/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
7912 * wired page diagnose
7913 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7914
7915#include <libkern/OSKextLibPrivate.h>
7916
7917vm_allocation_site_t *
7918vm_allocation_sites[VM_KERN_MEMORY_COUNT];
7919
7920vm_tag_t
7921vm_tag_bt(void)
7922{
7923 uintptr_t* frameptr;
7924 uintptr_t* frameptr_next;
7925 uintptr_t retaddr;
7926 uintptr_t kstackb, kstackt;
7927 const vm_allocation_site_t * site;
7928 thread_t cthread;
7929
7930 cthread = current_thread();
7931 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
7932
7933 kstackb = cthread->kernel_stack;
7934 kstackt = kstackb + kernel_stack_size;
7935
7936 /* Load stack frame pointer (EBP on x86) into frameptr */
7937 frameptr = __builtin_frame_address(0);
7938 site = NULL;
7939 while (frameptr != NULL)
7940 {
7941 /* Verify thread stack bounds */
7942 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
7943
7944 /* Next frame pointer is pointed to by the previous one */
7945 frameptr_next = (uintptr_t*) *frameptr;
7946
7947 /* Pull return address from one spot above the frame pointer */
7948 retaddr = *(frameptr + 1);
7949
7950 if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
7951 {
7952 site = OSKextGetAllocationSiteForCaller(retaddr);
7953 break;
7954 }
7955
7956 frameptr = frameptr_next;
7957 }
7958 return (site ? site->tag : VM_KERN_MEMORY_NONE);
7959}
7960
7961static uint64_t free_tag_bits[256/64];
7962
7963void
7964vm_tag_alloc_locked(vm_allocation_site_t * site)
7965{
7966 vm_tag_t tag;
7967 uint64_t avail;
7968 uint64_t idx;
7969
7970 if (site->tag) return;
7971
7972 idx = 0;
7973 while (TRUE)
7974 {
7975 avail = free_tag_bits[idx];
7976 if (avail)
7977 {
7978 tag = __builtin_clzll(avail);
7979 avail &= ~(1ULL << (63 - tag));
7980 free_tag_bits[idx] = avail;
7981 tag += (idx << 6);
7982 break;
7983 }
7984 idx++;
7985 if (idx >= (sizeof(free_tag_bits) / sizeof(free_tag_bits[0])))
7986 {
7987 tag = VM_KERN_MEMORY_ANY;
7988 break;
7989 }
7990 }
7991 site->tag = tag;
7992 if (VM_KERN_MEMORY_ANY != tag)
7993 {
7994 assert(!vm_allocation_sites[tag]);
7995 vm_allocation_sites[tag] = site;
7996 }
7997}
7998
7999static void
8000vm_tag_free_locked(vm_tag_t tag)
8001{
8002 uint64_t avail;
8003 uint32_t idx;
8004 uint64_t bit;
8005
8006 if (VM_KERN_MEMORY_ANY == tag) return;
8007
8008 idx = (tag >> 6);
8009 avail = free_tag_bits[idx];
8010 tag &= 63;
8011 bit = (1ULL << (63 - tag));
8012 assert(!(avail & bit));
8013 free_tag_bits[idx] = (avail | bit);
8014}
8015
8016static void
8017vm_tag_init(void)
8018{
8019 vm_tag_t tag;
8020 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
8021 {
8022 vm_tag_free_locked(tag);
8023 }
8024}
8025
8026vm_tag_t
8027vm_tag_alloc(vm_allocation_site_t * site)
8028{
8029 vm_tag_t tag;
8030
8031 if (VM_TAG_BT & site->flags)
8032 {
8033 tag = vm_tag_bt();
8034 if (VM_KERN_MEMORY_NONE != tag) return (tag);
8035 }
8036
8037 if (!site->tag)
8038 {
8039 lck_spin_lock(&vm_allocation_sites_lock);
8040 vm_tag_alloc_locked(site);
8041 lck_spin_unlock(&vm_allocation_sites_lock);
8042 }
8043
8044 return (site->tag);
8045}
8046
8047static void
8048vm_page_count_object(mach_memory_info_t * sites, unsigned int __unused num_sites, vm_object_t object)
8049{
8050 if (!object->wired_page_count) return;
8051 if (object != kernel_object)
8052 {
8053 assert(object->wire_tag < num_sites);
8054 sites[object->wire_tag].size += ptoa_64(object->wired_page_count);
8055 }
8056}
8057
8058typedef void (*vm_page_iterate_proc)(mach_memory_info_t * sites,
8059 unsigned int num_sites, vm_object_t object);
8060
8061static void
8062vm_page_iterate_purgeable_objects(mach_memory_info_t * sites, unsigned int num_sites,
8063 vm_page_iterate_proc proc, purgeable_q_t queue,
8064 int group)
8065{
8066 vm_object_t object;
8067
8068 for (object = (vm_object_t) queue_first(&queue->objq[group]);
8069 !queue_end(&queue->objq[group], (queue_entry_t) object);
8070 object = (vm_object_t) queue_next(&object->objq))
8071 {
8072 proc(sites, num_sites, object);
8073 }
8074}
8075
8076static void
8077vm_page_iterate_objects(mach_memory_info_t * sites, unsigned int num_sites,
8078 vm_page_iterate_proc proc)
8079{
8080 purgeable_q_t volatile_q;
8081 queue_head_t * nonvolatile_q;
8082 vm_object_t object;
8083 int group;
8084
8085 lck_spin_lock(&vm_objects_wired_lock);
8086 queue_iterate(&vm_objects_wired,
8087 object,
8088 vm_object_t,
8089 objq)
8090 {
8091 proc(sites, num_sites, object);
8092 }
8093 lck_spin_unlock(&vm_objects_wired_lock);
8094
8095 lck_mtx_lock(&vm_purgeable_queue_lock);
8096 nonvolatile_q = &purgeable_nonvolatile_queue;
8097 for (object = (vm_object_t) queue_first(nonvolatile_q);
8098 !queue_end(nonvolatile_q, (queue_entry_t) object);
8099 object = (vm_object_t) queue_next(&object->objq))
8100 {
8101 proc(sites, num_sites, object);
8102 }
8103
8104 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
8105 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, 0);
8106
8107 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
8108 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8109 {
8110 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
8111 }
8112
8113 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
8114 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
8115 {
8116 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
8117 }
8118 lck_mtx_unlock(&vm_purgeable_queue_lock);
8119}
8120
8121static uint64_t
39037602 8122process_account(mach_memory_info_t * sites, unsigned int __unused num_sites, uint64_t zones_collectable_bytes)
3e170ce0
A
8123{
8124 uint64_t found;
8125 unsigned int idx;
8126 vm_allocation_site_t * site;
8127
8128 assert(num_sites >= VM_KERN_MEMORY_COUNT);
8129 found = 0;
8130 for (idx = 0; idx < VM_KERN_MEMORY_COUNT; idx++)
8131 {
8132 found += sites[idx].size;
8133 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
8134 {
8135 sites[idx].site = idx;
8136 sites[idx].flags |= VM_KERN_SITE_TAG;
39037602
A
8137 if (VM_KERN_MEMORY_ZONE == idx)
8138 {
8139 sites[idx].flags |= VM_KERN_SITE_HIDE;
8140 sites[idx].collectable_bytes = zones_collectable_bytes;
8141 } else sites[idx].flags |= VM_KERN_SITE_WIRED;
8142 continue;
3e170ce0
A
8143 }
8144 lck_spin_lock(&vm_allocation_sites_lock);
8145 if ((site = vm_allocation_sites[idx]))
8146 {
8147 if (sites[idx].size)
8148 {
8149 sites[idx].flags |= VM_KERN_SITE_WIRED;
8150 if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
8151 {
39037602 8152 sites[idx].site = OSKextGetKmodIDForSite(site, NULL, 0);
3e170ce0
A
8153 sites[idx].flags |= VM_KERN_SITE_KMOD;
8154 }
8155 else
8156 {
8157 sites[idx].site = VM_KERNEL_UNSLIDE(site);
8158 sites[idx].flags |= VM_KERN_SITE_KERNEL;
8159 }
8160 site = NULL;
8161 }
8162 else
8163 {
490019cf
A
8164#if 1
8165 site = NULL;
8166#else
8167 /* this code would free a site with no allocations but can race a new
8168 * allocation being made */
3e170ce0
A
8169 vm_tag_free_locked(site->tag);
8170 site->tag = VM_KERN_MEMORY_NONE;
8171 vm_allocation_sites[idx] = NULL;
8172 if (!(VM_TAG_UNLOAD & site->flags)) site = NULL;
490019cf 8173#endif
3e170ce0
A
8174 }
8175 }
8176 lck_spin_unlock(&vm_allocation_sites_lock);
8177 if (site) OSKextFreeSite(site);
8178 }
39037602 8179
3e170ce0
A
8180 return (found);
8181}
8182
8183kern_return_t
39037602 8184vm_page_diagnose(mach_memory_info_t * sites, unsigned int num_sites, uint64_t zones_collectable_bytes)
3e170ce0
A
8185{
8186 enum { kMaxKernelDepth = 1 };
8187 vm_map_t maps [kMaxKernelDepth];
8188 vm_map_entry_t entries[kMaxKernelDepth];
8189 vm_map_t map;
8190 vm_map_entry_t entry;
8191 vm_object_offset_t offset;
8192 vm_page_t page;
8193 int stackIdx, count;
8194 uint64_t wired_size;
8195 uint64_t wired_managed_size;
8196 uint64_t wired_reserved_size;
8197 mach_memory_info_t * counts;
8198
8199 bzero(sites, num_sites * sizeof(mach_memory_info_t));
8200
39037602
A
8201 if (!vm_page_wire_count_initial) return (KERN_ABORTED);
8202
3e170ce0
A
8203 vm_page_iterate_objects(sites, num_sites, &vm_page_count_object);
8204
8205 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
8206 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
8207 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
8208
8209 assert(num_sites >= (VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT));
8210 counts = &sites[VM_KERN_MEMORY_COUNT];
8211
8212#define SET_COUNT(xcount, xsize, xflags) \
8213 counts[xcount].site = (xcount); \
8214 counts[xcount].size = (xsize); \
8215 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8216
8217 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
8218 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
8219 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
8220 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
8221 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
8222 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
8223
8224#define SET_MAP(xcount, xsize, xfree, xlargest) \
8225 counts[xcount].site = (xcount); \
8226 counts[xcount].size = (xsize); \
8227 counts[xcount].free = (xfree); \
8228 counts[xcount].largest = (xlargest); \
8229 counts[xcount].flags = VM_KERN_SITE_COUNTER;
8230
8231 vm_map_size_t map_size, map_free, map_largest;
8232
8233 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
8234 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
8235
8236 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
8237 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
8238
8239 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
8240 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
8241
8242 map = kernel_map;
8243 stackIdx = 0;
8244 while (map)
8245 {
8246 vm_map_lock(map);
8247 for (entry = map->hdr.links.next; map; entry = entry->links.next)
8248 {
8249 if (entry->is_sub_map)
8250 {
8251 assert(stackIdx < kMaxKernelDepth);
8252 maps[stackIdx] = map;
8253 entries[stackIdx] = entry;
8254 stackIdx++;
8255 map = VME_SUBMAP(entry);
8256 entry = NULL;
8257 break;
8258 }
8259 if (VME_OBJECT(entry) == kernel_object)
8260 {
8261 count = 0;
8262 vm_object_lock(VME_OBJECT(entry));
8263 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
8264 {
8265 page = vm_page_lookup(VME_OBJECT(entry), offset);
8266 if (page && VM_PAGE_WIRED(page)) count++;
8267 }
8268 vm_object_unlock(VME_OBJECT(entry));
8269
8270 if (count)
8271 {
8272 assert(VME_ALIAS(entry) < num_sites);
8273 sites[VME_ALIAS(entry)].size += ptoa_64(count);
8274 }
8275 }
39037602 8276 while (map && (entry == vm_map_last_entry(map)))
3e170ce0
A
8277 {
8278 vm_map_unlock(map);
8279 if (!stackIdx) map = NULL;
8280 else
8281 {
8282 --stackIdx;
8283 map = maps[stackIdx];
8284 entry = entries[stackIdx];
8285 }
8286 }
8287 }
8288 }
8289
39037602 8290 process_account(sites, num_sites, zones_collectable_bytes);
3e170ce0
A
8291
8292 return (KERN_SUCCESS);
8293}
39037602
A
8294
8295uint32_t
8296vm_tag_get_kext(vm_tag_t tag, char * name, vm_size_t namelen)
8297{
8298 vm_allocation_site_t * site;
8299 uint32_t kmodId;
8300
8301 kmodId = 0;
8302 lck_spin_lock(&vm_allocation_sites_lock);
8303 if ((site = vm_allocation_sites[tag]))
8304 {
8305 if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
8306 {
8307 kmodId = OSKextGetKmodIDForSite(site, name, namelen);
8308 }
8309 }
8310 lck_spin_unlock(&vm_allocation_sites_lock);
8311
8312 return (kmodId);
8313}
8314
8315#if DEBUG || DEVELOPMENT
8316
8317#define vm_tag_set_lock(set) lck_spin_lock(&set->lock)
8318#define vm_tag_set_unlock(set) lck_spin_unlock(&set->lock)
8319
8320void
8321vm_tag_set_init(vm_tag_set_t set, uint32_t count)
8322{
8323 lck_spin_init(&set->lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
8324 bzero(&set->entries, count * sizeof(struct vm_tag_set_entry));
8325}
8326
8327kern_return_t
8328vm_tag_set_enter(vm_tag_set_t set, uint32_t count, vm_tag_t tag)
8329{
8330 kern_return_t kr;
8331 uint32_t idx, free;
8332
8333 vm_tag_set_lock(set);
8334
8335 assert(tag != VM_KERN_MEMORY_NONE);
8336
8337 kr = KERN_NO_SPACE;
8338 free = -1U;
8339 for (idx = 0; idx < count; idx++)
8340 {
8341 if (tag == set->entries[idx].tag)
8342 {
8343 set->entries[idx].count++;
8344 kr = KERN_SUCCESS;
8345 break;
8346 }
8347 if ((free == -1U) && !set->entries[idx].count) free = idx;
8348 }
8349
8350 if ((KERN_SUCCESS != kr) && (free != -1U))
8351 {
8352 set->entries[free].tag = tag;
8353 set->entries[free].count = 1;
8354 kr = KERN_SUCCESS;
8355 }
8356
8357 vm_tag_set_unlock(set);
8358
8359 return (kr);
8360}
8361
8362kern_return_t
8363vm_tag_set_remove(vm_tag_set_t set, uint32_t count, vm_tag_t tag, vm_tag_t * new_tagp)
8364{
8365 kern_return_t kr;
8366 uint32_t idx;
8367 vm_tag_t new_tag;
8368
8369 assert(tag != VM_KERN_MEMORY_NONE);
8370 new_tag = VM_KERN_MEMORY_NONE;
8371 vm_tag_set_lock(set);
8372
8373 kr = KERN_NOT_IN_SET;
8374 for (idx = 0; idx < count; idx++)
8375 {
8376 if ((tag != VM_KERN_MEMORY_NONE)
8377 && (tag == set->entries[idx].tag)
8378 && set->entries[idx].count)
8379 {
8380 set->entries[idx].count--;
8381 kr = KERN_SUCCESS;
8382 if (set->entries[idx].count)
8383 {
8384 new_tag = tag;
8385 break;
8386 }
8387 if (!new_tagp) break;
8388 tag = VM_KERN_MEMORY_NONE;
8389 }
8390
8391 if (set->entries[idx].count && (VM_KERN_MEMORY_NONE == new_tag))
8392 {
8393 new_tag = set->entries[idx].tag;
8394 if (VM_KERN_MEMORY_NONE == tag) break;
8395 }
8396 }
8397
8398 vm_tag_set_unlock(set);
8399 if (new_tagp) *new_tagp = new_tag;
8400
8401 return (kr);
8402}
8403
8404#endif /* DEBUG || DEVELOPMENT */