]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
91447636 67
9bccf70c 68#include <mach/clock_types.h>
1c79356b
A
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
2d21ac55 71#include <mach/sdt.h>
1c79356b
A
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
b0d623f7 76#include <kern/kalloc.h>
1c79356b
A
77#include <kern/zalloc.h>
78#include <kern/xpr.h>
79#include <vm/pmap.h>
80#include <vm/vm_init.h>
81#include <vm/vm_map.h>
82#include <vm/vm_page.h>
83#include <vm/vm_pageout.h>
84#include <vm/vm_kern.h> /* kernel_memory_allocate() */
85#include <kern/misc_protos.h>
86#include <zone_debug.h>
87#include <vm/cpm.h>
6d2010ae 88#include <pexpert/pexpert.h>
55e303ae 89
91447636 90#include <vm/vm_protos.h>
2d21ac55
A
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
93
b0d623f7
A
94#include <IOKit/IOHibernatePrivate.h>
95
96
2d21ac55 97#include <sys/kern_memorystatus.h>
2d21ac55 98
b0d623f7
A
99#include <sys/kdebug.h>
100
101boolean_t vm_page_free_verify = TRUE;
102
6d2010ae
A
103uint32_t vm_lopage_free_count = 0;
104uint32_t vm_lopage_free_limit = 0;
105uint32_t vm_lopage_lowater = 0;
0b4c1975
A
106boolean_t vm_lopage_refill = FALSE;
107boolean_t vm_lopage_needed = FALSE;
108
b0d623f7
A
109lck_mtx_ext_t vm_page_queue_lock_ext;
110lck_mtx_ext_t vm_page_queue_free_lock_ext;
111lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55 112
0b4c1975
A
113int speculative_age_index = 0;
114int speculative_steal_index = 0;
2d21ac55
A
115struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
116
0b4e3aa0 117
b0d623f7
A
118__private_extern__ void vm_page_init_lck_grp(void);
119
6d2010ae
A
120static void vm_page_free_prepare(vm_page_t page);
121static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
122
b0d623f7
A
123
124
125
1c79356b
A
126/*
127 * Associated with page of user-allocatable memory is a
128 * page structure.
129 */
130
131/*
132 * These variables record the values returned by vm_page_bootstrap,
133 * for debugging purposes. The implementation of pmap_steal_memory
134 * and pmap_startup here also uses them internally.
135 */
136
137vm_offset_t virtual_space_start;
138vm_offset_t virtual_space_end;
139int vm_page_pages;
140
141/*
142 * The vm_page_lookup() routine, which provides for fast
143 * (virtual memory object, offset) to page lookup, employs
144 * the following hash table. The vm_page_{insert,remove}
145 * routines install and remove associations in the table.
146 * [This table is often called the virtual-to-physical,
147 * or VP, table.]
148 */
149typedef struct {
150 vm_page_t pages;
151#if MACH_PAGE_HASH_STATS
152 int cur_count; /* current count */
153 int hi_count; /* high water mark */
154#endif /* MACH_PAGE_HASH_STATS */
155} vm_page_bucket_t;
156
b0d623f7
A
157
158#define BUCKETS_PER_LOCK 16
159
1c79356b
A
160vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
161unsigned int vm_page_bucket_count = 0; /* How big is array? */
162unsigned int vm_page_hash_mask; /* Mask for hash function */
163unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 164uint32_t vm_page_bucket_hash; /* Basic bucket hash */
b0d623f7
A
165unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
166
167lck_spin_t *vm_page_bucket_locks;
1c79356b 168
91447636 169
1c79356b
A
170#if MACH_PAGE_HASH_STATS
171/* This routine is only for debug. It is intended to be called by
172 * hand by a developer using a kernel debugger. This routine prints
173 * out vm_page_hash table statistics to the kernel debug console.
174 */
175void
176hash_debug(void)
177{
178 int i;
179 int numbuckets = 0;
180 int highsum = 0;
181 int maxdepth = 0;
182
183 for (i = 0; i < vm_page_bucket_count; i++) {
184 if (vm_page_buckets[i].hi_count) {
185 numbuckets++;
186 highsum += vm_page_buckets[i].hi_count;
187 if (vm_page_buckets[i].hi_count > maxdepth)
188 maxdepth = vm_page_buckets[i].hi_count;
189 }
190 }
191 printf("Total number of buckets: %d\n", vm_page_bucket_count);
192 printf("Number used buckets: %d = %d%%\n",
193 numbuckets, 100*numbuckets/vm_page_bucket_count);
194 printf("Number unused buckets: %d = %d%%\n",
195 vm_page_bucket_count - numbuckets,
196 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
197 printf("Sum of bucket max depth: %d\n", highsum);
198 printf("Average bucket depth: %d.%2d\n",
199 highsum/vm_page_bucket_count,
200 highsum%vm_page_bucket_count);
201 printf("Maximum bucket depth: %d\n", maxdepth);
202}
203#endif /* MACH_PAGE_HASH_STATS */
204
205/*
206 * The virtual page size is currently implemented as a runtime
207 * variable, but is constant once initialized using vm_set_page_size.
208 * This initialization must be done in the machine-dependent
209 * bootstrap sequence, before calling other machine-independent
210 * initializations.
211 *
212 * All references to the virtual page size outside this
213 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
214 * constants.
215 */
55e303ae
A
216vm_size_t page_size = PAGE_SIZE;
217vm_size_t page_mask = PAGE_MASK;
2d21ac55 218int page_shift = PAGE_SHIFT;
1c79356b
A
219
220/*
221 * Resident page structures are initialized from
222 * a template (see vm_page_alloc).
223 *
224 * When adding a new field to the virtual memory
225 * object structure, be sure to add initialization
226 * (see vm_page_bootstrap).
227 */
228struct vm_page vm_page_template;
229
2d21ac55
A
230vm_page_t vm_pages = VM_PAGE_NULL;
231unsigned int vm_pages_count = 0;
0b4c1975 232ppnum_t vm_page_lowest = 0;
2d21ac55 233
1c79356b
A
234/*
235 * Resident pages that represent real memory
2d21ac55
A
236 * are allocated from a set of free lists,
237 * one per color.
1c79356b 238 */
2d21ac55
A
239unsigned int vm_colors;
240unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
241unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
242queue_head_t vm_page_queue_free[MAX_COLORS];
1c79356b 243unsigned int vm_page_free_wanted;
2d21ac55 244unsigned int vm_page_free_wanted_privileged;
91447636
A
245unsigned int vm_page_free_count;
246unsigned int vm_page_fictitious_count;
1c79356b
A
247
248unsigned int vm_page_free_count_minimum; /* debugging */
249
250/*
251 * Occasionally, the virtual memory system uses
252 * resident page structures that do not refer to
253 * real pages, for example to leave a page with
254 * important state information in the VP table.
255 *
256 * These page structures are allocated the way
257 * most other kernel structures are.
258 */
259zone_t vm_page_zone;
b0d623f7
A
260vm_locks_array_t vm_page_locks;
261decl_lck_mtx_data(,vm_page_alloc_lock)
9bccf70c 262unsigned int io_throttle_zero_fill;
1c79356b 263
b0d623f7
A
264unsigned int vm_page_local_q_count = 0;
265unsigned int vm_page_local_q_soft_limit = 250;
266unsigned int vm_page_local_q_hard_limit = 500;
267struct vplq *vm_page_local_q = NULL;
268
1c79356b
A
269/*
270 * Fictitious pages don't have a physical address,
55e303ae 271 * but we must initialize phys_page to something.
1c79356b
A
272 * For debugging, this should be a strange value
273 * that the pmap module can recognize in assertions.
274 */
b0d623f7 275ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 276
2d21ac55
A
277/*
278 * Guard pages are not accessible so they don't
279 * need a physical address, but we need to enter
280 * one in the pmap.
281 * Let's make it recognizable and make sure that
282 * we don't use a real physical page with that
283 * physical address.
284 */
b0d623f7 285ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 286
1c79356b
A
287/*
288 * Resident page structures are also chained on
289 * queues that are used by the page replacement
290 * system (pageout daemon). These queues are
291 * defined here, but are shared by the pageout
9bccf70c
A
292 * module. The inactive queue is broken into
293 * inactive and zf for convenience as the
294 * pageout daemon often assignes a higher
295 * affinity to zf pages
1c79356b
A
296 */
297queue_head_t vm_page_queue_active;
298queue_head_t vm_page_queue_inactive;
2d21ac55 299queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
b0d623f7 300queue_head_t vm_page_queue_throttled;
2d21ac55 301
91447636
A
302unsigned int vm_page_active_count;
303unsigned int vm_page_inactive_count;
2d21ac55
A
304unsigned int vm_page_throttled_count;
305unsigned int vm_page_speculative_count;
91447636 306unsigned int vm_page_wire_count;
0b4c1975 307unsigned int vm_page_wire_count_initial;
91447636
A
308unsigned int vm_page_gobble_count = 0;
309unsigned int vm_page_wire_count_warning = 0;
310unsigned int vm_page_gobble_count_warning = 0;
311
312unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
b0d623f7 313unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
91447636 314uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 315
b0d623f7 316#if DEVELOPMENT || DEBUG
2d21ac55
A
317unsigned int vm_page_speculative_recreated = 0;
318unsigned int vm_page_speculative_created = 0;
319unsigned int vm_page_speculative_used = 0;
b0d623f7 320#endif
2d21ac55 321
0c530ab8 322uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
0b4c1975 323ppnum_t max_valid_low_ppnum = 0xffffffff;
0c530ab8
A
324
325
1c79356b
A
326/*
327 * Several page replacement parameters are also
328 * shared with this module, so that page allocation
329 * (done here in vm_page_alloc) can trigger the
330 * pageout daemon.
331 */
91447636
A
332unsigned int vm_page_free_target = 0;
333unsigned int vm_page_free_min = 0;
b0d623f7
A
334unsigned int vm_page_throttle_limit = 0;
335uint32_t vm_page_creation_throttle = 0;
91447636 336unsigned int vm_page_inactive_target = 0;
2d21ac55 337unsigned int vm_page_inactive_min = 0;
91447636 338unsigned int vm_page_free_reserved = 0;
b0d623f7 339unsigned int vm_page_throttle_count = 0;
1c79356b
A
340
341/*
342 * The VM system has a couple of heuristics for deciding
343 * that pages are "uninteresting" and should be placed
344 * on the inactive queue as likely candidates for replacement.
345 * These variables let the heuristics be controlled at run-time
346 * to make experimentation easier.
347 */
348
349boolean_t vm_page_deactivate_hint = TRUE;
350
b0d623f7
A
351struct vm_page_stats_reusable vm_page_stats_reusable;
352
1c79356b
A
353/*
354 * vm_set_page_size:
355 *
356 * Sets the page size, perhaps based upon the memory
357 * size. Must be called before any use of page-size
358 * dependent functions.
359 *
360 * Sets page_shift and page_mask from page_size.
361 */
362void
363vm_set_page_size(void)
364{
1c79356b
A
365 page_mask = page_size - 1;
366
367 if ((page_mask & page_size) != 0)
368 panic("vm_set_page_size: page size not a power of two");
369
370 for (page_shift = 0; ; page_shift++)
91447636 371 if ((1U << page_shift) == page_size)
1c79356b 372 break;
1c79356b
A
373}
374
2d21ac55
A
375
376/* Called once during statup, once the cache geometry is known.
377 */
378static void
379vm_page_set_colors( void )
380{
381 unsigned int n, override;
382
593a1d5f 383 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
384 n = override;
385 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
386 n = vm_cache_geometry_colors;
387 else n = DEFAULT_COLORS; /* use default if all else fails */
388
389 if ( n == 0 )
390 n = 1;
391 if ( n > MAX_COLORS )
392 n = MAX_COLORS;
393
394 /* the count must be a power of 2 */
b0d623f7 395 if ( ( n & (n - 1)) != 0 )
2d21ac55
A
396 panic("vm_page_set_colors");
397
398 vm_colors = n;
399 vm_color_mask = n - 1;
400}
401
402
b0d623f7
A
403lck_grp_t vm_page_lck_grp_free;
404lck_grp_t vm_page_lck_grp_queue;
405lck_grp_t vm_page_lck_grp_local;
406lck_grp_t vm_page_lck_grp_purge;
407lck_grp_t vm_page_lck_grp_alloc;
408lck_grp_t vm_page_lck_grp_bucket;
409lck_grp_attr_t vm_page_lck_grp_attr;
410lck_attr_t vm_page_lck_attr;
411
412
413__private_extern__ void
414vm_page_init_lck_grp(void)
415{
416 /*
417 * initialze the vm_page lock world
418 */
419 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
420 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
421 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
422 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
423 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
424 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
425 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
426 lck_attr_setdefault(&vm_page_lck_attr);
427}
428
429void
430vm_page_init_local_q()
431{
432 unsigned int num_cpus;
433 unsigned int i;
434 struct vplq *t_local_q;
435
436 num_cpus = ml_get_max_cpus();
437
438 /*
439 * no point in this for a uni-processor system
440 */
441 if (num_cpus >= 2) {
442 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
443
444 for (i = 0; i < num_cpus; i++) {
445 struct vpl *lq;
446
447 lq = &t_local_q[i].vpl_un.vpl;
448 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
449 queue_init(&lq->vpl_queue);
450 lq->vpl_count = 0;
451 }
452 vm_page_local_q_count = num_cpus;
453
454 vm_page_local_q = (struct vplq *)t_local_q;
455 }
456}
457
458
1c79356b
A
459/*
460 * vm_page_bootstrap:
461 *
462 * Initializes the resident memory module.
463 *
464 * Allocates memory for the page cells, and
465 * for the object/offset-to-page hash table headers.
466 * Each page cell is initialized and placed on the free list.
467 * Returns the range of available kernel virtual memory.
468 */
469
470void
471vm_page_bootstrap(
472 vm_offset_t *startp,
473 vm_offset_t *endp)
474{
475 register vm_page_t m;
91447636 476 unsigned int i;
1c79356b
A
477 unsigned int log1;
478 unsigned int log2;
479 unsigned int size;
480
481 /*
482 * Initialize the vm_page template.
483 */
484
485 m = &vm_page_template;
b0d623f7 486 bzero(m, sizeof (*m));
1c79356b 487
91447636
A
488 m->pageq.next = NULL;
489 m->pageq.prev = NULL;
490 m->listq.next = NULL;
491 m->listq.prev = NULL;
b0d623f7 492 m->next = VM_PAGE_NULL;
91447636 493
b0d623f7
A
494 m->object = VM_OBJECT_NULL; /* reset later */
495 m->offset = (vm_object_offset_t) -1; /* reset later */
496
497 m->wire_count = 0;
498 m->local = FALSE;
1c79356b
A
499 m->inactive = FALSE;
500 m->active = FALSE;
b0d623f7
A
501 m->pageout_queue = FALSE;
502 m->speculative = FALSE;
1c79356b
A
503 m->laundry = FALSE;
504 m->free = FALSE;
505 m->reference = FALSE;
b0d623f7
A
506 m->gobbled = FALSE;
507 m->private = FALSE;
508 m->throttled = FALSE;
509 m->__unused_pageq_bits = 0;
510
511 m->phys_page = 0; /* reset later */
1c79356b
A
512
513 m->busy = TRUE;
514 m->wanted = FALSE;
515 m->tabled = FALSE;
516 m->fictitious = FALSE;
b0d623f7
A
517 m->pmapped = FALSE;
518 m->wpmapped = FALSE;
519 m->pageout = FALSE;
1c79356b
A
520 m->absent = FALSE;
521 m->error = FALSE;
522 m->dirty = FALSE;
523 m->cleaning = FALSE;
524 m->precious = FALSE;
525 m->clustered = FALSE;
b0d623f7 526 m->overwriting = FALSE;
1c79356b 527 m->restart = FALSE;
b0d623f7 528 m->unusual = FALSE;
91447636 529 m->encrypted = FALSE;
2d21ac55 530 m->encrypted_cleaning = FALSE;
b0d623f7
A
531 m->list_req_pending = FALSE;
532 m->dump_cleaning = FALSE;
533 m->cs_validated = FALSE;
534 m->cs_tainted = FALSE;
535 m->no_cache = FALSE;
536 m->zero_fill = FALSE;
537 m->reusable = FALSE;
6d2010ae 538 m->slid = FALSE;
b0d623f7 539 m->__unused_object_bits = 0;
1c79356b 540
1c79356b 541
1c79356b
A
542 /*
543 * Initialize the page queues.
544 */
b0d623f7
A
545 vm_page_init_lck_grp();
546
547 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
548 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
549 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
2d21ac55
A
550
551 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
552 int group;
553
554 purgeable_queues[i].token_q_head = 0;
555 purgeable_queues[i].token_q_tail = 0;
556 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
557 queue_init(&purgeable_queues[i].objq[group]);
558
559 purgeable_queues[i].type = i;
560 purgeable_queues[i].new_pages = 0;
561#if MACH_ASSERT
562 purgeable_queues[i].debug_count_tokens = 0;
563 purgeable_queues[i].debug_count_objects = 0;
564#endif
565 };
566
567 for (i = 0; i < MAX_COLORS; i++ )
568 queue_init(&vm_page_queue_free[i]);
6d2010ae 569
2d21ac55 570 queue_init(&vm_lopage_queue_free);
1c79356b
A
571 queue_init(&vm_page_queue_active);
572 queue_init(&vm_page_queue_inactive);
2d21ac55 573 queue_init(&vm_page_queue_throttled);
9bccf70c 574 queue_init(&vm_page_queue_zf);
1c79356b 575
2d21ac55
A
576 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
577 queue_init(&vm_page_queue_speculative[i].age_q);
578
579 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
580 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
581 }
1c79356b 582 vm_page_free_wanted = 0;
2d21ac55
A
583 vm_page_free_wanted_privileged = 0;
584
585 vm_page_set_colors();
586
1c79356b
A
587
588 /*
589 * Steal memory for the map and zone subsystems.
590 */
591
592 vm_map_steal_memory();
593 zone_steal_memory();
594
595 /*
596 * Allocate (and initialize) the virtual-to-physical
597 * table hash buckets.
598 *
599 * The number of buckets should be a power of two to
600 * get a good hash function. The following computation
601 * chooses the first power of two that is greater
602 * than the number of physical pages in the system.
603 */
604
1c79356b
A
605 if (vm_page_bucket_count == 0) {
606 unsigned int npages = pmap_free_pages();
607
608 vm_page_bucket_count = 1;
609 while (vm_page_bucket_count < npages)
610 vm_page_bucket_count <<= 1;
611 }
b0d623f7 612 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
613
614 vm_page_hash_mask = vm_page_bucket_count - 1;
615
616 /*
617 * Calculate object shift value for hashing algorithm:
618 * O = log2(sizeof(struct vm_object))
619 * B = log2(vm_page_bucket_count)
620 * hash shifts the object left by
621 * B/2 - O
622 */
623 size = vm_page_bucket_count;
624 for (log1 = 0; size > 1; log1++)
625 size /= 2;
626 size = sizeof(struct vm_object);
627 for (log2 = 0; size > 1; log2++)
628 size /= 2;
629 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
630
631 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
632 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
633 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
634
635 if (vm_page_hash_mask & vm_page_bucket_count)
636 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
637
638 vm_page_buckets = (vm_page_bucket_t *)
639 pmap_steal_memory(vm_page_bucket_count *
640 sizeof(vm_page_bucket_t));
641
b0d623f7
A
642 vm_page_bucket_locks = (lck_spin_t *)
643 pmap_steal_memory(vm_page_bucket_lock_count *
644 sizeof(lck_spin_t));
645
1c79356b
A
646 for (i = 0; i < vm_page_bucket_count; i++) {
647 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
648
649 bucket->pages = VM_PAGE_NULL;
650#if MACH_PAGE_HASH_STATS
651 bucket->cur_count = 0;
652 bucket->hi_count = 0;
653#endif /* MACH_PAGE_HASH_STATS */
654 }
655
b0d623f7
A
656 for (i = 0; i < vm_page_bucket_lock_count; i++)
657 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
658
1c79356b
A
659 /*
660 * Machine-dependent code allocates the resident page table.
661 * It uses vm_page_init to initialize the page frames.
662 * The code also returns to us the virtual space available
663 * to the kernel. We don't trust the pmap module
664 * to get the alignment right.
665 */
666
667 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
668 virtual_space_start = round_page(virtual_space_start);
669 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
670
671 *startp = virtual_space_start;
672 *endp = virtual_space_end;
673
674 /*
675 * Compute the initial "wire" count.
676 * Up until now, the pages which have been set aside are not under
677 * the VM system's control, so although they aren't explicitly
678 * wired, they nonetheless can't be moved. At this moment,
679 * all VM managed pages are "free", courtesy of pmap_startup.
680 */
b0d623f7 681 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
0b4c1975
A
682 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
683 vm_page_wire_count_initial = vm_page_wire_count;
1c79356b 684 vm_page_free_count_minimum = vm_page_free_count;
91447636 685
2d21ac55
A
686 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
687 vm_page_free_count, vm_page_wire_count);
688
91447636 689 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
690}
691
692#ifndef MACHINE_PAGES
693/*
694 * We implement pmap_steal_memory and pmap_startup with the help
695 * of two simpler functions, pmap_virtual_space and pmap_next_page.
696 */
697
91447636 698void *
1c79356b
A
699pmap_steal_memory(
700 vm_size_t size)
701{
55e303ae
A
702 vm_offset_t addr, vaddr;
703 ppnum_t phys_page;
1c79356b
A
704
705 /*
706 * We round the size to a round multiple.
707 */
708
709 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
710
711 /*
712 * If this is the first call to pmap_steal_memory,
713 * we have to initialize ourself.
714 */
715
716 if (virtual_space_start == virtual_space_end) {
717 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
718
719 /*
720 * The initial values must be aligned properly, and
721 * we don't trust the pmap module to do it right.
722 */
723
91447636
A
724 virtual_space_start = round_page(virtual_space_start);
725 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
726 }
727
728 /*
729 * Allocate virtual memory for this request.
730 */
731
732 addr = virtual_space_start;
733 virtual_space_start += size;
734
6d2010ae 735 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
736
737 /*
738 * Allocate and map physical pages to back new virtual pages.
739 */
740
91447636 741 for (vaddr = round_page(addr);
1c79356b
A
742 vaddr < addr + size;
743 vaddr += PAGE_SIZE) {
b0d623f7 744
0b4c1975 745 if (!pmap_next_page_hi(&phys_page))
1c79356b
A
746 panic("pmap_steal_memory");
747
748 /*
749 * XXX Logically, these mappings should be wired,
750 * but some pmap modules barf if they are.
751 */
b0d623f7
A
752#if defined(__LP64__)
753 pmap_pre_expand(kernel_pmap, vaddr);
754#endif
1c79356b 755
55e303ae 756 pmap_enter(kernel_pmap, vaddr, phys_page,
9bccf70c
A
757 VM_PROT_READ|VM_PROT_WRITE,
758 VM_WIMG_USE_DEFAULT, FALSE);
1c79356b
A
759 /*
760 * Account for newly stolen memory
761 */
762 vm_page_wire_count++;
763
764 }
765
91447636 766 return (void *) addr;
1c79356b
A
767}
768
769void
770pmap_startup(
771 vm_offset_t *startp,
772 vm_offset_t *endp)
773{
55e303ae 774 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
775 ppnum_t phys_page;
776 addr64_t tmpaddr;
1c79356b
A
777
778 /*
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
781 */
782
55e303ae 783 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
b0d623f7 784 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 785 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 786
2d21ac55 787 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
788
789 /*
790 * Initialize the page frames.
791 */
1c79356b 792 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 793 if (!pmap_next_page(&phys_page))
1c79356b 794 break;
0b4c1975
A
795 if (pages_initialized == 0 || phys_page < vm_page_lowest)
796 vm_page_lowest = phys_page;
1c79356b 797
0b4c1975 798 vm_page_init(&vm_pages[i], phys_page, FALSE);
1c79356b
A
799 vm_page_pages++;
800 pages_initialized++;
801 }
2d21ac55 802 vm_pages_count = pages_initialized;
1c79356b 803
0c530ab8
A
804 /*
805 * Check if we want to initialize pages to a known value
806 */
807 fill = 0; /* Assume no fill */
593a1d5f 808 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
2d21ac55 809
0c530ab8
A
810 // -debug code remove
811 if (2 == vm_himemory_mode) {
812 // free low -> high so high is preferred
0b4c1975 813 for (i = 1; i <= pages_initialized; i++) {
2d21ac55
A
814 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
815 vm_page_release(&vm_pages[i - 1]);
0c530ab8
A
816 }
817 }
818 else
819 // debug code remove-
820
1c79356b
A
821 /*
822 * Release pages in reverse order so that physical pages
823 * initially get allocated in ascending addresses. This keeps
824 * the devices (which must address physical memory) happy if
825 * they require several consecutive pages.
826 */
0b4c1975 827 for (i = pages_initialized; i > 0; i--) {
2d21ac55
A
828 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
829 vm_page_release(&vm_pages[i - 1]);
1c79356b
A
830 }
831
55e303ae
A
832#if 0
833 {
834 vm_page_t xx, xxo, xxl;
2d21ac55 835 int i, j, k, l;
55e303ae
A
836
837 j = 0; /* (BRINGUP) */
838 xxl = 0;
839
2d21ac55
A
840 for( i = 0; i < vm_colors; i++ ) {
841 queue_iterate(&vm_page_queue_free[i],
842 xx,
843 vm_page_t,
844 pageq) { /* BRINGUP */
845 j++; /* (BRINGUP) */
846 if(j > vm_page_free_count) { /* (BRINGUP) */
847 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 848 }
2d21ac55
A
849
850 l = vm_page_free_count - j; /* (BRINGUP) */
851 k = 0; /* (BRINGUP) */
852
853 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
854
855 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
856 k++;
857 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
858 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
859 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
860 }
861 }
862
863 xxl = xx;
55e303ae
A
864 }
865 }
866
867 if(j != vm_page_free_count) { /* (BRINGUP) */
868 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
869 }
870 }
871#endif
872
873
1c79356b
A
874 /*
875 * We have to re-align virtual_space_start,
876 * because pmap_steal_memory has been using it.
877 */
878
b0d623f7 879 virtual_space_start = round_page(virtual_space_start);
1c79356b
A
880
881 *startp = virtual_space_start;
882 *endp = virtual_space_end;
883}
884#endif /* MACHINE_PAGES */
885
886/*
887 * Routine: vm_page_module_init
888 * Purpose:
889 * Second initialization pass, to be done after
890 * the basic VM system is ready.
891 */
892void
893vm_page_module_init(void)
894{
895 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
896 0, PAGE_SIZE, "vm pages");
897
898#if ZONE_DEBUG
899 zone_debug_disable(vm_page_zone);
900#endif /* ZONE_DEBUG */
901
6d2010ae 902 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1c79356b
A
903 zone_change(vm_page_zone, Z_EXPAND, FALSE);
904 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
905 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
906
907 /*
908 * Adjust zone statistics to account for the real pages allocated
909 * in vm_page_create(). [Q: is this really what we want?]
910 */
911 vm_page_zone->count += vm_page_pages;
6d2010ae 912 vm_page_zone->sum_count += vm_page_pages;
1c79356b
A
913 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
914
b0d623f7 915 lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
1c79356b
A
916}
917
918/*
919 * Routine: vm_page_create
920 * Purpose:
921 * After the VM system is up, machine-dependent code
922 * may stumble across more physical memory. For example,
923 * memory that it was reserving for a frame buffer.
924 * vm_page_create turns this memory into available pages.
925 */
926
927void
928vm_page_create(
55e303ae
A
929 ppnum_t start,
930 ppnum_t end)
1c79356b 931{
55e303ae
A
932 ppnum_t phys_page;
933 vm_page_t m;
1c79356b 934
55e303ae
A
935 for (phys_page = start;
936 phys_page < end;
937 phys_page++) {
6d2010ae 938 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1c79356b
A
939 == VM_PAGE_NULL)
940 vm_page_more_fictitious();
941
6d2010ae 942 m->fictitious = FALSE;
0b4c1975 943 pmap_clear_noencrypt(phys_page);
6d2010ae 944
1c79356b
A
945 vm_page_pages++;
946 vm_page_release(m);
947 }
948}
949
950/*
951 * vm_page_hash:
952 *
953 * Distributes the object/offset key pair among hash buckets.
954 *
55e303ae 955 * NOTE: The bucket count must be a power of 2
1c79356b
A
956 */
957#define vm_page_hash(object, offset) (\
b0d623f7 958 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
959 & vm_page_hash_mask)
960
2d21ac55 961
1c79356b
A
962/*
963 * vm_page_insert: [ internal use only ]
964 *
965 * Inserts the given mem entry into the object/object-page
966 * table and object list.
967 *
968 * The object must be locked.
969 */
1c79356b
A
970void
971vm_page_insert(
2d21ac55
A
972 vm_page_t mem,
973 vm_object_t object,
974 vm_object_offset_t offset)
975{
b0d623f7 976 vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
2d21ac55
A
977}
978
4a3eedf9 979void
2d21ac55
A
980vm_page_insert_internal(
981 vm_page_t mem,
982 vm_object_t object,
983 vm_object_offset_t offset,
b0d623f7
A
984 boolean_t queues_lock_held,
985 boolean_t insert_in_hash)
1c79356b 986{
b0d623f7
A
987 vm_page_bucket_t *bucket;
988 lck_spin_t *bucket_lock;
989 int hash_id;
1c79356b
A
990
991 XPR(XPR_VM_PAGE,
992 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 993 object, offset, mem, 0,0);
1c79356b
A
994
995 VM_PAGE_CHECK(mem);
996
2d21ac55
A
997 if (object == vm_submap_object) {
998 /* the vm_submap_object is only a placeholder for submaps */
999 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1000 }
1001
1002 vm_object_lock_assert_exclusive(object);
1003#if DEBUG
b0d623f7
A
1004 lck_mtx_assert(&vm_page_queue_lock,
1005 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1006 : LCK_MTX_ASSERT_NOTOWNED);
1007#endif /* DEBUG */
1008
1009 if (insert_in_hash == TRUE) {
1010#if DEBUG
1011 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1012 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1013 "already in (obj=%p,off=0x%llx)",
1014 mem, object, offset, mem->object, mem->offset);
91447636 1015#endif
6d2010ae 1016 assert(!object->internal || offset < object->vo_size);
1c79356b 1017
b0d623f7
A
1018 /* only insert "pageout" pages into "pageout" objects,
1019 * and normal pages into normal objects */
1020 assert(object->pageout == mem->pageout);
91447636 1021
b0d623f7
A
1022 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1023
1024 /*
1025 * Record the object/offset pair in this page
1026 */
1c79356b 1027
b0d623f7
A
1028 mem->object = object;
1029 mem->offset = offset;
1c79356b 1030
b0d623f7
A
1031 /*
1032 * Insert it into the object_object/offset hash table
1033 */
1034 hash_id = vm_page_hash(object, offset);
1035 bucket = &vm_page_buckets[hash_id];
1036 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1037
1038 lck_spin_lock(bucket_lock);
1c79356b 1039
b0d623f7
A
1040 mem->next = bucket->pages;
1041 bucket->pages = mem;
1c79356b 1042#if MACH_PAGE_HASH_STATS
b0d623f7
A
1043 if (++bucket->cur_count > bucket->hi_count)
1044 bucket->hi_count = bucket->cur_count;
1c79356b 1045#endif /* MACH_PAGE_HASH_STATS */
1c79356b 1046
b0d623f7
A
1047 lck_spin_unlock(bucket_lock);
1048 }
6d2010ae
A
1049
1050 { unsigned int cache_attr;
1051
1052 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1053
1054 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1055 pmap_set_cache_attributes(mem->phys_page, cache_attr);
1056 object->set_cache_attr = TRUE;
1057 }
1058 }
1c79356b
A
1059 /*
1060 * Now link into the object's list of backed pages.
1061 */
1062
91447636 1063 VM_PAGE_INSERT(mem, object);
1c79356b
A
1064 mem->tabled = TRUE;
1065
1066 /*
1067 * Show that the object has one more resident page.
1068 */
1069
1070 object->resident_page_count++;
b0d623f7
A
1071 if (VM_PAGE_WIRED(mem)) {
1072 object->wired_page_count++;
1073 }
1074 assert(object->resident_page_count >= object->wired_page_count);
91447636 1075
b0d623f7 1076 assert(!mem->reusable);
2d21ac55 1077
b0d623f7
A
1078 if (object->purgable == VM_PURGABLE_VOLATILE) {
1079 if (VM_PAGE_WIRED(mem)) {
1080 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1081 } else {
1082 OSAddAtomic(1, &vm_page_purgeable_count);
1083 }
593a1d5f
A
1084 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1085 mem->throttled) {
b0d623f7
A
1086 /*
1087 * This page belongs to a purged VM object but hasn't
1088 * been purged (because it was "busy").
1089 * It's in the "throttled" queue and hence not
1090 * visible to vm_pageout_scan(). Move it to a pageable
1091 * queue, so that it can eventually be reclaimed, instead
1092 * of lingering in the "empty" object.
1093 */
593a1d5f 1094 if (queues_lock_held == FALSE)
b0d623f7 1095 vm_page_lockspin_queues();
593a1d5f 1096 vm_page_deactivate(mem);
2d21ac55
A
1097 if (queues_lock_held == FALSE)
1098 vm_page_unlock_queues();
91447636 1099 }
1c79356b
A
1100}
1101
1102/*
1103 * vm_page_replace:
1104 *
1105 * Exactly like vm_page_insert, except that we first
1106 * remove any existing page at the given offset in object.
1107 *
b0d623f7 1108 * The object must be locked.
1c79356b 1109 */
1c79356b
A
1110void
1111vm_page_replace(
1112 register vm_page_t mem,
1113 register vm_object_t object,
1114 register vm_object_offset_t offset)
1115{
0c530ab8
A
1116 vm_page_bucket_t *bucket;
1117 vm_page_t found_m = VM_PAGE_NULL;
b0d623f7
A
1118 lck_spin_t *bucket_lock;
1119 int hash_id;
1c79356b
A
1120
1121 VM_PAGE_CHECK(mem);
2d21ac55 1122 vm_object_lock_assert_exclusive(object);
91447636 1123#if DEBUG
91447636
A
1124 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1125 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1126 "already in (obj=%p,off=0x%llx)",
1127 mem, object, offset, mem->object, mem->offset);
b0d623f7 1128 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
91447636 1129#endif
1c79356b
A
1130 /*
1131 * Record the object/offset pair in this page
1132 */
1133
1134 mem->object = object;
1135 mem->offset = offset;
1136
1137 /*
1138 * Insert it into the object_object/offset hash table,
1139 * replacing any page that might have been there.
1140 */
1141
b0d623f7
A
1142 hash_id = vm_page_hash(object, offset);
1143 bucket = &vm_page_buckets[hash_id];
1144 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1145
1146 lck_spin_lock(bucket_lock);
0c530ab8 1147
1c79356b
A
1148 if (bucket->pages) {
1149 vm_page_t *mp = &bucket->pages;
b0d623f7 1150 vm_page_t m = *mp;
0c530ab8 1151
1c79356b
A
1152 do {
1153 if (m->object == object && m->offset == offset) {
1154 /*
0c530ab8 1155 * Remove old page from hash list
1c79356b
A
1156 */
1157 *mp = m->next;
1c79356b 1158
0c530ab8 1159 found_m = m;
1c79356b
A
1160 break;
1161 }
1162 mp = &m->next;
91447636 1163 } while ((m = *mp));
0c530ab8 1164
1c79356b
A
1165 mem->next = bucket->pages;
1166 } else {
1167 mem->next = VM_PAGE_NULL;
1168 }
0c530ab8
A
1169 /*
1170 * insert new page at head of hash list
1171 */
1c79356b 1172 bucket->pages = mem;
0c530ab8 1173
b0d623f7 1174 lck_spin_unlock(bucket_lock);
1c79356b 1175
0c530ab8
A
1176 if (found_m) {
1177 /*
1178 * there was already a page at the specified
1179 * offset for this object... remove it from
1180 * the object and free it back to the free list
1181 */
b0d623f7 1182 vm_page_free_unlocked(found_m, FALSE);
91447636 1183 }
b0d623f7 1184 vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1c79356b
A
1185}
1186
1187/*
1188 * vm_page_remove: [ internal use only ]
1189 *
1190 * Removes the given mem entry from the object/offset-page
1191 * table and the object page list.
1192 *
b0d623f7 1193 * The object must be locked.
1c79356b
A
1194 */
1195
1196void
1197vm_page_remove(
b0d623f7
A
1198 vm_page_t mem,
1199 boolean_t remove_from_hash)
1c79356b 1200{
b0d623f7
A
1201 vm_page_bucket_t *bucket;
1202 vm_page_t this;
1203 lck_spin_t *bucket_lock;
1204 int hash_id;
1c79356b
A
1205
1206 XPR(XPR_VM_PAGE,
1207 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7
A
1208 mem->object, mem->offset,
1209 mem, 0,0);
1210
2d21ac55 1211 vm_object_lock_assert_exclusive(mem->object);
1c79356b
A
1212 assert(mem->tabled);
1213 assert(!mem->cleaning);
1214 VM_PAGE_CHECK(mem);
1215
b0d623f7
A
1216 if (remove_from_hash == TRUE) {
1217 /*
1218 * Remove from the object_object/offset hash table
1219 */
1220 hash_id = vm_page_hash(mem->object, mem->offset);
1221 bucket = &vm_page_buckets[hash_id];
1222 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 1223
b0d623f7 1224 lck_spin_lock(bucket_lock);
1c79356b 1225
b0d623f7
A
1226 if ((this = bucket->pages) == mem) {
1227 /* optimize for common case */
1c79356b 1228
b0d623f7
A
1229 bucket->pages = mem->next;
1230 } else {
1231 vm_page_t *prev;
1c79356b 1232
b0d623f7
A
1233 for (prev = &this->next;
1234 (this = *prev) != mem;
1235 prev = &this->next)
1236 continue;
1237 *prev = this->next;
1238 }
1c79356b 1239#if MACH_PAGE_HASH_STATS
b0d623f7 1240 bucket->cur_count--;
1c79356b 1241#endif /* MACH_PAGE_HASH_STATS */
1c79356b 1242
b0d623f7
A
1243 lck_spin_unlock(bucket_lock);
1244 }
1c79356b
A
1245 /*
1246 * Now remove from the object's list of backed pages.
1247 */
1248
91447636 1249 VM_PAGE_REMOVE(mem);
1c79356b
A
1250
1251 /*
1252 * And show that the object has one fewer resident
1253 * page.
1254 */
1255
b0d623f7 1256 assert(mem->object->resident_page_count > 0);
1c79356b 1257 mem->object->resident_page_count--;
6d2010ae
A
1258
1259 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1260 if (mem->object->resident_page_count == 0)
1261 vm_object_cache_remove(mem->object);
1262 }
1263
b0d623f7
A
1264 if (VM_PAGE_WIRED(mem)) {
1265 assert(mem->object->wired_page_count > 0);
1266 mem->object->wired_page_count--;
1267 }
1268 assert(mem->object->resident_page_count >=
1269 mem->object->wired_page_count);
1270 if (mem->reusable) {
1271 assert(mem->object->reusable_page_count > 0);
1272 mem->object->reusable_page_count--;
1273 assert(mem->object->reusable_page_count <=
1274 mem->object->resident_page_count);
1275 mem->reusable = FALSE;
1276 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1277 vm_page_stats_reusable.reused_remove++;
1278 } else if (mem->object->all_reusable) {
1279 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1280 vm_page_stats_reusable.reused_remove++;
1281 }
1c79356b 1282
593a1d5f 1283 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
1284 if (VM_PAGE_WIRED(mem)) {
1285 assert(vm_page_purgeable_wired_count > 0);
1286 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1287 } else {
1288 assert(vm_page_purgeable_count > 0);
1289 OSAddAtomic(-1, &vm_page_purgeable_count);
1290 }
91447636 1291 }
6d2010ae
A
1292 if (mem->object->set_cache_attr == TRUE)
1293 pmap_set_cache_attributes(mem->phys_page, 0);
1294
1c79356b
A
1295 mem->tabled = FALSE;
1296 mem->object = VM_OBJECT_NULL;
91447636 1297 mem->offset = (vm_object_offset_t) -1;
1c79356b
A
1298}
1299
b0d623f7 1300
1c79356b
A
1301/*
1302 * vm_page_lookup:
1303 *
1304 * Returns the page associated with the object/offset
1305 * pair specified; if none is found, VM_PAGE_NULL is returned.
1306 *
1307 * The object must be locked. No side effects.
1308 */
1309
91447636
A
1310unsigned long vm_page_lookup_hint = 0;
1311unsigned long vm_page_lookup_hint_next = 0;
1312unsigned long vm_page_lookup_hint_prev = 0;
1313unsigned long vm_page_lookup_hint_miss = 0;
2d21ac55
A
1314unsigned long vm_page_lookup_bucket_NULL = 0;
1315unsigned long vm_page_lookup_miss = 0;
1316
91447636 1317
1c79356b
A
1318vm_page_t
1319vm_page_lookup(
b0d623f7
A
1320 vm_object_t object,
1321 vm_object_offset_t offset)
1c79356b 1322{
b0d623f7
A
1323 vm_page_t mem;
1324 vm_page_bucket_t *bucket;
1325 queue_entry_t qe;
1326 lck_spin_t *bucket_lock;
1327 int hash_id;
91447636 1328
2d21ac55 1329 vm_object_lock_assert_held(object);
91447636 1330 mem = object->memq_hint;
2d21ac55 1331
91447636
A
1332 if (mem != VM_PAGE_NULL) {
1333 assert(mem->object == object);
2d21ac55 1334
91447636
A
1335 if (mem->offset == offset) {
1336 vm_page_lookup_hint++;
1337 return mem;
1338 }
1339 qe = queue_next(&mem->listq);
2d21ac55 1340
91447636
A
1341 if (! queue_end(&object->memq, qe)) {
1342 vm_page_t next_page;
1343
1344 next_page = (vm_page_t) qe;
1345 assert(next_page->object == object);
2d21ac55 1346
91447636
A
1347 if (next_page->offset == offset) {
1348 vm_page_lookup_hint_next++;
1349 object->memq_hint = next_page; /* new hint */
1350 return next_page;
1351 }
1352 }
1353 qe = queue_prev(&mem->listq);
2d21ac55 1354
91447636
A
1355 if (! queue_end(&object->memq, qe)) {
1356 vm_page_t prev_page;
1357
1358 prev_page = (vm_page_t) qe;
1359 assert(prev_page->object == object);
2d21ac55 1360
91447636
A
1361 if (prev_page->offset == offset) {
1362 vm_page_lookup_hint_prev++;
1363 object->memq_hint = prev_page; /* new hint */
1364 return prev_page;
1365 }
1366 }
1367 }
1c79356b 1368 /*
2d21ac55 1369 * Search the hash table for this object/offset pair
1c79356b 1370 */
b0d623f7
A
1371 hash_id = vm_page_hash(object, offset);
1372 bucket = &vm_page_buckets[hash_id];
1c79356b 1373
2d21ac55
A
1374 /*
1375 * since we hold the object lock, we are guaranteed that no
1376 * new pages can be inserted into this object... this in turn
1377 * guarantess that the page we're looking for can't exist
1378 * if the bucket it hashes to is currently NULL even when looked
1379 * at outside the scope of the hash bucket lock... this is a
1380 * really cheap optimiztion to avoid taking the lock
1381 */
1382 if (bucket->pages == VM_PAGE_NULL) {
1383 vm_page_lookup_bucket_NULL++;
1384
1385 return (VM_PAGE_NULL);
1386 }
b0d623f7
A
1387 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1388
1389 lck_spin_lock(bucket_lock);
0c530ab8 1390
1c79356b
A
1391 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1392 VM_PAGE_CHECK(mem);
1393 if ((mem->object == object) && (mem->offset == offset))
1394 break;
1395 }
b0d623f7 1396 lck_spin_unlock(bucket_lock);
55e303ae 1397
91447636
A
1398 if (mem != VM_PAGE_NULL) {
1399 if (object->memq_hint != VM_PAGE_NULL) {
1400 vm_page_lookup_hint_miss++;
1401 }
1402 assert(mem->object == object);
1403 object->memq_hint = mem;
2d21ac55
A
1404 } else
1405 vm_page_lookup_miss++;
91447636
A
1406
1407 return(mem);
1408}
1409
1410
1c79356b
A
1411/*
1412 * vm_page_rename:
1413 *
1414 * Move the given memory entry from its
1415 * current object to the specified target object/offset.
1416 *
1417 * The object must be locked.
1418 */
1419void
1420vm_page_rename(
1421 register vm_page_t mem,
1422 register vm_object_t new_object,
2d21ac55
A
1423 vm_object_offset_t new_offset,
1424 boolean_t encrypted_ok)
1c79356b
A
1425{
1426 assert(mem->object != new_object);
2d21ac55 1427
91447636
A
1428 /*
1429 * ENCRYPTED SWAP:
1430 * The encryption key is based on the page's memory object
1431 * (aka "pager") and paging offset. Moving the page to
1432 * another VM object changes its "pager" and "paging_offset"
2d21ac55
A
1433 * so it has to be decrypted first, or we would lose the key.
1434 *
1435 * One exception is VM object collapsing, where we transfer pages
1436 * from one backing object to its parent object. This operation also
1437 * transfers the paging information, so the <pager,paging_offset> info
1438 * should remain consistent. The caller (vm_object_do_collapse())
1439 * sets "encrypted_ok" in this case.
91447636 1440 */
2d21ac55 1441 if (!encrypted_ok && mem->encrypted) {
91447636
A
1442 panic("vm_page_rename: page %p is encrypted\n", mem);
1443 }
2d21ac55 1444
b0d623f7
A
1445 XPR(XPR_VM_PAGE,
1446 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1447 new_object, new_offset,
1448 mem, 0,0);
1449
1c79356b
A
1450 /*
1451 * Changes to mem->object require the page lock because
1452 * the pageout daemon uses that lock to get the object.
1453 */
b0d623f7 1454 vm_page_lockspin_queues();
1c79356b 1455
b0d623f7
A
1456 vm_page_remove(mem, TRUE);
1457 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1c79356b 1458
1c79356b
A
1459 vm_page_unlock_queues();
1460}
1461
1462/*
1463 * vm_page_init:
1464 *
1465 * Initialize the fields in a new page.
1466 * This takes a structure with random values and initializes it
1467 * so that it can be given to vm_page_release or vm_page_insert.
1468 */
1469void
1470vm_page_init(
1471 vm_page_t mem,
0b4c1975
A
1472 ppnum_t phys_page,
1473 boolean_t lopage)
1c79356b 1474{
91447636 1475 assert(phys_page);
1c79356b 1476 *mem = vm_page_template;
55e303ae 1477 mem->phys_page = phys_page;
6d2010ae
A
1478#if 0
1479 /*
1480 * we're leaving this turned off for now... currently pages
1481 * come off the free list and are either immediately dirtied/referenced
1482 * due to zero-fill or COW faults, or are used to read or write files...
1483 * in the file I/O case, the UPL mechanism takes care of clearing
1484 * the state of the HW ref/mod bits in a somewhat fragile way.
1485 * Since we may change the way this works in the future (to toughen it up),
1486 * I'm leaving this as a reminder of where these bits could get cleared
1487 */
1488
1489 /*
1490 * make sure both the h/w referenced and modified bits are
1491 * clear at this point... we are especially dependent on
1492 * not finding a 'stale' h/w modified in a number of spots
1493 * once this page goes back into use
1494 */
1495 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1496#endif
0b4c1975 1497 mem->lopage = lopage;
1c79356b
A
1498}
1499
1500/*
1501 * vm_page_grab_fictitious:
1502 *
1503 * Remove a fictitious page from the free list.
1504 * Returns VM_PAGE_NULL if there are no free pages.
1505 */
1506int c_vm_page_grab_fictitious = 0;
6d2010ae 1507int c_vm_page_grab_fictitious_failed = 0;
1c79356b
A
1508int c_vm_page_release_fictitious = 0;
1509int c_vm_page_more_fictitious = 0;
1510
1511vm_page_t
2d21ac55 1512vm_page_grab_fictitious_common(
b0d623f7 1513 ppnum_t phys_addr)
1c79356b 1514{
6d2010ae
A
1515 vm_page_t m;
1516
1517 if ((m = (vm_page_t)zget(vm_page_zone))) {
1c79356b 1518
0b4c1975 1519 vm_page_init(m, phys_addr, FALSE);
1c79356b 1520 m->fictitious = TRUE;
1c79356b 1521
6d2010ae
A
1522 c_vm_page_grab_fictitious++;
1523 } else
1524 c_vm_page_grab_fictitious_failed++;
1525
1c79356b
A
1526 return m;
1527}
1528
2d21ac55
A
1529vm_page_t
1530vm_page_grab_fictitious(void)
1531{
1532 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1533}
1534
1535vm_page_t
1536vm_page_grab_guard(void)
1537{
1538 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1539}
1540
6d2010ae 1541
1c79356b
A
1542/*
1543 * vm_page_release_fictitious:
1544 *
6d2010ae 1545 * Release a fictitious page to the zone pool
1c79356b 1546 */
1c79356b
A
1547void
1548vm_page_release_fictitious(
6d2010ae 1549 vm_page_t m)
1c79356b
A
1550{
1551 assert(!m->free);
1c79356b 1552 assert(m->fictitious);
2d21ac55
A
1553 assert(m->phys_page == vm_page_fictitious_addr ||
1554 m->phys_page == vm_page_guard_addr);
1c79356b
A
1555
1556 c_vm_page_release_fictitious++;
6d2010ae 1557
91447636 1558 zfree(vm_page_zone, m);
1c79356b
A
1559}
1560
1561/*
1562 * vm_page_more_fictitious:
1563 *
6d2010ae 1564 * Add more fictitious pages to the zone.
1c79356b
A
1565 * Allowed to block. This routine is way intimate
1566 * with the zones code, for several reasons:
1567 * 1. we need to carve some page structures out of physical
1568 * memory before zones work, so they _cannot_ come from
1569 * the zone_map.
1570 * 2. the zone needs to be collectable in order to prevent
1571 * growth without bound. These structures are used by
1572 * the device pager (by the hundreds and thousands), as
1573 * private pages for pageout, and as blocking pages for
1574 * pagein. Temporary bursts in demand should not result in
1575 * permanent allocation of a resource.
1576 * 3. To smooth allocation humps, we allocate single pages
1577 * with kernel_memory_allocate(), and cram them into the
6d2010ae 1578 * zone.
1c79356b
A
1579 */
1580
1581void vm_page_more_fictitious(void)
1582{
6d2010ae
A
1583 vm_offset_t addr;
1584 kern_return_t retval;
1c79356b
A
1585
1586 c_vm_page_more_fictitious++;
1587
1c79356b
A
1588 /*
1589 * Allocate a single page from the zone_map. Do not wait if no physical
1590 * pages are immediately available, and do not zero the space. We need
1591 * our own blocking lock here to prevent having multiple,
1592 * simultaneous requests from piling up on the zone_map lock. Exactly
1593 * one (of our) threads should be potentially waiting on the map lock.
1594 * If winner is not vm-privileged, then the page allocation will fail,
1595 * and it will temporarily block here in the vm_page_wait().
1596 */
b0d623f7 1597 lck_mtx_lock(&vm_page_alloc_lock);
1c79356b
A
1598 /*
1599 * If another thread allocated space, just bail out now.
1600 */
1601 if (zone_free_count(vm_page_zone) > 5) {
1602 /*
1603 * The number "5" is a small number that is larger than the
1604 * number of fictitious pages that any single caller will
1605 * attempt to allocate. Otherwise, a thread will attempt to
1606 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1607 * release all of the resources and locks already acquired,
1608 * and then call this routine. This routine finds the pages
1609 * that the caller released, so fails to allocate new space.
1610 * The process repeats infinitely. The largest known number
1611 * of fictitious pages required in this manner is 2. 5 is
1612 * simply a somewhat larger number.
1613 */
b0d623f7 1614 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1615 return;
1616 }
1617
91447636
A
1618 retval = kernel_memory_allocate(zone_map,
1619 &addr, PAGE_SIZE, VM_PROT_ALL,
1620 KMA_KOBJECT|KMA_NOPAGEWAIT);
1621 if (retval != KERN_SUCCESS) {
1c79356b 1622 /*
6d2010ae 1623 * No page was available. Drop the
1c79356b
A
1624 * lock to give another thread a chance at it, and
1625 * wait for the pageout daemon to make progress.
1626 */
b0d623f7 1627 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1628 vm_page_wait(THREAD_UNINT);
1629 return;
1630 }
91447636 1631 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
6d2010ae 1632
b0d623f7 1633 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1634}
1635
1c79356b
A
1636
1637/*
1638 * vm_pool_low():
1639 *
1640 * Return true if it is not likely that a non-vm_privileged thread
1641 * can get memory without blocking. Advisory only, since the
1642 * situation may change under us.
1643 */
1644int
1645vm_pool_low(void)
1646{
1647 /* No locking, at worst we will fib. */
b0d623f7 1648 return( vm_page_free_count <= vm_page_free_reserved );
1c79356b
A
1649}
1650
0c530ab8
A
1651
1652
1653/*
1654 * this is an interface to support bring-up of drivers
1655 * on platforms with physical memory > 4G...
1656 */
1657int vm_himemory_mode = 0;
1658
1659
1660/*
1661 * this interface exists to support hardware controllers
1662 * incapable of generating DMAs with more than 32 bits
1663 * of address on platforms with physical memory > 4G...
1664 */
0b4c1975
A
1665unsigned int vm_lopages_allocated_q = 0;
1666unsigned int vm_lopages_allocated_cpm_success = 0;
1667unsigned int vm_lopages_allocated_cpm_failed = 0;
2d21ac55 1668queue_head_t vm_lopage_queue_free;
0c530ab8
A
1669
1670vm_page_t
1671vm_page_grablo(void)
1672{
0b4c1975 1673 vm_page_t mem;
0c530ab8 1674
0b4c1975 1675 if (vm_lopage_needed == FALSE)
0c530ab8
A
1676 return (vm_page_grab());
1677
b0d623f7 1678 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 1679
0b4c1975
A
1680 if ( !queue_empty(&vm_lopage_queue_free)) {
1681 queue_remove_first(&vm_lopage_queue_free,
1682 mem,
1683 vm_page_t,
1684 pageq);
1685 assert(vm_lopage_free_count);
0c530ab8 1686
0b4c1975
A
1687 vm_lopage_free_count--;
1688 vm_lopages_allocated_q++;
1689
1690 if (vm_lopage_free_count < vm_lopage_lowater)
1691 vm_lopage_refill = TRUE;
0c530ab8 1692
0b4c1975 1693 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 1694 } else {
0b4c1975
A
1695 lck_mtx_unlock(&vm_page_queue_free_lock);
1696
1697 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1698
1699 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1700 vm_lopages_allocated_cpm_failed++;
1701 lck_mtx_unlock(&vm_page_queue_free_lock);
1702
1703 return (VM_PAGE_NULL);
1704 }
1705 mem->busy = TRUE;
1706
1707 vm_page_lockspin_queues();
1708
1709 mem->gobbled = FALSE;
1710 vm_page_gobble_count--;
1711 vm_page_wire_count--;
1712
1713 vm_lopages_allocated_cpm_success++;
1714 vm_page_unlock_queues();
0c530ab8 1715 }
0b4c1975
A
1716 assert(mem->busy);
1717 assert(!mem->free);
1718 assert(!mem->pmapped);
1719 assert(!mem->wpmapped);
1720
1721 mem->pageq.next = NULL;
1722 mem->pageq.prev = NULL;
0c530ab8
A
1723
1724 return (mem);
1725}
1726
6d2010ae 1727
1c79356b
A
1728/*
1729 * vm_page_grab:
1730 *
2d21ac55
A
1731 * first try to grab a page from the per-cpu free list...
1732 * this must be done while pre-emption is disabled... if
1733 * a page is available, we're done...
1734 * if no page is available, grab the vm_page_queue_free_lock
1735 * and see if current number of free pages would allow us
1736 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1737 * if there are pages available, disable preemption and
1738 * recheck the state of the per-cpu free list... we could
1739 * have been preempted and moved to a different cpu, or
1740 * some other thread could have re-filled it... if still
1741 * empty, figure out how many pages we can steal from the
1742 * global free queue and move to the per-cpu queue...
1743 * return 1 of these pages when done... only wakeup the
1744 * pageout_scan thread if we moved pages from the global
1745 * list... no need for the wakeup if we've satisfied the
1746 * request from the per-cpu queue.
1c79356b
A
1747 */
1748
2d21ac55
A
1749#define COLOR_GROUPS_TO_STEAL 4
1750
1c79356b
A
1751
1752vm_page_t
2d21ac55 1753vm_page_grab( void )
1c79356b 1754{
2d21ac55
A
1755 vm_page_t mem;
1756
1757
1758 disable_preemption();
1759
1760 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1761return_page_from_cpu_list:
1762 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1763 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1764 mem->pageq.next = NULL;
1765
1766 enable_preemption();
1767
1768 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1769 assert(mem->tabled == FALSE);
1770 assert(mem->object == VM_OBJECT_NULL);
1771 assert(!mem->laundry);
1772 assert(!mem->free);
1773 assert(pmap_verify_free(mem->phys_page));
1774 assert(mem->busy);
1775 assert(!mem->encrypted);
1776 assert(!mem->pmapped);
4a3eedf9 1777 assert(!mem->wpmapped);
6d2010ae
A
1778 assert(!mem->active);
1779 assert(!mem->inactive);
1780 assert(!mem->throttled);
1781 assert(!mem->speculative);
2d21ac55
A
1782
1783 return mem;
1784 }
1785 enable_preemption();
1786
1c79356b 1787
1c79356b
A
1788 /*
1789 * Optionally produce warnings if the wire or gobble
1790 * counts exceed some threshold.
1791 */
1792 if (vm_page_wire_count_warning > 0
1793 && vm_page_wire_count >= vm_page_wire_count_warning) {
1794 printf("mk: vm_page_grab(): high wired page count of %d\n",
1795 vm_page_wire_count);
1796 assert(vm_page_wire_count < vm_page_wire_count_warning);
1797 }
1798 if (vm_page_gobble_count_warning > 0
1799 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1800 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1801 vm_page_gobble_count);
1802 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1803 }
1804
b0d623f7
A
1805 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1806
1c79356b
A
1807 /*
1808 * Only let privileged threads (involved in pageout)
1809 * dip into the reserved pool.
1810 */
1c79356b 1811 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 1812 !(current_thread()->options & TH_OPT_VMPRIV)) {
b0d623f7 1813 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 1814 mem = VM_PAGE_NULL;
1c79356b 1815 }
2d21ac55
A
1816 else {
1817 vm_page_t head;
1818 vm_page_t tail;
1819 unsigned int pages_to_steal;
1820 unsigned int color;
1c79356b 1821
2d21ac55 1822 while ( vm_page_free_count == 0 ) {
1c79356b 1823
b0d623f7 1824 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1825 /*
1826 * must be a privileged thread to be
1827 * in this state since a non-privileged
1828 * thread would have bailed if we were
1829 * under the vm_page_free_reserved mark
1830 */
1831 VM_PAGE_WAIT();
b0d623f7 1832 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
1833 }
1834
1835 disable_preemption();
1836
1837 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
b0d623f7 1838 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1839
1840 /*
1841 * we got preempted and moved to another processor
1842 * or we got preempted and someone else ran and filled the cache
1843 */
1844 goto return_page_from_cpu_list;
1845 }
1846 if (vm_page_free_count <= vm_page_free_reserved)
1847 pages_to_steal = 1;
1848 else {
1849 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1850
1851 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1852 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1853 }
1854 color = PROCESSOR_DATA(current_processor(), start_color);
1855 head = tail = NULL;
1856
1857 while (pages_to_steal--) {
1858 if (--vm_page_free_count < vm_page_free_count_minimum)
1859 vm_page_free_count_minimum = vm_page_free_count;
1860
1861 while (queue_empty(&vm_page_queue_free[color]))
1862 color = (color + 1) & vm_color_mask;
1863
1864 queue_remove_first(&vm_page_queue_free[color],
1865 mem,
1866 vm_page_t,
1867 pageq);
1868 mem->pageq.next = NULL;
1869 mem->pageq.prev = NULL;
1870
6d2010ae
A
1871 assert(!mem->active);
1872 assert(!mem->inactive);
1873 assert(!mem->throttled);
1874 assert(!mem->speculative);
1875
2d21ac55
A
1876 color = (color + 1) & vm_color_mask;
1877
1878 if (head == NULL)
1879 head = mem;
1880 else
1881 tail->pageq.next = (queue_t)mem;
1882 tail = mem;
1883
1884 mem->pageq.prev = NULL;
1885 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1886 assert(mem->tabled == FALSE);
1887 assert(mem->object == VM_OBJECT_NULL);
1888 assert(!mem->laundry);
1889 assert(mem->free);
1890 mem->free = FALSE;
1891
1892 assert(pmap_verify_free(mem->phys_page));
1893 assert(mem->busy);
1894 assert(!mem->free);
1895 assert(!mem->encrypted);
1896 assert(!mem->pmapped);
4a3eedf9 1897 assert(!mem->wpmapped);
2d21ac55
A
1898 }
1899 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1900 PROCESSOR_DATA(current_processor(), start_color) = color;
1901
1902 /*
1903 * satisfy this request
1904 */
1905 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1906 mem = head;
1907 mem->pageq.next = NULL;
91447636 1908
b0d623f7 1909 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1910
1911 enable_preemption();
1912 }
1c79356b
A
1913 /*
1914 * Decide if we should poke the pageout daemon.
1915 * We do this if the free count is less than the low
1916 * water mark, or if the free count is less than the high
1917 * water mark (but above the low water mark) and the inactive
1918 * count is less than its target.
1919 *
1920 * We don't have the counts locked ... if they change a little,
1921 * it doesn't really matter.
1922 */
1c79356b
A
1923 if ((vm_page_free_count < vm_page_free_min) ||
1924 ((vm_page_free_count < vm_page_free_target) &&
2d21ac55
A
1925 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1926 thread_wakeup((event_t) &vm_page_free_wanted);
1927
6d2010ae
A
1928 VM_CHECK_MEMORYSTATUS;
1929
55e303ae 1930// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1c79356b
A
1931
1932 return mem;
1933}
1934
1935/*
1936 * vm_page_release:
1937 *
1938 * Return a page to the free list.
1939 */
1940
1941void
1942vm_page_release(
1943 register vm_page_t mem)
1944{
2d21ac55 1945 unsigned int color;
b0d623f7
A
1946 int need_wakeup = 0;
1947 int need_priv_wakeup = 0;
55e303ae 1948
6d2010ae 1949
1c79356b 1950 assert(!mem->private && !mem->fictitious);
b0d623f7
A
1951 if (vm_page_free_verify) {
1952 assert(pmap_verify_free(mem->phys_page));
1953 }
55e303ae 1954// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 1955
b0d623f7
A
1956
1957 lck_mtx_lock_spin(&vm_page_queue_free_lock);
91447636 1958#if DEBUG
1c79356b
A
1959 if (mem->free)
1960 panic("vm_page_release");
91447636 1961#endif
6d2010ae 1962
2d21ac55 1963 assert(mem->busy);
91447636
A
1964 assert(!mem->laundry);
1965 assert(mem->object == VM_OBJECT_NULL);
1966 assert(mem->pageq.next == NULL &&
1967 mem->pageq.prev == NULL);
2d21ac55
A
1968 assert(mem->listq.next == NULL &&
1969 mem->listq.prev == NULL);
1970
6d2010ae 1971 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
0b4c1975
A
1972 vm_lopage_free_count < vm_lopage_free_limit &&
1973 mem->phys_page < max_valid_low_ppnum) {
0c530ab8
A
1974 /*
1975 * this exists to support hardware controllers
1976 * incapable of generating DMAs with more than 32 bits
1977 * of address on platforms with physical memory > 4G...
1978 */
2d21ac55
A
1979 queue_enter_first(&vm_lopage_queue_free,
1980 mem,
1981 vm_page_t,
1982 pageq);
0c530ab8 1983 vm_lopage_free_count++;
0b4c1975
A
1984
1985 if (vm_lopage_free_count >= vm_lopage_free_limit)
1986 vm_lopage_refill = FALSE;
1987
1988 mem->lopage = TRUE;
0c530ab8 1989 } else {
6d2010ae 1990 mem->lopage = FALSE;
0b4c1975
A
1991 mem->free = TRUE;
1992
2d21ac55
A
1993 color = mem->phys_page & vm_color_mask;
1994 queue_enter_first(&vm_page_queue_free[color],
1995 mem,
1996 vm_page_t,
1997 pageq);
0c530ab8
A
1998 vm_page_free_count++;
1999 /*
2000 * Check if we should wake up someone waiting for page.
2001 * But don't bother waking them unless they can allocate.
2002 *
2003 * We wakeup only one thread, to prevent starvation.
2004 * Because the scheduling system handles wait queues FIFO,
2005 * if we wakeup all waiting threads, one greedy thread
2006 * can starve multiple niceguy threads. When the threads
2007 * all wakeup, the greedy threads runs first, grabs the page,
2008 * and waits for another page. It will be the first to run
2009 * when the next page is freed.
2010 *
2011 * However, there is a slight danger here.
2012 * The thread we wake might not use the free page.
2013 * Then the other threads could wait indefinitely
2014 * while the page goes unused. To forestall this,
2015 * the pageout daemon will keep making free pages
2016 * as long as vm_page_free_wanted is non-zero.
2017 */
1c79356b 2018
b0d623f7
A
2019 assert(vm_page_free_count > 0);
2020 if (vm_page_free_wanted_privileged > 0) {
2d21ac55 2021 vm_page_free_wanted_privileged--;
b0d623f7
A
2022 need_priv_wakeup = 1;
2023 } else if (vm_page_free_wanted > 0 &&
2024 vm_page_free_count > vm_page_free_reserved) {
0c530ab8 2025 vm_page_free_wanted--;
b0d623f7 2026 need_wakeup = 1;
0c530ab8 2027 }
1c79356b 2028 }
b0d623f7
A
2029 lck_mtx_unlock(&vm_page_queue_free_lock);
2030
2031 if (need_priv_wakeup)
2032 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2033 else if (need_wakeup)
2034 thread_wakeup_one((event_t) &vm_page_free_count);
2d21ac55 2035
6d2010ae 2036 VM_CHECK_MEMORYSTATUS;
1c79356b
A
2037}
2038
1c79356b
A
2039/*
2040 * vm_page_wait:
2041 *
2042 * Wait for a page to become available.
2043 * If there are plenty of free pages, then we don't sleep.
2044 *
2045 * Returns:
2046 * TRUE: There may be another page, try again
2047 * FALSE: We were interrupted out of our wait, don't try again
2048 */
2049
2050boolean_t
2051vm_page_wait(
2052 int interruptible )
2053{
2054 /*
2055 * We can't use vm_page_free_reserved to make this
2056 * determination. Consider: some thread might
2057 * need to allocate two pages. The first allocation
2058 * succeeds, the second fails. After the first page is freed,
2059 * a call to vm_page_wait must really block.
2060 */
9bccf70c 2061 kern_return_t wait_result;
9bccf70c 2062 int need_wakeup = 0;
2d21ac55 2063 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b 2064
b0d623f7 2065 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2066
2067 if (is_privileged && vm_page_free_count) {
b0d623f7 2068 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2069 return TRUE;
2070 }
1c79356b 2071 if (vm_page_free_count < vm_page_free_target) {
2d21ac55
A
2072
2073 if (is_privileged) {
2074 if (vm_page_free_wanted_privileged++ == 0)
2075 need_wakeup = 1;
2076 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2077 } else {
2078 if (vm_page_free_wanted++ == 0)
2079 need_wakeup = 1;
2080 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2081 }
b0d623f7 2082 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2083 counter(c_vm_page_wait_block++);
0b4e3aa0
A
2084
2085 if (need_wakeup)
2086 thread_wakeup((event_t)&vm_page_free_wanted);
9bccf70c 2087
91447636 2088 if (wait_result == THREAD_WAITING)
9bccf70c
A
2089 wait_result = thread_block(THREAD_CONTINUE_NULL);
2090
1c79356b
A
2091 return(wait_result == THREAD_AWAKENED);
2092 } else {
b0d623f7 2093 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b
A
2094 return TRUE;
2095 }
2096}
2097
2098/*
2099 * vm_page_alloc:
2100 *
2101 * Allocate and return a memory cell associated
2102 * with this VM object/offset pair.
2103 *
2104 * Object must be locked.
2105 */
2106
2107vm_page_t
2108vm_page_alloc(
2109 vm_object_t object,
2110 vm_object_offset_t offset)
2111{
2112 register vm_page_t mem;
2113
2d21ac55 2114 vm_object_lock_assert_exclusive(object);
1c79356b
A
2115 mem = vm_page_grab();
2116 if (mem == VM_PAGE_NULL)
2117 return VM_PAGE_NULL;
2118
2119 vm_page_insert(mem, object, offset);
2120
2121 return(mem);
2122}
2123
0c530ab8
A
2124vm_page_t
2125vm_page_alloclo(
2126 vm_object_t object,
2127 vm_object_offset_t offset)
2128{
2129 register vm_page_t mem;
2130
2d21ac55 2131 vm_object_lock_assert_exclusive(object);
0c530ab8
A
2132 mem = vm_page_grablo();
2133 if (mem == VM_PAGE_NULL)
2134 return VM_PAGE_NULL;
2135
2136 vm_page_insert(mem, object, offset);
2137
2138 return(mem);
2139}
2140
2141
2d21ac55
A
2142/*
2143 * vm_page_alloc_guard:
2144 *
b0d623f7 2145 * Allocate a fictitious page which will be used
2d21ac55
A
2146 * as a guard page. The page will be inserted into
2147 * the object and returned to the caller.
2148 */
2149
2150vm_page_t
2151vm_page_alloc_guard(
2152 vm_object_t object,
2153 vm_object_offset_t offset)
2154{
2155 register vm_page_t mem;
2156
2157 vm_object_lock_assert_exclusive(object);
2158 mem = vm_page_grab_guard();
2159 if (mem == VM_PAGE_NULL)
2160 return VM_PAGE_NULL;
2161
2162 vm_page_insert(mem, object, offset);
2163
2164 return(mem);
2165}
2166
2167
1c79356b
A
2168counter(unsigned int c_laundry_pages_freed = 0;)
2169
1c79356b 2170/*
6d2010ae 2171 * vm_page_free_prepare:
1c79356b 2172 *
6d2010ae
A
2173 * Removes page from any queue it may be on
2174 * and disassociates it from its VM object.
1c79356b
A
2175 *
2176 * Object and page queues must be locked prior to entry.
2177 */
b0d623f7 2178static void
2d21ac55 2179vm_page_free_prepare(
6d2010ae 2180 vm_page_t mem)
b0d623f7
A
2181{
2182 vm_page_free_prepare_queues(mem);
2183 vm_page_free_prepare_object(mem, TRUE);
2184}
2185
2186
2187void
2188vm_page_free_prepare_queues(
2189 vm_page_t mem)
1c79356b 2190{
2d21ac55 2191 VM_PAGE_CHECK(mem);
1c79356b
A
2192 assert(!mem->free);
2193 assert(!mem->cleaning);
2194 assert(!mem->pageout);
2d21ac55 2195#if DEBUG
b0d623f7 2196 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2197 if (mem->free)
b0d623f7 2198 panic("vm_page_free: freeing page on free list\n");
91447636 2199#endif
b0d623f7
A
2200 if (mem->object) {
2201 vm_object_lock_assert_exclusive(mem->object);
2202 }
2d21ac55
A
2203
2204 if (mem->laundry) {
2205 /*
2206 * We may have to free a page while it's being laundered
2207 * if we lost its pager (due to a forced unmount, for example).
2208 * We need to call vm_pageout_throttle_up() before removing
2209 * the page from its VM object, so that we can find out on
b0d623f7 2210 * which pageout queue the page is on.
2d21ac55
A
2211 */
2212 vm_pageout_throttle_up(mem);
2213 counter(++c_laundry_pages_freed);
2214 }
b0d623f7
A
2215 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2216
2217 if (VM_PAGE_WIRED(mem)) {
2218 if (mem->object) {
2219 assert(mem->object->wired_page_count > 0);
2220 mem->object->wired_page_count--;
2221 assert(mem->object->resident_page_count >=
2222 mem->object->wired_page_count);
6d2010ae
A
2223
2224 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2225 OSAddAtomic(+1, &vm_page_purgeable_count);
2226 assert(vm_page_purgeable_wired_count > 0);
2227 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2228 }
b0d623f7 2229 }
1c79356b
A
2230 if (!mem->private && !mem->fictitious)
2231 vm_page_wire_count--;
2232 mem->wire_count = 0;
2233 assert(!mem->gobbled);
2234 } else if (mem->gobbled) {
2235 if (!mem->private && !mem->fictitious)
2236 vm_page_wire_count--;
2237 vm_page_gobble_count--;
2238 }
b0d623f7
A
2239}
2240
2241
2242void
2243vm_page_free_prepare_object(
2244 vm_page_t mem,
2245 boolean_t remove_from_hash)
2246{
b0d623f7
A
2247 if (mem->tabled)
2248 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
1c79356b 2249
b0d623f7 2250 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b
A
2251
2252 if (mem->private) {
2253 mem->private = FALSE;
2254 mem->fictitious = TRUE;
55e303ae 2255 mem->phys_page = vm_page_fictitious_addr;
1c79356b 2256 }
6d2010ae 2257 if ( !mem->fictitious) {
b0d623f7
A
2258 if (mem->zero_fill == TRUE)
2259 VM_ZF_COUNT_DECR();
0b4c1975 2260 vm_page_init(mem, mem->phys_page, mem->lopage);
1c79356b
A
2261 }
2262}
2263
b0d623f7 2264
6d2010ae
A
2265/*
2266 * vm_page_free:
2267 *
2268 * Returns the given page to the free list,
2269 * disassociating it with any VM object.
2270 *
2271 * Object and page queues must be locked prior to entry.
2272 */
2d21ac55
A
2273void
2274vm_page_free(
2275 vm_page_t mem)
2276{
b0d623f7 2277 vm_page_free_prepare(mem);
6d2010ae 2278
b0d623f7
A
2279 if (mem->fictitious) {
2280 vm_page_release_fictitious(mem);
2281 } else {
2282 vm_page_release(mem);
2283 }
2284}
2285
2286
2287void
2288vm_page_free_unlocked(
2289 vm_page_t mem,
2290 boolean_t remove_from_hash)
2291{
2292 vm_page_lockspin_queues();
2293 vm_page_free_prepare_queues(mem);
2294 vm_page_unlock_queues();
2295
2296 vm_page_free_prepare_object(mem, remove_from_hash);
2297
2d21ac55
A
2298 if (mem->fictitious) {
2299 vm_page_release_fictitious(mem);
2300 } else {
2301 vm_page_release(mem);
2302 }
2303}
55e303ae 2304
2d21ac55
A
2305/*
2306 * Free a list of pages. The list can be up to several hundred pages,
2307 * as blocked up by vm_pageout_scan().
b0d623f7 2308 * The big win is not having to take the free list lock once
2d21ac55
A
2309 * per page. We sort the incoming pages into n lists, one for
2310 * each color.
2d21ac55 2311 */
55e303ae
A
2312void
2313vm_page_free_list(
b0d623f7
A
2314 vm_page_t mem,
2315 boolean_t prepare_object)
55e303ae 2316{
2d21ac55
A
2317 vm_page_t nxt;
2318 int pg_count = 0;
2319 int color;
2320 int inuse_list_head = -1;
2321
2322 queue_head_t free_list[MAX_COLORS];
2323 int inuse[MAX_COLORS];
55e303ae 2324
2d21ac55
A
2325 for (color = 0; color < (signed) vm_colors; color++) {
2326 queue_init(&free_list[color]);
2327 }
2328
55e303ae 2329 while (mem) {
b0d623f7
A
2330 assert(!mem->inactive);
2331 assert(!mem->active);
2332 assert(!mem->throttled);
2333 assert(!mem->free);
2334 assert(!mem->speculative);
0b4c1975 2335 assert(!VM_PAGE_WIRED(mem));
b0d623f7
A
2336 assert(mem->pageq.prev == NULL);
2337
2338 nxt = (vm_page_t)(mem->pageq.next);
2339
2340 if (prepare_object == TRUE)
2341 vm_page_free_prepare_object(mem, TRUE);
2342
2d21ac55
A
2343 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2344 assert(pmap_verify_free(mem->phys_page));
2345 }
55e303ae 2346
55e303ae 2347 if (!mem->fictitious) {
6d2010ae 2348 assert(mem->busy);
0b4c1975
A
2349 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2350 vm_lopage_free_count < vm_lopage_free_limit &&
2351 mem->phys_page < max_valid_low_ppnum) {
935ed37a
A
2352 mem->pageq.next = NULL;
2353 vm_page_release(mem);
2354 } else {
935ed37a 2355
b0d623f7
A
2356 /*
2357 * IMPORTANT: we can't set the page "free" here
2358 * because that would make the page eligible for
2359 * a physically-contiguous allocation (see
2360 * vm_page_find_contiguous()) right away (we don't
2361 * hold the vm_page_queue_free lock). That would
2362 * cause trouble because the page is not actually
2363 * in the free queue yet...
2364 */
935ed37a
A
2365 color = mem->phys_page & vm_color_mask;
2366 if (queue_empty(&free_list[color])) {
2367 inuse[color] = inuse_list_head;
2368 inuse_list_head = color;
2369 }
2370 queue_enter_first(&free_list[color],
2371 mem,
2372 vm_page_t,
2373 pageq);
2374 pg_count++;
2d21ac55 2375 }
55e303ae 2376 } else {
2d21ac55
A
2377 assert(mem->phys_page == vm_page_fictitious_addr ||
2378 mem->phys_page == vm_page_guard_addr);
55e303ae
A
2379 vm_page_release_fictitious(mem);
2380 }
2381 mem = nxt;
2382 }
2d21ac55
A
2383 if (pg_count) {
2384 unsigned int avail_free_count;
b0d623f7
A
2385 unsigned int need_wakeup = 0;
2386 unsigned int need_priv_wakeup = 0;
2d21ac55 2387
b0d623f7 2388 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 2389
2d21ac55
A
2390 color = inuse_list_head;
2391
2392 while( color != -1 ) {
2393 vm_page_t first, last;
2394 vm_page_t first_free;
2395
b0d623f7
A
2396 /*
2397 * Now that we hold the vm_page_queue_free lock,
2398 * it's safe to mark all pages in our local queue
2399 * as "free"...
2400 */
2401 queue_iterate(&free_list[color],
2402 mem,
2403 vm_page_t,
2404 pageq) {
2405 assert(!mem->free);
2406 assert(mem->busy);
2407 mem->free = TRUE;
2408 }
2409
2410 /*
2411 * ... and insert our local queue at the head of
2412 * the global free queue.
2413 */
2d21ac55
A
2414 first = (vm_page_t) queue_first(&free_list[color]);
2415 last = (vm_page_t) queue_last(&free_list[color]);
2416 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2d21ac55
A
2417 if (queue_empty(&vm_page_queue_free[color])) {
2418 queue_last(&vm_page_queue_free[color]) =
2419 (queue_entry_t) last;
2420 } else {
2421 queue_prev(&first_free->pageq) =
2422 (queue_entry_t) last;
2423 }
2424 queue_first(&vm_page_queue_free[color]) =
2425 (queue_entry_t) first;
2426 queue_prev(&first->pageq) =
2427 (queue_entry_t) &vm_page_queue_free[color];
2428 queue_next(&last->pageq) =
2429 (queue_entry_t) first_free;
b0d623f7
A
2430
2431 /* next color */
2d21ac55
A
2432 color = inuse[color];
2433 }
2434
55e303ae 2435 vm_page_free_count += pg_count;
2d21ac55
A
2436 avail_free_count = vm_page_free_count;
2437
b0d623f7
A
2438 if (vm_page_free_wanted_privileged > 0 &&
2439 avail_free_count > 0) {
2440 if (avail_free_count < vm_page_free_wanted_privileged) {
2441 need_priv_wakeup = avail_free_count;
2442 vm_page_free_wanted_privileged -=
2443 avail_free_count;
2444 avail_free_count = 0;
2445 } else {
2446 need_priv_wakeup = vm_page_free_wanted_privileged;
2447 vm_page_free_wanted_privileged = 0;
2448 avail_free_count -=
2449 vm_page_free_wanted_privileged;
2450 }
2d21ac55 2451 }
55e303ae 2452
b0d623f7
A
2453 if (vm_page_free_wanted > 0 &&
2454 avail_free_count > vm_page_free_reserved) {
91447636 2455 unsigned int available_pages;
55e303ae 2456
b0d623f7
A
2457 available_pages = (avail_free_count -
2458 vm_page_free_reserved);
55e303ae
A
2459
2460 if (available_pages >= vm_page_free_wanted) {
b0d623f7 2461 need_wakeup = vm_page_free_wanted;
55e303ae 2462 vm_page_free_wanted = 0;
55e303ae 2463 } else {
b0d623f7
A
2464 need_wakeup = available_pages;
2465 vm_page_free_wanted -= available_pages;
55e303ae
A
2466 }
2467 }
b0d623f7 2468 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 2469
b0d623f7
A
2470 if (need_priv_wakeup != 0) {
2471 /*
2472 * There shouldn't be that many VM-privileged threads,
2473 * so let's wake them all up, even if we don't quite
2474 * have enough pages to satisfy them all.
2475 */
2476 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2477 }
2478 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2479 /*
2480 * We don't expect to have any more waiters
2481 * after this, so let's wake them all up at
2482 * once.
2483 */
2484 thread_wakeup((event_t) &vm_page_free_count);
2485 } else for (; need_wakeup != 0; need_wakeup--) {
2486 /*
2487 * Wake up one waiter per page we just released.
2488 */
2489 thread_wakeup_one((event_t) &vm_page_free_count);
2490 }
2d21ac55 2491
6d2010ae 2492 VM_CHECK_MEMORYSTATUS;
55e303ae
A
2493 }
2494}
2495
2496
1c79356b
A
2497/*
2498 * vm_page_wire:
2499 *
2500 * Mark this page as wired down by yet
2501 * another map, removing it from paging queues
2502 * as necessary.
2503 *
2504 * The page's object and the page queues must be locked.
2505 */
2506void
2507vm_page_wire(
2508 register vm_page_t mem)
2509{
2510
91447636 2511// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1c79356b
A
2512
2513 VM_PAGE_CHECK(mem);
b0d623f7
A
2514 if (mem->object) {
2515 vm_object_lock_assert_exclusive(mem->object);
2516 } else {
2517 /*
2518 * In theory, the page should be in an object before it
2519 * gets wired, since we need to hold the object lock
2520 * to update some fields in the page structure.
2521 * However, some code (i386 pmap, for example) might want
2522 * to wire a page before it gets inserted into an object.
2523 * That's somewhat OK, as long as nobody else can get to
2524 * that page and update it at the same time.
2525 */
2526 }
91447636 2527#if DEBUG
b0d623f7 2528 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2529#endif
b0d623f7 2530 if ( !VM_PAGE_WIRED(mem)) {
1c79356b 2531 VM_PAGE_QUEUES_REMOVE(mem);
b0d623f7
A
2532
2533 if (mem->object) {
2534 mem->object->wired_page_count++;
2535 assert(mem->object->resident_page_count >=
2536 mem->object->wired_page_count);
2537 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2538 assert(vm_page_purgeable_count > 0);
2539 OSAddAtomic(-1, &vm_page_purgeable_count);
2540 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2541 }
2542 if (mem->object->all_reusable) {
2543 /*
2544 * Wired pages are not counted as "re-usable"
2545 * in "all_reusable" VM objects, so nothing
2546 * to do here.
2547 */
2548 } else if (mem->reusable) {
2549 /*
2550 * This page is not "re-usable" when it's
2551 * wired, so adjust its state and the
2552 * accounting.
2553 */
2554 vm_object_reuse_pages(mem->object,
2555 mem->offset,
2556 mem->offset+PAGE_SIZE_64,
2557 FALSE);
2558 }
2559 }
2560 assert(!mem->reusable);
2561
1c79356b
A
2562 if (!mem->private && !mem->fictitious && !mem->gobbled)
2563 vm_page_wire_count++;
2564 if (mem->gobbled)
2565 vm_page_gobble_count--;
2566 mem->gobbled = FALSE;
2d21ac55 2567 if (mem->zero_fill == TRUE) {
9bccf70c 2568 mem->zero_fill = FALSE;
b0d623f7 2569 VM_ZF_COUNT_DECR();
9bccf70c 2570 }
593a1d5f 2571
6d2010ae
A
2572 VM_CHECK_MEMORYSTATUS;
2573
91447636
A
2574 /*
2575 * ENCRYPTED SWAP:
2576 * The page could be encrypted, but
2577 * We don't have to decrypt it here
2578 * because we don't guarantee that the
2579 * data is actually valid at this point.
2580 * The page will get decrypted in
2581 * vm_fault_wire() if needed.
2582 */
1c79356b
A
2583 }
2584 assert(!mem->gobbled);
2585 mem->wire_count++;
b0d623f7 2586 VM_PAGE_CHECK(mem);
1c79356b
A
2587}
2588
2589/*
2590 * vm_page_gobble:
2591 *
2592 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2593 *
2594 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2595 */
2596void
2597vm_page_gobble(
2598 register vm_page_t mem)
2599{
2d21ac55 2600 vm_page_lockspin_queues();
1c79356b
A
2601 VM_PAGE_CHECK(mem);
2602
2603 assert(!mem->gobbled);
b0d623f7 2604 assert( !VM_PAGE_WIRED(mem));
1c79356b 2605
b0d623f7 2606 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
1c79356b
A
2607 if (!mem->private && !mem->fictitious)
2608 vm_page_wire_count++;
2609 }
2610 vm_page_gobble_count++;
2611 mem->gobbled = TRUE;
2612 vm_page_unlock_queues();
2613}
2614
2615/*
2616 * vm_page_unwire:
2617 *
2618 * Release one wiring of this page, potentially
2619 * enabling it to be paged again.
2620 *
2621 * The page's object and the page queues must be locked.
2622 */
2623void
2624vm_page_unwire(
0b4c1975
A
2625 vm_page_t mem,
2626 boolean_t queueit)
1c79356b
A
2627{
2628
91447636 2629// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1c79356b
A
2630
2631 VM_PAGE_CHECK(mem);
b0d623f7
A
2632 assert(VM_PAGE_WIRED(mem));
2633 assert(mem->object != VM_OBJECT_NULL);
91447636 2634#if DEBUG
b0d623f7
A
2635 vm_object_lock_assert_exclusive(mem->object);
2636 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2637#endif
1c79356b
A
2638 if (--mem->wire_count == 0) {
2639 assert(!mem->private && !mem->fictitious);
2640 vm_page_wire_count--;
b0d623f7
A
2641 assert(mem->object->wired_page_count > 0);
2642 mem->object->wired_page_count--;
2643 assert(mem->object->resident_page_count >=
2644 mem->object->wired_page_count);
2645 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2646 OSAddAtomic(+1, &vm_page_purgeable_count);
2647 assert(vm_page_purgeable_wired_count > 0);
2648 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2649 }
91447636
A
2650 assert(!mem->laundry);
2651 assert(mem->object != kernel_object);
2652 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
0b4c1975
A
2653
2654 if (queueit == TRUE) {
2655 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2656 vm_page_deactivate(mem);
2657 } else {
2658 vm_page_activate(mem);
2659 }
2d21ac55 2660 }
593a1d5f 2661
6d2010ae
A
2662 VM_CHECK_MEMORYSTATUS;
2663
1c79356b 2664 }
b0d623f7 2665 VM_PAGE_CHECK(mem);
1c79356b
A
2666}
2667
2668/*
2669 * vm_page_deactivate:
2670 *
2671 * Returns the given page to the inactive list,
2672 * indicating that no physical maps have access
2673 * to this page. [Used by the physical mapping system.]
2674 *
2675 * The page queues must be locked.
2676 */
2677void
2678vm_page_deactivate(
b0d623f7
A
2679 vm_page_t m)
2680{
2681 vm_page_deactivate_internal(m, TRUE);
2682}
2683
2684
2685void
2686vm_page_deactivate_internal(
2687 vm_page_t m,
2688 boolean_t clear_hw_reference)
1c79356b 2689{
2d21ac55 2690
1c79356b 2691 VM_PAGE_CHECK(m);
91447636 2692 assert(m->object != kernel_object);
2d21ac55 2693 assert(m->phys_page != vm_page_guard_addr);
1c79356b 2694
55e303ae 2695// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
91447636 2696#if DEBUG
b0d623f7 2697 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2698#endif
1c79356b
A
2699 /*
2700 * This page is no longer very interesting. If it was
2701 * interesting (active or inactive/referenced), then we
2702 * clear the reference bit and (re)enter it in the
2703 * inactive queue. Note wired pages should not have
2704 * their reference bit cleared.
2705 */
6d2010ae 2706 assert ( !(m->absent && !m->unusual));
0b4c1975 2707
1c79356b 2708 if (m->gobbled) { /* can this happen? */
b0d623f7 2709 assert( !VM_PAGE_WIRED(m));
2d21ac55 2710
1c79356b
A
2711 if (!m->private && !m->fictitious)
2712 vm_page_wire_count--;
2713 vm_page_gobble_count--;
2714 m->gobbled = FALSE;
2715 }
6d2010ae 2716 if (m->private || m->fictitious || (VM_PAGE_WIRED(m)))
1c79356b 2717 return;
2d21ac55 2718
6d2010ae 2719 if (!m->absent && clear_hw_reference == TRUE)
2d21ac55
A
2720 pmap_clear_reference(m->phys_page);
2721
2722 m->reference = FALSE;
2d21ac55
A
2723 m->no_cache = FALSE;
2724
2725 if (!m->inactive) {
2726 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 2727
91447636
A
2728 assert(!m->laundry);
2729 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2d21ac55 2730
6d2010ae 2731 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
d1ecb069
A
2732 m->dirty && m->object->internal &&
2733 (m->object->purgable == VM_PURGABLE_DENY ||
2734 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2735 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
2736 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2737 m->throttled = TRUE;
2738 vm_page_throttled_count++;
9bccf70c 2739 } else {
6d2010ae 2740 if (m->object->named && m->object->ref_count == 1) {
2d21ac55 2741 vm_page_speculate(m, FALSE);
b0d623f7 2742#if DEVELOPMENT || DEBUG
2d21ac55 2743 vm_page_speculative_recreated++;
b0d623f7 2744#endif
2d21ac55 2745 } else {
6d2010ae 2746 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2d21ac55 2747 }
9bccf70c 2748 }
1c79356b
A
2749 }
2750}
2751
2752/*
2753 * vm_page_activate:
2754 *
2755 * Put the specified page on the active list (if appropriate).
2756 *
2757 * The page queues must be locked.
2758 */
2759
2760void
2761vm_page_activate(
2762 register vm_page_t m)
2763{
2764 VM_PAGE_CHECK(m);
2d21ac55 2765#ifdef FIXME_4778297
91447636 2766 assert(m->object != kernel_object);
2d21ac55
A
2767#endif
2768 assert(m->phys_page != vm_page_guard_addr);
91447636 2769#if DEBUG
b0d623f7 2770 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2771#endif
6d2010ae 2772 assert( !(m->absent && !m->unusual));
0b4c1975 2773
1c79356b 2774 if (m->gobbled) {
b0d623f7 2775 assert( !VM_PAGE_WIRED(m));
1c79356b
A
2776 if (!m->private && !m->fictitious)
2777 vm_page_wire_count--;
2778 vm_page_gobble_count--;
2779 m->gobbled = FALSE;
2780 }
6d2010ae 2781 if (m->private || m->fictitious)
1c79356b
A
2782 return;
2783
2d21ac55
A
2784#if DEBUG
2785 if (m->active)
2786 panic("vm_page_activate: already active");
2787#endif
2788
2789 if (m->speculative) {
2790 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2791 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2792 }
2793
2794 VM_PAGE_QUEUES_REMOVE(m);
2795
b0d623f7 2796 if ( !VM_PAGE_WIRED(m)) {
91447636 2797 assert(!m->laundry);
2d21ac55 2798 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
6d2010ae
A
2799 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2800 m->dirty && m->object->internal &&
d1ecb069
A
2801 (m->object->purgable == VM_PURGABLE_DENY ||
2802 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2803 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
2804 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2805 m->throttled = TRUE;
2806 vm_page_throttled_count++;
9bccf70c 2807 } else {
2d21ac55
A
2808 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2809 m->active = TRUE;
6d2010ae 2810 vm_page_active_count++;
9bccf70c 2811 }
2d21ac55
A
2812 m->reference = TRUE;
2813 m->no_cache = FALSE;
1c79356b 2814 }
b0d623f7 2815 VM_PAGE_CHECK(m);
2d21ac55
A
2816}
2817
2818
2819/*
2820 * vm_page_speculate:
2821 *
2822 * Put the specified page on the speculative list (if appropriate).
2823 *
2824 * The page queues must be locked.
2825 */
2826void
2827vm_page_speculate(
2828 vm_page_t m,
2829 boolean_t new)
2830{
2831 struct vm_speculative_age_q *aq;
2832
2833 VM_PAGE_CHECK(m);
2834 assert(m->object != kernel_object);
2d21ac55 2835 assert(m->phys_page != vm_page_guard_addr);
91447636 2836#if DEBUG
b0d623f7 2837 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2838#endif
6d2010ae 2839 assert( !(m->absent && !m->unusual));
b0d623f7 2840
6d2010ae
A
2841 if (m->private || m->fictitious)
2842 return;
0b4c1975 2843
b0d623f7
A
2844 VM_PAGE_QUEUES_REMOVE(m);
2845
2846 if ( !VM_PAGE_WIRED(m)) {
2d21ac55 2847 mach_timespec_t ts;
b0d623f7
A
2848 clock_sec_t sec;
2849 clock_nsec_t nsec;
2d21ac55 2850
b0d623f7
A
2851 clock_get_system_nanotime(&sec, &nsec);
2852 ts.tv_sec = (unsigned int) sec;
2853 ts.tv_nsec = nsec;
2d21ac55
A
2854
2855 if (vm_page_speculative_count == 0) {
2856
2857 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2858 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2859
2860 aq = &vm_page_queue_speculative[speculative_age_index];
2861
2862 /*
2863 * set the timer to begin a new group
2864 */
6d2010ae
A
2865 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2866 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
2867
2868 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2869 } else {
2870 aq = &vm_page_queue_speculative[speculative_age_index];
2871
2872 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2873
2874 speculative_age_index++;
2875
2876 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2877 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2878 if (speculative_age_index == speculative_steal_index) {
2879 speculative_steal_index = speculative_age_index + 1;
2880
2881 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2882 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2883 }
2884 aq = &vm_page_queue_speculative[speculative_age_index];
2885
2886 if (!queue_empty(&aq->age_q))
2887 vm_page_speculate_ageit(aq);
2888
6d2010ae
A
2889 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2890 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
2891
2892 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2893 }
2894 }
2895 enqueue_tail(&aq->age_q, &m->pageq);
2896 m->speculative = TRUE;
2897 vm_page_speculative_count++;
2898
2899 if (new == TRUE) {
6d2010ae
A
2900 vm_object_lock_assert_exclusive(m->object);
2901
2d21ac55 2902 m->object->pages_created++;
b0d623f7 2903#if DEVELOPMENT || DEBUG
2d21ac55 2904 vm_page_speculative_created++;
b0d623f7 2905#endif
2d21ac55
A
2906 }
2907 }
b0d623f7 2908 VM_PAGE_CHECK(m);
2d21ac55
A
2909}
2910
2911
2912/*
2913 * move pages from the specified aging bin to
2914 * the speculative bin that pageout_scan claims from
2915 *
2916 * The page queues must be locked.
2917 */
2918void
2919vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2920{
2921 struct vm_speculative_age_q *sq;
2922 vm_page_t t;
2923
2924 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2925
2926 if (queue_empty(&sq->age_q)) {
2927 sq->age_q.next = aq->age_q.next;
2928 sq->age_q.prev = aq->age_q.prev;
2929
2930 t = (vm_page_t)sq->age_q.next;
2931 t->pageq.prev = &sq->age_q;
2932
2933 t = (vm_page_t)sq->age_q.prev;
2934 t->pageq.next = &sq->age_q;
2935 } else {
2936 t = (vm_page_t)sq->age_q.prev;
2937 t->pageq.next = aq->age_q.next;
2938
2939 t = (vm_page_t)aq->age_q.next;
2940 t->pageq.prev = sq->age_q.prev;
2941
2942 t = (vm_page_t)aq->age_q.prev;
2943 t->pageq.next = &sq->age_q;
2944
2945 sq->age_q.prev = aq->age_q.prev;
1c79356b 2946 }
2d21ac55
A
2947 queue_init(&aq->age_q);
2948}
2949
2950
2951void
2952vm_page_lru(
2953 vm_page_t m)
2954{
2955 VM_PAGE_CHECK(m);
2956 assert(m->object != kernel_object);
2957 assert(m->phys_page != vm_page_guard_addr);
2958
2959#if DEBUG
b0d623f7 2960 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
2961#endif
2962 if (m->active || m->reference)
2963 return;
2964
b0d623f7 2965 if (m->private || (VM_PAGE_WIRED(m)))
2d21ac55
A
2966 return;
2967
2968 m->no_cache = FALSE;
2969
2970 VM_PAGE_QUEUES_REMOVE(m);
2971
2972 assert(!m->laundry);
2973 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2974
6d2010ae 2975 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
1c79356b
A
2976}
2977
2d21ac55 2978
b0d623f7
A
2979void
2980vm_page_reactivate_all_throttled(void)
2981{
2982 vm_page_t first_throttled, last_throttled;
2983 vm_page_t first_active;
2984 vm_page_t m;
2985 int extra_active_count;
2986
6d2010ae
A
2987 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
2988 return;
2989
b0d623f7
A
2990 extra_active_count = 0;
2991 vm_page_lock_queues();
2992 if (! queue_empty(&vm_page_queue_throttled)) {
2993 /*
2994 * Switch "throttled" pages to "active".
2995 */
2996 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
2997 VM_PAGE_CHECK(m);
2998 assert(m->throttled);
2999 assert(!m->active);
3000 assert(!m->inactive);
3001 assert(!m->speculative);
3002 assert(!VM_PAGE_WIRED(m));
6d2010ae
A
3003
3004 extra_active_count++;
3005
b0d623f7
A
3006 m->throttled = FALSE;
3007 m->active = TRUE;
3008 VM_PAGE_CHECK(m);
3009 }
3010
3011 /*
3012 * Transfer the entire throttled queue to a regular LRU page queues.
3013 * We insert it at the head of the active queue, so that these pages
3014 * get re-evaluated by the LRU algorithm first, since they've been
3015 * completely out of it until now.
3016 */
3017 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3018 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3019 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3020 if (queue_empty(&vm_page_queue_active)) {
3021 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3022 } else {
3023 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3024 }
3025 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3026 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3027 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3028
3029#if DEBUG
3030 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3031#endif
3032 queue_init(&vm_page_queue_throttled);
3033 /*
3034 * Adjust the global page counts.
3035 */
3036 vm_page_active_count += extra_active_count;
3037 vm_page_throttled_count = 0;
3038 }
3039 assert(vm_page_throttled_count == 0);
3040 assert(queue_empty(&vm_page_queue_throttled));
3041 vm_page_unlock_queues();
3042}
3043
3044
3045/*
3046 * move pages from the indicated local queue to the global active queue
3047 * its ok to fail if we're below the hard limit and force == FALSE
3048 * the nolocks == TRUE case is to allow this function to be run on
3049 * the hibernate path
3050 */
3051
3052void
3053vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3054{
3055 struct vpl *lq;
3056 vm_page_t first_local, last_local;
3057 vm_page_t first_active;
3058 vm_page_t m;
3059 uint32_t count = 0;
3060
3061 if (vm_page_local_q == NULL)
3062 return;
3063
3064 lq = &vm_page_local_q[lid].vpl_un.vpl;
3065
3066 if (nolocks == FALSE) {
3067 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3068 if ( !vm_page_trylockspin_queues())
3069 return;
3070 } else
3071 vm_page_lockspin_queues();
3072
3073 VPL_LOCK(&lq->vpl_lock);
3074 }
3075 if (lq->vpl_count) {
3076 /*
3077 * Switch "local" pages to "active".
3078 */
3079 assert(!queue_empty(&lq->vpl_queue));
3080
3081 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3082 VM_PAGE_CHECK(m);
3083 assert(m->local);
3084 assert(!m->active);
3085 assert(!m->inactive);
3086 assert(!m->speculative);
3087 assert(!VM_PAGE_WIRED(m));
3088 assert(!m->throttled);
3089 assert(!m->fictitious);
3090
3091 if (m->local_id != lid)
3092 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3093
3094 m->local_id = 0;
3095 m->local = FALSE;
3096 m->active = TRUE;
3097 VM_PAGE_CHECK(m);
3098
3099 count++;
3100 }
3101 if (count != lq->vpl_count)
3102 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3103
3104 /*
3105 * Transfer the entire local queue to a regular LRU page queues.
3106 */
3107 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3108 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3109 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3110
3111 if (queue_empty(&vm_page_queue_active)) {
3112 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3113 } else {
3114 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3115 }
3116 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3117 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3118 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3119
3120 queue_init(&lq->vpl_queue);
3121 /*
3122 * Adjust the global page counts.
3123 */
3124 vm_page_active_count += lq->vpl_count;
3125 lq->vpl_count = 0;
3126 }
3127 assert(queue_empty(&lq->vpl_queue));
3128
3129 if (nolocks == FALSE) {
3130 VPL_UNLOCK(&lq->vpl_lock);
3131 vm_page_unlock_queues();
3132 }
3133}
3134
1c79356b
A
3135/*
3136 * vm_page_part_zero_fill:
3137 *
3138 * Zero-fill a part of the page.
3139 */
3140void
3141vm_page_part_zero_fill(
3142 vm_page_t m,
3143 vm_offset_t m_pa,
3144 vm_size_t len)
3145{
3146 vm_page_t tmp;
3147
3148 VM_PAGE_CHECK(m);
3149#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
55e303ae 3150 pmap_zero_part_page(m->phys_page, m_pa, len);
1c79356b
A
3151#else
3152 while (1) {
3153 tmp = vm_page_grab();
3154 if (tmp == VM_PAGE_NULL) {
3155 vm_page_wait(THREAD_UNINT);
3156 continue;
3157 }
3158 break;
3159 }
3160 vm_page_zero_fill(tmp);
3161 if(m_pa != 0) {
3162 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3163 }
3164 if((m_pa + len) < PAGE_SIZE) {
3165 vm_page_part_copy(m, m_pa + len, tmp,
3166 m_pa + len, PAGE_SIZE - (m_pa + len));
3167 }
3168 vm_page_copy(tmp,m);
b0d623f7 3169 VM_PAGE_FREE(tmp);
1c79356b
A
3170#endif
3171
3172}
3173
3174/*
3175 * vm_page_zero_fill:
3176 *
3177 * Zero-fill the specified page.
3178 */
3179void
3180vm_page_zero_fill(
3181 vm_page_t m)
3182{
3183 XPR(XPR_VM_PAGE,
3184 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 3185 m->object, m->offset, m, 0,0);
1c79356b
A
3186
3187 VM_PAGE_CHECK(m);
3188
55e303ae
A
3189// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3190 pmap_zero_page(m->phys_page);
1c79356b
A
3191}
3192
3193/*
3194 * vm_page_part_copy:
3195 *
3196 * copy part of one page to another
3197 */
3198
3199void
3200vm_page_part_copy(
3201 vm_page_t src_m,
3202 vm_offset_t src_pa,
3203 vm_page_t dst_m,
3204 vm_offset_t dst_pa,
3205 vm_size_t len)
3206{
3207 VM_PAGE_CHECK(src_m);
3208 VM_PAGE_CHECK(dst_m);
3209
55e303ae
A
3210 pmap_copy_part_page(src_m->phys_page, src_pa,
3211 dst_m->phys_page, dst_pa, len);
1c79356b
A
3212}
3213
3214/*
3215 * vm_page_copy:
3216 *
3217 * Copy one page to another
91447636
A
3218 *
3219 * ENCRYPTED SWAP:
3220 * The source page should not be encrypted. The caller should
3221 * make sure the page is decrypted first, if necessary.
1c79356b
A
3222 */
3223
2d21ac55
A
3224int vm_page_copy_cs_validations = 0;
3225int vm_page_copy_cs_tainted = 0;
3226
1c79356b
A
3227void
3228vm_page_copy(
3229 vm_page_t src_m,
3230 vm_page_t dest_m)
3231{
3232 XPR(XPR_VM_PAGE,
3233 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
b0d623f7
A
3234 src_m->object, src_m->offset,
3235 dest_m->object, dest_m->offset,
1c79356b
A
3236 0);
3237
3238 VM_PAGE_CHECK(src_m);
3239 VM_PAGE_CHECK(dest_m);
3240
91447636
A
3241 /*
3242 * ENCRYPTED SWAP:
3243 * The source page should not be encrypted at this point.
3244 * The destination page will therefore not contain encrypted
3245 * data after the copy.
3246 */
3247 if (src_m->encrypted) {
3248 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3249 }
3250 dest_m->encrypted = FALSE;
3251
2d21ac55 3252 if (src_m->object != VM_OBJECT_NULL &&
4a3eedf9 3253 src_m->object->code_signed) {
2d21ac55 3254 /*
4a3eedf9 3255 * We're copying a page from a code-signed object.
2d21ac55
A
3256 * Whoever ends up mapping the copy page might care about
3257 * the original page's integrity, so let's validate the
3258 * source page now.
3259 */
3260 vm_page_copy_cs_validations++;
3261 vm_page_validate_cs(src_m);
3262 }
6d2010ae
A
3263
3264 if (vm_page_is_slideable(src_m)) {
3265 boolean_t was_busy = src_m->busy;
3266 src_m->busy = TRUE;
3267 (void) vm_page_slide(src_m, 0);
3268 assert(src_m->busy);
3269 if(!was_busy) {
3270 PAGE_WAKEUP_DONE(src_m);
3271 }
3272 }
3273
2d21ac55 3274 /*
b0d623f7
A
3275 * Propagate the cs_tainted bit to the copy page. Do not propagate
3276 * the cs_validated bit.
2d21ac55 3277 */
2d21ac55
A
3278 dest_m->cs_tainted = src_m->cs_tainted;
3279 if (dest_m->cs_tainted) {
2d21ac55
A
3280 vm_page_copy_cs_tainted++;
3281 }
6d2010ae
A
3282 dest_m->slid = src_m->slid;
3283 dest_m->error = src_m->error; /* sliding src_m might have failed... */
55e303ae 3284 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
1c79356b
A
3285}
3286
2d21ac55 3287#if MACH_ASSERT
b0d623f7
A
3288static void
3289_vm_page_print(
3290 vm_page_t p)
3291{
3292 printf("vm_page %p: \n", p);
3293 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3294 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3295 printf(" next=%p\n", p->next);
3296 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3297 printf(" wire_count=%u\n", p->wire_count);
3298
3299 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3300 (p->local ? "" : "!"),
3301 (p->inactive ? "" : "!"),
3302 (p->active ? "" : "!"),
3303 (p->pageout_queue ? "" : "!"),
3304 (p->speculative ? "" : "!"),
3305 (p->laundry ? "" : "!"));
3306 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3307 (p->free ? "" : "!"),
3308 (p->reference ? "" : "!"),
3309 (p->gobbled ? "" : "!"),
3310 (p->private ? "" : "!"),
3311 (p->throttled ? "" : "!"));
3312 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3313 (p->busy ? "" : "!"),
3314 (p->wanted ? "" : "!"),
3315 (p->tabled ? "" : "!"),
3316 (p->fictitious ? "" : "!"),
3317 (p->pmapped ? "" : "!"),
3318 (p->wpmapped ? "" : "!"));
3319 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3320 (p->pageout ? "" : "!"),
3321 (p->absent ? "" : "!"),
3322 (p->error ? "" : "!"),
3323 (p->dirty ? "" : "!"),
3324 (p->cleaning ? "" : "!"),
3325 (p->precious ? "" : "!"),
3326 (p->clustered ? "" : "!"));
3327 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3328 (p->overwriting ? "" : "!"),
3329 (p->restart ? "" : "!"),
3330 (p->unusual ? "" : "!"),
3331 (p->encrypted ? "" : "!"),
3332 (p->encrypted_cleaning ? "" : "!"));
3333 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3334 (p->list_req_pending ? "" : "!"),
3335 (p->dump_cleaning ? "" : "!"),
3336 (p->cs_validated ? "" : "!"),
3337 (p->cs_tainted ? "" : "!"),
3338 (p->no_cache ? "" : "!"));
3339 printf(" %szero_fill\n",
3340 (p->zero_fill ? "" : "!"));
3341
3342 printf("phys_page=0x%x\n", p->phys_page);
3343}
3344
1c79356b
A
3345/*
3346 * Check that the list of pages is ordered by
3347 * ascending physical address and has no holes.
3348 */
2d21ac55 3349static int
1c79356b
A
3350vm_page_verify_contiguous(
3351 vm_page_t pages,
3352 unsigned int npages)
3353{
3354 register vm_page_t m;
3355 unsigned int page_count;
91447636 3356 vm_offset_t prev_addr;
1c79356b 3357
55e303ae 3358 prev_addr = pages->phys_page;
1c79356b
A
3359 page_count = 1;
3360 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
55e303ae 3361 if (m->phys_page != prev_addr + 1) {
b0d623f7
A
3362 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3363 m, (long)prev_addr, m->phys_page);
6d2010ae 3364 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
1c79356b
A
3365 panic("vm_page_verify_contiguous: not contiguous!");
3366 }
55e303ae 3367 prev_addr = m->phys_page;
1c79356b
A
3368 ++page_count;
3369 }
3370 if (page_count != npages) {
2d21ac55 3371 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
3372 pages, page_count, npages);
3373 panic("vm_page_verify_contiguous: count error");
3374 }
3375 return 1;
3376}
1c79356b
A
3377
3378
2d21ac55
A
3379/*
3380 * Check the free lists for proper length etc.
3381 */
b0d623f7
A
3382static unsigned int
3383vm_page_verify_free_list(
d1ecb069 3384 queue_head_t *vm_page_queue,
b0d623f7
A
3385 unsigned int color,
3386 vm_page_t look_for_page,
3387 boolean_t expect_page)
3388{
3389 unsigned int npages;
3390 vm_page_t m;
3391 vm_page_t prev_m;
3392 boolean_t found_page;
3393
3394 found_page = FALSE;
3395 npages = 0;
d1ecb069
A
3396 prev_m = (vm_page_t) vm_page_queue;
3397 queue_iterate(vm_page_queue,
b0d623f7
A
3398 m,
3399 vm_page_t,
3400 pageq) {
6d2010ae 3401
b0d623f7
A
3402 if (m == look_for_page) {
3403 found_page = TRUE;
3404 }
3405 if ((vm_page_t) m->pageq.prev != prev_m)
3406 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3407 color, npages, m, m->pageq.prev, prev_m);
b0d623f7
A
3408 if ( ! m->busy )
3409 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3410 color, npages, m);
6d2010ae
A
3411 if (color != (unsigned int) -1) {
3412 if ((m->phys_page & vm_color_mask) != color)
3413 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3414 color, npages, m, m->phys_page & vm_color_mask, color);
3415 if ( ! m->free )
3416 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3417 color, npages, m);
3418 }
b0d623f7
A
3419 ++npages;
3420 prev_m = m;
3421 }
3422 if (look_for_page != VM_PAGE_NULL) {
3423 unsigned int other_color;
3424
3425 if (expect_page && !found_page) {
3426 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3427 color, npages, look_for_page, look_for_page->phys_page);
3428 _vm_page_print(look_for_page);
3429 for (other_color = 0;
3430 other_color < vm_colors;
3431 other_color++) {
3432 if (other_color == color)
3433 continue;
d1ecb069 3434 vm_page_verify_free_list(&vm_page_queue_free[other_color],
6d2010ae 3435 other_color, look_for_page, FALSE);
b0d623f7 3436 }
6d2010ae 3437 if (color == (unsigned int) -1) {
d1ecb069
A
3438 vm_page_verify_free_list(&vm_lopage_queue_free,
3439 (unsigned int) -1, look_for_page, FALSE);
3440 }
b0d623f7
A
3441 panic("vm_page_verify_free_list(color=%u)\n", color);
3442 }
3443 if (!expect_page && found_page) {
3444 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3445 color, npages, look_for_page, look_for_page->phys_page);
3446 }
3447 }
3448 return npages;
3449}
3450
3451static boolean_t vm_page_verify_free_lists_enabled = FALSE;
2d21ac55
A
3452static void
3453vm_page_verify_free_lists( void )
3454{
d1ecb069 3455 unsigned int color, npages, nlopages;
b0d623f7
A
3456
3457 if (! vm_page_verify_free_lists_enabled)
3458 return;
3459
2d21ac55 3460 npages = 0;
b0d623f7
A
3461
3462 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3463
3464 for( color = 0; color < vm_colors; color++ ) {
d1ecb069 3465 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
6d2010ae 3466 color, VM_PAGE_NULL, FALSE);
2d21ac55 3467 }
d1ecb069
A
3468 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3469 (unsigned int) -1,
3470 VM_PAGE_NULL, FALSE);
3471 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3472 panic("vm_page_verify_free_lists: "
3473 "npages %u free_count %d nlopages %u lo_free_count %u",
3474 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
6d2010ae 3475
b0d623f7 3476 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3477}
2d21ac55 3478
b0d623f7
A
3479void
3480vm_page_queues_assert(
3481 vm_page_t mem,
3482 int val)
3483{
3484 if (mem->free + mem->active + mem->inactive + mem->speculative +
3485 mem->throttled + mem->pageout_queue > (val)) {
3486 _vm_page_print(mem);
3487 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3488 }
3489 if (VM_PAGE_WIRED(mem)) {
3490 assert(!mem->active);
3491 assert(!mem->inactive);
3492 assert(!mem->speculative);
3493 assert(!mem->throttled);
3494 }
3495}
3496#endif /* MACH_ASSERT */
2d21ac55 3497
91447636 3498
1c79356b 3499/*
2d21ac55 3500 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
3501 *
3502 * Find a region large enough to contain at least n pages
1c79356b
A
3503 * of contiguous physical memory.
3504 *
2d21ac55
A
3505 * This is done by traversing the vm_page_t array in a linear fashion
3506 * we assume that the vm_page_t array has the avaiable physical pages in an
3507 * ordered, ascending list... this is currently true of all our implementations
3508 * and must remain so... there can be 'holes' in the array... we also can
3509 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3510 * which use to happen via 'vm_page_convert'... that function was no longer
3511 * being called and was removed...
3512 *
3513 * The basic flow consists of stabilizing some of the interesting state of
3514 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3515 * sweep at the beginning of the array looking for pages that meet our criterea
3516 * for a 'stealable' page... currently we are pretty conservative... if the page
3517 * meets this criterea and is physically contiguous to the previous page in the 'run'
3518 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3519 * and start to develop a new run... if at this point we've already considered
3520 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3521 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3522 * to other threads trying to acquire free pages (or move pages from q to q),
3523 * and then continue from the spot we left off... we only make 1 pass through the
3524 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3525 * which steals the pages from the queues they're currently on... pages on the free
3526 * queue can be stolen directly... pages that are on any of the other queues
3527 * must be removed from the object they are tabled on... this requires taking the
3528 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3529 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3530 * dump the pages we've currently stolen back to the free list, and pick up our
3531 * scan from the point where we aborted the 'current' run.
3532 *
3533 *
1c79356b 3534 * Requirements:
2d21ac55 3535 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 3536 *
2d21ac55 3537 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 3538 *
e5568f75 3539 * Algorithm:
1c79356b 3540 */
2d21ac55
A
3541
3542#define MAX_CONSIDERED_BEFORE_YIELD 1000
3543
3544
3545#define RESET_STATE_OF_RUN() \
3546 MACRO_BEGIN \
3547 prevcontaddr = -2; \
b0d623f7 3548 start_pnum = -1; \
2d21ac55
A
3549 free_considered = 0; \
3550 substitute_needed = 0; \
3551 npages = 0; \
3552 MACRO_END
3553
b0d623f7
A
3554/*
3555 * Can we steal in-use (i.e. not free) pages when searching for
3556 * physically-contiguous pages ?
3557 */
3558#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3559
3560static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3561#if DEBUG
3562int vm_page_find_contig_debug = 0;
3563#endif
2d21ac55 3564
1c79356b
A
3565static vm_page_t
3566vm_page_find_contiguous(
2d21ac55
A
3567 unsigned int contig_pages,
3568 ppnum_t max_pnum,
b0d623f7
A
3569 ppnum_t pnum_mask,
3570 boolean_t wire,
3571 int flags)
1c79356b 3572{
2d21ac55 3573 vm_page_t m = NULL;
e5568f75 3574 ppnum_t prevcontaddr;
b0d623f7
A
3575 ppnum_t start_pnum;
3576 unsigned int npages, considered, scanned;
3577 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3578 unsigned int idx_last_contig_page_found = 0;
2d21ac55
A
3579 int free_considered, free_available;
3580 int substitute_needed;
b0d623f7 3581 boolean_t wrapped;
593a1d5f 3582#if DEBUG
b0d623f7
A
3583 clock_sec_t tv_start_sec, tv_end_sec;
3584 clock_usec_t tv_start_usec, tv_end_usec;
593a1d5f
A
3585#endif
3586#if MACH_ASSERT
2d21ac55
A
3587 int yielded = 0;
3588 int dumped_run = 0;
3589 int stolen_pages = 0;
91447636 3590#endif
1c79356b 3591
2d21ac55 3592 if (contig_pages == 0)
1c79356b
A
3593 return VM_PAGE_NULL;
3594
2d21ac55
A
3595#if MACH_ASSERT
3596 vm_page_verify_free_lists();
593a1d5f
A
3597#endif
3598#if DEBUG
2d21ac55
A
3599 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3600#endif
3601 vm_page_lock_queues();
b0d623f7 3602 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3603
3604 RESET_STATE_OF_RUN();
1c79356b 3605
b0d623f7 3606 scanned = 0;
2d21ac55
A
3607 considered = 0;
3608 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 3609
b0d623f7
A
3610 wrapped = FALSE;
3611
3612 if(flags & KMA_LOMEM)
3613 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3614 else
3615 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3616
3617 orig_last_idx = idx_last_contig_page_found;
3618 last_idx = orig_last_idx;
3619
3620 for (page_idx = last_idx, start_idx = last_idx;
2d21ac55
A
3621 npages < contig_pages && page_idx < vm_pages_count;
3622 page_idx++) {
b0d623f7
A
3623retry:
3624 if (wrapped &&
3625 npages == 0 &&
3626 page_idx >= orig_last_idx) {
3627 /*
3628 * We're back where we started and we haven't
3629 * found any suitable contiguous range. Let's
3630 * give up.
3631 */
3632 break;
3633 }
3634 scanned++;
2d21ac55 3635 m = &vm_pages[page_idx];
e5568f75 3636
b0d623f7
A
3637 assert(!m->fictitious);
3638 assert(!m->private);
3639
2d21ac55
A
3640 if (max_pnum && m->phys_page > max_pnum) {
3641 /* no more low pages... */
3642 break;
e5568f75 3643 }
6d2010ae 3644 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
b0d623f7
A
3645 /*
3646 * not aligned
3647 */
3648 RESET_STATE_OF_RUN();
3649
3650 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
2d21ac55
A
3651 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3652 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
b0d623f7
A
3653 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3654 m->pageout) {
2d21ac55
A
3655 /*
3656 * page is in a transient state
3657 * or a state we don't want to deal
3658 * with, so don't consider it which
3659 * means starting a new run
3660 */
3661 RESET_STATE_OF_RUN();
1c79356b 3662
2d21ac55
A
3663 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3664 /*
3665 * page needs to be on one of our queues
3666 * in order for it to be stable behind the
3667 * locks we hold at this point...
3668 * if not, don't consider it which
3669 * means starting a new run
3670 */
3671 RESET_STATE_OF_RUN();
3672
3673 } else if (!m->free && (!m->tabled || m->busy)) {
3674 /*
3675 * pages on the free list are always 'busy'
3676 * so we couldn't test for 'busy' in the check
3677 * for the transient states... pages that are
3678 * 'free' are never 'tabled', so we also couldn't
3679 * test for 'tabled'. So we check here to make
3680 * sure that a non-free page is not busy and is
3681 * tabled on an object...
3682 * if not, don't consider it which
3683 * means starting a new run
3684 */
3685 RESET_STATE_OF_RUN();
3686
3687 } else {
3688 if (m->phys_page != prevcontaddr + 1) {
b0d623f7
A
3689 if ((m->phys_page & pnum_mask) != 0) {
3690 RESET_STATE_OF_RUN();
3691 goto did_consider;
3692 } else {
3693 npages = 1;
3694 start_idx = page_idx;
3695 start_pnum = m->phys_page;
3696 }
2d21ac55
A
3697 } else {
3698 npages++;
e5568f75 3699 }
2d21ac55 3700 prevcontaddr = m->phys_page;
b0d623f7
A
3701
3702 VM_PAGE_CHECK(m);
2d21ac55
A
3703 if (m->free) {
3704 free_considered++;
b0d623f7
A
3705 } else {
3706 /*
3707 * This page is not free.
3708 * If we can't steal used pages,
3709 * we have to give up this run
3710 * and keep looking.
3711 * Otherwise, we might need to
3712 * move the contents of this page
3713 * into a substitute page.
3714 */
3715#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3716 if (m->pmapped || m->dirty) {
3717 substitute_needed++;
3718 }
3719#else
3720 RESET_STATE_OF_RUN();
3721#endif
2d21ac55 3722 }
b0d623f7 3723
2d21ac55
A
3724 if ((free_considered + substitute_needed) > free_available) {
3725 /*
3726 * if we let this run continue
3727 * we will end up dropping the vm_page_free_count
3728 * below the reserve limit... we need to abort
3729 * this run, but we can at least re-consider this
3730 * page... thus the jump back to 'retry'
3731 */
3732 RESET_STATE_OF_RUN();
3733
3734 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3735 considered++;
3736 goto retry;
e5568f75 3737 }
2d21ac55
A
3738 /*
3739 * free_available == 0
3740 * so can't consider any free pages... if
3741 * we went to retry in this case, we'd
3742 * get stuck looking at the same page
3743 * w/o making any forward progress
3744 * we also want to take this path if we've already
3745 * reached our limit that controls the lock latency
3746 */
e5568f75 3747 }
2d21ac55 3748 }
b0d623f7 3749did_consider:
2d21ac55
A
3750 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3751
b0d623f7 3752 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3753 vm_page_unlock_queues();
e5568f75 3754
2d21ac55
A
3755 mutex_pause(0);
3756
3757 vm_page_lock_queues();
b0d623f7 3758 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3759
3760 RESET_STATE_OF_RUN();
1c79356b 3761 /*
2d21ac55
A
3762 * reset our free page limit since we
3763 * dropped the lock protecting the vm_page_free_queue
1c79356b 3764 */
2d21ac55
A
3765 free_available = vm_page_free_count - vm_page_free_reserved;
3766 considered = 0;
3767#if MACH_ASSERT
3768 yielded++;
3769#endif
3770 goto retry;
3771 }
3772 considered++;
3773 }
3774 m = VM_PAGE_NULL;
3775
b0d623f7
A
3776 if (npages != contig_pages) {
3777 if (!wrapped) {
3778 /*
3779 * We didn't find a contiguous range but we didn't
3780 * start from the very first page.
3781 * Start again from the very first page.
3782 */
3783 RESET_STATE_OF_RUN();
3784 if( flags & KMA_LOMEM)
3785 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3786 else
3787 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3788 last_idx = 0;
3789 page_idx = last_idx;
3790 wrapped = TRUE;
3791 goto retry;
3792 }
3793 lck_mtx_unlock(&vm_page_queue_free_lock);
3794 } else {
2d21ac55
A
3795 vm_page_t m1;
3796 vm_page_t m2;
3797 unsigned int cur_idx;
3798 unsigned int tmp_start_idx;
3799 vm_object_t locked_object = VM_OBJECT_NULL;
3800 boolean_t abort_run = FALSE;
3801
b0d623f7
A
3802 assert(page_idx - start_idx == contig_pages);
3803
2d21ac55
A
3804 tmp_start_idx = start_idx;
3805
3806 /*
3807 * first pass through to pull the free pages
3808 * off of the free queue so that in case we
3809 * need substitute pages, we won't grab any
3810 * of the free pages in the run... we'll clear
3811 * the 'free' bit in the 2nd pass, and even in
3812 * an abort_run case, we'll collect all of the
3813 * free pages in this run and return them to the free list
3814 */
3815 while (start_idx < page_idx) {
3816
3817 m1 = &vm_pages[start_idx++];
3818
b0d623f7
A
3819#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3820 assert(m1->free);
3821#endif
3822
2d21ac55 3823 if (m1->free) {
0b4c1975 3824 unsigned int color;
2d21ac55 3825
0b4c1975 3826 color = m1->phys_page & vm_color_mask;
b0d623f7 3827#if MACH_ASSERT
6d2010ae 3828 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
b0d623f7 3829#endif
0b4c1975
A
3830 queue_remove(&vm_page_queue_free[color],
3831 m1,
3832 vm_page_t,
3833 pageq);
d1ecb069
A
3834 m1->pageq.next = NULL;
3835 m1->pageq.prev = NULL;
0b4c1975 3836#if MACH_ASSERT
6d2010ae 3837 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
0b4c1975 3838#endif
b0d623f7
A
3839 /*
3840 * Clear the "free" bit so that this page
3841 * does not get considered for another
3842 * concurrent physically-contiguous allocation.
3843 */
3844 m1->free = FALSE;
3845 assert(m1->busy);
0b4c1975
A
3846
3847 vm_page_free_count--;
2d21ac55
A
3848 }
3849 }
3850 /*
3851 * adjust global freelist counts
3852 */
3853 if (vm_page_free_count < vm_page_free_count_minimum)
3854 vm_page_free_count_minimum = vm_page_free_count;
3855
b0d623f7
A
3856 if( flags & KMA_LOMEM)
3857 vm_page_lomem_find_contiguous_last_idx = page_idx;
3858 else
3859 vm_page_find_contiguous_last_idx = page_idx;
3860
2d21ac55
A
3861 /*
3862 * we can drop the free queue lock at this point since
3863 * we've pulled any 'free' candidates off of the list
3864 * we need it dropped so that we can do a vm_page_grab
3865 * when substituing for pmapped/dirty pages
3866 */
b0d623f7 3867 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
3868
3869 start_idx = tmp_start_idx;
3870 cur_idx = page_idx - 1;
3871
3872 while (start_idx++ < page_idx) {
3873 /*
3874 * must go through the list from back to front
3875 * so that the page list is created in the
3876 * correct order - low -> high phys addresses
3877 */
3878 m1 = &vm_pages[cur_idx--];
3879
b0d623f7
A
3880 assert(!m1->free);
3881 if (m1->object == VM_OBJECT_NULL) {
2d21ac55 3882 /*
b0d623f7 3883 * page has already been removed from
2d21ac55
A
3884 * the free list in the 1st pass
3885 */
b0d623f7 3886 assert(m1->offset == (vm_object_offset_t) -1);
2d21ac55
A
3887 assert(m1->busy);
3888 assert(!m1->wanted);
3889 assert(!m1->laundry);
e5568f75 3890 } else {
2d21ac55
A
3891 vm_object_t object;
3892
3893 if (abort_run == TRUE)
3894 continue;
3895
3896 object = m1->object;
3897
3898 if (object != locked_object) {
3899 if (locked_object) {
3900 vm_object_unlock(locked_object);
3901 locked_object = VM_OBJECT_NULL;
3902 }
3903 if (vm_object_lock_try(object))
3904 locked_object = object;
3905 }
3906 if (locked_object == VM_OBJECT_NULL ||
b0d623f7 3907 (VM_PAGE_WIRED(m1) || m1->gobbled ||
2d21ac55
A
3908 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3909 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3910 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3911
3912 if (locked_object) {
3913 vm_object_unlock(locked_object);
3914 locked_object = VM_OBJECT_NULL;
3915 }
3916 tmp_start_idx = cur_idx;
3917 abort_run = TRUE;
3918 continue;
3919 }
3920 if (m1->pmapped || m1->dirty) {
3921 int refmod;
3922 vm_object_offset_t offset;
3923
3924 m2 = vm_page_grab();
3925
3926 if (m2 == VM_PAGE_NULL) {
3927 if (locked_object) {
3928 vm_object_unlock(locked_object);
3929 locked_object = VM_OBJECT_NULL;
3930 }
3931 tmp_start_idx = cur_idx;
3932 abort_run = TRUE;
3933 continue;
3934 }
3935 if (m1->pmapped)
3936 refmod = pmap_disconnect(m1->phys_page);
3937 else
3938 refmod = 0;
3939 vm_page_copy(m1, m2);
3940
3941 m2->reference = m1->reference;
3942 m2->dirty = m1->dirty;
3943
3944 if (refmod & VM_MEM_REFERENCED)
3945 m2->reference = TRUE;
3946 if (refmod & VM_MEM_MODIFIED)
3947 m2->dirty = TRUE;
3948 offset = m1->offset;
3949
3950 /*
3951 * completely cleans up the state
3952 * of the page so that it is ready
3953 * to be put onto the free list, or
3954 * for this purpose it looks like it
3955 * just came off of the free list
3956 */
3957 vm_page_free_prepare(m1);
3958
3959 /*
3960 * make sure we clear the ref/mod state
3961 * from the pmap layer... else we risk
3962 * inheriting state from the last time
3963 * this page was used...
3964 */
3965 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3966 /*
3967 * now put the substitute page on the object
3968 */
b0d623f7 3969 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
2d21ac55
A
3970
3971 if (m2->reference)
3972 vm_page_activate(m2);
3973 else
3974 vm_page_deactivate(m2);
3975
3976 PAGE_WAKEUP_DONE(m2);
3977
3978 } else {
3979 /*
3980 * completely cleans up the state
3981 * of the page so that it is ready
3982 * to be put onto the free list, or
3983 * for this purpose it looks like it
3984 * just came off of the free list
3985 */
3986 vm_page_free_prepare(m1);
3987 }
3988#if MACH_ASSERT
3989 stolen_pages++;
3990#endif
1c79356b 3991 }
2d21ac55
A
3992 m1->pageq.next = (queue_entry_t) m;
3993 m1->pageq.prev = NULL;
3994 m = m1;
e5568f75 3995 }
2d21ac55
A
3996 if (locked_object) {
3997 vm_object_unlock(locked_object);
3998 locked_object = VM_OBJECT_NULL;
1c79356b
A
3999 }
4000
2d21ac55
A
4001 if (abort_run == TRUE) {
4002 if (m != VM_PAGE_NULL) {
b0d623f7 4003 vm_page_free_list(m, FALSE);
2d21ac55
A
4004 }
4005#if MACH_ASSERT
4006 dumped_run++;
4007#endif
4008 /*
4009 * want the index of the last
4010 * page in this run that was
4011 * successfully 'stolen', so back
4012 * it up 1 for the auto-decrement on use
4013 * and 1 more to bump back over this page
4014 */
4015 page_idx = tmp_start_idx + 2;
b0d623f7
A
4016 if (page_idx >= vm_pages_count) {
4017 if (wrapped)
4018 goto done_scanning;
4019 page_idx = last_idx = 0;
4020 wrapped = TRUE;
4021 }
4022 abort_run = FALSE;
4023
2d21ac55 4024 /*
b0d623f7
A
4025 * We didn't find a contiguous range but we didn't
4026 * start from the very first page.
4027 * Start again from the very first page.
2d21ac55 4028 */
b0d623f7
A
4029 RESET_STATE_OF_RUN();
4030
4031 if( flags & KMA_LOMEM)
4032 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4033 else
4034 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4035
4036 last_idx = page_idx;
2d21ac55 4037
b0d623f7
A
4038 lck_mtx_lock(&vm_page_queue_free_lock);
4039 /*
4040 * reset our free page limit since we
4041 * dropped the lock protecting the vm_page_free_queue
4042 */
4043 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
4044 goto retry;
4045 }
e5568f75 4046
e5568f75 4047 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55
A
4048
4049 if (wire == TRUE)
4050 m1->wire_count++;
4051 else
4052 m1->gobbled = TRUE;
e5568f75 4053 }
2d21ac55
A
4054 if (wire == FALSE)
4055 vm_page_gobble_count += npages;
4056
4057 /*
4058 * gobbled pages are also counted as wired pages
4059 */
e5568f75 4060 vm_page_wire_count += npages;
e5568f75 4061
2d21ac55
A
4062 assert(vm_page_verify_contiguous(m, npages));
4063 }
4064done_scanning:
4065 vm_page_unlock_queues();
4066
593a1d5f 4067#if DEBUG
2d21ac55
A
4068 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4069
4070 tv_end_sec -= tv_start_sec;
4071 if (tv_end_usec < tv_start_usec) {
4072 tv_end_sec--;
4073 tv_end_usec += 1000000;
1c79356b 4074 }
2d21ac55
A
4075 tv_end_usec -= tv_start_usec;
4076 if (tv_end_usec >= 1000000) {
4077 tv_end_sec++;
4078 tv_end_sec -= 1000000;
4079 }
b0d623f7
A
4080 if (vm_page_find_contig_debug) {
4081 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4082 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4083 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4084 scanned, yielded, dumped_run, stolen_pages);
4085 }
e5568f75 4086
593a1d5f
A
4087#endif
4088#if MACH_ASSERT
2d21ac55
A
4089 vm_page_verify_free_lists();
4090#endif
e5568f75 4091 return m;
1c79356b
A
4092}
4093
4094/*
4095 * Allocate a list of contiguous, wired pages.
4096 */
4097kern_return_t
4098cpm_allocate(
4099 vm_size_t size,
4100 vm_page_t *list,
2d21ac55 4101 ppnum_t max_pnum,
b0d623f7
A
4102 ppnum_t pnum_mask,
4103 boolean_t wire,
4104 int flags)
1c79356b 4105{
91447636
A
4106 vm_page_t pages;
4107 unsigned int npages;
1c79356b 4108
6d2010ae 4109 if (size % PAGE_SIZE != 0)
1c79356b
A
4110 return KERN_INVALID_ARGUMENT;
4111
b0d623f7
A
4112 npages = (unsigned int) (size / PAGE_SIZE);
4113 if (npages != size / PAGE_SIZE) {
4114 /* 32-bit overflow */
4115 return KERN_INVALID_ARGUMENT;
4116 }
1c79356b 4117
1c79356b
A
4118 /*
4119 * Obtain a pointer to a subset of the free
4120 * list large enough to satisfy the request;
4121 * the region will be physically contiguous.
4122 */
b0d623f7 4123 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 4124
2d21ac55 4125 if (pages == VM_PAGE_NULL)
1c79356b 4126 return KERN_NO_SPACE;
1c79356b 4127 /*
2d21ac55 4128 * determine need for wakeups
1c79356b 4129 */
2d21ac55
A
4130 if ((vm_page_free_count < vm_page_free_min) ||
4131 ((vm_page_free_count < vm_page_free_target) &&
4132 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
e5568f75 4133 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 4134
6d2010ae
A
4135 VM_CHECK_MEMORYSTATUS;
4136
1c79356b
A
4137 /*
4138 * The CPM pages should now be available and
4139 * ordered by ascending physical address.
4140 */
4141 assert(vm_page_verify_contiguous(pages, npages));
4142
4143 *list = pages;
4144 return KERN_SUCCESS;
4145}
6d2010ae
A
4146
4147
4148unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4149
4150/*
4151 * when working on a 'run' of pages, it is necessary to hold
4152 * the vm_page_queue_lock (a hot global lock) for certain operations
4153 * on the page... however, the majority of the work can be done
4154 * while merely holding the object lock... in fact there are certain
4155 * collections of pages that don't require any work brokered by the
4156 * vm_page_queue_lock... to mitigate the time spent behind the global
4157 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4158 * while doing all of the work that doesn't require the vm_page_queue_lock...
4159 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4160 * necessary work for each page... we will grab the busy bit on the page
4161 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4162 * if it can't immediately take the vm_page_queue_lock in order to compete
4163 * for the locks in the same order that vm_pageout_scan takes them.
4164 * the operation names are modeled after the names of the routines that
4165 * need to be called in order to make the changes very obvious in the
4166 * original loop
4167 */
4168
4169void
4170vm_page_do_delayed_work(
4171 vm_object_t object,
4172 struct vm_page_delayed_work *dwp,
4173 int dw_count)
4174{
4175 int j;
4176 vm_page_t m;
4177 vm_page_t local_free_q = VM_PAGE_NULL;
4178 boolean_t dropped_obj_lock = FALSE;
4179
4180 /*
4181 * pageout_scan takes the vm_page_lock_queues first
4182 * then tries for the object lock... to avoid what
4183 * is effectively a lock inversion, we'll go to the
4184 * trouble of taking them in that same order... otherwise
4185 * if this object contains the majority of the pages resident
4186 * in the UBC (or a small set of large objects actively being
4187 * worked on contain the majority of the pages), we could
4188 * cause the pageout_scan thread to 'starve' in its attempt
4189 * to find pages to move to the free queue, since it has to
4190 * successfully acquire the object lock of any candidate page
4191 * before it can steal/clean it.
4192 */
4193 if (!vm_page_trylockspin_queues()) {
4194 vm_object_unlock(object);
4195
4196 vm_page_lockspin_queues();
4197
4198 for (j = 0; ; j++) {
4199 if (!vm_object_lock_avoid(object) &&
4200 _vm_object_lock_try(object))
4201 break;
4202 vm_page_unlock_queues();
4203 mutex_pause(j);
4204 vm_page_lockspin_queues();
4205 }
4206 dropped_obj_lock = TRUE;
4207 }
4208 for (j = 0; j < dw_count; j++, dwp++) {
4209
4210 m = dwp->dw_m;
4211
4212 if (dwp->dw_mask & DW_set_list_req_pending) {
4213 m->list_req_pending = TRUE;
4214
4215 if (dropped_obj_lock == TRUE) {
4216 /*
4217 * need to make sure anyone that might have
4218 * blocked on busy == TRUE when we dropped
4219 * the object lock gets a chance to re-evaluate
4220 * its state since we have several places
4221 * where we avoid potential deadlocks with
4222 * the fileysystem by stealing pages with
4223 * list_req_pending == TRUE and busy == TRUE
4224 */
4225 dwp->dw_mask |= DW_PAGE_WAKEUP;
4226 }
4227 }
4228 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4229 vm_pageout_throttle_up(m);
4230
4231 if (dwp->dw_mask & DW_vm_page_wire)
4232 vm_page_wire(m);
4233 else if (dwp->dw_mask & DW_vm_page_unwire) {
4234 boolean_t queueit;
4235
4236 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4237
4238 vm_page_unwire(m, queueit);
4239 }
4240 if (dwp->dw_mask & DW_vm_page_free) {
4241 vm_page_free_prepare_queues(m);
4242
4243 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4244 /*
4245 * Add this page to our list of reclaimed pages,
4246 * to be freed later.
4247 */
4248 m->pageq.next = (queue_entry_t) local_free_q;
4249 local_free_q = m;
4250 } else {
4251 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4252 vm_page_deactivate_internal(m, FALSE);
4253 else if (dwp->dw_mask & DW_vm_page_activate) {
4254 if (m->active == FALSE) {
4255 vm_page_activate(m);
4256 }
4257 }
4258 else if (dwp->dw_mask & DW_vm_page_speculate)
4259 vm_page_speculate(m, TRUE);
4260 else if (dwp->dw_mask & DW_vm_page_lru)
4261 vm_page_lru(m);
4262 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE)
4263 VM_PAGE_QUEUES_REMOVE(m);
4264
4265 if (dwp->dw_mask & DW_set_reference)
4266 m->reference = TRUE;
4267 else if (dwp->dw_mask & DW_clear_reference)
4268 m->reference = FALSE;
4269
4270 if (dwp->dw_mask & DW_move_page) {
4271 VM_PAGE_QUEUES_REMOVE(m);
4272
4273 assert(!m->laundry);
4274 assert(m->object != kernel_object);
4275 assert(m->pageq.next == NULL &&
4276 m->pageq.prev == NULL);
4277
4278 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4279 }
4280 if (dwp->dw_mask & DW_clear_busy)
4281 m->busy = FALSE;
4282
4283 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4284 PAGE_WAKEUP(m);
4285 }
4286 }
4287 vm_page_unlock_queues();
4288
4289 if (local_free_q)
4290 vm_page_free_list(local_free_q, TRUE);
4291
4292 VM_CHECK_MEMORYSTATUS;
4293
4294}
4295
4296
2d21ac55 4297
0b4c1975 4298
6d2010ae
A
4299void vm_check_memorystatus()
4300{
4301#if CONFIG_EMBEDDED
4302 static boolean_t in_critical = FALSE;
4303 static unsigned int last_memorystatus = 0;
4304 unsigned int pages_avail;
4305
4306 if (!kern_memorystatus_delta) {
4307 return;
4308 }
4309
4310 pages_avail = (vm_page_active_count +
4311 vm_page_inactive_count +
4312 vm_page_speculative_count +
4313 vm_page_free_count +
4314 (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count));
4315 if ( (!in_critical && (pages_avail < kern_memorystatus_delta)) ||
4316 (pages_avail >= (last_memorystatus + kern_memorystatus_delta)) ||
4317 (last_memorystatus >= (pages_avail + kern_memorystatus_delta)) ) {
4318 kern_memorystatus_level = pages_avail * 100 / atop_64(max_mem);
4319 last_memorystatus = pages_avail;
4320
4321 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4322
4323 in_critical = (pages_avail < kern_memorystatus_delta) ? TRUE : FALSE;
4324 }
4325#endif
4326}
4327
0b4c1975
A
4328kern_return_t
4329vm_page_alloc_list(
4330 int page_count,
4331 int flags,
4332 vm_page_t *list)
4333{
4334 vm_page_t lo_page_list = VM_PAGE_NULL;
4335 vm_page_t mem;
4336 int i;
4337
4338 if ( !(flags & KMA_LOMEM))
4339 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4340
4341 for (i = 0; i < page_count; i++) {
4342
4343 mem = vm_page_grablo();
4344
4345 if (mem == VM_PAGE_NULL) {
4346 if (lo_page_list)
4347 vm_page_free_list(lo_page_list, FALSE);
4348
4349 *list = VM_PAGE_NULL;
4350
4351 return (KERN_RESOURCE_SHORTAGE);
4352 }
4353 mem->pageq.next = (queue_entry_t) lo_page_list;
4354 lo_page_list = mem;
4355 }
4356 *list = lo_page_list;
4357
4358 return (KERN_SUCCESS);
4359}
4360
4361void
4362vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4363{
4364 page->offset = offset;
4365}
4366
4367vm_page_t
4368vm_page_get_next(vm_page_t page)
4369{
4370 return ((vm_page_t) page->pageq.next);
4371}
4372
4373vm_object_offset_t
4374vm_page_get_offset(vm_page_t page)
4375{
4376 return (page->offset);
4377}
4378
4379ppnum_t
4380vm_page_get_phys_page(vm_page_t page)
4381{
4382 return (page->phys_page);
4383}
4384
4385
b0d623f7
A
4386/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4387
d1ecb069
A
4388#if HIBERNATION
4389
b0d623f7
A
4390static vm_page_t hibernate_gobble_queue;
4391
0b4c1975
A
4392extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4393
4394static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4395static int hibernate_flush_dirty_pages(void);
4396static int hibernate_flush_queue(queue_head_t *, int);
4397static void hibernate_dirty_page(vm_page_t);
4398
4399void hibernate_flush_wait(void);
4400void hibernate_mark_in_progress(void);
4401void hibernate_clear_in_progress(void);
4402
4403
4404struct hibernate_statistics {
4405 int hibernate_considered;
4406 int hibernate_reentered_on_q;
4407 int hibernate_found_dirty;
4408 int hibernate_skipped_cleaning;
4409 int hibernate_skipped_transient;
4410 int hibernate_skipped_precious;
4411 int hibernate_queue_nolock;
4412 int hibernate_queue_paused;
4413 int hibernate_throttled;
4414 int hibernate_throttle_timeout;
4415 int hibernate_drained;
4416 int hibernate_drain_timeout;
4417 int cd_lock_failed;
4418 int cd_found_precious;
4419 int cd_found_wired;
4420 int cd_found_busy;
4421 int cd_found_unusual;
4422 int cd_found_cleaning;
4423 int cd_found_laundry;
4424 int cd_found_dirty;
4425 int cd_local_free;
4426 int cd_total_free;
4427 int cd_vm_page_wire_count;
4428 int cd_pages;
4429 int cd_discarded;
4430 int cd_count_wire;
4431} hibernate_stats;
4432
4433
4434
4435static int
4436hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4437{
4438 wait_result_t wait_result;
4439
4440 vm_page_lock_queues();
4441
4442 while (q->pgo_laundry) {
4443
4444 q->pgo_draining = TRUE;
4445
4446 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4447
4448 vm_page_unlock_queues();
4449
4450 wait_result = thread_block(THREAD_CONTINUE_NULL);
4451
4452 if (wait_result == THREAD_TIMED_OUT) {
4453 hibernate_stats.hibernate_drain_timeout++;
4454 return (1);
4455 }
4456 vm_page_lock_queues();
4457
4458 hibernate_stats.hibernate_drained++;
4459 }
4460 vm_page_unlock_queues();
4461
4462 return (0);
4463}
4464
4465static void
4466hibernate_dirty_page(vm_page_t m)
4467{
4468 vm_object_t object = m->object;
4469 struct vm_pageout_queue *q;
4470
4471#if DEBUG
4472 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4473#endif
4474 vm_object_lock_assert_exclusive(object);
4475
4476 /*
4477 * protect the object from collapse -
4478 * locking in the object's paging_offset.
4479 */
4480 vm_object_paging_begin(object);
4481
4482 m->list_req_pending = TRUE;
4483 m->cleaning = TRUE;
4484 m->busy = TRUE;
4485
4486 if (object->internal == TRUE)
4487 q = &vm_pageout_queue_internal;
4488 else
4489 q = &vm_pageout_queue_external;
4490
4491 /*
4492 * pgo_laundry count is tied to the laundry bit
4493 */
4494 m->laundry = TRUE;
4495 q->pgo_laundry++;
4496
4497 m->pageout_queue = TRUE;
4498 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4499
4500 if (q->pgo_idle == TRUE) {
4501 q->pgo_idle = FALSE;
4502 thread_wakeup((event_t) &q->pgo_pending);
4503 }
4504}
4505
4506static int
4507hibernate_flush_queue(queue_head_t *q, int qcount)
4508{
4509 vm_page_t m;
4510 vm_object_t l_object = NULL;
4511 vm_object_t m_object = NULL;
4512 int refmod_state = 0;
4513 int try_failed_count = 0;
4514 int retval = 0;
4515 int current_run = 0;
4516 struct vm_pageout_queue *iq;
4517 struct vm_pageout_queue *eq;
4518 struct vm_pageout_queue *tq;
4519
4520
4521 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4522
4523 iq = &vm_pageout_queue_internal;
4524 eq = &vm_pageout_queue_external;
4525
4526 vm_page_lock_queues();
4527
4528 while (qcount && !queue_empty(q)) {
4529
4530 if (current_run++ == 1000) {
4531 if (hibernate_should_abort()) {
4532 retval = 1;
4533 break;
4534 }
4535 current_run = 0;
4536 }
4537
4538 m = (vm_page_t) queue_first(q);
4539 m_object = m->object;
4540
4541 /*
4542 * check to see if we currently are working
4543 * with the same object... if so, we've
4544 * already got the lock
4545 */
4546 if (m_object != l_object) {
4547 /*
4548 * the object associated with candidate page is
4549 * different from the one we were just working
4550 * with... dump the lock if we still own it
4551 */
4552 if (l_object != NULL) {
4553 vm_object_unlock(l_object);
4554 l_object = NULL;
4555 }
4556 /*
4557 * Try to lock object; since we've alread got the
4558 * page queues lock, we can only 'try' for this one.
4559 * if the 'try' fails, we need to do a mutex_pause
4560 * to allow the owner of the object lock a chance to
4561 * run...
4562 */
4563 if ( !vm_object_lock_try_scan(m_object)) {
4564
4565 if (try_failed_count > 20) {
4566 hibernate_stats.hibernate_queue_nolock++;
4567
4568 goto reenter_pg_on_q;
4569 }
4570 vm_pageout_scan_wants_object = m_object;
4571
4572 vm_page_unlock_queues();
4573 mutex_pause(try_failed_count++);
4574 vm_page_lock_queues();
4575
4576 hibernate_stats.hibernate_queue_paused++;
4577 continue;
4578 } else {
4579 l_object = m_object;
4580 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4581 }
4582 }
4583 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4584 /*
4585 * page is not to be cleaned
4586 * put it back on the head of its queue
4587 */
4588 if (m->cleaning)
4589 hibernate_stats.hibernate_skipped_cleaning++;
4590 else
4591 hibernate_stats.hibernate_skipped_transient++;
4592
4593 goto reenter_pg_on_q;
4594 }
4595 if ( !m_object->pager_initialized && m_object->pager_created)
4596 goto reenter_pg_on_q;
4597
4598 if (m_object->copy == VM_OBJECT_NULL) {
4599 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4600 /*
4601 * let the normal hibernate image path
4602 * deal with these
4603 */
4604 goto reenter_pg_on_q;
4605 }
4606 }
4607 if ( !m->dirty && m->pmapped) {
4608 refmod_state = pmap_get_refmod(m->phys_page);
4609
4610 if ((refmod_state & VM_MEM_MODIFIED))
4611 m->dirty = TRUE;
4612 } else
4613 refmod_state = 0;
4614
4615 if ( !m->dirty) {
4616 /*
4617 * page is not to be cleaned
4618 * put it back on the head of its queue
4619 */
4620 if (m->precious)
4621 hibernate_stats.hibernate_skipped_precious++;
4622
4623 goto reenter_pg_on_q;
4624 }
4625 tq = NULL;
4626
4627 if (m_object->internal) {
4628 if (VM_PAGE_Q_THROTTLED(iq))
4629 tq = iq;
4630 } else if (VM_PAGE_Q_THROTTLED(eq))
4631 tq = eq;
4632
4633 if (tq != NULL) {
4634 wait_result_t wait_result;
4635 int wait_count = 5;
4636
4637 if (l_object != NULL) {
4638 vm_object_unlock(l_object);
4639 l_object = NULL;
4640 }
4641 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4642
4643 tq->pgo_throttled = TRUE;
4644
4645 while (retval == 0) {
4646
4647 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4648
4649 vm_page_unlock_queues();
4650
4651 wait_result = thread_block(THREAD_CONTINUE_NULL);
4652
4653 vm_page_lock_queues();
4654
4655 if (hibernate_should_abort())
4656 retval = 1;
4657
4658 if (wait_result != THREAD_TIMED_OUT)
4659 break;
4660
4661 if (--wait_count == 0) {
4662 hibernate_stats.hibernate_throttle_timeout++;
4663 retval = 1;
4664 }
4665 }
4666 if (retval)
4667 break;
4668
4669 hibernate_stats.hibernate_throttled++;
4670
4671 continue;
4672 }
4673 VM_PAGE_QUEUES_REMOVE(m);
4674
4675 hibernate_dirty_page(m);
4676
4677 hibernate_stats.hibernate_found_dirty++;
4678
4679 goto next_pg;
4680
4681reenter_pg_on_q:
4682 queue_remove(q, m, vm_page_t, pageq);
4683 queue_enter(q, m, vm_page_t, pageq);
4684
4685 hibernate_stats.hibernate_reentered_on_q++;
4686next_pg:
4687 hibernate_stats.hibernate_considered++;
4688
4689 qcount--;
4690 try_failed_count = 0;
4691 }
4692 if (l_object != NULL) {
4693 vm_object_unlock(l_object);
4694 l_object = NULL;
4695 }
4696 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4697
4698 vm_page_unlock_queues();
4699
4700 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4701
4702 return (retval);
4703}
4704
4705
4706static int
4707hibernate_flush_dirty_pages()
4708{
4709 struct vm_speculative_age_q *aq;
4710 uint32_t i;
4711
4712 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4713
4714 if (vm_page_local_q) {
4715 for (i = 0; i < vm_page_local_q_count; i++)
4716 vm_page_reactivate_local(i, TRUE, FALSE);
4717 }
4718
4719 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4720 int qcount;
4721 vm_page_t m;
4722
4723 aq = &vm_page_queue_speculative[i];
4724
4725 if (queue_empty(&aq->age_q))
4726 continue;
4727 qcount = 0;
4728
4729 vm_page_lockspin_queues();
4730
4731 queue_iterate(&aq->age_q,
4732 m,
4733 vm_page_t,
4734 pageq)
4735 {
4736 qcount++;
4737 }
4738 vm_page_unlock_queues();
4739
4740 if (qcount) {
4741 if (hibernate_flush_queue(&aq->age_q, qcount))
4742 return (1);
4743 }
4744 }
4745 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4746 return (1);
4747 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4748 return (1);
4749 if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4750 return (1);
4751
4752 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4753 return (1);
4754 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4755}
4756
4757
4758extern void IOSleep(unsigned int);
4759extern int sync_internal(void);
4760
4761int
4762hibernate_flush_memory()
4763{
4764 int retval;
4765
4766 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4767
4768 IOSleep(2 * 1000);
4769
4770 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4771
4772 if ((retval = hibernate_flush_dirty_pages()) == 0) {
4773 if (consider_buffer_cache_collect != NULL) {
4774
4775 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4776
4777 sync_internal();
4778 (void)(*consider_buffer_cache_collect)(1);
4779 consider_zone_gc(1);
4780
4781 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4782 }
4783 }
4784 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4785
4786 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4787 hibernate_stats.hibernate_considered,
4788 hibernate_stats.hibernate_reentered_on_q,
4789 hibernate_stats.hibernate_found_dirty);
4790 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4791 hibernate_stats.hibernate_skipped_cleaning,
4792 hibernate_stats.hibernate_skipped_transient,
4793 hibernate_stats.hibernate_skipped_precious,
4794 hibernate_stats.hibernate_queue_nolock);
4795 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4796 hibernate_stats.hibernate_queue_paused,
4797 hibernate_stats.hibernate_throttled,
4798 hibernate_stats.hibernate_throttle_timeout,
4799 hibernate_stats.hibernate_drained,
4800 hibernate_stats.hibernate_drain_timeout);
4801
4802 return (retval);
4803}
4804
6d2010ae 4805
b0d623f7
A
4806static void
4807hibernate_page_list_zero(hibernate_page_list_t *list)
4808{
4809 uint32_t bank;
4810 hibernate_bitmap_t * bitmap;
4811
4812 bitmap = &list->bank_bitmap[0];
4813 for (bank = 0; bank < list->bank_count; bank++)
4814 {
4815 uint32_t last_bit;
4816
4817 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4818 // set out-of-bound bits at end of bitmap.
4819 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4820 if (last_bit)
4821 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4822
4823 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4824 }
4825}
4826
4827void
4828hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4829{
4830 uint32_t i;
4831 vm_page_t m;
4832 uint64_t start, end, timeout, nsec;
4833 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4834 clock_get_uptime(&start);
4835
4836 for (i = 0; i < gobble_count; i++)
4837 {
4838 while (VM_PAGE_NULL == (m = vm_page_grab()))
4839 {
4840 clock_get_uptime(&end);
4841 if (end >= timeout)
4842 break;
4843 VM_PAGE_WAIT();
4844 }
4845 if (!m)
4846 break;
4847 m->busy = FALSE;
4848 vm_page_gobble(m);
4849
4850 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4851 hibernate_gobble_queue = m;
4852 }
4853
4854 clock_get_uptime(&end);
4855 absolutetime_to_nanoseconds(end - start, &nsec);
4856 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4857}
4858
4859void
4860hibernate_free_gobble_pages(void)
4861{
4862 vm_page_t m, next;
4863 uint32_t count = 0;
4864
4865 m = (vm_page_t) hibernate_gobble_queue;
4866 while(m)
4867 {
4868 next = (vm_page_t) m->pageq.next;
4869 vm_page_free(m);
4870 count++;
4871 m = next;
4872 }
4873 hibernate_gobble_queue = VM_PAGE_NULL;
4874
4875 if (count)
4876 HIBLOG("Freed %d pages\n", count);
4877}
4878
4879static boolean_t
4880hibernate_consider_discard(vm_page_t m)
4881{
4882 vm_object_t object = NULL;
4883 int refmod_state;
4884 boolean_t discard = FALSE;
4885
4886 do
4887 {
0b4c1975 4888 if (m->private)
b0d623f7
A
4889 panic("hibernate_consider_discard: private");
4890
0b4c1975
A
4891 if (!vm_object_lock_try(m->object)) {
4892 hibernate_stats.cd_lock_failed++;
b0d623f7 4893 break;
0b4c1975 4894 }
b0d623f7
A
4895 object = m->object;
4896
0b4c1975
A
4897 if (VM_PAGE_WIRED(m)) {
4898 hibernate_stats.cd_found_wired++;
b0d623f7 4899 break;
0b4c1975
A
4900 }
4901 if (m->precious) {
4902 hibernate_stats.cd_found_precious++;
b0d623f7 4903 break;
0b4c1975
A
4904 }
4905 if (m->busy || !object->alive) {
b0d623f7
A
4906 /*
4907 * Somebody is playing with this page.
4908 */
6d2010ae
A
4909 hibernate_stats.cd_found_busy++;
4910 break;
0b4c1975
A
4911 }
4912 if (m->absent || m->unusual || m->error) {
b0d623f7
A
4913 /*
4914 * If it's unusual in anyway, ignore it
4915 */
0b4c1975 4916 hibernate_stats.cd_found_unusual++;
b0d623f7 4917 break;
0b4c1975
A
4918 }
4919 if (m->cleaning) {
4920 hibernate_stats.cd_found_cleaning++;
b0d623f7 4921 break;
0b4c1975
A
4922 }
4923 if (m->laundry || m->list_req_pending) {
4924 hibernate_stats.cd_found_laundry++;
b0d623f7 4925 break;
0b4c1975 4926 }
b0d623f7
A
4927 if (!m->dirty)
4928 {
4929 refmod_state = pmap_get_refmod(m->phys_page);
4930
4931 if (refmod_state & VM_MEM_REFERENCED)
4932 m->reference = TRUE;
4933 if (refmod_state & VM_MEM_MODIFIED)
4934 m->dirty = TRUE;
4935 }
4936
4937 /*
4938 * If it's clean or purgeable we can discard the page on wakeup.
4939 */
4940 discard = (!m->dirty)
4941 || (VM_PURGABLE_VOLATILE == object->purgable)
0b4c1975
A
4942 || (VM_PURGABLE_EMPTY == object->purgable);
4943
4944 if (discard == FALSE)
4945 hibernate_stats.cd_found_dirty++;
b0d623f7
A
4946 }
4947 while (FALSE);
4948
4949 if (object)
4950 vm_object_unlock(object);
4951
4952 return (discard);
4953}
4954
4955
4956static void
4957hibernate_discard_page(vm_page_t m)
4958{
4959 if (m->absent || m->unusual || m->error)
4960 /*
4961 * If it's unusual in anyway, ignore
4962 */
4963 return;
4964
4965 if (m->pmapped == TRUE)
4966 {
4967 __unused int refmod_state = pmap_disconnect(m->phys_page);
4968 }
4969
4970 if (m->laundry)
4971 panic("hibernate_discard_page(%p) laundry", m);
4972 if (m->private)
4973 panic("hibernate_discard_page(%p) private", m);
4974 if (m->fictitious)
4975 panic("hibernate_discard_page(%p) fictitious", m);
4976
4977 if (VM_PURGABLE_VOLATILE == m->object->purgable)
4978 {
4979 /* object should be on a queue */
4980 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4981 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4982 assert(old_queue);
4983 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4984 makes sure these locks are uncontended before sleep */
4985 vm_purgeable_token_delete_first(old_queue);
4986 m->object->purgable = VM_PURGABLE_EMPTY;
4987 }
4988
4989 vm_page_free(m);
4990}
4991
4992/*
4993 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4994 pages known to VM to not need saving are subtracted.
4995 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4996*/
4997
4998void
4999hibernate_page_list_setall(hibernate_page_list_t * page_list,
5000 hibernate_page_list_t * page_list_wired,
6d2010ae 5001 hibernate_page_list_t * page_list_pal,
b0d623f7
A
5002 uint32_t * pagesOut)
5003{
5004 uint64_t start, end, nsec;
5005 vm_page_t m;
5006 uint32_t pages = page_list->page_count;
5007 uint32_t count_zf = 0, count_throttled = 0;
5008 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
5009 uint32_t count_wire = pages;
5010 uint32_t count_discard_active = 0;
5011 uint32_t count_discard_inactive = 0;
5012 uint32_t count_discard_purgeable = 0;
5013 uint32_t count_discard_speculative = 0;
5014 uint32_t i;
5015 uint32_t bank;
5016 hibernate_bitmap_t * bitmap;
5017 hibernate_bitmap_t * bitmap_wired;
5018
5019
0b4c1975
A
5020 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5021
5022 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
b0d623f7
A
5023
5024 clock_get_uptime(&start);
5025
5026 hibernate_page_list_zero(page_list);
5027 hibernate_page_list_zero(page_list_wired);
6d2010ae 5028 hibernate_page_list_zero(page_list_pal);
b0d623f7 5029
0b4c1975
A
5030 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5031 hibernate_stats.cd_pages = pages;
5032
b0d623f7
A
5033 if (vm_page_local_q) {
5034 for (i = 0; i < vm_page_local_q_count; i++)
5035 vm_page_reactivate_local(i, TRUE, TRUE);
5036 }
5037
5038 m = (vm_page_t) hibernate_gobble_queue;
5039 while(m)
5040 {
5041 pages--;
5042 count_wire--;
5043 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5044 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5045 m = (vm_page_t) m->pageq.next;
5046 }
6d2010ae 5047
0b4c1975
A
5048 for( i = 0; i < real_ncpus; i++ )
5049 {
5050 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5051 {
5052 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5053 {
5054 pages--;
5055 count_wire--;
5056 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5057 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5058
5059 hibernate_stats.cd_local_free++;
5060 hibernate_stats.cd_total_free++;
5061 }
5062 }
5063 }
6d2010ae 5064
b0d623f7
A
5065 for( i = 0; i < vm_colors; i++ )
5066 {
5067 queue_iterate(&vm_page_queue_free[i],
5068 m,
5069 vm_page_t,
5070 pageq)
5071 {
5072 pages--;
5073 count_wire--;
5074 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5075 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
0b4c1975
A
5076
5077 hibernate_stats.cd_total_free++;
b0d623f7
A
5078 }
5079 }
5080
5081 queue_iterate(&vm_lopage_queue_free,
5082 m,
5083 vm_page_t,
5084 pageq)
5085 {
5086 pages--;
5087 count_wire--;
5088 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5089 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
0b4c1975
A
5090
5091 hibernate_stats.cd_total_free++;
b0d623f7
A
5092 }
5093
5094 queue_iterate( &vm_page_queue_throttled,
5095 m,
5096 vm_page_t,
5097 pageq )
5098 {
5099 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5100 && hibernate_consider_discard(m))
5101 {
5102 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5103 count_discard_inactive++;
5104 }
5105 else
5106 count_throttled++;
5107 count_wire--;
5108 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5109 }
5110
5111 queue_iterate( &vm_page_queue_zf,
5112 m,
5113 vm_page_t,
5114 pageq )
5115 {
5116 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5117 && hibernate_consider_discard(m))
5118 {
5119 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5120 if (m->dirty)
5121 count_discard_purgeable++;
5122 else
5123 count_discard_inactive++;
5124 }
5125 else
5126 count_zf++;
5127 count_wire--;
5128 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5129 }
5130
5131 queue_iterate( &vm_page_queue_inactive,
5132 m,
5133 vm_page_t,
5134 pageq )
5135 {
5136 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5137 && hibernate_consider_discard(m))
5138 {
5139 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5140 if (m->dirty)
5141 count_discard_purgeable++;
5142 else
5143 count_discard_inactive++;
5144 }
5145 else
5146 count_inactive++;
5147 count_wire--;
5148 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5149 }
5150
5151 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5152 {
5153 queue_iterate(&vm_page_queue_speculative[i].age_q,
5154 m,
5155 vm_page_t,
5156 pageq)
5157 {
5158 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5159 && hibernate_consider_discard(m))
5160 {
5161 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5162 count_discard_speculative++;
5163 }
5164 else
5165 count_speculative++;
5166 count_wire--;
5167 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5168 }
5169 }
5170
5171 queue_iterate( &vm_page_queue_active,
5172 m,
5173 vm_page_t,
5174 pageq )
5175 {
5176 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5177 && hibernate_consider_discard(m))
5178 {
5179 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5180 if (m->dirty)
5181 count_discard_purgeable++;
5182 else
5183 count_discard_active++;
5184 }
5185 else
5186 count_active++;
5187 count_wire--;
5188 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5189 }
5190
5191 // pull wired from hibernate_bitmap
5192
5193 bitmap = &page_list->bank_bitmap[0];
5194 bitmap_wired = &page_list_wired->bank_bitmap[0];
5195 for (bank = 0; bank < page_list->bank_count; bank++)
5196 {
5197 for (i = 0; i < bitmap->bitmapwords; i++)
5198 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5199 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5200 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5201 }
5202
5203 // machine dependent adjustments
5204 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5205
0b4c1975
A
5206 hibernate_stats.cd_count_wire = count_wire;
5207 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5208
b0d623f7
A
5209 clock_get_uptime(&end);
5210 absolutetime_to_nanoseconds(end - start, &nsec);
5211 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5212
5213 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5214 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5215 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5216
5217 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
0b4c1975
A
5218
5219 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
b0d623f7
A
5220}
5221
5222void
5223hibernate_page_list_discard(hibernate_page_list_t * page_list)
5224{
5225 uint64_t start, end, nsec;
5226 vm_page_t m;
5227 vm_page_t next;
5228 uint32_t i;
5229 uint32_t count_discard_active = 0;
5230 uint32_t count_discard_inactive = 0;
5231 uint32_t count_discard_purgeable = 0;
5232 uint32_t count_discard_speculative = 0;
5233
5234 clock_get_uptime(&start);
5235
5236 m = (vm_page_t) queue_first(&vm_page_queue_zf);
5237 while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5238 {
5239 next = (vm_page_t) m->pageq.next;
5240 if (hibernate_page_bittst(page_list, m->phys_page))
5241 {
5242 if (m->dirty)
5243 count_discard_purgeable++;
5244 else
5245 count_discard_inactive++;
5246 hibernate_discard_page(m);
5247 }
5248 m = next;
5249 }
5250
5251 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5252 {
5253 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5254 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5255 {
5256 next = (vm_page_t) m->pageq.next;
5257 if (hibernate_page_bittst(page_list, m->phys_page))
5258 {
5259 count_discard_speculative++;
5260 hibernate_discard_page(m);
5261 }
5262 m = next;
5263 }
5264 }
5265
5266 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5267 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5268 {
5269 next = (vm_page_t) m->pageq.next;
5270 if (hibernate_page_bittst(page_list, m->phys_page))
5271 {
5272 if (m->dirty)
5273 count_discard_purgeable++;
5274 else
5275 count_discard_inactive++;
5276 hibernate_discard_page(m);
5277 }
5278 m = next;
5279 }
5280
5281 m = (vm_page_t) queue_first(&vm_page_queue_active);
5282 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5283 {
5284 next = (vm_page_t) m->pageq.next;
5285 if (hibernate_page_bittst(page_list, m->phys_page))
5286 {
5287 if (m->dirty)
5288 count_discard_purgeable++;
5289 else
5290 count_discard_active++;
5291 hibernate_discard_page(m);
5292 }
5293 m = next;
5294 }
5295
5296 clock_get_uptime(&end);
5297 absolutetime_to_nanoseconds(end - start, &nsec);
5298 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5299 nsec / 1000000ULL,
5300 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5301}
5302
d1ecb069
A
5303#endif /* HIBERNATION */
5304
b0d623f7 5305/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
5306
5307#include <mach_vm_debug.h>
5308#if MACH_VM_DEBUG
5309
5310#include <mach_debug/hash_info.h>
5311#include <vm/vm_debug.h>
5312
5313/*
5314 * Routine: vm_page_info
5315 * Purpose:
5316 * Return information about the global VP table.
5317 * Fills the buffer with as much information as possible
5318 * and returns the desired size of the buffer.
5319 * Conditions:
5320 * Nothing locked. The caller should provide
5321 * possibly-pageable memory.
5322 */
5323
5324unsigned int
5325vm_page_info(
5326 hash_info_bucket_t *info,
5327 unsigned int count)
5328{
91447636 5329 unsigned int i;
b0d623f7 5330 lck_spin_t *bucket_lock;
1c79356b
A
5331
5332 if (vm_page_bucket_count < count)
5333 count = vm_page_bucket_count;
5334
5335 for (i = 0; i < count; i++) {
5336 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5337 unsigned int bucket_count = 0;
5338 vm_page_t m;
5339
b0d623f7
A
5340 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5341 lck_spin_lock(bucket_lock);
5342
1c79356b
A
5343 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5344 bucket_count++;
b0d623f7
A
5345
5346 lck_spin_unlock(bucket_lock);
1c79356b
A
5347
5348 /* don't touch pageable memory while holding locks */
5349 info[i].hib_count = bucket_count;
5350 }
5351
5352 return vm_page_bucket_count;
5353}
5354#endif /* MACH_VM_DEBUG */
5355
5356#include <mach_kdb.h>
5357#if MACH_KDB
5358
5359#include <ddb/db_output.h>
5360#include <vm/vm_print.h>
5361#define printf kdbprintf
5362
5363/*
5364 * Routine: vm_page_print [exported]
5365 */
5366void
5367vm_page_print(
91447636 5368 db_addr_t db_addr)
1c79356b 5369{
91447636
A
5370 vm_page_t p;
5371
5372 p = (vm_page_t) (long) db_addr;
1c79356b
A
5373
5374 iprintf("page 0x%x\n", p);
5375
5376 db_indent += 2;
5377
5378 iprintf("object=0x%x", p->object);
5379 printf(", offset=0x%x", p->offset);
5380 printf(", wire_count=%d", p->wire_count);
1c79356b 5381
b0d623f7
A
5382 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5383 (p->local ? "" : "!"),
1c79356b
A
5384 (p->inactive ? "" : "!"),
5385 (p->active ? "" : "!"),
2d21ac55 5386 (p->throttled ? "" : "!"),
1c79356b
A
5387 (p->gobbled ? "" : "!"),
5388 (p->laundry ? "" : "!"),
5389 (p->free ? "" : "!"),
5390 (p->reference ? "" : "!"),
91447636 5391 (p->encrypted ? "" : "!"));
1c79356b
A
5392 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5393 (p->busy ? "" : "!"),
5394 (p->wanted ? "" : "!"),
5395 (p->tabled ? "" : "!"),
5396 (p->fictitious ? "" : "!"),
5397 (p->private ? "" : "!"),
5398 (p->precious ? "" : "!"));
5399 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5400 (p->absent ? "" : "!"),
5401 (p->error ? "" : "!"),
5402 (p->dirty ? "" : "!"),
5403 (p->cleaning ? "" : "!"),
5404 (p->pageout ? "" : "!"),
5405 (p->clustered ? "" : "!"));
2d21ac55 5406 iprintf("%soverwriting, %srestart, %sunusual\n",
1c79356b
A
5407 (p->overwriting ? "" : "!"),
5408 (p->restart ? "" : "!"),
0b4e3aa0 5409 (p->unusual ? "" : "!"));
1c79356b 5410
55e303ae 5411 iprintf("phys_page=0x%x", p->phys_page);
1c79356b
A
5412
5413 db_indent -= 2;
5414}
5415#endif /* MACH_KDB */