]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-3789.21.4.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/memory_object_default.h>
75 #include <mach/memory_object_control_server.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/upl.h>
78 #include <mach/vm_map.h>
79 #include <mach/vm_param.h>
80 #include <mach/vm_statistics.h>
81 #include <mach/sdt.h>
82
83 #include <kern/kern_types.h>
84 #include <kern/counters.h>
85 #include <kern/host_statistics.h>
86 #include <kern/machine.h>
87 #include <kern/misc_protos.h>
88 #include <kern/sched.h>
89 #include <kern/thread.h>
90 #include <kern/xpr.h>
91 #include <kern/kalloc.h>
92 #include <kern/policy_internal.h>
93
94 #include <machine/vm_tuning.h>
95 #include <machine/commpage.h>
96
97 #include <vm/pmap.h>
98 #include <vm/vm_compressor_pager.h>
99 #include <vm/vm_fault.h>
100 #include <vm/vm_map.h>
101 #include <vm/vm_object.h>
102 #include <vm/vm_page.h>
103 #include <vm/vm_pageout.h>
104 #include <vm/vm_protos.h> /* must be last */
105 #include <vm/memory_object.h>
106 #include <vm/vm_purgeable_internal.h>
107 #include <vm/vm_shared_region.h>
108 #include <vm/vm_compressor.h>
109
110 #if CONFIG_PHANTOM_CACHE
111 #include <vm/vm_phantom_cache.h>
112 #endif
113 /*
114 * ENCRYPTED SWAP:
115 */
116 #include <libkern/crypto/aes.h>
117 extern u_int32_t random(void); /* from <libkern/libkern.h> */
118
119 extern int cs_debug;
120
121 #if UPL_DEBUG
122 #include <libkern/OSDebug.h>
123 #endif
124
125 extern void m_drain(void);
126
127 #if VM_PRESSURE_EVENTS
128 extern unsigned int memorystatus_available_pages;
129 extern unsigned int memorystatus_available_pages_pressure;
130 extern unsigned int memorystatus_available_pages_critical;
131 extern unsigned int memorystatus_frozen_count;
132 extern unsigned int memorystatus_suspended_count;
133
134 extern vm_pressure_level_t memorystatus_vm_pressure_level;
135 int memorystatus_purge_on_warning = 2;
136 int memorystatus_purge_on_urgent = 5;
137 int memorystatus_purge_on_critical = 8;
138
139 void vm_pressure_response(void);
140 boolean_t vm_pressure_thread_running = FALSE;
141 extern void consider_vm_pressure_events(void);
142
143 #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
144 #endif /* VM_PRESSURE_EVENTS */
145
146 boolean_t vm_pressure_changed = FALSE;
147
148 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
149 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
150 #endif
151
152 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
153 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
154 #endif
155
156 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
157 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
158 #endif
159
160 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
161 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
162 #endif
163
164 #ifndef VM_PAGE_LAUNDRY_MAX
165 #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
166 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
167
168 #ifndef VM_PAGEOUT_BURST_WAIT
169 #define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
170 #endif /* VM_PAGEOUT_BURST_WAIT */
171
172 #ifndef VM_PAGEOUT_EMPTY_WAIT
173 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
174 #endif /* VM_PAGEOUT_EMPTY_WAIT */
175
176 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
177 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
178 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
179
180 #ifndef VM_PAGEOUT_IDLE_WAIT
181 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
182 #endif /* VM_PAGEOUT_IDLE_WAIT */
183
184 #ifndef VM_PAGEOUT_SWAP_WAIT
185 #define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
186 #endif /* VM_PAGEOUT_SWAP_WAIT */
187
188 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
189 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
190 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
191
192 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
193 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
194 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
195
196 unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
197 unsigned int vm_page_speculative_percentage = 5;
198
199 #ifndef VM_PAGE_SPECULATIVE_TARGET
200 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
201 #endif /* VM_PAGE_SPECULATIVE_TARGET */
202
203
204 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
205 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
206 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
207
208
209 /*
210 * To obtain a reasonable LRU approximation, the inactive queue
211 * needs to be large enough to give pages on it a chance to be
212 * referenced a second time. This macro defines the fraction
213 * of active+inactive pages that should be inactive.
214 * The pageout daemon uses it to update vm_page_inactive_target.
215 *
216 * If vm_page_free_count falls below vm_page_free_target and
217 * vm_page_inactive_count is below vm_page_inactive_target,
218 * then the pageout daemon starts running.
219 */
220
221 #ifndef VM_PAGE_INACTIVE_TARGET
222 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
223 #endif /* VM_PAGE_INACTIVE_TARGET */
224
225 /*
226 * Once the pageout daemon starts running, it keeps going
227 * until vm_page_free_count meets or exceeds vm_page_free_target.
228 */
229
230 #ifndef VM_PAGE_FREE_TARGET
231 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
232 #endif /* VM_PAGE_FREE_TARGET */
233
234
235 /*
236 * The pageout daemon always starts running once vm_page_free_count
237 * falls below vm_page_free_min.
238 */
239
240 #ifndef VM_PAGE_FREE_MIN
241 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
242 #endif /* VM_PAGE_FREE_MIN */
243
244 #define VM_PAGE_FREE_RESERVED_LIMIT 1700
245 #define VM_PAGE_FREE_MIN_LIMIT 3500
246 #define VM_PAGE_FREE_TARGET_LIMIT 4000
247
248 /*
249 * When vm_page_free_count falls below vm_page_free_reserved,
250 * only vm-privileged threads can allocate pages. vm-privilege
251 * allows the pageout daemon and default pager (and any other
252 * associated threads needed for default pageout) to continue
253 * operation by dipping into the reserved pool of pages.
254 */
255
256 #ifndef VM_PAGE_FREE_RESERVED
257 #define VM_PAGE_FREE_RESERVED(n) \
258 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
259 #endif /* VM_PAGE_FREE_RESERVED */
260
261 /*
262 * When we dequeue pages from the inactive list, they are
263 * reactivated (ie, put back on the active queue) if referenced.
264 * However, it is possible to starve the free list if other
265 * processors are referencing pages faster than we can turn off
266 * the referenced bit. So we limit the number of reactivations
267 * we will make per call of vm_pageout_scan().
268 */
269 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
270 #ifndef VM_PAGE_REACTIVATE_LIMIT
271 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
272 #endif /* VM_PAGE_REACTIVATE_LIMIT */
273 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
274
275
276 extern boolean_t hibernate_cleaning_in_progress;
277
278 /*
279 * Exported variable used to broadcast the activation of the pageout scan
280 * Working Set uses this to throttle its use of pmap removes. In this
281 * way, code which runs within memory in an uncontested context does
282 * not keep encountering soft faults.
283 */
284
285 unsigned int vm_pageout_scan_event_counter = 0;
286
287 /*
288 * Forward declarations for internal routines.
289 */
290 struct cq {
291 struct vm_pageout_queue *q;
292 void *current_chead;
293 char *scratch_buf;
294 int id;
295 };
296 #define MAX_COMPRESSOR_THREAD_COUNT 8
297
298 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
299
300 void *vm_pageout_immediate_chead;
301 char *vm_pageout_immediate_scratch_buf;
302
303
304 #if VM_PRESSURE_EVENTS
305 void vm_pressure_thread(void);
306
307 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
308 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
309
310 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
311 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
312 #endif
313 static void vm_pageout_garbage_collect(int);
314 static void vm_pageout_iothread_external(void);
315 static void vm_pageout_iothread_internal(struct cq *cq);
316 static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t);
317
318 extern void vm_pageout_continue(void);
319 extern void vm_pageout_scan(void);
320
321 static void vm_pageout_immediate(vm_page_t, boolean_t);
322 boolean_t vm_compressor_immediate_preferred = FALSE;
323 boolean_t vm_compressor_immediate_preferred_override = FALSE;
324 boolean_t vm_restricted_to_single_processor = FALSE;
325 static boolean_t vm_pageout_waiter = FALSE;
326 static boolean_t vm_pageout_running = FALSE;
327
328
329 static thread_t vm_pageout_external_iothread = THREAD_NULL;
330 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
331
332 unsigned int vm_pageout_reserved_internal = 0;
333 unsigned int vm_pageout_reserved_really = 0;
334
335 unsigned int vm_pageout_swap_wait = 0;
336 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
337 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
338 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
339 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
340 unsigned int vm_pageout_deadlock_relief = 0;
341 unsigned int vm_pageout_inactive_relief = 0;
342 unsigned int vm_pageout_burst_active_throttle = 0;
343 unsigned int vm_pageout_burst_inactive_throttle = 0;
344
345 int vm_upl_wait_for_pages = 0;
346
347
348 /*
349 * These variables record the pageout daemon's actions:
350 * how many pages it looks at and what happens to those pages.
351 * No locking needed because only one thread modifies the variables.
352 */
353
354 unsigned int vm_pageout_active = 0; /* debugging */
355 unsigned int vm_pageout_inactive = 0; /* debugging */
356 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
357 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
358 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
359 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
360 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
361 unsigned int vm_pageout_inactive_error = 0; /* debugging */
362 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
363 unsigned int vm_pageout_inactive_notalive = 0; /* debugging */
364 unsigned int vm_pageout_inactive_used = 0; /* debugging */
365 unsigned int vm_pageout_cache_evicted = 0; /* debugging */
366 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
367 unsigned int vm_pageout_speculative_clean = 0; /* debugging */
368
369 unsigned int vm_pageout_freed_from_cleaned = 0;
370 unsigned int vm_pageout_freed_from_speculative = 0;
371 unsigned int vm_pageout_freed_from_inactive_clean = 0;
372
373 unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0;
374 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
375
376 unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
377 unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
378 unsigned int vm_pageout_cleaned_reference_reactivated = 0;
379 unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
380 unsigned int vm_pageout_cleaned_fault_reactivated = 0;
381 unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
382 unsigned int vm_pageout_cleaned_busy = 0;
383 unsigned int vm_pageout_cleaned_nolock = 0;
384
385 unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */
386 unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */
387 unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
388 unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
389 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
390 unsigned int vm_pageout_purged_objects = 0; /* used for sysctl vm stats */
391 unsigned int vm_stat_discard = 0; /* debugging */
392 unsigned int vm_stat_discard_sent = 0; /* debugging */
393 unsigned int vm_stat_discard_failure = 0; /* debugging */
394 unsigned int vm_stat_discard_throttle = 0; /* debugging */
395 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
396 unsigned int vm_pageout_catch_ups = 0; /* debugging */
397 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
398
399 unsigned int vm_pageout_scan_reclaimed_throttled = 0;
400 unsigned int vm_pageout_scan_active_throttled = 0;
401 unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
402 unsigned int vm_pageout_scan_inactive_throttled_external = 0;
403 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
404 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
405 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
406 unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */
407 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
408 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
409 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
410 unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */
411 unsigned int vm_pageout_scan_throttle_deferred = 0; /* debugging */
412 unsigned int vm_pageout_scan_yield_unthrottled = 0; /* debugging */
413 unsigned int vm_page_speculative_count_drifts = 0;
414 unsigned int vm_page_speculative_count_drift_max = 0;
415
416
417 /*
418 * Backing store throttle when BS is exhausted
419 */
420 unsigned int vm_backing_store_low = 0;
421
422 unsigned int vm_pageout_out_of_line = 0;
423 unsigned int vm_pageout_in_place = 0;
424
425 unsigned int vm_page_steal_pageout_page = 0;
426
427 struct vm_config vm_config;
428
429 /*
430 * ENCRYPTED SWAP:
431 * counters and statistics...
432 */
433 unsigned long vm_page_decrypt_counter = 0;
434 unsigned long vm_page_decrypt_for_upl_counter = 0;
435 unsigned long vm_page_encrypt_counter = 0;
436 unsigned long vm_page_encrypt_abort_counter = 0;
437 unsigned long vm_page_encrypt_already_encrypted_counter = 0;
438 boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
439
440 struct vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
441 struct vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
442
443 unsigned int vm_page_speculative_target = 0;
444
445 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
446
447 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
448
449 #if DEVELOPMENT || DEBUG
450 unsigned long vm_cs_validated_resets = 0;
451 #endif
452
453 int vm_debug_events = 0;
454
455 #if CONFIG_MEMORYSTATUS
456 #if !CONFIG_JETSAM
457 extern boolean_t memorystatus_idle_exit_from_VM(void);
458 #endif
459 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
460 extern void memorystatus_on_pageout_scan_end(void);
461
462 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
463 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
464 #if DEVELOPMENT || DEBUG
465 uint32_t vm_grab_anon_overrides = 0;
466 uint32_t vm_grab_anon_nops = 0;
467 #endif
468
469 #endif
470
471 /*
472 * Routine: vm_backing_store_disable
473 * Purpose:
474 * Suspend non-privileged threads wishing to extend
475 * backing store when we are low on backing store
476 * (Synchronized by caller)
477 */
478 void
479 vm_backing_store_disable(
480 boolean_t disable)
481 {
482 if(disable) {
483 vm_backing_store_low = 1;
484 } else {
485 if(vm_backing_store_low) {
486 vm_backing_store_low = 0;
487 thread_wakeup((event_t) &vm_backing_store_low);
488 }
489 }
490 }
491
492
493 #if MACH_CLUSTER_STATS
494 unsigned long vm_pageout_cluster_dirtied = 0;
495 unsigned long vm_pageout_cluster_cleaned = 0;
496 unsigned long vm_pageout_cluster_collisions = 0;
497 unsigned long vm_pageout_cluster_clusters = 0;
498 unsigned long vm_pageout_cluster_conversions = 0;
499 unsigned long vm_pageout_target_collisions = 0;
500 unsigned long vm_pageout_target_page_dirtied = 0;
501 unsigned long vm_pageout_target_page_freed = 0;
502 #define CLUSTER_STAT(clause) clause
503 #else /* MACH_CLUSTER_STATS */
504 #define CLUSTER_STAT(clause)
505 #endif /* MACH_CLUSTER_STATS */
506
507 /*
508 * Routine: vm_pageout_object_terminate
509 * Purpose:
510 * Destroy the pageout_object, and perform all of the
511 * required cleanup actions.
512 *
513 * In/Out conditions:
514 * The object must be locked, and will be returned locked.
515 */
516 void
517 vm_pageout_object_terminate(
518 vm_object_t object)
519 {
520 vm_object_t shadow_object;
521
522 /*
523 * Deal with the deallocation (last reference) of a pageout object
524 * (used for cleaning-in-place) by dropping the paging references/
525 * freeing pages in the original object.
526 */
527
528 assert(object->pageout);
529 shadow_object = object->shadow;
530 vm_object_lock(shadow_object);
531
532 while (!vm_page_queue_empty(&object->memq)) {
533 vm_page_t p, m;
534 vm_object_offset_t offset;
535
536 p = (vm_page_t) vm_page_queue_first(&object->memq);
537
538 assert(p->private);
539 assert(p->free_when_done);
540 p->free_when_done = FALSE;
541 assert(!p->cleaning);
542 assert(!p->laundry);
543
544 offset = p->offset;
545 VM_PAGE_FREE(p);
546 p = VM_PAGE_NULL;
547
548 m = vm_page_lookup(shadow_object,
549 offset + object->vo_shadow_offset);
550
551 if(m == VM_PAGE_NULL)
552 continue;
553
554 assert((m->dirty) || (m->precious) ||
555 (m->busy && m->cleaning));
556
557 /*
558 * Handle the trusted pager throttle.
559 * Also decrement the burst throttle (if external).
560 */
561 vm_page_lock_queues();
562 if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
563 vm_pageout_throttle_up(m);
564
565 /*
566 * Handle the "target" page(s). These pages are to be freed if
567 * successfully cleaned. Target pages are always busy, and are
568 * wired exactly once. The initial target pages are not mapped,
569 * (so cannot be referenced or modified) but converted target
570 * pages may have been modified between the selection as an
571 * adjacent page and conversion to a target.
572 */
573 if (m->free_when_done) {
574 assert(m->busy);
575 assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
576 assert(m->wire_count == 1);
577 m->cleaning = FALSE;
578 m->encrypted_cleaning = FALSE;
579 m->free_when_done = FALSE;
580 #if MACH_CLUSTER_STATS
581 if (m->wanted) vm_pageout_target_collisions++;
582 #endif
583 /*
584 * Revoke all access to the page. Since the object is
585 * locked, and the page is busy, this prevents the page
586 * from being dirtied after the pmap_disconnect() call
587 * returns.
588 *
589 * Since the page is left "dirty" but "not modifed", we
590 * can detect whether the page was redirtied during
591 * pageout by checking the modify state.
592 */
593 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
594 SET_PAGE_DIRTY(m, FALSE);
595 } else {
596 m->dirty = FALSE;
597 }
598
599 if (m->dirty) {
600 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
601 vm_page_unwire(m, TRUE); /* reactivates */
602 VM_STAT_INCR(reactivations);
603 PAGE_WAKEUP_DONE(m);
604 } else {
605 CLUSTER_STAT(vm_pageout_target_page_freed++;)
606 vm_page_free(m);/* clears busy, etc. */
607 }
608 vm_page_unlock_queues();
609 continue;
610 }
611 /*
612 * Handle the "adjacent" pages. These pages were cleaned in
613 * place, and should be left alone.
614 * If prep_pin_count is nonzero, then someone is using the
615 * page, so make it active.
616 */
617 if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) && !m->private) {
618 if (m->reference)
619 vm_page_activate(m);
620 else
621 vm_page_deactivate(m);
622 }
623 if (m->overwriting) {
624 /*
625 * the (COPY_OUT_FROM == FALSE) request_page_list case
626 */
627 if (m->busy) {
628 /*
629 * We do not re-set m->dirty !
630 * The page was busy so no extraneous activity
631 * could have occurred. COPY_INTO is a read into the
632 * new pages. CLEAN_IN_PLACE does actually write
633 * out the pages but handling outside of this code
634 * will take care of resetting dirty. We clear the
635 * modify however for the Programmed I/O case.
636 */
637 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
638
639 m->busy = FALSE;
640 m->absent = FALSE;
641 } else {
642 /*
643 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
644 * Occurs when the original page was wired
645 * at the time of the list request
646 */
647 assert(VM_PAGE_WIRED(m));
648 vm_page_unwire(m, TRUE); /* reactivates */
649 }
650 m->overwriting = FALSE;
651 } else {
652 /*
653 * Set the dirty state according to whether or not the page was
654 * modified during the pageout. Note that we purposefully do
655 * NOT call pmap_clear_modify since the page is still mapped.
656 * If the page were to be dirtied between the 2 calls, this
657 * this fact would be lost. This code is only necessary to
658 * maintain statistics, since the pmap module is always
659 * consulted if m->dirty is false.
660 */
661 #if MACH_CLUSTER_STATS
662 m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
663
664 if (m->dirty) vm_pageout_cluster_dirtied++;
665 else vm_pageout_cluster_cleaned++;
666 if (m->wanted) vm_pageout_cluster_collisions++;
667 #else
668 m->dirty = FALSE;
669 #endif
670 }
671 if (m->encrypted_cleaning == TRUE) {
672 m->encrypted_cleaning = FALSE;
673 m->busy = FALSE;
674 }
675 m->cleaning = FALSE;
676
677 /*
678 * Wakeup any thread waiting for the page to be un-cleaning.
679 */
680 PAGE_WAKEUP(m);
681 vm_page_unlock_queues();
682 }
683 /*
684 * Account for the paging reference taken in vm_paging_object_allocate.
685 */
686 vm_object_activity_end(shadow_object);
687 vm_object_unlock(shadow_object);
688
689 assert(object->ref_count == 0);
690 assert(object->paging_in_progress == 0);
691 assert(object->activity_in_progress == 0);
692 assert(object->resident_page_count == 0);
693 return;
694 }
695
696 /*
697 * Routine: vm_pageclean_setup
698 *
699 * Purpose: setup a page to be cleaned (made non-dirty), but not
700 * necessarily flushed from the VM page cache.
701 * This is accomplished by cleaning in place.
702 *
703 * The page must not be busy, and new_object
704 * must be locked.
705 *
706 */
707 static void
708 vm_pageclean_setup(
709 vm_page_t m,
710 vm_page_t new_m,
711 vm_object_t new_object,
712 vm_object_offset_t new_offset)
713 {
714 assert(!m->busy);
715 #if 0
716 assert(!m->cleaning);
717 #endif
718
719 XPR(XPR_VM_PAGEOUT,
720 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
721 VM_PAGE_OBJECT(m), m->offset, m,
722 new_m, new_offset);
723
724 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
725
726 /*
727 * Mark original page as cleaning in place.
728 */
729 m->cleaning = TRUE;
730 SET_PAGE_DIRTY(m, FALSE);
731 m->precious = FALSE;
732
733 /*
734 * Convert the fictitious page to a private shadow of
735 * the real page.
736 */
737 assert(new_m->fictitious);
738 assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
739 new_m->fictitious = FALSE;
740 new_m->private = TRUE;
741 new_m->free_when_done = TRUE;
742 VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
743
744 vm_page_lockspin_queues();
745 vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
746 vm_page_unlock_queues();
747
748 vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
749 assert(!new_m->wanted);
750 new_m->busy = FALSE;
751 }
752
753 /*
754 * Routine: vm_pageout_initialize_page
755 * Purpose:
756 * Causes the specified page to be initialized in
757 * the appropriate memory object. This routine is used to push
758 * pages into a copy-object when they are modified in the
759 * permanent object.
760 *
761 * The page is moved to a temporary object and paged out.
762 *
763 * In/out conditions:
764 * The page in question must not be on any pageout queues.
765 * The object to which it belongs must be locked.
766 * The page must be busy, but not hold a paging reference.
767 *
768 * Implementation:
769 * Move this page to a completely new object.
770 */
771 void
772 vm_pageout_initialize_page(
773 vm_page_t m)
774 {
775 vm_object_t object;
776 vm_object_offset_t paging_offset;
777 memory_object_t pager;
778
779 XPR(XPR_VM_PAGEOUT,
780 "vm_pageout_initialize_page, page 0x%X\n",
781 m, 0, 0, 0, 0);
782
783 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
784
785 object = VM_PAGE_OBJECT(m);
786
787 assert(m->busy);
788 assert(object->internal);
789
790 /*
791 * Verify that we really want to clean this page
792 */
793 assert(!m->absent);
794 assert(!m->error);
795 assert(m->dirty);
796
797 /*
798 * Create a paging reference to let us play with the object.
799 */
800 paging_offset = m->offset + object->paging_offset;
801
802 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
803 panic("reservation without pageout?"); /* alan */
804
805 VM_PAGE_FREE(m);
806 vm_object_unlock(object);
807
808 return;
809 }
810
811 /*
812 * If there's no pager, then we can't clean the page. This should
813 * never happen since this should be a copy object and therefore not
814 * an external object, so the pager should always be there.
815 */
816
817 pager = object->pager;
818
819 if (pager == MEMORY_OBJECT_NULL) {
820 panic("missing pager for copy object");
821
822 VM_PAGE_FREE(m);
823 return;
824 }
825
826 /*
827 * set the page for future call to vm_fault_list_request
828 */
829 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
830 SET_PAGE_DIRTY(m, FALSE);
831
832 /*
833 * keep the object from collapsing or terminating
834 */
835 vm_object_paging_begin(object);
836 vm_object_unlock(object);
837
838 /*
839 * Write the data to its pager.
840 * Note that the data is passed by naming the new object,
841 * not a virtual address; the pager interface has been
842 * manipulated to use the "internal memory" data type.
843 * [The object reference from its allocation is donated
844 * to the eventual recipient.]
845 */
846 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
847
848 vm_object_lock(object);
849 vm_object_paging_end(object);
850 }
851
852 #if MACH_CLUSTER_STATS
853 #define MAXCLUSTERPAGES 16
854 struct {
855 unsigned long pages_in_cluster;
856 unsigned long pages_at_higher_offsets;
857 unsigned long pages_at_lower_offsets;
858 } cluster_stats[MAXCLUSTERPAGES];
859 #endif /* MACH_CLUSTER_STATS */
860
861
862 /*
863 * vm_pageout_cluster:
864 *
865 * Given a page, queue it to the appropriate I/O thread,
866 * which will page it out and attempt to clean adjacent pages
867 * in the same operation.
868 *
869 * The object and queues must be locked. We will take a
870 * paging reference to prevent deallocation or collapse when we
871 * release the object lock back at the call site. The I/O thread
872 * is responsible for consuming this reference
873 *
874 * The page must not be on any pageout queue.
875 */
876
877 int
878 vm_pageout_cluster(vm_page_t m, boolean_t immediate_ok, boolean_t keep_object_locked)
879 {
880 vm_object_t object = VM_PAGE_OBJECT(m);
881 struct vm_pageout_queue *q;
882
883
884 XPR(XPR_VM_PAGEOUT,
885 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
886 object, m->offset, m, 0, 0);
887
888 VM_PAGE_CHECK(m);
889 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
890 vm_object_lock_assert_exclusive(object);
891
892 /*
893 * Only a certain kind of page is appreciated here.
894 */
895 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
896 assert(!m->cleaning && !m->laundry);
897 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
898
899 /*
900 * protect the object from collapse or termination
901 */
902 vm_object_activity_begin(object);
903
904 if (object->internal == TRUE) {
905 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
906
907 m->busy = TRUE;
908
909 if (vm_compressor_immediate_preferred == TRUE && immediate_ok == TRUE) {
910 panic("immediate compressor mode no longer supported\n");
911
912 if (keep_object_locked == FALSE)
913 vm_object_unlock(object);
914 vm_page_unlock_queues();
915
916 vm_pageout_immediate(m, keep_object_locked);
917
918 return (1);
919 }
920 q = &vm_pageout_queue_internal;
921 } else
922 q = &vm_pageout_queue_external;
923
924 /*
925 * pgo_laundry count is tied to the laundry bit
926 */
927 m->laundry = TRUE;
928 q->pgo_laundry++;
929
930 m->vm_page_q_state = VM_PAGE_ON_PAGEOUT_Q;
931 vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
932
933 if (q->pgo_idle == TRUE) {
934 q->pgo_idle = FALSE;
935 thread_wakeup((event_t) &q->pgo_pending);
936 }
937 VM_PAGE_CHECK(m);
938
939 return (0);
940 }
941
942
943 unsigned long vm_pageout_throttle_up_count = 0;
944
945 /*
946 * A page is back from laundry or we are stealing it back from
947 * the laundering state. See if there are some pages waiting to
948 * go to laundry and if we can let some of them go now.
949 *
950 * Object and page queues must be locked.
951 */
952 void
953 vm_pageout_throttle_up(
954 vm_page_t m)
955 {
956 struct vm_pageout_queue *q;
957 vm_object_t m_object;
958
959 m_object = VM_PAGE_OBJECT(m);
960
961 assert(m_object != VM_OBJECT_NULL);
962 assert(m_object != kernel_object);
963
964 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
965 vm_object_lock_assert_exclusive(m_object);
966
967 vm_pageout_throttle_up_count++;
968
969 if (m_object->internal == TRUE)
970 q = &vm_pageout_queue_internal;
971 else
972 q = &vm_pageout_queue_external;
973
974 if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
975
976 vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
977 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
978
979 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
980
981 vm_object_activity_end(m_object);
982 }
983 if (m->laundry == TRUE) {
984
985 m->laundry = FALSE;
986 q->pgo_laundry--;
987
988 if (q->pgo_throttled == TRUE) {
989 q->pgo_throttled = FALSE;
990 thread_wakeup((event_t) &q->pgo_laundry);
991 }
992 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
993 q->pgo_draining = FALSE;
994 thread_wakeup((event_t) (&q->pgo_laundry+1));
995 }
996 }
997 }
998
999
1000 static void
1001 vm_pageout_throttle_up_batch(
1002 struct vm_pageout_queue *q,
1003 int batch_cnt)
1004 {
1005 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1006
1007 vm_pageout_throttle_up_count += batch_cnt;
1008
1009 q->pgo_laundry -= batch_cnt;
1010
1011 if (q->pgo_throttled == TRUE) {
1012 q->pgo_throttled = FALSE;
1013 thread_wakeup((event_t) &q->pgo_laundry);
1014 }
1015 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
1016 q->pgo_draining = FALSE;
1017 thread_wakeup((event_t) (&q->pgo_laundry+1));
1018 }
1019 }
1020
1021
1022
1023 /*
1024 * VM memory pressure monitoring.
1025 *
1026 * vm_pageout_scan() keeps track of the number of pages it considers and
1027 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1028 *
1029 * compute_memory_pressure() is called every second from compute_averages()
1030 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1031 * of recalimed pages in a new vm_pageout_stat[] bucket.
1032 *
1033 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1034 * The caller provides the number of seconds ("nsecs") worth of statistics
1035 * it wants, up to 30 seconds.
1036 * It computes the number of pages reclaimed in the past "nsecs" seconds and
1037 * also returns the number of pages the system still needs to reclaim at this
1038 * moment in time.
1039 */
1040 #define VM_PAGEOUT_STAT_SIZE 31
1041 struct vm_pageout_stat {
1042 unsigned int considered;
1043 unsigned int reclaimed;
1044 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, };
1045 unsigned int vm_pageout_stat_now = 0;
1046 unsigned int vm_memory_pressure = 0;
1047
1048 #define VM_PAGEOUT_STAT_BEFORE(i) \
1049 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1050 #define VM_PAGEOUT_STAT_AFTER(i) \
1051 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1052
1053 #if VM_PAGE_BUCKETS_CHECK
1054 int vm_page_buckets_check_interval = 10; /* in seconds */
1055 #endif /* VM_PAGE_BUCKETS_CHECK */
1056
1057 /*
1058 * Called from compute_averages().
1059 */
1060 void
1061 compute_memory_pressure(
1062 __unused void *arg)
1063 {
1064 unsigned int vm_pageout_next;
1065
1066 #if VM_PAGE_BUCKETS_CHECK
1067 /* check the consistency of VM page buckets at regular interval */
1068 static int counter = 0;
1069 if ((++counter % vm_page_buckets_check_interval) == 0) {
1070 vm_page_buckets_check();
1071 }
1072 #endif /* VM_PAGE_BUCKETS_CHECK */
1073
1074 vm_memory_pressure =
1075 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed;
1076
1077 commpage_set_memory_pressure( vm_memory_pressure );
1078
1079 /* move "now" forward */
1080 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
1081 vm_pageout_stats[vm_pageout_next].considered = 0;
1082 vm_pageout_stats[vm_pageout_next].reclaimed = 0;
1083 vm_pageout_stat_now = vm_pageout_next;
1084 }
1085
1086
1087 /*
1088 * IMPORTANT
1089 * mach_vm_ctl_page_free_wanted() is called indirectly, via
1090 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1091 * it must be safe in the restricted stackshot context. Locks and/or
1092 * blocking are not allowable.
1093 */
1094 unsigned int
1095 mach_vm_ctl_page_free_wanted(void)
1096 {
1097 unsigned int page_free_target, page_free_count, page_free_wanted;
1098
1099 page_free_target = vm_page_free_target;
1100 page_free_count = vm_page_free_count;
1101 if (page_free_target > page_free_count) {
1102 page_free_wanted = page_free_target - page_free_count;
1103 } else {
1104 page_free_wanted = 0;
1105 }
1106
1107 return page_free_wanted;
1108 }
1109
1110
1111 /*
1112 * IMPORTANT:
1113 * mach_vm_pressure_monitor() is called when taking a stackshot, with
1114 * wait_for_pressure FALSE, so that code path must remain safe in the
1115 * restricted stackshot context. No blocking or locks are allowable.
1116 * on that code path.
1117 */
1118
1119 kern_return_t
1120 mach_vm_pressure_monitor(
1121 boolean_t wait_for_pressure,
1122 unsigned int nsecs_monitored,
1123 unsigned int *pages_reclaimed_p,
1124 unsigned int *pages_wanted_p)
1125 {
1126 wait_result_t wr;
1127 unsigned int vm_pageout_then, vm_pageout_now;
1128 unsigned int pages_reclaimed;
1129
1130 /*
1131 * We don't take the vm_page_queue_lock here because we don't want
1132 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1133 * thread when it's trying to reclaim memory. We don't need fully
1134 * accurate monitoring anyway...
1135 */
1136
1137 if (wait_for_pressure) {
1138 /* wait until there's memory pressure */
1139 while (vm_page_free_count >= vm_page_free_target) {
1140 wr = assert_wait((event_t) &vm_page_free_wanted,
1141 THREAD_INTERRUPTIBLE);
1142 if (wr == THREAD_WAITING) {
1143 wr = thread_block(THREAD_CONTINUE_NULL);
1144 }
1145 if (wr == THREAD_INTERRUPTED) {
1146 return KERN_ABORTED;
1147 }
1148 if (wr == THREAD_AWAKENED) {
1149 /*
1150 * The memory pressure might have already
1151 * been relieved but let's not block again
1152 * and let's report that there was memory
1153 * pressure at some point.
1154 */
1155 break;
1156 }
1157 }
1158 }
1159
1160 /* provide the number of pages the system wants to reclaim */
1161 if (pages_wanted_p != NULL) {
1162 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1163 }
1164
1165 if (pages_reclaimed_p == NULL) {
1166 return KERN_SUCCESS;
1167 }
1168
1169 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1170 vm_pageout_now = vm_pageout_stat_now;
1171 pages_reclaimed = 0;
1172 for (vm_pageout_then =
1173 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1174 vm_pageout_then != vm_pageout_now &&
1175 nsecs_monitored-- != 0;
1176 vm_pageout_then =
1177 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1178 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed;
1179 }
1180 *pages_reclaimed_p = pages_reclaimed;
1181
1182 return KERN_SUCCESS;
1183 }
1184
1185
1186
1187 #if DEVELOPMENT || DEBUG
1188
1189 static void
1190 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1191
1192 /*
1193 * condition variable used to make sure there is
1194 * only a single sweep going on at a time
1195 */
1196 boolean_t vm_pageout_disconnect_all_pages_active = FALSE;
1197
1198
1199 void
1200 vm_pageout_disconnect_all_pages()
1201 {
1202 vm_page_lock_queues();
1203
1204 if (vm_pageout_disconnect_all_pages_active == TRUE) {
1205 vm_page_unlock_queues();
1206 return;
1207 }
1208 vm_pageout_disconnect_all_pages_active = TRUE;
1209 vm_page_unlock_queues();
1210
1211 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1212 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1213 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1214
1215 vm_pageout_disconnect_all_pages_active = FALSE;
1216 }
1217
1218
1219 void
1220 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1221 {
1222 vm_page_t m;
1223 vm_object_t t_object = NULL;
1224 vm_object_t l_object = NULL;
1225 vm_object_t m_object = NULL;
1226 int delayed_unlock = 0;
1227 int try_failed_count = 0;
1228 int disconnected_count = 0;
1229 int paused_count = 0;
1230 int object_locked_count = 0;
1231
1232 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1233 q, qcount, 0, 0, 0);
1234
1235 vm_page_lock_queues();
1236
1237 while (qcount && !vm_page_queue_empty(q)) {
1238
1239 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1240
1241 m = (vm_page_t) vm_page_queue_first(q);
1242 m_object = VM_PAGE_OBJECT(m);
1243
1244 /*
1245 * check to see if we currently are working
1246 * with the same object... if so, we've
1247 * already got the lock
1248 */
1249 if (m_object != l_object) {
1250 /*
1251 * the object associated with candidate page is
1252 * different from the one we were just working
1253 * with... dump the lock if we still own it
1254 */
1255 if (l_object != NULL) {
1256 vm_object_unlock(l_object);
1257 l_object = NULL;
1258 }
1259 if (m_object != t_object)
1260 try_failed_count = 0;
1261
1262 /*
1263 * Try to lock object; since we've alread got the
1264 * page queues lock, we can only 'try' for this one.
1265 * if the 'try' fails, we need to do a mutex_pause
1266 * to allow the owner of the object lock a chance to
1267 * run...
1268 */
1269 if ( !vm_object_lock_try_scan(m_object)) {
1270
1271 if (try_failed_count > 20) {
1272 goto reenter_pg_on_q;
1273 }
1274 vm_page_unlock_queues();
1275 mutex_pause(try_failed_count++);
1276 vm_page_lock_queues();
1277 delayed_unlock = 0;
1278
1279 paused_count++;
1280
1281 t_object = m_object;
1282 continue;
1283 }
1284 object_locked_count++;
1285
1286 l_object = m_object;
1287 }
1288 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1289 /*
1290 * put it back on the head of its queue
1291 */
1292 goto reenter_pg_on_q;
1293 }
1294 if (m->pmapped == TRUE) {
1295
1296 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1297
1298 disconnected_count++;
1299 }
1300 reenter_pg_on_q:
1301 vm_page_queue_remove(q, m, vm_page_t, pageq);
1302 vm_page_queue_enter(q, m, vm_page_t, pageq);
1303
1304 qcount--;
1305 try_failed_count = 0;
1306
1307 if (delayed_unlock++ > 128) {
1308
1309 if (l_object != NULL) {
1310 vm_object_unlock(l_object);
1311 l_object = NULL;
1312 }
1313 lck_mtx_yield(&vm_page_queue_lock);
1314 delayed_unlock = 0;
1315 }
1316 }
1317 if (l_object != NULL) {
1318 vm_object_unlock(l_object);
1319 l_object = NULL;
1320 }
1321 vm_page_unlock_queues();
1322
1323 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1324 q, disconnected_count, object_locked_count, paused_count, 0);
1325 }
1326
1327 #endif
1328
1329
1330 static void
1331 vm_pageout_page_queue(vm_page_queue_head_t *, int);
1332
1333 /*
1334 * condition variable used to make sure there is
1335 * only a single sweep going on at a time
1336 */
1337 boolean_t vm_pageout_anonymous_pages_active = FALSE;
1338
1339
1340 void
1341 vm_pageout_anonymous_pages()
1342 {
1343 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1344
1345 vm_page_lock_queues();
1346
1347 if (vm_pageout_anonymous_pages_active == TRUE) {
1348 vm_page_unlock_queues();
1349 return;
1350 }
1351 vm_pageout_anonymous_pages_active = TRUE;
1352 vm_page_unlock_queues();
1353
1354 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1355 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1356 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1357
1358 if (VM_CONFIG_SWAP_IS_PRESENT)
1359 vm_consider_swapping();
1360
1361 vm_page_lock_queues();
1362 vm_pageout_anonymous_pages_active = FALSE;
1363 vm_page_unlock_queues();
1364 }
1365 }
1366
1367
1368 void
1369 vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1370 {
1371 vm_page_t m;
1372 vm_object_t t_object = NULL;
1373 vm_object_t l_object = NULL;
1374 vm_object_t m_object = NULL;
1375 int delayed_unlock = 0;
1376 int try_failed_count = 0;
1377 int refmod_state;
1378 int pmap_options;
1379 struct vm_pageout_queue *iq;
1380 ppnum_t phys_page;
1381
1382
1383 iq = &vm_pageout_queue_internal;
1384
1385 vm_page_lock_queues();
1386
1387 while (qcount && !vm_page_queue_empty(q)) {
1388
1389 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1390
1391 if (VM_PAGE_Q_THROTTLED(iq)) {
1392
1393 if (l_object != NULL) {
1394 vm_object_unlock(l_object);
1395 l_object = NULL;
1396 }
1397 iq->pgo_draining = TRUE;
1398
1399 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1400 vm_page_unlock_queues();
1401
1402 thread_block(THREAD_CONTINUE_NULL);
1403
1404 vm_page_lock_queues();
1405 delayed_unlock = 0;
1406 continue;
1407 }
1408 m = (vm_page_t) vm_page_queue_first(q);
1409 m_object = VM_PAGE_OBJECT(m);
1410
1411 /*
1412 * check to see if we currently are working
1413 * with the same object... if so, we've
1414 * already got the lock
1415 */
1416 if (m_object != l_object) {
1417 if ( !m_object->internal)
1418 goto reenter_pg_on_q;
1419
1420 /*
1421 * the object associated with candidate page is
1422 * different from the one we were just working
1423 * with... dump the lock if we still own it
1424 */
1425 if (l_object != NULL) {
1426 vm_object_unlock(l_object);
1427 l_object = NULL;
1428 }
1429 if (m_object != t_object)
1430 try_failed_count = 0;
1431
1432 /*
1433 * Try to lock object; since we've alread got the
1434 * page queues lock, we can only 'try' for this one.
1435 * if the 'try' fails, we need to do a mutex_pause
1436 * to allow the owner of the object lock a chance to
1437 * run...
1438 */
1439 if ( !vm_object_lock_try_scan(m_object)) {
1440
1441 if (try_failed_count > 20) {
1442 goto reenter_pg_on_q;
1443 }
1444 vm_page_unlock_queues();
1445 mutex_pause(try_failed_count++);
1446 vm_page_lock_queues();
1447 delayed_unlock = 0;
1448
1449 t_object = m_object;
1450 continue;
1451 }
1452 l_object = m_object;
1453 }
1454 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1455 /*
1456 * page is not to be cleaned
1457 * put it back on the head of its queue
1458 */
1459 goto reenter_pg_on_q;
1460 }
1461 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1462
1463 if (m->reference == FALSE && m->pmapped == TRUE) {
1464 refmod_state = pmap_get_refmod(phys_page);
1465
1466 if (refmod_state & VM_MEM_REFERENCED)
1467 m->reference = TRUE;
1468 if (refmod_state & VM_MEM_MODIFIED) {
1469 SET_PAGE_DIRTY(m, FALSE);
1470 }
1471 }
1472 if (m->reference == TRUE) {
1473 m->reference = FALSE;
1474 pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1475 goto reenter_pg_on_q;
1476 }
1477 if (m->pmapped == TRUE) {
1478 if (m->dirty || m->precious) {
1479 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1480 } else {
1481 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1482 }
1483 refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1484 if (refmod_state & VM_MEM_MODIFIED) {
1485 SET_PAGE_DIRTY(m, FALSE);
1486 }
1487 }
1488 if ( !m->dirty && !m->precious) {
1489 vm_page_unlock_queues();
1490 VM_PAGE_FREE(m);
1491 vm_page_lock_queues();
1492 delayed_unlock = 0;
1493
1494 goto next_pg;
1495 }
1496 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1497
1498 if (!m_object->pager_initialized) {
1499
1500 vm_page_unlock_queues();
1501
1502 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1503
1504 if (!m_object->pager_initialized)
1505 vm_object_compressor_pager_create(m_object);
1506
1507 vm_page_lock_queues();
1508 delayed_unlock = 0;
1509 }
1510 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1511 goto reenter_pg_on_q;
1512 /*
1513 * vm_object_compressor_pager_create will drop the object lock
1514 * which means 'm' may no longer be valid to use
1515 */
1516 continue;
1517 }
1518 /*
1519 * we've already factored out pages in the laundry which
1520 * means this page can't be on the pageout queue so it's
1521 * safe to do the vm_page_queues_remove
1522 */
1523 vm_page_queues_remove(m, TRUE);
1524
1525 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1526
1527 vm_pageout_cluster(m, FALSE, FALSE);
1528
1529 goto next_pg;
1530
1531 reenter_pg_on_q:
1532 vm_page_queue_remove(q, m, vm_page_t, pageq);
1533 vm_page_queue_enter(q, m, vm_page_t, pageq);
1534 next_pg:
1535 qcount--;
1536 try_failed_count = 0;
1537
1538 if (delayed_unlock++ > 128) {
1539
1540 if (l_object != NULL) {
1541 vm_object_unlock(l_object);
1542 l_object = NULL;
1543 }
1544 lck_mtx_yield(&vm_page_queue_lock);
1545 delayed_unlock = 0;
1546 }
1547 }
1548 if (l_object != NULL) {
1549 vm_object_unlock(l_object);
1550 l_object = NULL;
1551 }
1552 vm_page_unlock_queues();
1553 }
1554
1555
1556
1557 /*
1558 * function in BSD to apply I/O throttle to the pageout thread
1559 */
1560 extern void vm_pageout_io_throttle(void);
1561
1562 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
1563 MACRO_BEGIN \
1564 /* \
1565 * If a "reusable" page somehow made it back into \
1566 * the active queue, it's been re-used and is not \
1567 * quite re-usable. \
1568 * If the VM object was "all_reusable", consider it \
1569 * as "all re-used" instead of converting it to \
1570 * "partially re-used", which could be expensive. \
1571 */ \
1572 assert(VM_PAGE_OBJECT((m)) == (obj)); \
1573 if ((m)->reusable || \
1574 (obj)->all_reusable) { \
1575 vm_object_reuse_pages((obj), \
1576 (m)->offset, \
1577 (m)->offset + PAGE_SIZE_64, \
1578 FALSE); \
1579 } \
1580 MACRO_END
1581
1582
1583 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1584 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1585
1586 #define FCS_IDLE 0
1587 #define FCS_DELAYED 1
1588 #define FCS_DEADLOCK_DETECTED 2
1589
1590 struct flow_control {
1591 int state;
1592 mach_timespec_t ts;
1593 };
1594
1595 #if CONFIG_BACKGROUND_QUEUE
1596 uint64_t vm_pageout_considered_bq_internal = 0;
1597 uint64_t vm_pageout_considered_bq_external = 0;
1598 uint64_t vm_pageout_rejected_bq_internal = 0;
1599 uint64_t vm_pageout_rejected_bq_external = 0;
1600 #endif
1601 uint32_t vm_pageout_considered_page = 0;
1602 uint32_t vm_page_filecache_min = 0;
1603
1604 #define ANONS_GRABBED_LIMIT 2
1605
1606 #if CONFIG_SECLUDED_MEMORY
1607 extern vm_page_t vm_page_grab_secluded(void);
1608 uint64_t vm_pageout_freed_from_secluded = 0;
1609 uint64_t vm_pageout_secluded_reactivated = 0; /* debugging; how many secluded pages are found to be referenced on pageout (and are therefore reactivated) */
1610 uint64_t vm_pageout_secluded_burst_count = 0;
1611 #endif /* CONFIG_SECLUDED_MEMORY */
1612
1613 /*
1614 * vm_pageout_scan does the dirty work for the pageout daemon.
1615 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1616 * held and vm_page_free_wanted == 0.
1617 */
1618 void
1619 vm_pageout_scan(void)
1620 {
1621 unsigned int loop_count = 0;
1622 unsigned int inactive_burst_count = 0;
1623 unsigned int active_burst_count = 0;
1624 unsigned int reactivated_this_call;
1625 unsigned int reactivate_limit;
1626 vm_page_t local_freeq = NULL;
1627 int local_freed = 0;
1628 int delayed_unlock;
1629 int delayed_unlock_limit = 0;
1630 int refmod_state = 0;
1631 int vm_pageout_deadlock_target = 0;
1632 struct vm_pageout_queue *iq;
1633 struct vm_pageout_queue *eq;
1634 struct vm_speculative_age_q *sq;
1635 struct flow_control flow_control = { 0, { 0, 0 } };
1636 boolean_t inactive_throttled = FALSE;
1637 boolean_t try_failed;
1638 mach_timespec_t ts;
1639 unsigned int msecs = 0;
1640 vm_object_t object;
1641 vm_object_t last_object_tried;
1642 uint32_t catch_up_count = 0;
1643 uint32_t inactive_reclaim_run;
1644 boolean_t exceeded_burst_throttle;
1645 boolean_t grab_anonymous = FALSE;
1646 boolean_t force_anonymous = FALSE;
1647 int anons_grabbed = 0;
1648 int page_prev_q_state = 0;
1649 boolean_t requeue_insert_first = FALSE;
1650 #if CONFIG_BACKGROUND_QUEUE
1651 boolean_t ignore_reference = FALSE;
1652 #endif
1653 #if CONFIG_SECLUDED_MEMORY
1654 boolean_t ignore_reference_secluded;
1655 #endif /* CONFIG_SECLUDED_MEMORY */
1656 int cache_evict_throttle = 0;
1657 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
1658 int force_purge = 0;
1659 #define DELAY_SPECULATIVE_AGE 1000
1660 int delay_speculative_age = 0;
1661 vm_object_t m_object = VM_OBJECT_NULL;
1662
1663 #if VM_PRESSURE_EVENTS
1664 vm_pressure_level_t pressure_level;
1665 #endif /* VM_PRESSURE_EVENTS */
1666
1667 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1668 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1669 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1670
1671 flow_control.state = FCS_IDLE;
1672 iq = &vm_pageout_queue_internal;
1673 eq = &vm_pageout_queue_external;
1674 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1675
1676
1677 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1678
1679
1680 vm_page_lock_queues();
1681 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */
1682
1683 /*
1684 * Calculate the max number of referenced pages on the inactive
1685 * queue that we will reactivate.
1686 */
1687 reactivated_this_call = 0;
1688 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1689 vm_page_inactive_count);
1690 inactive_reclaim_run = 0;
1691
1692 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1693
1694 /*
1695 * We want to gradually dribble pages from the active queue
1696 * to the inactive queue. If we let the inactive queue get
1697 * very small, and then suddenly dump many pages into it,
1698 * those pages won't get a sufficient chance to be referenced
1699 * before we start taking them from the inactive queue.
1700 *
1701 * We must limit the rate at which we send pages to the pagers
1702 * so that we don't tie up too many pages in the I/O queues.
1703 * We implement a throttling mechanism using the laundry count
1704 * to limit the number of pages outstanding to the default
1705 * and external pagers. We can bypass the throttles and look
1706 * for clean pages if the pageout queues don't drain in a timely
1707 * fashion since this may indicate that the pageout paths are
1708 * stalled waiting for memory, which only we can provide.
1709 */
1710
1711
1712 Restart:
1713
1714
1715 assert(delayed_unlock!=0);
1716
1717 /*
1718 * Recalculate vm_page_inactivate_target.
1719 */
1720 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1721 vm_page_inactive_count +
1722 vm_page_speculative_count);
1723
1724 vm_page_anonymous_min = vm_page_inactive_target / 20;
1725
1726
1727 /*
1728 * don't want to wake the pageout_scan thread up everytime we fall below
1729 * the targets... set a low water mark at 0.25% below the target
1730 */
1731 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1732
1733 if (vm_page_speculative_percentage > 50)
1734 vm_page_speculative_percentage = 50;
1735 else if (vm_page_speculative_percentage <= 0)
1736 vm_page_speculative_percentage = 1;
1737
1738 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1739 vm_page_inactive_count);
1740
1741 object = NULL;
1742 last_object_tried = NULL;
1743 try_failed = FALSE;
1744
1745 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count))
1746 catch_up_count = vm_page_inactive_count + vm_page_speculative_count;
1747 else
1748 catch_up_count = 0;
1749
1750 for (;;) {
1751 vm_page_t m;
1752
1753 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1754
1755 #if CONFIG_SECLUDED_MEMORY
1756 if (vm_page_secluded_count > vm_page_secluded_target &&
1757 object != NULL) {
1758 vm_object_unlock(object);
1759 object = NULL;
1760 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1761 }
1762
1763 /*
1764 * Deal with secluded_q overflow.
1765 */
1766 if (vm_page_secluded_count > vm_page_secluded_target &&
1767 secluded_aging_policy == SECLUDED_AGING_FIFO) {
1768 unsigned int secluded_overflow;
1769 vm_page_t secluded_page;
1770
1771 /*
1772 * SECLUDED_AGING_FIFO:
1773 * No aging, just reclaim the excess pages
1774 * at the tail of the secluded queue.
1775 * We're reclaiming pages and we're not hogging
1776 * any global lock, so no need for throttling.
1777 */
1778
1779 secluded_overflow = (vm_page_secluded_count -
1780 vm_page_secluded_target);
1781 /* transfer to free queue */
1782 vm_page_unlock_queues();
1783 while (secluded_overflow--) {
1784 secluded_page = vm_page_grab_secluded();
1785 if (secluded_page == VM_PAGE_NULL) {
1786 break;
1787 }
1788 assert(secluded_page->busy);
1789 assert(secluded_page->pageq.next == 0 &&
1790 secluded_page->pageq.prev == 0);
1791
1792 secluded_page->snext = local_freeq;
1793 local_freeq = secluded_page;
1794 local_freed++;
1795 secluded_page = VM_PAGE_NULL;
1796 }
1797 } else if (vm_page_secluded_count > vm_page_secluded_target &&
1798 secluded_aging_policy == SECLUDED_AGING_ALONG_ACTIVE) {
1799 unsigned int secluded_overflow;
1800 vm_page_t secluded_page;
1801
1802 /*
1803 * SECLUDED_AGING_ALONG_ACTIVE:
1804 * There might be free pages at the tail of the
1805 * secluded queue:
1806 * just move them to the free queue (in batches).
1807 * There can also be an excessive number of "inuse"
1808 * pages:
1809 * we age them by resetting their "referenced" bit and
1810 * moving them to the inactive queue. Their trip
1811 * through the secluded queue was equivalent to a trip
1812 * through the active queue.
1813 *
1814 * We're holding the page queue lock, so we need
1815 * to throttle and give someone else a chance to
1816 * grab that lock if needed.
1817 *
1818 * We're also limiting the number of secluded "inuse"
1819 * pages that get moved to the inactive queue, using
1820 * the same "active_bust_count" method we use when
1821 * balancing the active and inactive queues, because
1822 * there can be a large number
1823 * of extra "inuse" pages and handling them gets in the
1824 * way of actually reclaiming memory.
1825 */
1826
1827 active_burst_count = MIN(vm_pageout_burst_active_throttle,
1828 vm_page_secluded_count_inuse);
1829 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT;
1830 delayed_unlock = 1;
1831 secluded_overflow = (vm_page_secluded_count -
1832 vm_page_secluded_target);
1833 while (secluded_overflow-- > 0 &&
1834 vm_page_secluded_count > vm_page_secluded_target) {
1835 assert((vm_page_secluded_count_free +
1836 vm_page_secluded_count_inuse) ==
1837 vm_page_secluded_count);
1838 vm_page_queue_remove_first(&vm_page_queue_secluded,
1839 secluded_page,
1840 vm_page_t,
1841 pageq);
1842 assert(secluded_page->vm_page_q_state ==
1843 VM_PAGE_ON_SECLUDED_Q);
1844 VM_PAGE_ZERO_PAGEQ_ENTRY(secluded_page);
1845 secluded_page->vm_page_q_state = VM_PAGE_NOT_ON_Q;
1846 vm_page_secluded_count--;
1847 assert(!secluded_page->fictitious);
1848 assert(!VM_PAGE_WIRED(secluded_page));
1849 if (secluded_page->vm_page_object == 0) {
1850 /* transfer to free queue */
1851 assert(secluded_page->busy);
1852 vm_page_secluded_count_free--;
1853 secluded_page->snext = local_freeq;
1854 local_freeq = secluded_page;
1855 local_freed++;
1856 } else {
1857 vm_page_secluded_count_inuse--;
1858 /* transfer to head of inactive queue */
1859 pmap_clear_refmod_options(
1860 VM_PAGE_GET_PHYS_PAGE(secluded_page),
1861 VM_MEM_REFERENCED,
1862 PMAP_OPTIONS_NOFLUSH,
1863 (void *)NULL);
1864 vm_page_enqueue_inactive(secluded_page,
1865 FALSE);
1866 if (active_burst_count-- == 0) {
1867 vm_pageout_secluded_burst_count++;
1868 break;
1869 }
1870 }
1871 secluded_page = VM_PAGE_NULL;
1872 if (delayed_unlock++ > delayed_unlock_limit) {
1873 if (local_freeq) {
1874 vm_page_unlock_queues();
1875 VM_DEBUG_EVENT(
1876 vm_pageout_freelist,
1877 VM_PAGEOUT_FREELIST,
1878 DBG_FUNC_START,
1879 vm_page_free_count,
1880 local_freed,
1881 delayed_unlock_limit,
1882 1);
1883 vm_page_free_list(local_freeq,
1884 TRUE);
1885 VM_DEBUG_EVENT(
1886 vm_pageout_freelist,
1887 VM_PAGEOUT_FREELIST,
1888 DBG_FUNC_END,
1889 vm_page_free_count,
1890 0, 0, 1);
1891 local_freeq = NULL;
1892 local_freed = 0;
1893 vm_page_lock_queues();
1894 } else {
1895 lck_mtx_yield(&vm_page_queue_lock);
1896 }
1897 delayed_unlock = 1;
1898 }
1899 }
1900 delayed_unlock = 1;
1901 } else if (vm_page_secluded_count > vm_page_secluded_target &&
1902 secluded_aging_policy == SECLUDED_AGING_AFTER_INACTIVE) {
1903 /*
1904 * SECLUDED_AGING_AFTER_INACTIVE:
1905 * No balancing needed at this point: when we get to
1906 * the "choose a victim" part below, we'll consider the
1907 * extra secluded pages before any inactive page.
1908 */
1909 } else if (vm_page_secluded_count > vm_page_secluded_target &&
1910 secluded_aging_policy == SECLUDED_AGING_BEFORE_ACTIVE) {
1911 unsigned int secluded_overflow;
1912 vm_page_t secluded_page;
1913
1914 /*
1915 * SECLUDED_AGING_BEFORE_ACTIVE:
1916 * Excess secluded pages go to the active queue and
1917 * will later go to the inactive queue.
1918 */
1919 active_burst_count = MIN(vm_pageout_burst_active_throttle,
1920 vm_page_secluded_count_inuse);
1921 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT;
1922 delayed_unlock = 1;
1923 secluded_overflow = (vm_page_secluded_count -
1924 vm_page_secluded_target);
1925 while (secluded_overflow-- > 0 &&
1926 vm_page_secluded_count > vm_page_secluded_target) {
1927 assert((vm_page_secluded_count_free +
1928 vm_page_secluded_count_inuse) ==
1929 vm_page_secluded_count);
1930 vm_page_queue_remove_first(&vm_page_queue_secluded,
1931 secluded_page,
1932 vm_page_t,
1933 pageq);
1934 assert(secluded_page->vm_page_q_state ==
1935 VM_PAGE_ON_SECLUDED_Q);
1936 VM_PAGE_ZERO_PAGEQ_ENTRY(secluded_page);
1937 secluded_page->vm_page_q_state = VM_PAGE_NOT_ON_Q;
1938 vm_page_secluded_count--;
1939 assert(!secluded_page->fictitious);
1940 assert(!VM_PAGE_WIRED(secluded_page));
1941 if (secluded_page->vm_page_object == 0) {
1942 /* transfer to free queue */
1943 assert(secluded_page->busy);
1944 vm_page_secluded_count_free--;
1945 secluded_page->snext = local_freeq;
1946 local_freeq = secluded_page;
1947 local_freed++;
1948 } else {
1949 vm_page_secluded_count_inuse--;
1950 /* transfer to head of active queue */
1951 vm_page_enqueue_active(secluded_page,
1952 FALSE);
1953 if (active_burst_count-- == 0) {
1954 vm_pageout_secluded_burst_count++;
1955 break;
1956 }
1957 }
1958 secluded_page = VM_PAGE_NULL;
1959 if (delayed_unlock++ > delayed_unlock_limit) {
1960 if (local_freeq) {
1961 vm_page_unlock_queues();
1962 VM_DEBUG_EVENT(
1963 vm_pageout_freelist,
1964 VM_PAGEOUT_FREELIST,
1965 DBG_FUNC_START,
1966 vm_page_free_count,
1967 local_freed,
1968 delayed_unlock_limit,
1969 1);
1970 vm_page_free_list(local_freeq,
1971 TRUE);
1972 VM_DEBUG_EVENT(
1973 vm_pageout_freelist,
1974 VM_PAGEOUT_FREELIST,
1975 DBG_FUNC_END,
1976 vm_page_free_count,
1977 0, 0, 1);
1978 local_freeq = NULL;
1979 local_freed = 0;
1980 vm_page_lock_queues();
1981 } else {
1982 lck_mtx_yield(&vm_page_queue_lock);
1983 }
1984 delayed_unlock = 1;
1985 }
1986 }
1987 delayed_unlock = 1;
1988 } else if (vm_page_secluded_count > vm_page_secluded_target) {
1989 panic("unsupported secluded_aging_policy %d\n",
1990 secluded_aging_policy);
1991 }
1992 if (local_freeq) {
1993 vm_page_unlock_queues();
1994 VM_DEBUG_EVENT(vm_pageout_freelist,
1995 VM_PAGEOUT_FREELIST,
1996 DBG_FUNC_START,
1997 vm_page_free_count,
1998 local_freed,
1999 0,
2000 0);
2001 vm_page_free_list(local_freeq, TRUE);
2002 VM_DEBUG_EVENT(vm_pageout_freelist,
2003 VM_PAGEOUT_FREELIST,
2004 DBG_FUNC_END,
2005 vm_page_free_count, 0, 0, 0);
2006 local_freeq = NULL;
2007 local_freed = 0;
2008 vm_page_lock_queues();
2009 }
2010 #endif /* CONFIG_SECLUDED_MEMORY */
2011
2012 assert(delayed_unlock);
2013
2014 if (vm_upl_wait_for_pages < 0)
2015 vm_upl_wait_for_pages = 0;
2016
2017 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
2018
2019 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
2020 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
2021
2022 /*
2023 * Move pages from active to inactive if we're below the target
2024 */
2025 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */
2026 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
2027 goto done_moving_active_pages;
2028
2029 if (object != NULL) {
2030 vm_object_unlock(object);
2031 object = NULL;
2032 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2033 }
2034 /*
2035 * Don't sweep through active queue more than the throttle
2036 * which should be kept relatively low
2037 */
2038 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
2039
2040 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
2041 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
2042
2043 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
2044 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2045 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2046 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
2047
2048
2049 while (!vm_page_queue_empty(&vm_page_queue_active) && active_burst_count--) {
2050
2051 vm_pageout_active++;
2052
2053 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
2054
2055 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
2056 assert(!m->laundry);
2057 assert(VM_PAGE_OBJECT(m) != kernel_object);
2058 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2059
2060 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2061
2062 /*
2063 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
2064 *
2065 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
2066 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
2067 * new reference happens. If no futher references happen on the page after that remote TLB flushes
2068 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
2069 * by pageout_scan, which is just fine since the last reference would have happened quite far
2070 * in the past (TLB caches don't hang around for very long), and of course could just as easily
2071 * have happened before we moved the page
2072 */
2073 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
2074
2075 /*
2076 * The page might be absent or busy,
2077 * but vm_page_deactivate can handle that.
2078 * FALSE indicates that we don't want a H/W clear reference
2079 */
2080 vm_page_deactivate_internal(m, FALSE);
2081
2082 if (delayed_unlock++ > delayed_unlock_limit) {
2083
2084 if (local_freeq) {
2085 vm_page_unlock_queues();
2086
2087 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2088 vm_page_free_count, local_freed, delayed_unlock_limit, 1);
2089
2090 vm_page_free_list(local_freeq, TRUE);
2091
2092 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2093 vm_page_free_count, 0, 0, 1);
2094
2095 local_freeq = NULL;
2096 local_freed = 0;
2097 vm_page_lock_queues();
2098 } else {
2099 lck_mtx_yield(&vm_page_queue_lock);
2100 }
2101
2102 delayed_unlock = 1;
2103
2104 /*
2105 * continue the while loop processing
2106 * the active queue... need to hold
2107 * the page queues lock
2108 */
2109 }
2110 }
2111
2112 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
2113 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
2114 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
2115
2116 /**********************************************************************
2117 * above this point we're playing with the active queue
2118 * below this point we're playing with the throttling mechanisms
2119 * and the inactive queue
2120 **********************************************************************/
2121
2122 done_moving_active_pages:
2123
2124 #if CONFIG_BACKGROUND_QUEUE
2125 if ((vm_page_free_count + local_freed >= vm_page_free_target) &&
2126 ((vm_page_background_mode < VM_PAGE_BG_LEVEL_2) || (vm_page_background_count <= vm_page_background_target)))
2127 #else
2128 if (vm_page_free_count + local_freed >= vm_page_free_target)
2129 #endif
2130 {
2131 if (object != NULL) {
2132 vm_object_unlock(object);
2133 object = NULL;
2134 }
2135 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2136
2137 vm_page_unlock_queues();
2138
2139 if (local_freeq) {
2140
2141 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2142 vm_page_free_count, local_freed, delayed_unlock_limit, 2);
2143
2144 vm_page_free_list(local_freeq, TRUE);
2145
2146 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2147 vm_page_free_count, local_freed, 0, 2);
2148
2149 local_freeq = NULL;
2150 local_freed = 0;
2151 }
2152 vm_consider_waking_compactor_swapper();
2153
2154 vm_page_lock_queues();
2155
2156 /*
2157 * make sure the pageout I/O threads are running
2158 * throttled in case there are still requests
2159 * in the laundry... since we have met our targets
2160 * we don't need the laundry to be cleaned in a timely
2161 * fashion... so let's avoid interfering with foreground
2162 * activity
2163 */
2164 vm_pageout_adjust_io_throttles(iq, eq, TRUE);
2165
2166 /*
2167 * recalculate vm_page_inactivate_target
2168 */
2169 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
2170 vm_page_inactive_count +
2171 vm_page_speculative_count);
2172 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
2173 !vm_page_queue_empty(&vm_page_queue_active)) {
2174 /*
2175 * inactive target still not met... keep going
2176 * until we get the queues balanced...
2177 */
2178 continue;
2179 }
2180 lck_mtx_lock(&vm_page_queue_free_lock);
2181
2182 if ((vm_page_free_count >= vm_page_free_target) &&
2183 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2184 /*
2185 * done - we have met our target *and*
2186 * there is no one waiting for a page.
2187 */
2188 return_from_scan:
2189 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2190
2191 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
2192 vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
2193 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
2194 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2195 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2196
2197 return;
2198 }
2199 lck_mtx_unlock(&vm_page_queue_free_lock);
2200 }
2201
2202 /*
2203 * Before anything, we check if we have any ripe volatile
2204 * objects around. If so, try to purge the first object.
2205 * If the purge fails, fall through to reclaim a page instead.
2206 * If the purge succeeds, go back to the top and reevalute
2207 * the new memory situation.
2208 */
2209
2210 assert (available_for_purge>=0);
2211 force_purge = 0; /* no force-purging */
2212
2213 #if VM_PRESSURE_EVENTS
2214 pressure_level = memorystatus_vm_pressure_level;
2215
2216 if (pressure_level > kVMPressureNormal) {
2217
2218 if (pressure_level >= kVMPressureCritical) {
2219 force_purge = memorystatus_purge_on_critical;
2220 } else if (pressure_level >= kVMPressureUrgent) {
2221 force_purge = memorystatus_purge_on_urgent;
2222 } else if (pressure_level >= kVMPressureWarning) {
2223 force_purge = memorystatus_purge_on_warning;
2224 }
2225 }
2226 #endif /* VM_PRESSURE_EVENTS */
2227
2228 if (available_for_purge || force_purge) {
2229
2230 if (object != NULL) {
2231 vm_object_unlock(object);
2232 object = NULL;
2233 }
2234
2235 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2236
2237 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2238 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2239 vm_pageout_purged_objects++;
2240 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2241 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2242 continue;
2243 }
2244 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2245 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2246 }
2247
2248 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2249 /*
2250 * try to pull pages from the aging bins...
2251 * see vm_page.h for an explanation of how
2252 * this mechanism works
2253 */
2254 struct vm_speculative_age_q *aq;
2255 boolean_t can_steal = FALSE;
2256 int num_scanned_queues;
2257
2258 aq = &vm_page_queue_speculative[speculative_steal_index];
2259
2260 num_scanned_queues = 0;
2261 while (vm_page_queue_empty(&aq->age_q) &&
2262 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2263
2264 speculative_steal_index++;
2265
2266 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2267 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2268
2269 aq = &vm_page_queue_speculative[speculative_steal_index];
2270 }
2271
2272 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2273 /*
2274 * XXX We've scanned all the speculative
2275 * queues but still haven't found one
2276 * that is not empty, even though
2277 * vm_page_speculative_count is not 0.
2278 *
2279 * report the anomaly...
2280 */
2281 printf("vm_pageout_scan: "
2282 "all speculative queues empty "
2283 "but count=%d. Re-adjusting.\n",
2284 vm_page_speculative_count);
2285 if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
2286 vm_page_speculative_count_drift_max = vm_page_speculative_count;
2287 vm_page_speculative_count_drifts++;
2288 #if DEVELOPMENT || DEBUG
2289 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2290 #endif /* DEVELOPMENT || DEBUG */
2291 /* readjust... */
2292 vm_page_speculative_count = 0;
2293 /* ... and continue */
2294 continue;
2295 }
2296
2297 if (vm_page_speculative_count > vm_page_speculative_target)
2298 can_steal = TRUE;
2299 else {
2300 if (!delay_speculative_age) {
2301 mach_timespec_t ts_fully_aged;
2302
2303 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
2304 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
2305 * 1000 * NSEC_PER_USEC;
2306
2307 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2308
2309 clock_sec_t sec;
2310 clock_nsec_t nsec;
2311 clock_get_system_nanotime(&sec, &nsec);
2312 ts.tv_sec = (unsigned int) sec;
2313 ts.tv_nsec = nsec;
2314
2315 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2316 can_steal = TRUE;
2317 else
2318 delay_speculative_age++;
2319 } else {
2320 delay_speculative_age++;
2321 if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2322 delay_speculative_age = 0;
2323 }
2324 }
2325 if (can_steal == TRUE)
2326 vm_page_speculate_ageit(aq);
2327 }
2328 #if CONFIG_BACKGROUND_QUEUE
2329 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0 &&
2330 ((vm_page_background_mode == VM_PAGE_BG_DISABLED) || (vm_page_background_count <= vm_page_background_target)))
2331 #else
2332 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0)
2333 #endif
2334 {
2335 int pages_evicted;
2336
2337 if (object != NULL) {
2338 vm_object_unlock(object);
2339 object = NULL;
2340 }
2341 pages_evicted = vm_object_cache_evict(100, 10);
2342
2343 if (pages_evicted) {
2344
2345 vm_pageout_cache_evicted += pages_evicted;
2346
2347 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2348 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
2349 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2350
2351 /*
2352 * we just freed up to 100 pages,
2353 * so go back to the top of the main loop
2354 * and re-evaulate the memory situation
2355 */
2356 continue;
2357 } else
2358 cache_evict_throttle = 100;
2359 }
2360 if (cache_evict_throttle)
2361 cache_evict_throttle--;
2362
2363 #if CONFIG_JETSAM
2364 /*
2365 * don't let the filecache_min fall below 15% of available memory
2366 * on systems with an active compressor that isn't nearing its
2367 * limits w/r to accepting new data
2368 *
2369 * on systems w/o the compressor/swapper, the filecache is always
2370 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2371 * since most (if not all) of the anonymous pages are in the
2372 * throttled queue (which isn't counted as available) which
2373 * effectively disables this filter
2374 */
2375 if (vm_compressor_low_on_space())
2376 vm_page_filecache_min = 0;
2377 else
2378 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
2379 #else
2380 /*
2381 * don't let the filecache_min fall below 33% of available memory...
2382 */
2383 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
2384 #endif
2385 if (vm_page_free_count < (vm_page_free_reserved / 4))
2386 vm_page_filecache_min = 0;
2387
2388 exceeded_burst_throttle = FALSE;
2389 /*
2390 * Sometimes we have to pause:
2391 * 1) No inactive pages - nothing to do.
2392 * 2) Loop control - no acceptable pages found on the inactive queue
2393 * within the last vm_pageout_burst_inactive_throttle iterations
2394 * 3) Flow control - default pageout queue is full
2395 */
2396 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2397 vm_page_queue_empty(&vm_page_queue_anonymous) &&
2398 vm_page_queue_empty(&sq->age_q)) {
2399 vm_pageout_scan_empty_throttle++;
2400 msecs = vm_pageout_empty_wait;
2401 goto vm_pageout_scan_delay;
2402
2403 } else if (inactive_burst_count >=
2404 MIN(vm_pageout_burst_inactive_throttle,
2405 (vm_page_inactive_count +
2406 vm_page_speculative_count))) {
2407 vm_pageout_scan_burst_throttle++;
2408 msecs = vm_pageout_burst_wait;
2409
2410 exceeded_burst_throttle = TRUE;
2411 goto vm_pageout_scan_delay;
2412
2413 } else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
2414 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
2415 vm_pageout_scan_swap_throttle++;
2416 msecs = vm_pageout_swap_wait;
2417 goto vm_pageout_scan_delay;
2418
2419 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2420 VM_DYNAMIC_PAGING_ENABLED()) {
2421 clock_sec_t sec;
2422 clock_nsec_t nsec;
2423
2424 switch (flow_control.state) {
2425
2426 case FCS_IDLE:
2427 if ((vm_page_free_count + local_freed) < vm_page_free_target) {
2428
2429 if (object != NULL) {
2430 vm_object_unlock(object);
2431 object = NULL;
2432 }
2433 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2434
2435 vm_page_unlock_queues();
2436
2437 if (local_freeq) {
2438
2439 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2440 vm_page_free_count, local_freed, delayed_unlock_limit, 3);
2441
2442 vm_page_free_list(local_freeq, TRUE);
2443
2444 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2445 vm_page_free_count, local_freed, 0, 3);
2446
2447 local_freeq = NULL;
2448 local_freed = 0;
2449 }
2450 thread_yield_internal(1);
2451
2452 vm_page_lock_queues();
2453
2454 if (!VM_PAGE_Q_THROTTLED(iq)) {
2455 vm_pageout_scan_yield_unthrottled++;
2456 continue;
2457 }
2458 if (vm_page_pageable_external_count > vm_page_filecache_min &&
2459 !vm_page_queue_empty(&vm_page_queue_inactive)) {
2460 anons_grabbed = ANONS_GRABBED_LIMIT;
2461 vm_pageout_scan_throttle_deferred++;
2462 goto consider_inactive;
2463 }
2464 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
2465 continue;
2466 }
2467 reset_deadlock_timer:
2468 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
2469 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2470 clock_get_system_nanotime(&sec, &nsec);
2471 flow_control.ts.tv_sec = (unsigned int) sec;
2472 flow_control.ts.tv_nsec = nsec;
2473 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2474
2475 flow_control.state = FCS_DELAYED;
2476 msecs = vm_pageout_deadlock_wait;
2477
2478 break;
2479
2480 case FCS_DELAYED:
2481 clock_get_system_nanotime(&sec, &nsec);
2482 ts.tv_sec = (unsigned int) sec;
2483 ts.tv_nsec = nsec;
2484
2485 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2486 /*
2487 * the pageout thread for the default pager is potentially
2488 * deadlocked since the
2489 * default pager queue has been throttled for more than the
2490 * allowable time... we need to move some clean pages or dirty
2491 * pages belonging to the external pagers if they aren't throttled
2492 * vm_page_free_wanted represents the number of threads currently
2493 * blocked waiting for pages... we'll move one page for each of
2494 * these plus a fixed amount to break the logjam... once we're done
2495 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2496 * with a new timeout target since we have no way of knowing
2497 * whether we've broken the deadlock except through observation
2498 * of the queue associated with the default pager... we need to
2499 * stop moving pages and allow the system to run to see what
2500 * state it settles into.
2501 */
2502 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
2503 vm_pageout_scan_deadlock_detected++;
2504 flow_control.state = FCS_DEADLOCK_DETECTED;
2505 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2506 goto consider_inactive;
2507 }
2508 /*
2509 * just resniff instead of trying
2510 * to compute a new delay time... we're going to be
2511 * awakened immediately upon a laundry completion,
2512 * so we won't wait any longer than necessary
2513 */
2514 msecs = vm_pageout_idle_wait;
2515 break;
2516
2517 case FCS_DEADLOCK_DETECTED:
2518 if (vm_pageout_deadlock_target)
2519 goto consider_inactive;
2520 goto reset_deadlock_timer;
2521
2522 }
2523 vm_pageout_scan_delay:
2524 if (object != NULL) {
2525 vm_object_unlock(object);
2526 object = NULL;
2527 }
2528 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2529
2530 vm_page_unlock_queues();
2531
2532 if (local_freeq) {
2533
2534 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2535 vm_page_free_count, local_freed, delayed_unlock_limit, 3);
2536
2537 vm_page_free_list(local_freeq, TRUE);
2538
2539 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2540 vm_page_free_count, local_freed, 0, 3);
2541
2542 local_freeq = NULL;
2543 local_freed = 0;
2544 }
2545 vm_consider_waking_compactor_swapper();
2546
2547 vm_page_lock_queues();
2548
2549 if (flow_control.state == FCS_DELAYED &&
2550 !VM_PAGE_Q_THROTTLED(iq)) {
2551 flow_control.state = FCS_IDLE;
2552 goto consider_inactive;
2553 }
2554
2555 if (vm_page_free_count >= vm_page_free_target) {
2556 /*
2557 * we're here because
2558 * 1) someone else freed up some pages while we had
2559 * the queues unlocked above
2560 * and we've hit one of the 3 conditions that
2561 * cause us to pause the pageout scan thread
2562 *
2563 * since we already have enough free pages,
2564 * let's avoid stalling and return normally
2565 *
2566 * before we return, make sure the pageout I/O threads
2567 * are running throttled in case there are still requests
2568 * in the laundry... since we have enough free pages
2569 * we don't need the laundry to be cleaned in a timely
2570 * fashion... so let's avoid interfering with foreground
2571 * activity
2572 *
2573 * we don't want to hold vm_page_queue_free_lock when
2574 * calling vm_pageout_adjust_io_throttles (since it
2575 * may cause other locks to be taken), we do the intitial
2576 * check outside of the lock. Once we take the lock,
2577 * we recheck the condition since it may have changed.
2578 * if it has, no problem, we will make the threads
2579 * non-throttled before actually blocking
2580 */
2581 vm_pageout_adjust_io_throttles(iq, eq, TRUE);
2582 }
2583 lck_mtx_lock(&vm_page_queue_free_lock);
2584
2585 if (vm_page_free_count >= vm_page_free_target &&
2586 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2587 goto return_from_scan;
2588 }
2589 lck_mtx_unlock(&vm_page_queue_free_lock);
2590
2591 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2592 /*
2593 * we're most likely about to block due to one of
2594 * the 3 conditions that cause vm_pageout_scan to
2595 * not be able to make forward progress w/r
2596 * to providing new pages to the free queue,
2597 * so unthrottle the I/O threads in case we
2598 * have laundry to be cleaned... it needs
2599 * to be completed ASAP.
2600 *
2601 * even if we don't block, we want the io threads
2602 * running unthrottled since the sum of free +
2603 * clean pages is still under our free target
2604 */
2605 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
2606 }
2607 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2608 /*
2609 * if we get here we're below our free target and
2610 * we're stalling due to a full laundry queue or
2611 * we don't have any inactive pages other then
2612 * those in the clean queue...
2613 * however, we have pages on the clean queue that
2614 * can be moved to the free queue, so let's not
2615 * stall the pageout scan
2616 */
2617 flow_control.state = FCS_IDLE;
2618 goto consider_inactive;
2619 }
2620 VM_CHECK_MEMORYSTATUS;
2621
2622 if (flow_control.state != FCS_IDLE)
2623 vm_pageout_scan_throttle++;
2624 iq->pgo_throttled = TRUE;
2625
2626 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2627 counter(c_vm_pageout_scan_block++);
2628
2629 vm_page_unlock_queues();
2630
2631 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2632
2633 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2634 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2635 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2636
2637 thread_block(THREAD_CONTINUE_NULL);
2638
2639 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2640 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2641 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2642
2643 vm_page_lock_queues();
2644 delayed_unlock = 1;
2645
2646 iq->pgo_throttled = FALSE;
2647
2648 if (loop_count >= vm_page_inactive_count)
2649 loop_count = 0;
2650 inactive_burst_count = 0;
2651
2652 goto Restart;
2653 /*NOTREACHED*/
2654 }
2655
2656
2657 flow_control.state = FCS_IDLE;
2658 consider_inactive:
2659 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2660 vm_pageout_inactive_external_forced_reactivate_limit);
2661 loop_count++;
2662 inactive_burst_count++;
2663 vm_pageout_inactive++;
2664
2665
2666 /*
2667 * Choose a victim.
2668 */
2669 while (1) {
2670 uint32_t inactive_external_count;
2671
2672 #if CONFIG_BACKGROUND_QUEUE
2673 ignore_reference = FALSE;
2674 #endif /* CONFIG_BACKGROUND_QUEUE */
2675
2676 m = NULL;
2677 m_object = VM_OBJECT_NULL;
2678
2679 if (VM_DYNAMIC_PAGING_ENABLED()) {
2680 assert(vm_page_throttled_count == 0);
2681 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2682 }
2683
2684
2685 #if CONFIG_SECLUDED_MEMORY
2686 if ((secluded_aging_policy ==
2687 SECLUDED_AGING_AFTER_INACTIVE) &&
2688 vm_page_secluded_count > vm_page_secluded_target) {
2689 /*
2690 * SECLUDED_AGING_AFTER_INACTIVE:
2691 * Secluded pages have already been aged
2692 * through the active and inactive queues, and
2693 * we now have too many of them, so let's
2694 * balance that queue by considering reclaiming
2695 * the oldest page in the secluded queue.
2696 */
2697 assert(!vm_page_queue_empty(&vm_page_queue_secluded));
2698 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_secluded);
2699 if (m->vm_page_object == 0) {
2700 /*
2701 * It's already a free page:
2702 * just move it to a free queue.
2703 */
2704 vm_page_queues_remove(m, TRUE);
2705 assert(m->busy);
2706 assert(m->pageq.next == 0);
2707 assert(m->pageq.prev == 0);
2708 m->snext = local_freeq;
2709 local_freeq = m;
2710 local_freed++;
2711 goto done_with_inactivepage;
2712 }
2713 /*
2714 * Not a free page: we've found our next
2715 * "victim".
2716 */
2717 break;
2718 }
2719 #endif /* CONFIG_SECLUDED_MEMORY */
2720
2721 #if CONFIG_BACKGROUND_QUEUE
2722 if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2723 vm_object_t bg_m_object = NULL;
2724
2725 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2726
2727 bg_m_object = VM_PAGE_OBJECT(m);
2728
2729 if (!VM_PAGE_PAGEABLE(m)) {
2730 /*
2731 * This page is on the background queue
2732 * but not on a pageable queue. This is
2733 * likely a transient state and whoever
2734 * took it out of its pageable queue
2735 * will likely put it back on a pageable
2736 * queue soon but we can't deal with it
2737 * at this point, so let's ignore this
2738 * page.
2739 */
2740 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2741 ignore_reference = TRUE;
2742
2743 if (bg_m_object->internal)
2744 vm_pageout_considered_bq_internal++;
2745 else
2746 vm_pageout_considered_bq_external++;
2747
2748 assert(VM_PAGE_PAGEABLE(m));
2749 break;
2750 }
2751 }
2752 #endif
2753
2754 /*
2755 * The most eligible pages are ones we paged in speculatively,
2756 * but which have not yet been touched.
2757 */
2758 if (!vm_page_queue_empty(&sq->age_q) && force_anonymous == FALSE) {
2759 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2760
2761 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2762
2763 break;
2764 }
2765 /*
2766 * Try a clean-queue inactive page.
2767 */
2768 if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2769 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2770
2771 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2772
2773 break;
2774 }
2775
2776 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2777 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2778
2779 if ((vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) ||
2780 ((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
2781 grab_anonymous = TRUE;
2782 anons_grabbed = 0;
2783 }
2784 #if CONFIG_JETSAM
2785 /* If the file-backed pool has accumulated
2786 * significantly more pages than the jetsam
2787 * threshold, prefer to reclaim those
2788 * inline to minimise compute overhead of reclaiming
2789 * anonymous pages.
2790 * This calculation does not account for the CPU local
2791 * external page queues, as those are expected to be
2792 * much smaller relative to the global pools.
2793 */
2794 if (grab_anonymous) {
2795 if (vm_page_pageable_external_count >
2796 vm_page_filecache_min) {
2797 if ((vm_page_pageable_external_count *
2798 vm_pageout_memorystatus_fb_factor_dr) >
2799 (memorystatus_available_pages_critical *
2800 vm_pageout_memorystatus_fb_factor_nr)) {
2801 grab_anonymous = FALSE;
2802 #if DEVELOPMENT || DEBUG
2803 vm_grab_anon_overrides++;
2804 #endif
2805 }
2806 }
2807 #if DEVELOPMENT || DEBUG
2808 if (grab_anonymous) {
2809 vm_grab_anon_nops++;
2810
2811 }
2812 #endif
2813 }
2814 #endif /* CONFIG_JETSAM */
2815
2816 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2817
2818 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2819 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2820
2821 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2822 anons_grabbed = 0;
2823
2824 if (vm_page_pageable_external_count < vm_page_filecache_min) {
2825 if ((++reactivated_this_call % 100))
2826 goto must_activate_page;
2827 /*
2828 * steal 1% of the file backed pages even if
2829 * we are under the limit that has been set
2830 * for a healthy filecache
2831 */
2832 }
2833 break;
2834 }
2835 }
2836 if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2837 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2838
2839 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2840 anons_grabbed++;
2841
2842 break;
2843 }
2844
2845 /*
2846 * if we've gotten here, we have no victim page.
2847 * if making clean, free the local freed list and return.
2848 * if making free, check to see if we've finished balancing the queues
2849 * yet, if we haven't just continue, else panic
2850 */
2851 vm_page_unlock_queues();
2852
2853 if (object != NULL) {
2854 vm_object_unlock(object);
2855 object = NULL;
2856 }
2857 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2858
2859 if (local_freeq) {
2860 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
2861 vm_page_free_count, local_freed, delayed_unlock_limit, 5);
2862
2863 vm_page_free_list(local_freeq, TRUE);
2864
2865 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
2866 vm_page_free_count, local_freed, 0, 5);
2867
2868 local_freeq = NULL;
2869 local_freed = 0;
2870 }
2871 vm_page_lock_queues();
2872 delayed_unlock = 1;
2873
2874 force_anonymous = FALSE;
2875
2876 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
2877 goto Restart;
2878
2879 if (!vm_page_queue_empty(&sq->age_q))
2880 goto Restart;
2881
2882 panic("vm_pageout: no victim");
2883
2884 /* NOTREACHED */
2885 }
2886 m_object = VM_PAGE_OBJECT(m);
2887 force_anonymous = FALSE;
2888
2889 page_prev_q_state = m->vm_page_q_state;
2890 requeue_insert_first = FALSE;
2891 /*
2892 * we just found this page on one of our queues...
2893 * it can't also be on the pageout queue, so safe
2894 * to call vm_page_queues_remove
2895 */
2896 vm_page_queues_remove(m, TRUE);
2897
2898 assert(!m->laundry);
2899 assert(!m->private);
2900 assert(!m->fictitious);
2901 assert(m_object != kernel_object);
2902 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2903
2904
2905 if (page_prev_q_state != VM_PAGE_ON_SPECULATIVE_Q &&
2906 page_prev_q_state != VM_PAGE_ON_SECLUDED_Q)
2907 vm_pageout_stats[vm_pageout_stat_now].considered++;
2908
2909 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2910
2911 /*
2912 * check to see if we currently are working
2913 * with the same object... if so, we've
2914 * already got the lock
2915 */
2916 if (m_object != object) {
2917 /*
2918 * the object associated with candidate page is
2919 * different from the one we were just working
2920 * with... dump the lock if we still own it
2921 */
2922 if (object != NULL) {
2923 vm_object_unlock(object);
2924 object = NULL;
2925 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2926 }
2927 /*
2928 * Try to lock object; since we've alread got the
2929 * page queues lock, we can only 'try' for this one.
2930 * if the 'try' fails, we need to do a mutex_pause
2931 * to allow the owner of the object lock a chance to
2932 * run... otherwise, we're likely to trip over this
2933 * object in the same state as we work our way through
2934 * the queue... clumps of pages associated with the same
2935 * object are fairly typical on the inactive and active queues
2936 */
2937 if (!vm_object_lock_try_scan(m_object)) {
2938 vm_page_t m_want = NULL;
2939
2940 vm_pageout_inactive_nolock++;
2941
2942 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2943 vm_pageout_cleaned_nolock++;
2944
2945 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2946 requeue_insert_first = TRUE;
2947
2948 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2949 m->reference = FALSE;
2950
2951 /*
2952 * m->object must be stable since we hold the page queues lock...
2953 * we can update the scan_collisions field sans the object lock
2954 * since it is a separate field and this is the only spot that does
2955 * a read-modify-write operation and it is never executed concurrently...
2956 * we can asynchronously set this field to 0 when creating a UPL, so it
2957 * is possible for the value to be a bit non-determistic, but that's ok
2958 * since it's only used as a hint
2959 */
2960 m_object->scan_collisions = 1;
2961
2962 if ( !vm_page_queue_empty(&sq->age_q) )
2963 m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2964 else if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2965 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2966 else if ( !vm_page_queue_empty(&vm_page_queue_inactive) &&
2967 (anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)))
2968 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2969 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2970 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2971
2972 /*
2973 * this is the next object we're going to be interested in
2974 * try to make sure its available after the mutex_yield
2975 * returns control
2976 */
2977 if (m_want)
2978 vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2979
2980 /*
2981 * force us to dump any collected free pages
2982 * and to pause before moving on
2983 */
2984 try_failed = TRUE;
2985
2986 goto requeue_page;
2987 }
2988 object = m_object;
2989 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2990
2991 try_failed = FALSE;
2992 }
2993 assert(m_object == object);
2994 assert(VM_PAGE_OBJECT(m) == m_object);
2995
2996 if (catch_up_count)
2997 catch_up_count--;
2998
2999 if (m->busy) {
3000 if (m->encrypted_cleaning) {
3001 /*
3002 * ENCRYPTED SWAP:
3003 * if this page has already been picked up as
3004 * part of a page-out cluster, it will be busy
3005 * because it is being encrypted (see
3006 * vm_object_upl_request()). But we still
3007 * want to demote it from "clean-in-place"
3008 * (aka "adjacent") to "clean-and-free" (aka
3009 * "target"), so let's ignore its "busy" bit
3010 * here and proceed to check for "cleaning" a
3011 * little bit below...
3012 *
3013 * CAUTION CAUTION:
3014 * A "busy" page should still be left alone for
3015 * most purposes, so we have to be very careful
3016 * not to process that page too much.
3017 */
3018 assert(m->cleaning);
3019 goto consider_inactive_page;
3020 }
3021
3022 /*
3023 * Somebody is already playing with this page.
3024 * Put it back on the appropriate queue
3025 *
3026 */
3027 vm_pageout_inactive_busy++;
3028
3029 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3030 vm_pageout_cleaned_busy++;
3031
3032 requeue_page:
3033 if (requeue_insert_first)
3034 vm_page_enqueue_inactive(m, TRUE);
3035 else
3036 vm_page_enqueue_inactive(m, FALSE);
3037 #if CONFIG_BACKGROUND_QUEUE
3038 if (ignore_reference == TRUE) {
3039 if (m_object->internal)
3040 vm_pageout_rejected_bq_internal++;
3041 else
3042 vm_pageout_rejected_bq_external++;
3043 }
3044 #endif
3045 goto done_with_inactivepage;
3046 }
3047
3048
3049 /*
3050 * If it's absent, in error or the object is no longer alive,
3051 * we can reclaim the page... in the no longer alive case,
3052 * there are 2 states the page can be in that preclude us
3053 * from reclaiming it - busy or cleaning - that we've already
3054 * dealt with
3055 */
3056 if (m->absent || m->error || !object->alive) {
3057
3058 if (m->absent)
3059 vm_pageout_inactive_absent++;
3060 else if (!object->alive)
3061 vm_pageout_inactive_notalive++;
3062 else
3063 vm_pageout_inactive_error++;
3064 reclaim_page:
3065 if (vm_pageout_deadlock_target) {
3066 vm_pageout_scan_inactive_throttle_success++;
3067 vm_pageout_deadlock_target--;
3068 }
3069
3070 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
3071
3072 if (object->internal) {
3073 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
3074 } else {
3075 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
3076 }
3077 assert(!m->cleaning);
3078 assert(!m->laundry);
3079
3080 m->busy = TRUE;
3081
3082 /*
3083 * remove page from object here since we're already
3084 * behind the object lock... defer the rest of the work
3085 * we'd normally do in vm_page_free_prepare_object
3086 * until 'vm_page_free_list' is called
3087 */
3088 if (m->tabled)
3089 vm_page_remove(m, TRUE);
3090
3091 assert(m->pageq.next == 0 && m->pageq.prev == 0);
3092 m->snext = local_freeq;
3093 local_freeq = m;
3094 local_freed++;
3095
3096 #if CONFIG_SECLUDED_MEMORY
3097 if (page_prev_q_state == VM_PAGE_ON_SECLUDED_Q)
3098 vm_pageout_freed_from_secluded++;
3099 #endif /* CONFIG_SECLUDED_MEMORY */
3100 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3101 vm_pageout_freed_from_speculative++;
3102 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3103 vm_pageout_freed_from_cleaned++;
3104 else
3105 vm_pageout_freed_from_inactive_clean++;
3106
3107 if (page_prev_q_state != VM_PAGE_ON_SPECULATIVE_Q &&
3108 page_prev_q_state != VM_PAGE_ON_SECLUDED_Q)
3109 vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
3110
3111 inactive_burst_count = 0;
3112 goto done_with_inactivepage;
3113 }
3114 /*
3115 * If the object is empty, the page must be reclaimed even
3116 * if dirty or used.
3117 * If the page belongs to a volatile object, we stick it back
3118 * on.
3119 */
3120 if (object->copy == VM_OBJECT_NULL) {
3121 if (object->purgable == VM_PURGABLE_EMPTY) {
3122 if (m->pmapped == TRUE) {
3123 /* unmap the page */
3124 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
3125 if (refmod_state & VM_MEM_MODIFIED) {
3126 SET_PAGE_DIRTY(m, FALSE);
3127 }
3128 }
3129 if (m->dirty || m->precious) {
3130 /* we saved the cost of cleaning this page ! */
3131 vm_page_purged_count++;
3132 }
3133 goto reclaim_page;
3134 }
3135
3136 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
3137 /*
3138 * With the VM compressor, the cost of
3139 * reclaiming a page is much lower (no I/O),
3140 * so if we find a "volatile" page, it's better
3141 * to let it get compressed rather than letting
3142 * it occupy a full page until it gets purged.
3143 * So no need to check for "volatile" here.
3144 */
3145 } else if (object->purgable == VM_PURGABLE_VOLATILE) {
3146 /*
3147 * Avoid cleaning a "volatile" page which might
3148 * be purged soon.
3149 */
3150
3151 /* if it's wired, we can't put it on our queue */
3152 assert(!VM_PAGE_WIRED(m));
3153
3154 /* just stick it back on! */
3155 reactivated_this_call++;
3156
3157 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3158 vm_pageout_cleaned_volatile_reactivated++;
3159
3160 goto reactivate_page;
3161 }
3162 }
3163
3164 consider_inactive_page:
3165 if (m->busy) {
3166 /*
3167 * CAUTION CAUTION:
3168 * A "busy" page should always be left alone, except...
3169 */
3170 if (m->cleaning && m->encrypted_cleaning) {
3171 /*
3172 * ENCRYPTED_SWAP:
3173 * We could get here with a "busy" page
3174 * if it's being encrypted during a
3175 * "clean-in-place" operation. We'll deal
3176 * with it right away by testing if it has been
3177 * referenced and either reactivating it or
3178 * promoting it from "clean-in-place" to
3179 * "clean-and-free".
3180 */
3181 } else {
3182 panic("\"busy\" page considered for pageout\n");
3183 }
3184 }
3185
3186 /*
3187 * If it's being used, reactivate.
3188 * (Fictitious pages are either busy or absent.)
3189 * First, update the reference and dirty bits
3190 * to make sure the page is unreferenced.
3191 */
3192 refmod_state = -1;
3193
3194 if (m->reference == FALSE && m->pmapped == TRUE) {
3195 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3196
3197 if (refmod_state & VM_MEM_REFERENCED)
3198 m->reference = TRUE;
3199 if (refmod_state & VM_MEM_MODIFIED) {
3200 SET_PAGE_DIRTY(m, FALSE);
3201 }
3202 }
3203
3204 /*
3205 * if (m->cleaning && !m->free_when_done)
3206 * If already cleaning this page in place and it hasn't
3207 * been recently referenced, just pull off the queue.
3208 * We can leave the page mapped, and upl_commit_range
3209 * will put it on the clean queue.
3210 *
3211 * note: if m->encrypted_cleaning == TRUE, then
3212 * m->cleaning == TRUE
3213 * and we'll handle it here
3214 *
3215 * if (m->free_when_done && !m->cleaning)
3216 * an msync INVALIDATE is in progress...
3217 * this page has been marked for destruction
3218 * after it has been cleaned,
3219 * but not yet gathered into a UPL
3220 * where 'cleaning' will be set...
3221 * just leave it off the paging queues
3222 *
3223 * if (m->free_when_done && m->clenaing)
3224 * an msync INVALIDATE is in progress
3225 * and the UPL has already gathered this page...
3226 * just leave it off the paging queues
3227 */
3228
3229 /*
3230 * page with m->free_when_done and still on the queues means that an
3231 * MS_INVALIDATE is in progress on this page... leave it alone
3232 */
3233 if (m->free_when_done) {
3234 goto done_with_inactivepage;
3235 }
3236
3237 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */
3238 if (m->cleaning) {
3239 if (m->reference == TRUE) {
3240 reactivated_this_call++;
3241 goto reactivate_page;
3242 } else {
3243 goto done_with_inactivepage;
3244 }
3245 }
3246
3247 if (m->reference || m->dirty) {
3248 /* deal with a rogue "reusable" page */
3249 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
3250 }
3251
3252 #if CONFIG_SECLUDED_MEMORY
3253 if (secluded_for_filecache &&
3254 vm_page_secluded_target > 0 &&
3255 m_object->eligible_for_secluded &&
3256 secluded_aging_policy == SECLUDED_AGING_FIFO) {
3257 /*
3258 * SECLUDED_AGING_FIFO:
3259 * This victim page is eligible for the secluded pool
3260 * and we're not aging secluded pages, so let's not
3261 * reactivate it if it's been re-referenced.
3262 * Later on, we'll move it to the secluded queue
3263 * instead of freeing it.
3264 */
3265 ignore_reference_secluded = TRUE;
3266 } else {
3267 ignore_reference_secluded = FALSE;
3268 }
3269 #endif /* CONFIG_SECLUDED_MEMORY */
3270
3271 if (!m->no_cache &&
3272 #if CONFIG_BACKGROUND_QUEUE
3273 ignore_reference == FALSE &&
3274 #endif
3275 #if CONFIG_SECLUDED_MEMORY
3276 ignore_reference_secluded == FALSE &&
3277 #endif /* CONFIG_SECLUDED_MEMORY */
3278 (m->reference ||
3279 (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
3280 /*
3281 * The page we pulled off the inactive list has
3282 * been referenced. It is possible for other
3283 * processors to be touching pages faster than we
3284 * can clear the referenced bit and traverse the
3285 * inactive queue, so we limit the number of
3286 * reactivations.
3287 */
3288 if (++reactivated_this_call >= reactivate_limit) {
3289 vm_pageout_reactivation_limit_exceeded++;
3290 } else if (catch_up_count) {
3291 vm_pageout_catch_ups++;
3292 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
3293 vm_pageout_inactive_force_reclaim++;
3294 } else {
3295 uint32_t isinuse;
3296
3297 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3298 vm_pageout_cleaned_reference_reactivated++;
3299
3300 reactivate_page:
3301 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
3302 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
3303 /*
3304 * no explict mappings of this object exist
3305 * and it's not open via the filesystem
3306 */
3307 vm_page_deactivate(m);
3308 vm_pageout_inactive_deactivated++;
3309 } else {
3310 must_activate_page:
3311 /*
3312 * The page was/is being used, so put back on active list.
3313 */
3314 vm_page_activate(m);
3315 VM_STAT_INCR(reactivations);
3316 inactive_burst_count = 0;
3317 }
3318 #if CONFIG_BACKGROUND_QUEUE
3319 if (ignore_reference == TRUE) {
3320 if (m_object->internal)
3321 vm_pageout_rejected_bq_internal++;
3322 else
3323 vm_pageout_rejected_bq_external++;
3324 }
3325 #endif
3326 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3327 vm_pageout_cleaned_reactivated++;
3328 #if CONFIG_SECLUDED_MEMORY
3329 if (page_prev_q_state == VM_PAGE_ON_SECLUDED_Q)
3330 vm_pageout_secluded_reactivated++;
3331 #endif /* CONFIG_SECLUDED_MEMORY */
3332
3333 vm_pageout_inactive_used++;
3334
3335 goto done_with_inactivepage;
3336 }
3337 /*
3338 * Make sure we call pmap_get_refmod() if it
3339 * wasn't already called just above, to update
3340 * the dirty bit.
3341 */
3342 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
3343 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3344 if (refmod_state & VM_MEM_MODIFIED) {
3345 SET_PAGE_DIRTY(m, FALSE);
3346 }
3347 }
3348 }
3349
3350 XPR(XPR_VM_PAGEOUT,
3351 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
3352 object, m->offset, m, 0,0);
3353
3354 /*
3355 * we've got a candidate page to steal...
3356 *
3357 * m->dirty is up to date courtesy of the
3358 * preceding check for m->reference... if
3359 * we get here, then m->reference had to be
3360 * FALSE (or possibly "reactivate_limit" was
3361 * exceeded), but in either case we called
3362 * pmap_get_refmod() and updated both
3363 * m->reference and m->dirty
3364 *
3365 * if it's dirty or precious we need to
3366 * see if the target queue is throtttled
3367 * it if is, we need to skip over it by moving it back
3368 * to the end of the inactive queue
3369 */
3370
3371 inactive_throttled = FALSE;
3372
3373 if (m->dirty || m->precious) {
3374 if (object->internal) {
3375 if (VM_PAGE_Q_THROTTLED(iq))
3376 inactive_throttled = TRUE;
3377 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3378 inactive_throttled = TRUE;
3379 }
3380 }
3381 throttle_inactive:
3382 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3383 object->internal && m->dirty &&
3384 (object->purgable == VM_PURGABLE_DENY ||
3385 object->purgable == VM_PURGABLE_NONVOLATILE ||
3386 object->purgable == VM_PURGABLE_VOLATILE)) {
3387 vm_page_check_pageable_safe(m);
3388 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3389 vm_page_queue_enter(&vm_page_queue_throttled, m,
3390 vm_page_t, pageq);
3391 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
3392 vm_page_throttled_count++;
3393
3394 vm_pageout_scan_reclaimed_throttled++;
3395
3396 inactive_burst_count = 0;
3397 goto done_with_inactivepage;
3398 }
3399 if (inactive_throttled == TRUE) {
3400
3401 if (object->internal == FALSE) {
3402 /*
3403 * we need to break up the following potential deadlock case...
3404 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3405 * b) The thread doing the writing is waiting for pages while holding the truncate lock
3406 * c) Most of the pages in the inactive queue belong to this file.
3407 *
3408 * we are potentially in this deadlock because...
3409 * a) the external pageout queue is throttled
3410 * b) we're done with the active queue and moved on to the inactive queue
3411 * c) we've got a dirty external page
3412 *
3413 * since we don't know the reason for the external pageout queue being throttled we
3414 * must suspect that we are deadlocked, so move the current page onto the active queue
3415 * in an effort to cause a page from the active queue to 'age' to the inactive queue
3416 *
3417 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3418 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3419 * pool the next time we select a victim page... if we can make enough new free pages,
3420 * the deadlock will break, the external pageout queue will empty and it will no longer
3421 * be throttled
3422 *
3423 * if we have jestam configured, keep a count of the pages reactivated this way so
3424 * that we can try to find clean pages in the active/inactive queues before
3425 * deciding to jetsam a process
3426 */
3427 vm_pageout_scan_inactive_throttled_external++;
3428
3429 vm_page_check_pageable_safe(m);
3430 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3431 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3432 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
3433 vm_page_active_count++;
3434 vm_page_pageable_external_count++;
3435
3436 vm_pageout_adjust_io_throttles(iq, eq, FALSE);
3437
3438 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3439 vm_pageout_inactive_external_forced_reactivate_limit--;
3440
3441 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3442 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3443 /*
3444 * Possible deadlock scenario so request jetsam action
3445 */
3446 assert(object);
3447 vm_object_unlock(object);
3448 object = VM_OBJECT_NULL;
3449 vm_page_unlock_queues();
3450
3451 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3452 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3453
3454 /* Kill first suitable process */
3455 if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) {
3456 panic("vm_pageout_scan: Jetsam request failed\n");
3457 }
3458
3459 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
3460
3461 vm_pageout_inactive_external_forced_jetsam_count++;
3462 vm_page_lock_queues();
3463 delayed_unlock = 1;
3464 }
3465 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3466 force_anonymous = TRUE;
3467 #endif
3468 inactive_burst_count = 0;
3469 goto done_with_inactivepage;
3470 } else {
3471 vm_pageout_scan_inactive_throttled_internal++;
3472
3473 goto must_activate_page;
3474 }
3475 }
3476
3477 /*
3478 * we've got a page that we can steal...
3479 * eliminate all mappings and make sure
3480 * we have the up-to-date modified state
3481 *
3482 * if we need to do a pmap_disconnect then we
3483 * need to re-evaluate m->dirty since the pmap_disconnect
3484 * provides the true state atomically... the
3485 * page was still mapped up to the pmap_disconnect
3486 * and may have been dirtied at the last microsecond
3487 *
3488 * Note that if 'pmapped' is FALSE then the page is not
3489 * and has not been in any map, so there is no point calling
3490 * pmap_disconnect(). m->dirty could have been set in anticipation
3491 * of likely usage of the page.
3492 */
3493 if (m->pmapped == TRUE) {
3494 int pmap_options;
3495
3496 /*
3497 * Don't count this page as going into the compressor
3498 * if any of these are true:
3499 * 1) compressed pager isn't enabled
3500 * 2) Freezer enabled device with compressed pager
3501 * backend (exclusive use) i.e. most of the VM system
3502 * (including vm_pageout_scan) has no knowledge of
3503 * the compressor
3504 * 3) This page belongs to a file and hence will not be
3505 * sent into the compressor
3506 */
3507 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3508 object->internal == FALSE) {
3509 pmap_options = 0;
3510 } else if (m->dirty || m->precious) {
3511 /*
3512 * VM knows that this page is dirty (or
3513 * precious) and needs to be compressed
3514 * rather than freed.
3515 * Tell the pmap layer to count this page
3516 * as "compressed".
3517 */
3518 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3519 } else {
3520 /*
3521 * VM does not know if the page needs to
3522 * be preserved but the pmap layer might tell
3523 * us if any mapping has "modified" it.
3524 * Let's the pmap layer to count this page
3525 * as compressed if and only if it has been
3526 * modified.
3527 */
3528 pmap_options =
3529 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3530 }
3531 refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3532 pmap_options,
3533 NULL);
3534 if (refmod_state & VM_MEM_MODIFIED) {
3535 SET_PAGE_DIRTY(m, FALSE);
3536 }
3537 }
3538 /*
3539 * reset our count of pages that have been reclaimed
3540 * since the last page was 'stolen'
3541 */
3542 inactive_reclaim_run = 0;
3543
3544 /*
3545 * If it's clean and not precious, we can free the page.
3546 */
3547 if (!m->dirty && !m->precious) {
3548
3549 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3550 vm_pageout_speculative_clean++;
3551 else {
3552 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3553 vm_pageout_inactive_anonymous++;
3554 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3555 vm_pageout_cleaned_reclaimed++;
3556
3557 vm_pageout_inactive_clean++;
3558 }
3559
3560 #if CONFIG_SECLUDED_MEMORY
3561 if (secluded_for_filecache &&
3562 vm_page_secluded_target > 0 &&
3563 !m->fictitious &&
3564 m_object->eligible_for_secluded &&
3565 num_tasks_can_use_secluded_mem == 0 &&
3566 (secluded_aging_policy == SECLUDED_AGING_FIFO ||
3567 ((secluded_aging_policy ==
3568 SECLUDED_AGING_AFTER_INACTIVE) &&
3569 (page_prev_q_state != VM_PAGE_ON_SECLUDED_Q)))) {
3570 assert(page_prev_q_state != VM_PAGE_ON_SECLUDED_Q);
3571 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3572 LCK_MTX_ASSERT(&vm_page_queue_lock,
3573 LCK_MTX_ASSERT_OWNED);
3574 vm_page_queue_enter(&vm_page_queue_secluded,
3575 m,
3576 vm_page_t,
3577 pageq);
3578 m->vm_page_q_state = VM_PAGE_ON_SECLUDED_Q;
3579 vm_object_unlock(m_object);
3580 object = VM_OBJECT_NULL;
3581 vm_page_secluded_count++;
3582 vm_page_secluded_count_inuse++;
3583 assert(!m_object->internal);
3584 // vm_page_pageable_external_count++;
3585 m = VM_PAGE_NULL;
3586 goto done_with_inactivepage;
3587 }
3588 #endif /* CONFIG_SECLUDED_MEMORY */
3589
3590 /*
3591 * OK, at this point we have found a page we are going to free.
3592 */
3593 #if CONFIG_PHANTOM_CACHE
3594 if (!object->internal)
3595 vm_phantom_cache_add_ghost(m);
3596 #endif
3597 goto reclaim_page;
3598 }
3599
3600 /*
3601 * The page may have been dirtied since the last check
3602 * for a throttled target queue (which may have been skipped
3603 * if the page was clean then). With the dirty page
3604 * disconnected here, we can make one final check.
3605 */
3606 if (object->internal) {
3607 if (VM_PAGE_Q_THROTTLED(iq))
3608 inactive_throttled = TRUE;
3609 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3610 inactive_throttled = TRUE;
3611 }
3612
3613 if (inactive_throttled == TRUE)
3614 goto throttle_inactive;
3615
3616 #if VM_PRESSURE_EVENTS
3617 #if CONFIG_JETSAM
3618
3619 /*
3620 * If Jetsam is enabled, then the sending
3621 * of memory pressure notifications is handled
3622 * from the same thread that takes care of high-water
3623 * and other jetsams i.e. the memorystatus_thread.
3624 */
3625
3626 #else /* CONFIG_JETSAM */
3627
3628 vm_pressure_response();
3629
3630 #endif /* CONFIG_JETSAM */
3631 #endif /* VM_PRESSURE_EVENTS */
3632
3633 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3634 vm_pageout_inactive_anonymous++;
3635 if (object->internal)
3636 vm_pageout_inactive_dirty_internal++;
3637 else
3638 vm_pageout_inactive_dirty_external++;
3639
3640 /*
3641 * do NOT set the pageout bit!
3642 * sure, we might need free pages, but this page is going to take time to become free
3643 * anyway, so we may as well put it on the clean queue first and take it from there later
3644 * if necessary. that way, we'll ensure we don't free up too much. -mj
3645 */
3646 vm_pageout_cluster(m, FALSE, FALSE);
3647
3648 done_with_inactivepage:
3649
3650 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
3651 boolean_t need_delay = TRUE;
3652
3653 if (object != NULL) {
3654 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
3655 vm_object_unlock(object);
3656 object = NULL;
3657 }
3658 vm_page_unlock_queues();
3659
3660 if (local_freeq) {
3661
3662 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
3663 vm_page_free_count, local_freed, delayed_unlock_limit, 4);
3664
3665 vm_page_free_list(local_freeq, TRUE);
3666
3667 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
3668 vm_page_free_count, local_freed, 0, 4);
3669
3670 local_freeq = NULL;
3671 local_freed = 0;
3672 need_delay = FALSE;
3673 }
3674 vm_consider_waking_compactor_swapper();
3675
3676 vm_page_lock_queues();
3677
3678 if (need_delay == TRUE)
3679 lck_mtx_yield(&vm_page_queue_lock);
3680
3681 delayed_unlock = 1;
3682 }
3683 vm_pageout_considered_page++;
3684
3685 /*
3686 * back to top of pageout scan loop
3687 */
3688 }
3689 }
3690
3691
3692 int vm_page_free_count_init;
3693
3694 void
3695 vm_page_free_reserve(
3696 int pages)
3697 {
3698 int free_after_reserve;
3699
3700 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3701
3702 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3703 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3704 else
3705 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3706
3707 } else {
3708 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3709 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3710 else
3711 vm_page_free_reserved += pages;
3712 }
3713 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
3714
3715 vm_page_free_min = vm_page_free_reserved +
3716 VM_PAGE_FREE_MIN(free_after_reserve);
3717
3718 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3719 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3720
3721 vm_page_free_target = vm_page_free_reserved +
3722 VM_PAGE_FREE_TARGET(free_after_reserve);
3723
3724 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3725 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3726
3727 if (vm_page_free_target < vm_page_free_min + 5)
3728 vm_page_free_target = vm_page_free_min + 5;
3729
3730 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3731 }
3732
3733 /*
3734 * vm_pageout is the high level pageout daemon.
3735 */
3736
3737 void
3738 vm_pageout_continue(void)
3739 {
3740 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3741 vm_pageout_scan_event_counter++;
3742
3743 lck_mtx_lock(&vm_page_queue_free_lock);
3744 vm_pageout_running = TRUE;
3745 lck_mtx_unlock(&vm_page_queue_free_lock);
3746
3747 vm_pageout_scan();
3748 /*
3749 * we hold both the vm_page_queue_free_lock
3750 * and the vm_page_queues_lock at this point
3751 */
3752 assert(vm_page_free_wanted == 0);
3753 assert(vm_page_free_wanted_privileged == 0);
3754 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3755
3756 vm_pageout_running = FALSE;
3757 if (vm_pageout_waiter) {
3758 vm_pageout_waiter = FALSE;
3759 thread_wakeup((event_t)&vm_pageout_waiter);
3760 }
3761
3762 lck_mtx_unlock(&vm_page_queue_free_lock);
3763 vm_page_unlock_queues();
3764
3765 counter(c_vm_pageout_block++);
3766 thread_block((thread_continue_t)vm_pageout_continue);
3767 /*NOTREACHED*/
3768 }
3769
3770 kern_return_t
3771 vm_pageout_wait(uint64_t deadline)
3772 {
3773 kern_return_t kr;
3774
3775 lck_mtx_lock(&vm_page_queue_free_lock);
3776 for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3777 vm_pageout_waiter = TRUE;
3778 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3779 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3780 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3781 kr = KERN_OPERATION_TIMED_OUT;
3782 }
3783 }
3784 lck_mtx_unlock(&vm_page_queue_free_lock);
3785
3786 return (kr);
3787 }
3788
3789
3790 static void
3791 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3792 {
3793 vm_page_t m = NULL;
3794 vm_object_t object;
3795 vm_object_offset_t offset;
3796 memory_object_t pager;
3797
3798
3799 if (vm_pageout_internal_iothread != THREAD_NULL)
3800 current_thread()->options &= ~TH_OPT_VMPRIV;
3801
3802 vm_page_lockspin_queues();
3803
3804 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3805
3806 q->pgo_busy = TRUE;
3807 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3808
3809 assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3810 VM_PAGE_CHECK(m);
3811 /*
3812 * grab a snapshot of the object and offset this
3813 * page is tabled in so that we can relookup this
3814 * page after we've taken the object lock - these
3815 * fields are stable while we hold the page queues lock
3816 * but as soon as we drop it, there is nothing to keep
3817 * this page in this object... we hold an activity_in_progress
3818 * on this object which will keep it from terminating
3819 */
3820 object = VM_PAGE_OBJECT(m);
3821 offset = m->offset;
3822
3823 if (object->object_slid) {
3824 panic("slid page %p not allowed on this path\n", m);
3825 }
3826 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3827 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3828
3829 vm_page_unlock_queues();
3830
3831 vm_object_lock(object);
3832
3833 m = vm_page_lookup(object, offset);
3834
3835 if (m == NULL ||
3836 m->busy || m->cleaning || !m->laundry || (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
3837 /*
3838 * it's either the same page that someone else has
3839 * started cleaning (or it's finished cleaning or
3840 * been put back on the pageout queue), or
3841 * the page has been freed or we have found a
3842 * new page at this offset... in all of these cases
3843 * we merely need to release the activity_in_progress
3844 * we took when we put the page on the pageout queue
3845 */
3846 vm_object_activity_end(object);
3847 vm_object_unlock(object);
3848
3849 vm_page_lockspin_queues();
3850 continue;
3851 }
3852 pager = object->pager;
3853
3854 if (pager == MEMORY_OBJECT_NULL) {
3855 /*
3856 * This pager has been destroyed by either
3857 * memory_object_destroy or vm_object_destroy, and
3858 * so there is nowhere for the page to go.
3859 */
3860 if (m->free_when_done) {
3861 /*
3862 * Just free the page... VM_PAGE_FREE takes
3863 * care of cleaning up all the state...
3864 * including doing the vm_pageout_throttle_up
3865 */
3866 VM_PAGE_FREE(m);
3867 } else {
3868 vm_page_lockspin_queues();
3869
3870 vm_pageout_throttle_up(m);
3871 vm_page_activate(m);
3872
3873 vm_page_unlock_queues();
3874
3875 /*
3876 * And we are done with it.
3877 */
3878 }
3879 vm_object_activity_end(object);
3880 vm_object_unlock(object);
3881
3882 vm_page_lockspin_queues();
3883 continue;
3884 }
3885 #if 0
3886 /*
3887 * we don't hold the page queue lock
3888 * so this check isn't safe to make
3889 */
3890 VM_PAGE_CHECK(m);
3891 #endif
3892 /*
3893 * give back the activity_in_progress reference we
3894 * took when we queued up this page and replace it
3895 * it with a paging_in_progress reference that will
3896 * also hold the paging offset from changing and
3897 * prevent the object from terminating
3898 */
3899 vm_object_activity_end(object);
3900 vm_object_paging_begin(object);
3901 vm_object_unlock(object);
3902
3903 /*
3904 * Send the data to the pager.
3905 * any pageout clustering happens there
3906 */
3907 memory_object_data_return(pager,
3908 m->offset + object->paging_offset,
3909 PAGE_SIZE,
3910 NULL,
3911 NULL,
3912 FALSE,
3913 FALSE,
3914 0);
3915
3916 vm_object_lock(object);
3917 vm_object_paging_end(object);
3918 vm_object_unlock(object);
3919
3920 vm_pageout_io_throttle();
3921
3922 vm_page_lockspin_queues();
3923 }
3924 q->pgo_busy = FALSE;
3925 q->pgo_idle = TRUE;
3926
3927 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3928 vm_page_unlock_queues();
3929
3930 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3931 /*NOTREACHED*/
3932 }
3933
3934
3935 uint32_t vm_compressor_failed;
3936
3937 #define MAX_FREE_BATCH 32
3938 uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3939 * this thread.
3940 */
3941 uint64_t vm_compressor_thread_runtime;
3942
3943 static void
3944 vm_pageout_iothread_internal_continue(struct cq *cq)
3945 {
3946 struct vm_pageout_queue *q;
3947 vm_page_t m = NULL;
3948 boolean_t pgo_draining;
3949 vm_page_t local_q;
3950 int local_cnt;
3951 vm_page_t local_freeq = NULL;
3952 int local_freed = 0;
3953 int local_batch_size;
3954
3955
3956 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3957
3958 q = cq->q;
3959 local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
3960
3961 #if RECORD_THE_COMPRESSED_DATA
3962 if (q->pgo_laundry)
3963 c_compressed_record_init();
3964 #endif
3965 while (TRUE) {
3966 int pages_left_on_q = 0;
3967
3968 local_cnt = 0;
3969 local_q = NULL;
3970
3971 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3972
3973 vm_page_lock_queues();
3974
3975 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3976
3977 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3978
3979 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3980
3981 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3982 assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3983 VM_PAGE_CHECK(m);
3984
3985 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3986 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3987 m->laundry = FALSE;
3988
3989 m->snext = local_q;
3990 local_q = m;
3991 local_cnt++;
3992 }
3993 if (local_q == NULL)
3994 break;
3995
3996 q->pgo_busy = TRUE;
3997
3998 if ((pgo_draining = q->pgo_draining) == FALSE) {
3999 vm_pageout_throttle_up_batch(q, local_cnt);
4000 pages_left_on_q = q->pgo_laundry;
4001 } else
4002 pages_left_on_q = q->pgo_laundry - local_cnt;
4003
4004 vm_page_unlock_queues();
4005
4006 #if !RECORD_THE_COMPRESSED_DATA
4007 if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1))
4008 thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
4009 #endif
4010 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
4011
4012 while (local_q) {
4013
4014 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
4015
4016 m = local_q;
4017 local_q = m->snext;
4018 m->snext = NULL;
4019
4020 if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
4021
4022 m->snext = local_freeq;
4023 local_freeq = m;
4024 local_freed++;
4025
4026 if (local_freed >= MAX_FREE_BATCH) {
4027
4028 vm_page_free_list(local_freeq, TRUE);
4029 local_freeq = NULL;
4030 local_freed = 0;
4031 }
4032 }
4033 #if !CONFIG_JETSAM
4034 while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
4035 kern_return_t wait_result;
4036 int need_wakeup = 0;
4037
4038 if (local_freeq) {
4039 vm_page_free_list(local_freeq, TRUE);
4040
4041 local_freeq = NULL;
4042 local_freed = 0;
4043
4044 continue;
4045 }
4046 lck_mtx_lock_spin(&vm_page_queue_free_lock);
4047
4048 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
4049
4050 if (vm_page_free_wanted_privileged++ == 0)
4051 need_wakeup = 1;
4052 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
4053
4054 lck_mtx_unlock(&vm_page_queue_free_lock);
4055
4056 if (need_wakeup)
4057 thread_wakeup((event_t)&vm_page_free_wanted);
4058
4059 if (wait_result == THREAD_WAITING)
4060
4061 thread_block(THREAD_CONTINUE_NULL);
4062 } else
4063 lck_mtx_unlock(&vm_page_queue_free_lock);
4064 }
4065 #endif
4066 }
4067 if (local_freeq) {
4068 vm_page_free_list(local_freeq, TRUE);
4069
4070 local_freeq = NULL;
4071 local_freed = 0;
4072 }
4073 if (pgo_draining == TRUE) {
4074 vm_page_lockspin_queues();
4075 vm_pageout_throttle_up_batch(q, local_cnt);
4076 vm_page_unlock_queues();
4077 }
4078 }
4079 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
4080
4081 /*
4082 * queue lock is held and our q is empty
4083 */
4084 q->pgo_busy = FALSE;
4085 q->pgo_idle = TRUE;
4086
4087 assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
4088 vm_page_unlock_queues();
4089
4090 if (__improbable(vm_compressor_time_thread)) {
4091 vm_compressor_thread_runtime = thread_get_runtime_self();
4092 }
4093
4094 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
4095
4096 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
4097 /*NOTREACHED*/
4098 }
4099
4100
4101
4102 static void
4103 vm_pageout_immediate(vm_page_t m, boolean_t object_locked_by_caller)
4104 {
4105 assert(vm_pageout_immediate_scratch_buf);
4106
4107 if (vm_pageout_compress_page(&vm_pageout_immediate_chead, vm_pageout_immediate_scratch_buf, m, object_locked_by_caller) == KERN_SUCCESS) {
4108
4109 vm_page_free_prepare_object(m, TRUE);
4110 vm_page_release(m, TRUE);
4111 }
4112 }
4113
4114
4115 kern_return_t
4116 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m, boolean_t object_locked_by_caller)
4117 {
4118 vm_object_t object;
4119 memory_object_t pager;
4120 int compressed_count_delta;
4121 kern_return_t retval;
4122
4123 object = VM_PAGE_OBJECT(m);
4124
4125 if (object->object_slid) {
4126 panic("slid page %p not allowed on this path\n", m);
4127 }
4128 assert(!m->free_when_done);
4129 assert(!m->laundry);
4130
4131 pager = object->pager;
4132
4133 if (object_locked_by_caller == FALSE && (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)) {
4134
4135 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
4136
4137 vm_object_lock(object);
4138
4139 /*
4140 * If there is no memory object for the page, create
4141 * one and hand it to the compression pager.
4142 */
4143
4144 if (!object->pager_initialized)
4145 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
4146 if (!object->pager_initialized)
4147 vm_object_compressor_pager_create(object);
4148
4149 pager = object->pager;
4150
4151 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
4152 /*
4153 * Still no pager for the object,
4154 * or the pager has been destroyed.
4155 * Reactivate the page.
4156 *
4157 * Should only happen if there is no
4158 * compression pager
4159 */
4160 PAGE_WAKEUP_DONE(m);
4161
4162 vm_page_lockspin_queues();
4163 vm_page_activate(m);
4164 vm_pageout_dirty_no_pager++;
4165 vm_page_unlock_queues();
4166
4167 /*
4168 * And we are done with it.
4169 */
4170 vm_object_activity_end(object);
4171 vm_object_unlock(object);
4172
4173 return KERN_FAILURE;
4174 }
4175 vm_object_unlock(object);
4176
4177 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
4178 }
4179 assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
4180
4181 if (object_locked_by_caller == FALSE)
4182 assert(object->activity_in_progress > 0);
4183
4184 retval = vm_compressor_pager_put(
4185 pager,
4186 m->offset + object->paging_offset,
4187 VM_PAGE_GET_PHYS_PAGE(m),
4188 current_chead,
4189 scratch_buf,
4190 &compressed_count_delta);
4191
4192 if (object_locked_by_caller == FALSE) {
4193 vm_object_lock(object);
4194
4195 assert(object->activity_in_progress > 0);
4196 assert(VM_PAGE_OBJECT(m) == object);
4197 }
4198
4199 vm_compressor_pager_count(pager,
4200 compressed_count_delta,
4201 FALSE, /* shared_lock */
4202 object);
4203
4204 assert( !VM_PAGE_WIRED(m));
4205
4206 if (retval == KERN_SUCCESS) {
4207 /*
4208 * If the object is purgeable, its owner's
4209 * purgeable ledgers will be updated in
4210 * vm_page_remove() but the page still
4211 * contributes to the owner's memory footprint,
4212 * so account for it as such.
4213 */
4214 if (object->purgable != VM_PURGABLE_DENY &&
4215 object->vo_purgeable_owner != NULL) {
4216 /* one more compressed purgeable page */
4217 vm_purgeable_compressed_update(object,
4218 +1);
4219 }
4220 VM_STAT_INCR(compressions);
4221
4222 if (m->tabled)
4223 vm_page_remove(m, TRUE);
4224
4225 } else {
4226 PAGE_WAKEUP_DONE(m);
4227
4228 vm_page_lockspin_queues();
4229
4230 vm_page_activate(m);
4231 vm_compressor_failed++;
4232
4233 vm_page_unlock_queues();
4234 }
4235 if (object_locked_by_caller == FALSE) {
4236 vm_object_activity_end(object);
4237 vm_object_unlock(object);
4238 }
4239 return retval;
4240 }
4241
4242
4243 static void
4244 vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority)
4245 {
4246 uint32_t policy;
4247 boolean_t set_iq = FALSE;
4248 boolean_t set_eq = FALSE;
4249
4250 if (hibernate_cleaning_in_progress == TRUE)
4251 req_lowpriority = FALSE;
4252
4253 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority)
4254 set_eq = TRUE;
4255
4256 if (set_iq == TRUE || set_eq == TRUE) {
4257
4258 vm_page_unlock_queues();
4259
4260 if (req_lowpriority == TRUE) {
4261 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
4262 DTRACE_VM(laundrythrottle);
4263 } else {
4264 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
4265 DTRACE_VM(laundryunthrottle);
4266 }
4267 if (set_iq == TRUE) {
4268 proc_set_thread_policy_with_tid(kernel_task, iq->pgo_tid,
4269 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
4270
4271 iq->pgo_lowpriority = req_lowpriority;
4272 }
4273 if (set_eq == TRUE) {
4274 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
4275 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
4276
4277 eq->pgo_lowpriority = req_lowpriority;
4278 }
4279 vm_page_lock_queues();
4280 }
4281 }
4282
4283
4284 static void
4285 vm_pageout_iothread_external(void)
4286 {
4287 thread_t self = current_thread();
4288
4289 self->options |= TH_OPT_VMPRIV;
4290
4291 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
4292
4293 proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
4294 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
4295
4296 vm_page_lock_queues();
4297
4298 vm_pageout_queue_external.pgo_tid = self->thread_id;
4299 vm_pageout_queue_external.pgo_lowpriority = TRUE;
4300 vm_pageout_queue_external.pgo_inited = TRUE;
4301
4302 vm_page_unlock_queues();
4303
4304 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
4305
4306 /*NOTREACHED*/
4307 }
4308
4309
4310 static void
4311 vm_pageout_iothread_internal(struct cq *cq)
4312 {
4313 thread_t self = current_thread();
4314
4315 self->options |= TH_OPT_VMPRIV;
4316
4317 vm_page_lock_queues();
4318
4319 vm_pageout_queue_internal.pgo_tid = self->thread_id;
4320 vm_pageout_queue_internal.pgo_lowpriority = TRUE;
4321 vm_pageout_queue_internal.pgo_inited = TRUE;
4322
4323 vm_page_unlock_queues();
4324
4325 if (vm_restricted_to_single_processor == TRUE)
4326 thread_vm_bind_group_add();
4327
4328 vm_pageout_iothread_internal_continue(cq);
4329
4330 /*NOTREACHED*/
4331 }
4332
4333 kern_return_t
4334 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
4335 {
4336 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
4337 return KERN_SUCCESS;
4338 } else {
4339 return KERN_FAILURE; /* Already set */
4340 }
4341 }
4342
4343 extern boolean_t memorystatus_manual_testing_on;
4344 extern unsigned int memorystatus_level;
4345
4346
4347 #if VM_PRESSURE_EVENTS
4348
4349 boolean_t vm_pressure_events_enabled = FALSE;
4350
4351 void
4352 vm_pressure_response(void)
4353 {
4354
4355 vm_pressure_level_t old_level = kVMPressureNormal;
4356 int new_level = -1;
4357 unsigned int total_pages;
4358 uint64_t available_memory = 0;
4359
4360 if (vm_pressure_events_enabled == FALSE)
4361 return;
4362
4363
4364 available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4365
4366
4367 total_pages = (unsigned int) atop_64(max_mem);
4368 #if CONFIG_SECLUDED_MEMORY
4369 total_pages -= vm_page_secluded_count;
4370 #endif /* CONFIG_SECLUDED_MEMORY */
4371 memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
4372
4373 if (memorystatus_manual_testing_on) {
4374 return;
4375 }
4376
4377 old_level = memorystatus_vm_pressure_level;
4378
4379 switch (memorystatus_vm_pressure_level) {
4380
4381 case kVMPressureNormal:
4382 {
4383 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4384 new_level = kVMPressureCritical;
4385 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4386 new_level = kVMPressureWarning;
4387 }
4388 break;
4389 }
4390
4391 case kVMPressureWarning:
4392 case kVMPressureUrgent:
4393 {
4394 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4395 new_level = kVMPressureNormal;
4396 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4397 new_level = kVMPressureCritical;
4398 }
4399 break;
4400 }
4401
4402 case kVMPressureCritical:
4403 {
4404 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4405 new_level = kVMPressureNormal;
4406 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4407 new_level = kVMPressureWarning;
4408 }
4409 break;
4410 }
4411
4412 default:
4413 return;
4414 }
4415
4416 if (new_level != -1) {
4417 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4418
4419 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) {
4420 if (vm_pressure_thread_running == FALSE) {
4421 thread_wakeup(&vm_pressure_thread);
4422 }
4423
4424 if (old_level != new_level) {
4425 thread_wakeup(&vm_pressure_changed);
4426 }
4427 }
4428 }
4429
4430 }
4431 #endif /* VM_PRESSURE_EVENTS */
4432
4433 kern_return_t
4434 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4435
4436 #if !VM_PRESSURE_EVENTS
4437
4438 return KERN_FAILURE;
4439
4440 #else /* VM_PRESSURE_EVENTS */
4441
4442 kern_return_t kr = KERN_SUCCESS;
4443
4444 if (pressure_level != NULL) {
4445
4446 vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4447
4448 if (wait_for_pressure == TRUE) {
4449 wait_result_t wr = 0;
4450
4451 while (old_level == *pressure_level) {
4452 wr = assert_wait((event_t) &vm_pressure_changed,
4453 THREAD_INTERRUPTIBLE);
4454 if (wr == THREAD_WAITING) {
4455 wr = thread_block(THREAD_CONTINUE_NULL);
4456 }
4457 if (wr == THREAD_INTERRUPTED) {
4458 return KERN_ABORTED;
4459 }
4460 if (wr == THREAD_AWAKENED) {
4461
4462 old_level = memorystatus_vm_pressure_level;
4463
4464 if (old_level != *pressure_level) {
4465 break;
4466 }
4467 }
4468 }
4469 }
4470
4471 *pressure_level = old_level;
4472 kr = KERN_SUCCESS;
4473 } else {
4474 kr = KERN_INVALID_ARGUMENT;
4475 }
4476
4477 return kr;
4478 #endif /* VM_PRESSURE_EVENTS */
4479 }
4480
4481 #if VM_PRESSURE_EVENTS
4482 void
4483 vm_pressure_thread(void) {
4484 static boolean_t thread_initialized = FALSE;
4485
4486 if (thread_initialized == TRUE) {
4487 vm_pressure_thread_running = TRUE;
4488 consider_vm_pressure_events();
4489 vm_pressure_thread_running = FALSE;
4490 }
4491
4492 thread_initialized = TRUE;
4493 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4494 thread_block((thread_continue_t)vm_pressure_thread);
4495 }
4496 #endif /* VM_PRESSURE_EVENTS */
4497
4498
4499 uint32_t vm_pageout_considered_page_last = 0;
4500
4501 /*
4502 * called once per-second via "compute_averages"
4503 */
4504 void
4505 compute_pageout_gc_throttle(__unused void *arg)
4506 {
4507 if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
4508
4509 vm_pageout_considered_page_last = vm_pageout_considered_page;
4510
4511 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4512 }
4513 }
4514
4515
4516 static void
4517 vm_pageout_garbage_collect(int collect)
4518 {
4519
4520 if (collect) {
4521 boolean_t buf_large_zfree = FALSE;
4522 boolean_t first_try = TRUE;
4523
4524 stack_collect();
4525
4526 consider_machine_collect();
4527 m_drain();
4528
4529 do {
4530 if (consider_buffer_cache_collect != NULL) {
4531 buf_large_zfree = (*consider_buffer_cache_collect)(0);
4532 }
4533 if (first_try == TRUE || buf_large_zfree == TRUE) {
4534 /*
4535 * consider_zone_gc should be last, because the other operations
4536 * might return memory to zones.
4537 */
4538 consider_zone_gc();
4539 }
4540 first_try = FALSE;
4541
4542 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4543
4544 consider_machine_adjust();
4545 }
4546 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4547
4548 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4549 /*NOTREACHED*/
4550 }
4551
4552
4553 #if VM_PAGE_BUCKETS_CHECK
4554 #if VM_PAGE_FAKE_BUCKETS
4555 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4556 #endif /* VM_PAGE_FAKE_BUCKETS */
4557 #endif /* VM_PAGE_BUCKETS_CHECK */
4558
4559
4560 #define FBDP_TEST_COLLAPSE_COMPRESSOR 0
4561 #define FBDP_TEST_WIRE_AND_EXTRACT 0
4562 #define FBDP_TEST_PAGE_WIRE_OVERFLOW 0
4563
4564 #if FBDP_TEST_COLLAPSE_COMPRESSOR
4565 extern boolean_t vm_object_collapse_compressor_allowed;
4566 #include <IOKit/IOLib.h>
4567 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4568
4569 #if FBDP_TEST_WIRE_AND_EXTRACT
4570 extern ledger_template_t task_ledger_template;
4571 #include <mach/mach_vm.h>
4572 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
4573 vm_offset_t offset);
4574 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4575
4576
4577 void
4578 vm_set_restrictions()
4579 {
4580 host_basic_info_data_t hinfo;
4581 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4582
4583 #define BSD_HOST 1
4584 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4585
4586 assert(hinfo.max_cpus > 0);
4587
4588 if (hinfo.max_cpus <= 3) {
4589 /*
4590 * on systems with a limited number of CPUS, bind the
4591 * 4 major threads that can free memory and that tend to use
4592 * a fair bit of CPU under pressured conditions to a single processor.
4593 * This insures that these threads don't hog all of the available CPUs
4594 * (important for camera launch), while allowing them to run independently
4595 * w/r to locks... the 4 threads are
4596 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4597 * vm_compressor_swap_trigger_thread (minor and major compactions),
4598 * memorystatus_thread (jetsams).
4599 *
4600 * the first time the thread is run, it is responsible for checking the
4601 * state of vm_restricted_to_single_processor, and if TRUE it calls
4602 * thread_bind_master... someday this should be replaced with a group
4603 * scheduling mechanism and KPI.
4604 */
4605 vm_restricted_to_single_processor = TRUE;
4606 }
4607 }
4608
4609
4610 void
4611 vm_pageout(void)
4612 {
4613 thread_t self = current_thread();
4614 thread_t thread;
4615 kern_return_t result;
4616 spl_t s;
4617
4618 /*
4619 * Set thread privileges.
4620 */
4621 s = splsched();
4622
4623 thread_lock(self);
4624 self->options |= TH_OPT_VMPRIV;
4625 sched_set_thread_base_priority(self, BASEPRI_PREEMPT - 1);
4626 thread_unlock(self);
4627
4628 if (!self->reserved_stack)
4629 self->reserved_stack = self->kernel_stack;
4630
4631 if (vm_restricted_to_single_processor == TRUE)
4632 thread_vm_bind_group_add();
4633
4634 splx(s);
4635
4636 /*
4637 * Initialize some paging parameters.
4638 */
4639
4640 if (vm_pageout_swap_wait == 0)
4641 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4642
4643 if (vm_pageout_idle_wait == 0)
4644 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4645
4646 if (vm_pageout_burst_wait == 0)
4647 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4648
4649 if (vm_pageout_empty_wait == 0)
4650 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4651
4652 if (vm_pageout_deadlock_wait == 0)
4653 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4654
4655 if (vm_pageout_deadlock_relief == 0)
4656 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4657
4658 if (vm_pageout_inactive_relief == 0)
4659 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
4660
4661 if (vm_pageout_burst_active_throttle == 0)
4662 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
4663
4664 if (vm_pageout_burst_inactive_throttle == 0)
4665 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4666
4667 /*
4668 * Set kernel task to low backing store privileged
4669 * status
4670 */
4671 task_lock(kernel_task);
4672 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
4673 task_unlock(kernel_task);
4674
4675 vm_page_free_count_init = vm_page_free_count;
4676
4677 /*
4678 * even if we've already called vm_page_free_reserve
4679 * call it again here to insure that the targets are
4680 * accurately calculated (it uses vm_page_free_count_init)
4681 * calling it with an arg of 0 will not change the reserve
4682 * but will re-calculate free_min and free_target
4683 */
4684 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4685 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4686 } else
4687 vm_page_free_reserve(0);
4688
4689
4690 vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4691 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4692 vm_pageout_queue_external.pgo_laundry = 0;
4693 vm_pageout_queue_external.pgo_idle = FALSE;
4694 vm_pageout_queue_external.pgo_busy = FALSE;
4695 vm_pageout_queue_external.pgo_throttled = FALSE;
4696 vm_pageout_queue_external.pgo_draining = FALSE;
4697 vm_pageout_queue_external.pgo_lowpriority = FALSE;
4698 vm_pageout_queue_external.pgo_tid = -1;
4699 vm_pageout_queue_external.pgo_inited = FALSE;
4700
4701 vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4702 vm_pageout_queue_internal.pgo_maxlaundry = 0;
4703 vm_pageout_queue_internal.pgo_laundry = 0;
4704 vm_pageout_queue_internal.pgo_idle = FALSE;
4705 vm_pageout_queue_internal.pgo_busy = FALSE;
4706 vm_pageout_queue_internal.pgo_throttled = FALSE;
4707 vm_pageout_queue_internal.pgo_draining = FALSE;
4708 vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4709 vm_pageout_queue_internal.pgo_tid = -1;
4710 vm_pageout_queue_internal.pgo_inited = FALSE;
4711
4712 /* internal pageout thread started when default pager registered first time */
4713 /* external pageout and garbage collection threads started here */
4714
4715 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4716 BASEPRI_PREEMPT - 1,
4717 &vm_pageout_external_iothread);
4718 if (result != KERN_SUCCESS)
4719 panic("vm_pageout_iothread_external: create failed");
4720
4721 thread_deallocate(vm_pageout_external_iothread);
4722
4723 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4724 BASEPRI_DEFAULT,
4725 &thread);
4726 if (result != KERN_SUCCESS)
4727 panic("vm_pageout_garbage_collect: create failed");
4728
4729 thread_deallocate(thread);
4730
4731 #if VM_PRESSURE_EVENTS
4732 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4733 BASEPRI_DEFAULT,
4734 &thread);
4735
4736 if (result != KERN_SUCCESS)
4737 panic("vm_pressure_thread: create failed");
4738
4739 thread_deallocate(thread);
4740 #endif
4741
4742 vm_object_reaper_init();
4743
4744
4745 bzero(&vm_config, sizeof(vm_config));
4746
4747 switch(vm_compressor_mode) {
4748
4749 case VM_PAGER_DEFAULT:
4750 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4751
4752 case VM_PAGER_COMPRESSOR_WITH_SWAP:
4753 vm_config.compressor_is_present = TRUE;
4754 vm_config.swap_is_present = TRUE;
4755 vm_config.compressor_is_active = TRUE;
4756 vm_config.swap_is_active = TRUE;
4757 break;
4758
4759 case VM_PAGER_COMPRESSOR_NO_SWAP:
4760 vm_config.compressor_is_present = TRUE;
4761 vm_config.swap_is_present = TRUE;
4762 vm_config.compressor_is_active = TRUE;
4763 break;
4764
4765 case VM_PAGER_FREEZER_DEFAULT:
4766 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4767
4768 case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4769 vm_config.compressor_is_present = TRUE;
4770 vm_config.swap_is_present = TRUE;
4771 break;
4772
4773 case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4774 vm_config.compressor_is_present = TRUE;
4775 vm_config.swap_is_present = TRUE;
4776 vm_config.compressor_is_active = TRUE;
4777 vm_config.freezer_swap_is_active = TRUE;
4778 break;
4779
4780 case VM_PAGER_NOT_CONFIGURED:
4781 break;
4782
4783 default:
4784 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4785 break;
4786 }
4787 if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4788 vm_compressor_pager_init();
4789
4790 #if VM_PRESSURE_EVENTS
4791 vm_pressure_events_enabled = TRUE;
4792 #endif /* VM_PRESSURE_EVENTS */
4793
4794 #if CONFIG_PHANTOM_CACHE
4795 vm_phantom_cache_init();
4796 #endif
4797 #if VM_PAGE_BUCKETS_CHECK
4798 #if VM_PAGE_FAKE_BUCKETS
4799 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4800 (uint64_t) vm_page_fake_buckets_start,
4801 (uint64_t) vm_page_fake_buckets_end);
4802 pmap_protect(kernel_pmap,
4803 vm_page_fake_buckets_start,
4804 vm_page_fake_buckets_end,
4805 VM_PROT_READ);
4806 // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4807 #endif /* VM_PAGE_FAKE_BUCKETS */
4808 #endif /* VM_PAGE_BUCKETS_CHECK */
4809
4810 #if VM_OBJECT_TRACKING
4811 vm_object_tracking_init();
4812 #endif /* VM_OBJECT_TRACKING */
4813
4814
4815 #if FBDP_TEST_COLLAPSE_COMPRESSOR
4816 vm_object_size_t backing_size, top_size;
4817 vm_object_t backing_object, top_object;
4818 vm_map_offset_t backing_offset, top_offset;
4819 unsigned char *backing_address, *top_address;
4820 kern_return_t kr;
4821
4822 printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n");
4823
4824 /* create backing object */
4825 backing_size = 15 * PAGE_SIZE;
4826 backing_object = vm_object_allocate(backing_size);
4827 assert(backing_object != VM_OBJECT_NULL);
4828 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
4829 backing_object);
4830 /* map backing object */
4831 backing_offset = 0;
4832 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
4833 VM_FLAGS_ANYWHERE, backing_object, 0, FALSE,
4834 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
4835 assert(kr == KERN_SUCCESS);
4836 backing_address = (unsigned char *) backing_offset;
4837 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4838 "mapped backing object %p at 0x%llx\n",
4839 backing_object, (uint64_t) backing_offset);
4840 /* populate with pages to be compressed in backing object */
4841 backing_address[0x1*PAGE_SIZE] = 0xB1;
4842 backing_address[0x4*PAGE_SIZE] = 0xB4;
4843 backing_address[0x7*PAGE_SIZE] = 0xB7;
4844 backing_address[0xa*PAGE_SIZE] = 0xBA;
4845 backing_address[0xd*PAGE_SIZE] = 0xBD;
4846 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4847 "populated pages to be compressed in "
4848 "backing_object %p\n", backing_object);
4849 /* compress backing object */
4850 vm_object_pageout(backing_object);
4851 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
4852 backing_object);
4853 /* wait for all the pages to be gone */
4854 while (*(volatile int *)&backing_object->resident_page_count != 0)
4855 IODelay(10);
4856 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
4857 backing_object);
4858 /* populate with pages to be resident in backing object */
4859 backing_address[0x0*PAGE_SIZE] = 0xB0;
4860 backing_address[0x3*PAGE_SIZE] = 0xB3;
4861 backing_address[0x6*PAGE_SIZE] = 0xB6;
4862 backing_address[0x9*PAGE_SIZE] = 0xB9;
4863 backing_address[0xc*PAGE_SIZE] = 0xBC;
4864 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4865 "populated pages to be resident in "
4866 "backing_object %p\n", backing_object);
4867 /* leave the other pages absent */
4868 /* mess with the paging_offset of the backing_object */
4869 assert(backing_object->paging_offset == 0);
4870 backing_object->paging_offset = 0x3000;
4871
4872 /* create top object */
4873 top_size = 9 * PAGE_SIZE;
4874 top_object = vm_object_allocate(top_size);
4875 assert(top_object != VM_OBJECT_NULL);
4876 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
4877 top_object);
4878 /* map top object */
4879 top_offset = 0;
4880 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
4881 VM_FLAGS_ANYWHERE, top_object, 0, FALSE,
4882 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
4883 assert(kr == KERN_SUCCESS);
4884 top_address = (unsigned char *) top_offset;
4885 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4886 "mapped top object %p at 0x%llx\n",
4887 top_object, (uint64_t) top_offset);
4888 /* populate with pages to be compressed in top object */
4889 top_address[0x3*PAGE_SIZE] = 0xA3;
4890 top_address[0x4*PAGE_SIZE] = 0xA4;
4891 top_address[0x5*PAGE_SIZE] = 0xA5;
4892 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4893 "populated pages to be compressed in "
4894 "top_object %p\n", top_object);
4895 /* compress top object */
4896 vm_object_pageout(top_object);
4897 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
4898 top_object);
4899 /* wait for all the pages to be gone */
4900 while (top_object->resident_page_count != 0);
4901 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
4902 top_object);
4903 /* populate with pages to be resident in top object */
4904 top_address[0x0*PAGE_SIZE] = 0xA0;
4905 top_address[0x1*PAGE_SIZE] = 0xA1;
4906 top_address[0x2*PAGE_SIZE] = 0xA2;
4907 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4908 "populated pages to be resident in "
4909 "top_object %p\n", top_object);
4910 /* leave the other pages absent */
4911
4912 /* link the 2 objects */
4913 vm_object_reference(backing_object);
4914 top_object->shadow = backing_object;
4915 top_object->vo_shadow_offset = 0x3000;
4916 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
4917 top_object, backing_object);
4918
4919 /* unmap backing object */
4920 vm_map_remove(kernel_map,
4921 backing_offset,
4922 backing_offset + backing_size,
4923 0);
4924 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4925 "unmapped backing_object %p [0x%llx:0x%llx]\n",
4926 backing_object,
4927 (uint64_t) backing_offset,
4928 (uint64_t) (backing_offset + backing_size));
4929
4930 /* collapse */
4931 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
4932 vm_object_lock(top_object);
4933 vm_object_collapse(top_object, 0, FALSE);
4934 vm_object_unlock(top_object);
4935 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
4936
4937 /* did it work? */
4938 if (top_object->shadow != VM_OBJECT_NULL) {
4939 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
4940 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4941 if (vm_object_collapse_compressor_allowed) {
4942 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4943 }
4944 } else {
4945 /* check the contents of the mapping */
4946 unsigned char expect[9] =
4947 { 0xA0, 0xA1, 0xA2, /* resident in top */
4948 0xA3, 0xA4, 0xA5, /* compressed in top */
4949 0xB9, /* resident in backing + shadow_offset */
4950 0xBD, /* compressed in backing + shadow_offset + paging_offset */
4951 0x00 }; /* absent in both */
4952 unsigned char actual[9];
4953 unsigned int i, errors;
4954
4955 errors = 0;
4956 for (i = 0; i < sizeof (actual); i++) {
4957 actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
4958 if (actual[i] != expect[i]) {
4959 errors++;
4960 }
4961 }
4962 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4963 "actual [%x %x %x %x %x %x %x %x %x] "
4964 "expect [%x %x %x %x %x %x %x %x %x] "
4965 "%d errors\n",
4966 actual[0], actual[1], actual[2], actual[3],
4967 actual[4], actual[5], actual[6], actual[7],
4968 actual[8],
4969 expect[0], expect[1], expect[2], expect[3],
4970 expect[4], expect[5], expect[6], expect[7],
4971 expect[8],
4972 errors);
4973 if (errors) {
4974 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4975 } else {
4976 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n");
4977 }
4978 }
4979 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4980
4981 #if FBDP_TEST_WIRE_AND_EXTRACT
4982 ledger_t ledger;
4983 vm_map_t user_map, wire_map;
4984 mach_vm_address_t user_addr, wire_addr;
4985 mach_vm_size_t user_size, wire_size;
4986 mach_vm_offset_t cur_offset;
4987 vm_prot_t cur_prot, max_prot;
4988 ppnum_t user_ppnum, wire_ppnum;
4989 kern_return_t kr;
4990
4991 ledger = ledger_instantiate(task_ledger_template,
4992 LEDGER_CREATE_ACTIVE_ENTRIES);
4993 user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
4994 0x100000000ULL,
4995 0x200000000ULL,
4996 TRUE);
4997 wire_map = vm_map_create(NULL,
4998 0x100000000ULL,
4999 0x200000000ULL,
5000 TRUE);
5001 user_addr = 0;
5002 user_size = 0x10000;
5003 kr = mach_vm_allocate(user_map,
5004 &user_addr,
5005 user_size,
5006 VM_FLAGS_ANYWHERE);
5007 assert(kr == KERN_SUCCESS);
5008 wire_addr = 0;
5009 wire_size = user_size;
5010 kr = mach_vm_remap(wire_map,
5011 &wire_addr,
5012 wire_size,
5013 0,
5014 VM_FLAGS_ANYWHERE,
5015 user_map,
5016 user_addr,
5017 FALSE,
5018 &cur_prot,
5019 &max_prot,
5020 VM_INHERIT_NONE);
5021 assert(kr == KERN_SUCCESS);
5022 for (cur_offset = 0;
5023 cur_offset < wire_size;
5024 cur_offset += PAGE_SIZE) {
5025 kr = vm_map_wire_and_extract(wire_map,
5026 wire_addr + cur_offset,
5027 VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
5028 TRUE,
5029 &wire_ppnum);
5030 assert(kr == KERN_SUCCESS);
5031 user_ppnum = vm_map_get_phys_page(user_map,
5032 user_addr + cur_offset);
5033 printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x "
5034 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
5035 kr,
5036 user_map, user_addr + cur_offset, user_ppnum,
5037 wire_map, wire_addr + cur_offset, wire_ppnum);
5038 if (kr != KERN_SUCCESS ||
5039 wire_ppnum == 0 ||
5040 wire_ppnum != user_ppnum) {
5041 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
5042 }
5043 }
5044 cur_offset -= PAGE_SIZE;
5045 kr = vm_map_wire_and_extract(wire_map,
5046 wire_addr + cur_offset,
5047 VM_PROT_DEFAULT,
5048 TRUE,
5049 &wire_ppnum);
5050 assert(kr == KERN_SUCCESS);
5051 printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
5052 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
5053 kr,
5054 user_map, user_addr + cur_offset, user_ppnum,
5055 wire_map, wire_addr + cur_offset, wire_ppnum);
5056 if (kr != KERN_SUCCESS ||
5057 wire_ppnum == 0 ||
5058 wire_ppnum != user_ppnum) {
5059 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
5060 }
5061
5062 printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
5063 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
5064
5065 #if FBDP_TEST_PAGE_WIRE_OVERFLOW
5066 vm_object_t fbdp_object;
5067 vm_page_t fbdp_page;
5068
5069 printf("FBDP_TEST_PAGE_WIRE_OVERFLOW: starting...\n");
5070
5071 fbdp_object = vm_object_allocate(PAGE_SIZE);
5072 vm_object_lock(fbdp_object);
5073 fbdp_page = vm_page_alloc(fbdp_object, 0x0);
5074 vm_page_lock_queues();
5075 do {
5076 vm_page_wire(fbdp_page, 1, FALSE);
5077 } while (fbdp_page->wire_count != 0);
5078 vm_page_unlock_queues();
5079 vm_object_unlock(fbdp_object);
5080 panic("FBDP(%p,%p): wire_count overflow not detected\n",
5081 fbdp_object, fbdp_page);
5082 #endif /* FBDP_TEST_PAGE_WIRE_OVERFLOW */
5083
5084 vm_pageout_continue();
5085
5086 /*
5087 * Unreached code!
5088 *
5089 * The vm_pageout_continue() call above never returns, so the code below is never
5090 * executed. We take advantage of this to declare several DTrace VM related probe
5091 * points that our kernel doesn't have an analog for. These are probe points that
5092 * exist in Solaris and are in the DTrace documentation, so people may have written
5093 * scripts that use them. Declaring the probe points here means their scripts will
5094 * compile and execute which we want for portability of the scripts, but since this
5095 * section of code is never reached, the probe points will simply never fire. Yes,
5096 * this is basically a hack. The problem is the DTrace probe points were chosen with
5097 * Solaris specific VM events in mind, not portability to different VM implementations.
5098 */
5099
5100 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
5101 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
5102 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
5103 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
5104 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
5105 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
5106 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
5107 /*NOTREACHED*/
5108 }
5109
5110
5111
5112 int vm_compressor_thread_count = 2;
5113
5114 kern_return_t
5115 vm_pageout_internal_start(void)
5116 {
5117 kern_return_t result;
5118 int i;
5119 host_basic_info_data_t hinfo;
5120
5121 assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
5122
5123 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
5124 #define BSD_HOST 1
5125 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
5126
5127 assert(hinfo.max_cpus > 0);
5128
5129 if (vm_compressor_thread_count >= hinfo.max_cpus)
5130 vm_compressor_thread_count = hinfo.max_cpus - 1;
5131 if (vm_compressor_thread_count <= 0)
5132 vm_compressor_thread_count = 1;
5133 else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
5134 vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
5135
5136 if (vm_compressor_immediate_preferred == TRUE) {
5137 vm_pageout_immediate_chead = NULL;
5138 vm_pageout_immediate_scratch_buf = kalloc(vm_compressor_get_encode_scratch_size());
5139
5140 vm_compressor_thread_count = 1;
5141 }
5142
5143 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
5144
5145 for (i = 0; i < vm_compressor_thread_count; i++) {
5146 ciq[i].id = i;
5147 ciq[i].q = &vm_pageout_queue_internal;
5148 ciq[i].current_chead = NULL;
5149 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
5150
5151 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread);
5152
5153 if (result == KERN_SUCCESS)
5154 thread_deallocate(vm_pageout_internal_iothread);
5155 else
5156 break;
5157 }
5158 return result;
5159 }
5160
5161 #if CONFIG_IOSCHED
5162 /*
5163 * To support I/O Expedite for compressed files we mark the upls with special flags.
5164 * The way decmpfs works is that we create a big upl which marks all the pages needed to
5165 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
5166 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
5167 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
5168 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
5169 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
5170 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
5171 * unless the real I/O upl is being destroyed).
5172 */
5173
5174
5175 static void
5176 upl_set_decmp_info(upl_t upl, upl_t src_upl)
5177 {
5178 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
5179
5180 upl_lock(src_upl);
5181 if (src_upl->decmp_io_upl) {
5182 /*
5183 * If there is already an alive real I/O UPL, ignore this new UPL.
5184 * This case should rarely happen and even if it does, it just means
5185 * that we might issue a spurious expedite which the driver is expected
5186 * to handle.
5187 */
5188 upl_unlock(src_upl);
5189 return;
5190 }
5191 src_upl->decmp_io_upl = (void *)upl;
5192 src_upl->ref_count++;
5193
5194 upl->flags |= UPL_DECMP_REAL_IO;
5195 upl->decmp_io_upl = (void *)src_upl;
5196 upl_unlock(src_upl);
5197 }
5198 #endif /* CONFIG_IOSCHED */
5199
5200 #if UPL_DEBUG
5201 int upl_debug_enabled = 1;
5202 #else
5203 int upl_debug_enabled = 0;
5204 #endif
5205
5206 static upl_t
5207 upl_create(int type, int flags, upl_size_t size)
5208 {
5209 upl_t upl;
5210 vm_size_t page_field_size = 0;
5211 int upl_flags = 0;
5212 vm_size_t upl_size = sizeof(struct upl);
5213
5214 size = round_page_32(size);
5215
5216 if (type & UPL_CREATE_LITE) {
5217 page_field_size = (atop(size) + 7) >> 3;
5218 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
5219
5220 upl_flags |= UPL_LITE;
5221 }
5222 if (type & UPL_CREATE_INTERNAL) {
5223 upl_size += sizeof(struct upl_page_info) * atop(size);
5224
5225 upl_flags |= UPL_INTERNAL;
5226 }
5227 upl = (upl_t)kalloc(upl_size + page_field_size);
5228
5229 if (page_field_size)
5230 bzero((char *)upl + upl_size, page_field_size);
5231
5232 upl->flags = upl_flags | flags;
5233 upl->kaddr = (vm_offset_t)0;
5234 upl->size = 0;
5235 upl->map_object = NULL;
5236 upl->ref_count = 1;
5237 upl->ext_ref_count = 0;
5238 upl->highest_page = 0;
5239 upl_lock_init(upl);
5240 upl->vector_upl = NULL;
5241 upl->associated_upl = NULL;
5242 #if CONFIG_IOSCHED
5243 if (type & UPL_CREATE_IO_TRACKING) {
5244 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
5245 }
5246
5247 upl->upl_reprio_info = 0;
5248 upl->decmp_io_upl = 0;
5249 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
5250 /* Only support expedite on internal UPLs */
5251 thread_t curthread = current_thread();
5252 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
5253 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
5254 upl->flags |= UPL_EXPEDITE_SUPPORTED;
5255 if (curthread->decmp_upl != NULL)
5256 upl_set_decmp_info(upl, curthread->decmp_upl);
5257 }
5258 #endif
5259 #if CONFIG_IOSCHED || UPL_DEBUG
5260 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
5261 upl->upl_creator = current_thread();
5262 upl->uplq.next = 0;
5263 upl->uplq.prev = 0;
5264 upl->flags |= UPL_TRACKED_BY_OBJECT;
5265 }
5266 #endif
5267
5268 #if UPL_DEBUG
5269 upl->ubc_alias1 = 0;
5270 upl->ubc_alias2 = 0;
5271
5272 upl->upl_state = 0;
5273 upl->upl_commit_index = 0;
5274 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
5275
5276 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
5277 #endif /* UPL_DEBUG */
5278
5279 return(upl);
5280 }
5281
5282 static void
5283 upl_destroy(upl_t upl)
5284 {
5285 int page_field_size; /* bit field in word size buf */
5286 int size;
5287
5288 if (upl->ext_ref_count) {
5289 panic("upl(%p) ext_ref_count", upl);
5290 }
5291
5292 #if CONFIG_IOSCHED
5293 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
5294 upl_t src_upl;
5295 src_upl = upl->decmp_io_upl;
5296 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
5297 upl_lock(src_upl);
5298 src_upl->decmp_io_upl = NULL;
5299 upl_unlock(src_upl);
5300 upl_deallocate(src_upl);
5301 }
5302 #endif /* CONFIG_IOSCHED */
5303
5304 #if CONFIG_IOSCHED || UPL_DEBUG
5305 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
5306 vm_object_t object;
5307
5308 if (upl->flags & UPL_SHADOWED) {
5309 object = upl->map_object->shadow;
5310 } else {
5311 object = upl->map_object;
5312 }
5313
5314 vm_object_lock(object);
5315 queue_remove(&object->uplq, upl, upl_t, uplq);
5316 vm_object_activity_end(object);
5317 vm_object_collapse(object, 0, TRUE);
5318 vm_object_unlock(object);
5319 }
5320 #endif
5321 /*
5322 * drop a reference on the map_object whether or
5323 * not a pageout object is inserted
5324 */
5325 if (upl->flags & UPL_SHADOWED)
5326 vm_object_deallocate(upl->map_object);
5327
5328 if (upl->flags & UPL_DEVICE_MEMORY)
5329 size = PAGE_SIZE;
5330 else
5331 size = upl->size;
5332 page_field_size = 0;
5333
5334 if (upl->flags & UPL_LITE) {
5335 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
5336 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
5337 }
5338 upl_lock_destroy(upl);
5339 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
5340
5341 #if CONFIG_IOSCHED
5342 if (upl->flags & UPL_EXPEDITE_SUPPORTED)
5343 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
5344 #endif
5345
5346 if (upl->flags & UPL_INTERNAL) {
5347 kfree(upl,
5348 sizeof(struct upl) +
5349 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
5350 + page_field_size);
5351 } else {
5352 kfree(upl, sizeof(struct upl) + page_field_size);
5353 }
5354 }
5355
5356 void
5357 upl_deallocate(upl_t upl)
5358 {
5359 upl_lock(upl);
5360 if (--upl->ref_count == 0) {
5361 if(vector_upl_is_valid(upl))
5362 vector_upl_deallocate(upl);
5363 upl_unlock(upl);
5364 upl_destroy(upl);
5365 }
5366 else
5367 upl_unlock(upl);
5368 }
5369
5370 #if CONFIG_IOSCHED
5371 void
5372 upl_mark_decmp(upl_t upl)
5373 {
5374 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
5375 upl->flags |= UPL_DECMP_REQ;
5376 upl->upl_creator->decmp_upl = (void *)upl;
5377 }
5378 }
5379
5380 void
5381 upl_unmark_decmp(upl_t upl)
5382 {
5383 if(upl && (upl->flags & UPL_DECMP_REQ)) {
5384 upl->upl_creator->decmp_upl = NULL;
5385 }
5386 }
5387
5388 #endif /* CONFIG_IOSCHED */
5389
5390 #define VM_PAGE_Q_BACKING_UP(q) \
5391 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
5392
5393 boolean_t must_throttle_writes(void);
5394
5395 boolean_t
5396 must_throttle_writes()
5397 {
5398 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
5399 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
5400 return (TRUE);
5401
5402 return (FALSE);
5403 }
5404
5405
5406 #if DEVELOPMENT || DEBUG
5407 /*/*
5408 * Statistics about UPL enforcement of copy-on-write obligations.
5409 */
5410 unsigned long upl_cow = 0;
5411 unsigned long upl_cow_again = 0;
5412 unsigned long upl_cow_pages = 0;
5413 unsigned long upl_cow_again_pages = 0;
5414
5415 unsigned long iopl_cow = 0;
5416 unsigned long iopl_cow_pages = 0;
5417 #endif
5418
5419 /*
5420 * Routine: vm_object_upl_request
5421 * Purpose:
5422 * Cause the population of a portion of a vm_object.
5423 * Depending on the nature of the request, the pages
5424 * returned may be contain valid data or be uninitialized.
5425 * A page list structure, listing the physical pages
5426 * will be returned upon request.
5427 * This function is called by the file system or any other
5428 * supplier of backing store to a pager.
5429 * IMPORTANT NOTE: The caller must still respect the relationship
5430 * between the vm_object and its backing memory object. The
5431 * caller MUST NOT substitute changes in the backing file
5432 * without first doing a memory_object_lock_request on the
5433 * target range unless it is know that the pages are not
5434 * shared with another entity at the pager level.
5435 * Copy_in_to:
5436 * if a page list structure is present
5437 * return the mapped physical pages, where a
5438 * page is not present, return a non-initialized
5439 * one. If the no_sync bit is turned on, don't
5440 * call the pager unlock to synchronize with other
5441 * possible copies of the page. Leave pages busy
5442 * in the original object, if a page list structure
5443 * was specified. When a commit of the page list
5444 * pages is done, the dirty bit will be set for each one.
5445 * Copy_out_from:
5446 * If a page list structure is present, return
5447 * all mapped pages. Where a page does not exist
5448 * map a zero filled one. Leave pages busy in
5449 * the original object. If a page list structure
5450 * is not specified, this call is a no-op.
5451 *
5452 * Note: access of default pager objects has a rather interesting
5453 * twist. The caller of this routine, presumably the file system
5454 * page cache handling code, will never actually make a request
5455 * against a default pager backed object. Only the default
5456 * pager will make requests on backing store related vm_objects
5457 * In this way the default pager can maintain the relationship
5458 * between backing store files (abstract memory objects) and
5459 * the vm_objects (cache objects), they support.
5460 *
5461 */
5462
5463 __private_extern__ kern_return_t
5464 vm_object_upl_request(
5465 vm_object_t object,
5466 vm_object_offset_t offset,
5467 upl_size_t size,
5468 upl_t *upl_ptr,
5469 upl_page_info_array_t user_page_list,
5470 unsigned int *page_list_count,
5471 upl_control_flags_t cntrl_flags)
5472 {
5473 vm_page_t dst_page = VM_PAGE_NULL;
5474 vm_object_offset_t dst_offset;
5475 upl_size_t xfer_size;
5476 unsigned int size_in_pages;
5477 boolean_t dirty;
5478 boolean_t hw_dirty;
5479 upl_t upl = NULL;
5480 unsigned int entry;
5481 #if MACH_CLUSTER_STATS
5482 boolean_t encountered_lrp = FALSE;
5483 #endif
5484 vm_page_t alias_page = NULL;
5485 int refmod_state = 0;
5486 wpl_array_t lite_list = NULL;
5487 vm_object_t last_copy_object;
5488 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
5489 struct vm_page_delayed_work *dwp;
5490 int dw_count;
5491 int dw_limit;
5492 int io_tracking_flag = 0;
5493 int grab_options;
5494 ppnum_t phys_page;
5495
5496 if (cntrl_flags & ~UPL_VALID_FLAGS) {
5497 /*
5498 * For forward compatibility's sake,
5499 * reject any unknown flag.
5500 */
5501 return KERN_INVALID_VALUE;
5502 }
5503 if ( (!object->internal) && (object->paging_offset != 0) )
5504 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5505 if (object->phys_contiguous)
5506 panic("vm_object_upl_request: contiguous object specified\n");
5507
5508
5509 if (size > MAX_UPL_SIZE_BYTES)
5510 size = MAX_UPL_SIZE_BYTES;
5511
5512 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
5513 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5514
5515 #if CONFIG_IOSCHED || UPL_DEBUG
5516 if (object->io_tracking || upl_debug_enabled)
5517 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5518 #endif
5519 #if CONFIG_IOSCHED
5520 if (object->io_tracking)
5521 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5522 #endif
5523
5524 if (cntrl_flags & UPL_SET_INTERNAL) {
5525 if (cntrl_flags & UPL_SET_LITE) {
5526
5527 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5528
5529 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5530 lite_list = (wpl_array_t)
5531 (((uintptr_t)user_page_list) +
5532 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5533 if (size == 0) {
5534 user_page_list = NULL;
5535 lite_list = NULL;
5536 }
5537 } else {
5538 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
5539
5540 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5541 if (size == 0) {
5542 user_page_list = NULL;
5543 }
5544 }
5545 } else {
5546 if (cntrl_flags & UPL_SET_LITE) {
5547
5548 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5549
5550 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
5551 if (size == 0) {
5552 lite_list = NULL;
5553 }
5554 } else {
5555 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
5556 }
5557 }
5558 *upl_ptr = upl;
5559
5560 if (user_page_list)
5561 user_page_list[0].device = FALSE;
5562
5563 if (cntrl_flags & UPL_SET_LITE) {
5564 upl->map_object = object;
5565 } else {
5566 upl->map_object = vm_object_allocate(size);
5567 /*
5568 * No neeed to lock the new object: nobody else knows
5569 * about it yet, so it's all ours so far.
5570 */
5571 upl->map_object->shadow = object;
5572 upl->map_object->pageout = TRUE;
5573 upl->map_object->can_persist = FALSE;
5574 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5575 upl->map_object->vo_shadow_offset = offset;
5576 upl->map_object->wimg_bits = object->wimg_bits;
5577
5578 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5579
5580 upl->flags |= UPL_SHADOWED;
5581 }
5582 /*
5583 * ENCRYPTED SWAP:
5584 * Just mark the UPL as "encrypted" here.
5585 * We'll actually encrypt the pages later,
5586 * in upl_encrypt(), when the caller has
5587 * selected which pages need to go to swap.
5588 */
5589 if (cntrl_flags & UPL_ENCRYPT)
5590 upl->flags |= UPL_ENCRYPTED;
5591
5592 if (cntrl_flags & UPL_FOR_PAGEOUT)
5593 upl->flags |= UPL_PAGEOUT;
5594
5595 vm_object_lock(object);
5596 vm_object_activity_begin(object);
5597
5598 grab_options = 0;
5599 #if CONFIG_SECLUDED_MEMORY
5600 if (object->can_grab_secluded) {
5601 grab_options |= VM_PAGE_GRAB_SECLUDED;
5602 }
5603 #endif /* CONFIG_SECLUDED_MEMORY */
5604
5605 /*
5606 * we can lock in the paging_offset once paging_in_progress is set
5607 */
5608 upl->size = size;
5609 upl->offset = offset + object->paging_offset;
5610
5611 #if CONFIG_IOSCHED || UPL_DEBUG
5612 if (object->io_tracking || upl_debug_enabled) {
5613 vm_object_activity_begin(object);
5614 queue_enter(&object->uplq, upl, upl_t, uplq);
5615 }
5616 #endif
5617 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
5618 /*
5619 * Honor copy-on-write obligations
5620 *
5621 * The caller is gathering these pages and
5622 * might modify their contents. We need to
5623 * make sure that the copy object has its own
5624 * private copies of these pages before we let
5625 * the caller modify them.
5626 */
5627 vm_object_update(object,
5628 offset,
5629 size,
5630 NULL,
5631 NULL,
5632 FALSE, /* should_return */
5633 MEMORY_OBJECT_COPY_SYNC,
5634 VM_PROT_NO_CHANGE);
5635 #if DEVELOPMENT || DEBUG
5636 upl_cow++;
5637 upl_cow_pages += size >> PAGE_SHIFT;
5638 #endif
5639 }
5640 /*
5641 * remember which copy object we synchronized with
5642 */
5643 last_copy_object = object->copy;
5644 entry = 0;
5645
5646 xfer_size = size;
5647 dst_offset = offset;
5648 size_in_pages = size / PAGE_SIZE;
5649
5650 dwp = &dw_array[0];
5651 dw_count = 0;
5652 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5653
5654 if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5655 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5656 object->scan_collisions = 0;
5657
5658 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5659 boolean_t isSSD = FALSE;
5660
5661 vnode_pager_get_isSSD(object->pager, &isSSD);
5662 vm_object_unlock(object);
5663
5664 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5665
5666 if (isSSD == TRUE)
5667 delay(1000 * size_in_pages);
5668 else
5669 delay(5000 * size_in_pages);
5670 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5671
5672 vm_object_lock(object);
5673 }
5674
5675 while (xfer_size) {
5676
5677 dwp->dw_mask = 0;
5678
5679 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5680 vm_object_unlock(object);
5681 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5682 vm_object_lock(object);
5683 }
5684 if (cntrl_flags & UPL_COPYOUT_FROM) {
5685 upl->flags |= UPL_PAGE_SYNC_DONE;
5686
5687 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5688 dst_page->fictitious ||
5689 dst_page->absent ||
5690 dst_page->error ||
5691 dst_page->cleaning ||
5692 (VM_PAGE_WIRED(dst_page))) {
5693
5694 if (user_page_list)
5695 user_page_list[entry].phys_addr = 0;
5696
5697 goto try_next_page;
5698 }
5699 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5700
5701 /*
5702 * grab this up front...
5703 * a high percentange of the time we're going to
5704 * need the hardware modification state a bit later
5705 * anyway... so we can eliminate an extra call into
5706 * the pmap layer by grabbing it here and recording it
5707 */
5708 if (dst_page->pmapped)
5709 refmod_state = pmap_get_refmod(phys_page);
5710 else
5711 refmod_state = 0;
5712
5713 if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5714 /*
5715 * page is on inactive list and referenced...
5716 * reactivate it now... this gets it out of the
5717 * way of vm_pageout_scan which would have to
5718 * reactivate it upon tripping over it
5719 */
5720 dwp->dw_mask |= DW_vm_page_activate;
5721 }
5722 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5723 /*
5724 * we're only asking for DIRTY pages to be returned
5725 */
5726 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5727 /*
5728 * if we were the page stolen by vm_pageout_scan to be
5729 * cleaned (as opposed to a buddy being clustered in
5730 * or this request is not being driven by a PAGEOUT cluster
5731 * then we only need to check for the page being dirty or
5732 * precious to decide whether to return it
5733 */
5734 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
5735 goto check_busy;
5736 goto dont_return;
5737 }
5738 /*
5739 * this is a request for a PAGEOUT cluster and this page
5740 * is merely along for the ride as a 'buddy'... not only
5741 * does it have to be dirty to be returned, but it also
5742 * can't have been referenced recently...
5743 */
5744 if ( (hibernate_cleaning_in_progress == TRUE ||
5745 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) ||
5746 (dst_page->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5747 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
5748 goto check_busy;
5749 }
5750 dont_return:
5751 /*
5752 * if we reach here, we're not to return
5753 * the page... go on to the next one
5754 */
5755 if (dst_page->laundry == TRUE) {
5756 /*
5757 * if we get here, the page is not 'cleaning' (filtered out above).
5758 * since it has been referenced, remove it from the laundry
5759 * so we don't pay the cost of an I/O to clean a page
5760 * we're just going to take back
5761 */
5762 vm_page_lockspin_queues();
5763
5764 vm_pageout_steal_laundry(dst_page, TRUE);
5765 vm_page_activate(dst_page);
5766
5767 vm_page_unlock_queues();
5768 }
5769 if (user_page_list)
5770 user_page_list[entry].phys_addr = 0;
5771
5772 goto try_next_page;
5773 }
5774 check_busy:
5775 if (dst_page->busy) {
5776 if (cntrl_flags & UPL_NOBLOCK) {
5777 if (user_page_list)
5778 user_page_list[entry].phys_addr = 0;
5779 dwp->dw_mask = 0;
5780
5781 goto try_next_page;
5782 }
5783 /*
5784 * someone else is playing with the
5785 * page. We will have to wait.
5786 */
5787 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5788
5789 continue;
5790 }
5791 /*
5792 * ENCRYPTED SWAP:
5793 * The caller is gathering this page and might
5794 * access its contents later on. Decrypt the
5795 * page before adding it to the UPL, so that
5796 * the caller never sees encrypted data.
5797 */
5798 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) {
5799 int was_busy;
5800
5801 /*
5802 * save the current state of busy
5803 * mark page as busy while decrypt
5804 * is in progress since it will drop
5805 * the object lock...
5806 */
5807 was_busy = dst_page->busy;
5808 dst_page->busy = TRUE;
5809
5810 vm_page_decrypt(dst_page, 0);
5811 vm_page_decrypt_for_upl_counter++;
5812 /*
5813 * restore to original busy state
5814 */
5815 dst_page->busy = was_busy;
5816 }
5817 if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5818
5819 vm_page_lockspin_queues();
5820
5821 if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5822 /*
5823 * we've buddied up a page for a clustered pageout
5824 * that has already been moved to the pageout
5825 * queue by pageout_scan... we need to remove
5826 * it from the queue and drop the laundry count
5827 * on that queue
5828 */
5829 vm_pageout_throttle_up(dst_page);
5830 }
5831 vm_page_unlock_queues();
5832 }
5833 #if MACH_CLUSTER_STATS
5834 /*
5835 * pageout statistics gathering. count
5836 * all the pages we will page out that
5837 * were not counted in the initial
5838 * vm_pageout_scan work
5839 */
5840 if (dst_page->pageout)
5841 encountered_lrp = TRUE;
5842 if ((dst_page->dirty || (object->internal && dst_page->precious))) {
5843 if (encountered_lrp)
5844 CLUSTER_STAT(pages_at_higher_offsets++;)
5845 else
5846 CLUSTER_STAT(pages_at_lower_offsets++;)
5847 }
5848 #endif
5849 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5850 dirty = hw_dirty ? TRUE : dst_page->dirty;
5851
5852 if (phys_page > upl->highest_page)
5853 upl->highest_page = phys_page;
5854
5855 assert (!pmap_is_noencrypt(phys_page));
5856
5857 if (cntrl_flags & UPL_SET_LITE) {
5858 unsigned int pg_num;
5859
5860 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5861 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5862 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5863
5864 if (hw_dirty)
5865 pmap_clear_modify(phys_page);
5866
5867 /*
5868 * Mark original page as cleaning
5869 * in place.
5870 */
5871 dst_page->cleaning = TRUE;
5872 dst_page->precious = FALSE;
5873 } else {
5874 /*
5875 * use pageclean setup, it is more
5876 * convenient even for the pageout
5877 * cases here
5878 */
5879 vm_object_lock(upl->map_object);
5880 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5881 vm_object_unlock(upl->map_object);
5882
5883 alias_page->absent = FALSE;
5884 alias_page = NULL;
5885 }
5886 if (dirty) {
5887 SET_PAGE_DIRTY(dst_page, FALSE);
5888 } else {
5889 dst_page->dirty = FALSE;
5890 }
5891
5892 if (!dirty)
5893 dst_page->precious = TRUE;
5894
5895 if ( (cntrl_flags & UPL_ENCRYPT) ) {
5896 /*
5897 * ENCRYPTED SWAP:
5898 * We want to deny access to the target page
5899 * because its contents are about to be
5900 * encrypted and the user would be very
5901 * confused to see encrypted data instead
5902 * of their data.
5903 * We also set "encrypted_cleaning" to allow
5904 * vm_pageout_scan() to demote that page
5905 * from "adjacent/clean-in-place" to
5906 * "target/clean-and-free" if it bumps into
5907 * this page during its scanning while we're
5908 * still processing this cluster.
5909 */
5910 dst_page->busy = TRUE;
5911 dst_page->encrypted_cleaning = TRUE;
5912 }
5913 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5914 if ( !VM_PAGE_WIRED(dst_page))
5915 dst_page->free_when_done = TRUE;
5916 }
5917 } else {
5918 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5919 /*
5920 * Honor copy-on-write obligations
5921 *
5922 * The copy object has changed since we
5923 * last synchronized for copy-on-write.
5924 * Another copy object might have been
5925 * inserted while we released the object's
5926 * lock. Since someone could have seen the
5927 * original contents of the remaining pages
5928 * through that new object, we have to
5929 * synchronize with it again for the remaining
5930 * pages only. The previous pages are "busy"
5931 * so they can not be seen through the new
5932 * mapping. The new mapping will see our
5933 * upcoming changes for those previous pages,
5934 * but that's OK since they couldn't see what
5935 * was there before. It's just a race anyway
5936 * and there's no guarantee of consistency or
5937 * atomicity. We just don't want new mappings
5938 * to see both the *before* and *after* pages.
5939 */
5940 if (object->copy != VM_OBJECT_NULL) {
5941 vm_object_update(
5942 object,
5943 dst_offset,/* current offset */
5944 xfer_size, /* remaining size */
5945 NULL,
5946 NULL,
5947 FALSE, /* should_return */
5948 MEMORY_OBJECT_COPY_SYNC,
5949 VM_PROT_NO_CHANGE);
5950
5951 #if DEVELOPMENT || DEBUG
5952 upl_cow_again++;
5953 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
5954 #endif
5955 }
5956 /*
5957 * remember the copy object we synced with
5958 */
5959 last_copy_object = object->copy;
5960 }
5961 dst_page = vm_page_lookup(object, dst_offset);
5962
5963 if (dst_page != VM_PAGE_NULL) {
5964
5965 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5966 /*
5967 * skip over pages already present in the cache
5968 */
5969 if (user_page_list)
5970 user_page_list[entry].phys_addr = 0;
5971
5972 goto try_next_page;
5973 }
5974 if (dst_page->fictitious) {
5975 panic("need corner case for fictitious page");
5976 }
5977
5978 if (dst_page->busy || dst_page->cleaning) {
5979 /*
5980 * someone else is playing with the
5981 * page. We will have to wait.
5982 */
5983 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5984
5985 continue;
5986 }
5987 if (dst_page->laundry)
5988 vm_pageout_steal_laundry(dst_page, FALSE);
5989 } else {
5990 if (object->private) {
5991 /*
5992 * This is a nasty wrinkle for users
5993 * of upl who encounter device or
5994 * private memory however, it is
5995 * unavoidable, only a fault can
5996 * resolve the actual backing
5997 * physical page by asking the
5998 * backing device.
5999 */
6000 if (user_page_list)
6001 user_page_list[entry].phys_addr = 0;
6002
6003 goto try_next_page;
6004 }
6005 if (object->scan_collisions) {
6006 /*
6007 * the pageout_scan thread is trying to steal
6008 * pages from this object, but has run into our
6009 * lock... grab 2 pages from the head of the object...
6010 * the first is freed on behalf of pageout_scan, the
6011 * 2nd is for our own use... we use vm_object_page_grab
6012 * in both cases to avoid taking pages from the free
6013 * list since we are under memory pressure and our
6014 * lock on this object is getting in the way of
6015 * relieving it
6016 */
6017 dst_page = vm_object_page_grab(object);
6018
6019 if (dst_page != VM_PAGE_NULL)
6020 vm_page_release(dst_page,
6021 FALSE);
6022
6023 dst_page = vm_object_page_grab(object);
6024 }
6025 if (dst_page == VM_PAGE_NULL) {
6026 /*
6027 * need to allocate a page
6028 */
6029 dst_page = vm_page_grab_options(grab_options);
6030 }
6031 if (dst_page == VM_PAGE_NULL) {
6032 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
6033 /*
6034 * we don't want to stall waiting for pages to come onto the free list
6035 * while we're already holding absent pages in this UPL
6036 * the caller will deal with the empty slots
6037 */
6038 if (user_page_list)
6039 user_page_list[entry].phys_addr = 0;
6040
6041 goto try_next_page;
6042 }
6043 /*
6044 * no pages available... wait
6045 * then try again for the same
6046 * offset...
6047 */
6048 vm_object_unlock(object);
6049
6050 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
6051
6052 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
6053
6054 VM_PAGE_WAIT();
6055 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
6056
6057 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
6058
6059 vm_object_lock(object);
6060
6061 continue;
6062 }
6063 vm_page_insert(dst_page, object, dst_offset);
6064
6065 dst_page->absent = TRUE;
6066 dst_page->busy = FALSE;
6067
6068 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
6069 /*
6070 * if UPL_RET_ONLY_ABSENT was specified,
6071 * than we're definitely setting up a
6072 * upl for a clustered read/pagein
6073 * operation... mark the pages as clustered
6074 * so upl_commit_range can put them on the
6075 * speculative list
6076 */
6077 dst_page->clustered = TRUE;
6078
6079 if ( !(cntrl_flags & UPL_FILE_IO))
6080 VM_STAT_INCR(pageins);
6081 }
6082 }
6083 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
6084
6085 /*
6086 * ENCRYPTED SWAP:
6087 */
6088 if (cntrl_flags & UPL_ENCRYPT) {
6089 /*
6090 * The page is going to be encrypted when we
6091 * get it from the pager, so mark it so.
6092 */
6093 dst_page->encrypted = TRUE;
6094 } else {
6095 /*
6096 * Otherwise, the page will not contain
6097 * encrypted data.
6098 */
6099 dst_page->encrypted = FALSE;
6100 }
6101 dst_page->overwriting = TRUE;
6102
6103 if (dst_page->pmapped) {
6104 if ( !(cntrl_flags & UPL_FILE_IO))
6105 /*
6106 * eliminate all mappings from the
6107 * original object and its prodigy
6108 */
6109 refmod_state = pmap_disconnect(phys_page);
6110 else
6111 refmod_state = pmap_get_refmod(phys_page);
6112 } else
6113 refmod_state = 0;
6114
6115 hw_dirty = refmod_state & VM_MEM_MODIFIED;
6116 dirty = hw_dirty ? TRUE : dst_page->dirty;
6117
6118 if (cntrl_flags & UPL_SET_LITE) {
6119 unsigned int pg_num;
6120
6121 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
6122 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
6123 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
6124
6125 if (hw_dirty)
6126 pmap_clear_modify(phys_page);
6127
6128 /*
6129 * Mark original page as cleaning
6130 * in place.
6131 */
6132 dst_page->cleaning = TRUE;
6133 dst_page->precious = FALSE;
6134 } else {
6135 /*
6136 * use pageclean setup, it is more
6137 * convenient even for the pageout
6138 * cases here
6139 */
6140 vm_object_lock(upl->map_object);
6141 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
6142 vm_object_unlock(upl->map_object);
6143
6144 alias_page->absent = FALSE;
6145 alias_page = NULL;
6146 }
6147
6148 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
6149 upl->flags &= ~UPL_CLEAR_DIRTY;
6150 upl->flags |= UPL_SET_DIRTY;
6151 dirty = TRUE;
6152 upl->flags |= UPL_SET_DIRTY;
6153 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
6154 /*
6155 * clean in place for read implies
6156 * that a write will be done on all
6157 * the pages that are dirty before
6158 * a upl commit is done. The caller
6159 * is obligated to preserve the
6160 * contents of all pages marked dirty
6161 */
6162 upl->flags |= UPL_CLEAR_DIRTY;
6163 }
6164 dst_page->dirty = dirty;
6165
6166 if (!dirty)
6167 dst_page->precious = TRUE;
6168
6169 if ( !VM_PAGE_WIRED(dst_page)) {
6170 /*
6171 * deny access to the target page while
6172 * it is being worked on
6173 */
6174 dst_page->busy = TRUE;
6175 } else
6176 dwp->dw_mask |= DW_vm_page_wire;
6177
6178 /*
6179 * We might be about to satisfy a fault which has been
6180 * requested. So no need for the "restart" bit.
6181 */
6182 dst_page->restart = FALSE;
6183 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
6184 /*
6185 * expect the page to be used
6186 */
6187 dwp->dw_mask |= DW_set_reference;
6188 }
6189 if (cntrl_flags & UPL_PRECIOUS) {
6190 if (object->internal) {
6191 SET_PAGE_DIRTY(dst_page, FALSE);
6192 dst_page->precious = FALSE;
6193 } else {
6194 dst_page->precious = TRUE;
6195 }
6196 } else {
6197 dst_page->precious = FALSE;
6198 }
6199 }
6200 if (dst_page->busy)
6201 upl->flags |= UPL_HAS_BUSY;
6202
6203 if (phys_page > upl->highest_page)
6204 upl->highest_page = phys_page;
6205 assert (!pmap_is_noencrypt(phys_page));
6206 if (user_page_list) {
6207 user_page_list[entry].phys_addr = phys_page;
6208 user_page_list[entry].free_when_done = dst_page->free_when_done;
6209 user_page_list[entry].absent = dst_page->absent;
6210 user_page_list[entry].dirty = dst_page->dirty;
6211 user_page_list[entry].precious = dst_page->precious;
6212 user_page_list[entry].device = FALSE;
6213 user_page_list[entry].needed = FALSE;
6214 if (dst_page->clustered == TRUE)
6215 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
6216 else
6217 user_page_list[entry].speculative = FALSE;
6218 user_page_list[entry].cs_validated = dst_page->cs_validated;
6219 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
6220 user_page_list[entry].cs_nx = dst_page->cs_nx;
6221 user_page_list[entry].mark = FALSE;
6222 }
6223 /*
6224 * if UPL_RET_ONLY_ABSENT is set, then
6225 * we are working with a fresh page and we've
6226 * just set the clustered flag on it to
6227 * indicate that it was drug in as part of a
6228 * speculative cluster... so leave it alone
6229 */
6230 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
6231 /*
6232 * someone is explicitly grabbing this page...
6233 * update clustered and speculative state
6234 *
6235 */
6236 if (dst_page->clustered)
6237 VM_PAGE_CONSUME_CLUSTERED(dst_page);
6238 }
6239 try_next_page:
6240 if (dwp->dw_mask) {
6241 if (dwp->dw_mask & DW_vm_page_activate)
6242 VM_STAT_INCR(reactivations);
6243
6244 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
6245
6246 if (dw_count >= dw_limit) {
6247 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
6248
6249 dwp = &dw_array[0];
6250 dw_count = 0;
6251 }
6252 }
6253 entry++;
6254 dst_offset += PAGE_SIZE_64;
6255 xfer_size -= PAGE_SIZE;
6256 }
6257 if (dw_count)
6258 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
6259
6260 if (alias_page != NULL) {
6261 VM_PAGE_FREE(alias_page);
6262 }
6263
6264 if (page_list_count != NULL) {
6265 if (upl->flags & UPL_INTERNAL)
6266 *page_list_count = 0;
6267 else if (*page_list_count > entry)
6268 *page_list_count = entry;
6269 }
6270 #if UPL_DEBUG
6271 upl->upl_state = 1;
6272 #endif
6273 vm_object_unlock(object);
6274
6275 return KERN_SUCCESS;
6276 }
6277
6278 /*
6279 * Routine: vm_object_super_upl_request
6280 * Purpose:
6281 * Cause the population of a portion of a vm_object
6282 * in much the same way as memory_object_upl_request.
6283 * Depending on the nature of the request, the pages
6284 * returned may be contain valid data or be uninitialized.
6285 * However, the region may be expanded up to the super
6286 * cluster size provided.
6287 */
6288
6289 __private_extern__ kern_return_t
6290 vm_object_super_upl_request(
6291 vm_object_t object,
6292 vm_object_offset_t offset,
6293 upl_size_t size,
6294 upl_size_t super_cluster,
6295 upl_t *upl,
6296 upl_page_info_t *user_page_list,
6297 unsigned int *page_list_count,
6298 upl_control_flags_t cntrl_flags)
6299 {
6300 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
6301 return KERN_FAILURE;
6302
6303 assert(object->paging_in_progress);
6304 offset = offset - object->paging_offset;
6305
6306 if (super_cluster > size) {
6307
6308 vm_object_offset_t base_offset;
6309 upl_size_t super_size;
6310 vm_object_size_t super_size_64;
6311
6312 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
6313 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
6314 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
6315 super_size = (upl_size_t) super_size_64;
6316 assert(super_size == super_size_64);
6317
6318 if (offset > (base_offset + super_size)) {
6319 panic("vm_object_super_upl_request: Missed target pageout"
6320 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
6321 offset, base_offset, super_size, super_cluster,
6322 size, object->paging_offset);
6323 }
6324 /*
6325 * apparently there is a case where the vm requests a
6326 * page to be written out who's offset is beyond the
6327 * object size
6328 */
6329 if ((offset + size) > (base_offset + super_size)) {
6330 super_size_64 = (offset + size) - base_offset;
6331 super_size = (upl_size_t) super_size_64;
6332 assert(super_size == super_size_64);
6333 }
6334
6335 offset = base_offset;
6336 size = super_size;
6337 }
6338 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags);
6339 }
6340
6341
6342 kern_return_t
6343 vm_map_create_upl(
6344 vm_map_t map,
6345 vm_map_address_t offset,
6346 upl_size_t *upl_size,
6347 upl_t *upl,
6348 upl_page_info_array_t page_list,
6349 unsigned int *count,
6350 upl_control_flags_t *flags)
6351 {
6352 vm_map_entry_t entry;
6353 upl_control_flags_t caller_flags;
6354 int force_data_sync;
6355 int sync_cow_data;
6356 vm_object_t local_object;
6357 vm_map_offset_t local_offset;
6358 vm_map_offset_t local_start;
6359 kern_return_t ret;
6360
6361 assert(page_aligned(offset));
6362
6363 caller_flags = *flags;
6364
6365 if (caller_flags & ~UPL_VALID_FLAGS) {
6366 /*
6367 * For forward compatibility's sake,
6368 * reject any unknown flag.
6369 */
6370 return KERN_INVALID_VALUE;
6371 }
6372 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
6373 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
6374
6375 if (upl == NULL)
6376 return KERN_INVALID_ARGUMENT;
6377
6378 REDISCOVER_ENTRY:
6379 vm_map_lock_read(map);
6380
6381 if (!vm_map_lookup_entry(map, offset, &entry)) {
6382 vm_map_unlock_read(map);
6383 return KERN_FAILURE;
6384 }
6385
6386 if ((entry->vme_end - offset) < *upl_size) {
6387 *upl_size = (upl_size_t) (entry->vme_end - offset);
6388 assert(*upl_size == entry->vme_end - offset);
6389 }
6390
6391 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
6392 *flags = 0;
6393
6394 if (!entry->is_sub_map &&
6395 VME_OBJECT(entry) != VM_OBJECT_NULL) {
6396 if (VME_OBJECT(entry)->private)
6397 *flags = UPL_DEV_MEMORY;
6398
6399 if (VME_OBJECT(entry)->phys_contiguous)
6400 *flags |= UPL_PHYS_CONTIG;
6401 }
6402 vm_map_unlock_read(map);
6403 return KERN_SUCCESS;
6404 }
6405
6406 if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
6407 !VME_OBJECT(entry)->phys_contiguous) {
6408 if (*upl_size > MAX_UPL_SIZE_BYTES)
6409 *upl_size = MAX_UPL_SIZE_BYTES;
6410 }
6411
6412 /*
6413 * Create an object if necessary.
6414 */
6415 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6416
6417 if (vm_map_lock_read_to_write(map))
6418 goto REDISCOVER_ENTRY;
6419
6420 VME_OBJECT_SET(entry,
6421 vm_object_allocate((vm_size_t)
6422 (entry->vme_end -
6423 entry->vme_start)));
6424 VME_OFFSET_SET(entry, 0);
6425
6426 vm_map_lock_write_to_read(map);
6427 }
6428
6429 if (!(caller_flags & UPL_COPYOUT_FROM) &&
6430 !(entry->protection & VM_PROT_WRITE)) {
6431 vm_map_unlock_read(map);
6432 return KERN_PROTECTION_FAILURE;
6433 }
6434
6435
6436 local_object = VME_OBJECT(entry);
6437 assert(local_object != VM_OBJECT_NULL);
6438
6439 if (!entry->is_sub_map &&
6440 !entry->needs_copy &&
6441 *upl_size != 0 &&
6442 local_object->vo_size > *upl_size && /* partial UPL */
6443 entry->wired_count == 0 && /* No COW for entries that are wired */
6444 (map->pmap != kernel_pmap) && /* alias checks */
6445 (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
6446 ||
6447 (/* case 2 */
6448 local_object->internal &&
6449 (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
6450 local_object->ref_count > 1))) {
6451 vm_prot_t prot;
6452
6453 /*
6454 * Case 1:
6455 * Set up the targeted range for copy-on-write to avoid
6456 * applying true_share/copy_delay to the entire object.
6457 *
6458 * Case 2:
6459 * This map entry covers only part of an internal
6460 * object. There could be other map entries covering
6461 * other areas of this object and some of these map
6462 * entries could be marked as "needs_copy", which
6463 * assumes that the object is COPY_SYMMETRIC.
6464 * To avoid marking this object as COPY_DELAY and
6465 * "true_share", let's shadow it and mark the new
6466 * (smaller) object as "true_share" and COPY_DELAY.
6467 */
6468
6469 if (vm_map_lock_read_to_write(map)) {
6470 goto REDISCOVER_ENTRY;
6471 }
6472 vm_map_lock_assert_exclusive(map);
6473 assert(VME_OBJECT(entry) == local_object);
6474
6475 vm_map_clip_start(map,
6476 entry,
6477 vm_map_trunc_page(offset,
6478 VM_MAP_PAGE_MASK(map)));
6479 vm_map_clip_end(map,
6480 entry,
6481 vm_map_round_page(offset + *upl_size,
6482 VM_MAP_PAGE_MASK(map)));
6483 if ((entry->vme_end - offset) < *upl_size) {
6484 *upl_size = (upl_size_t) (entry->vme_end - offset);
6485 assert(*upl_size == entry->vme_end - offset);
6486 }
6487
6488 prot = entry->protection & ~VM_PROT_WRITE;
6489 if (override_nx(map, VME_ALIAS(entry)) && prot)
6490 prot |= VM_PROT_EXECUTE;
6491 vm_object_pmap_protect(local_object,
6492 VME_OFFSET(entry),
6493 entry->vme_end - entry->vme_start,
6494 ((entry->is_shared ||
6495 map->mapped_in_other_pmaps)
6496 ? PMAP_NULL
6497 : map->pmap),
6498 entry->vme_start,
6499 prot);
6500
6501 assert(entry->wired_count == 0);
6502
6503 /*
6504 * Lock the VM object and re-check its status: if it's mapped
6505 * in another address space, we could still be racing with
6506 * another thread holding that other VM map exclusively.
6507 */
6508 vm_object_lock(local_object);
6509 if (local_object->true_share) {
6510 /* object is already in proper state: no COW needed */
6511 assert(local_object->copy_strategy !=
6512 MEMORY_OBJECT_COPY_SYMMETRIC);
6513 } else {
6514 /* not true_share: ask for copy-on-write below */
6515 assert(local_object->copy_strategy ==
6516 MEMORY_OBJECT_COPY_SYMMETRIC);
6517 entry->needs_copy = TRUE;
6518 }
6519 vm_object_unlock(local_object);
6520
6521 vm_map_lock_write_to_read(map);
6522 }
6523
6524 if (entry->needs_copy) {
6525 /*
6526 * Honor copy-on-write for COPY_SYMMETRIC
6527 * strategy.
6528 */
6529 vm_map_t local_map;
6530 vm_object_t object;
6531 vm_object_offset_t new_offset;
6532 vm_prot_t prot;
6533 boolean_t wired;
6534 vm_map_version_t version;
6535 vm_map_t real_map;
6536 vm_prot_t fault_type;
6537
6538 local_map = map;
6539
6540 if (caller_flags & UPL_COPYOUT_FROM) {
6541 fault_type = VM_PROT_READ | VM_PROT_COPY;
6542 vm_counters.create_upl_extra_cow++;
6543 vm_counters.create_upl_extra_cow_pages +=
6544 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
6545 } else {
6546 fault_type = VM_PROT_WRITE;
6547 }
6548 if (vm_map_lookup_locked(&local_map,
6549 offset, fault_type,
6550 OBJECT_LOCK_EXCLUSIVE,
6551 &version, &object,
6552 &new_offset, &prot, &wired,
6553 NULL,
6554 &real_map) != KERN_SUCCESS) {
6555 if (fault_type == VM_PROT_WRITE) {
6556 vm_counters.create_upl_lookup_failure_write++;
6557 } else {
6558 vm_counters.create_upl_lookup_failure_copy++;
6559 }
6560 vm_map_unlock_read(local_map);
6561 return KERN_FAILURE;
6562 }
6563 if (real_map != map)
6564 vm_map_unlock(real_map);
6565 vm_map_unlock_read(local_map);
6566
6567 vm_object_unlock(object);
6568
6569 goto REDISCOVER_ENTRY;
6570 }
6571
6572 if (entry->is_sub_map) {
6573 vm_map_t submap;
6574
6575 submap = VME_SUBMAP(entry);
6576 local_start = entry->vme_start;
6577 local_offset = VME_OFFSET(entry);
6578
6579 vm_map_reference(submap);
6580 vm_map_unlock_read(map);
6581
6582 ret = vm_map_create_upl(submap,
6583 local_offset + (offset - local_start),
6584 upl_size, upl, page_list, count, flags);
6585 vm_map_deallocate(submap);
6586
6587 return ret;
6588 }
6589
6590 if (sync_cow_data &&
6591 (VME_OBJECT(entry)->shadow ||
6592 VME_OBJECT(entry)->copy)) {
6593 local_object = VME_OBJECT(entry);
6594 local_start = entry->vme_start;
6595 local_offset = VME_OFFSET(entry);
6596
6597 vm_object_reference(local_object);
6598 vm_map_unlock_read(map);
6599
6600 if (local_object->shadow && local_object->copy) {
6601 vm_object_lock_request(local_object->shadow,
6602 ((vm_object_offset_t)
6603 ((offset - local_start) +
6604 local_offset) +
6605 local_object->vo_shadow_offset),
6606 *upl_size, FALSE,
6607 MEMORY_OBJECT_DATA_SYNC,
6608 VM_PROT_NO_CHANGE);
6609 }
6610 sync_cow_data = FALSE;
6611 vm_object_deallocate(local_object);
6612
6613 goto REDISCOVER_ENTRY;
6614 }
6615 if (force_data_sync) {
6616 local_object = VME_OBJECT(entry);
6617 local_start = entry->vme_start;
6618 local_offset = VME_OFFSET(entry);
6619
6620 vm_object_reference(local_object);
6621 vm_map_unlock_read(map);
6622
6623 vm_object_lock_request(local_object,
6624 ((vm_object_offset_t)
6625 ((offset - local_start) +
6626 local_offset)),
6627 (vm_object_size_t)*upl_size,
6628 FALSE,
6629 MEMORY_OBJECT_DATA_SYNC,
6630 VM_PROT_NO_CHANGE);
6631
6632 force_data_sync = FALSE;
6633 vm_object_deallocate(local_object);
6634
6635 goto REDISCOVER_ENTRY;
6636 }
6637 if (VME_OBJECT(entry)->private)
6638 *flags = UPL_DEV_MEMORY;
6639 else
6640 *flags = 0;
6641
6642 if (VME_OBJECT(entry)->phys_contiguous)
6643 *flags |= UPL_PHYS_CONTIG;
6644
6645 local_object = VME_OBJECT(entry);
6646 local_offset = VME_OFFSET(entry);
6647 local_start = entry->vme_start;
6648
6649
6650 vm_object_lock(local_object);
6651
6652 /*
6653 * Ensure that this object is "true_share" and "copy_delay" now,
6654 * while we're still holding the VM map lock. After we unlock the map,
6655 * anything could happen to that mapping, including some copy-on-write
6656 * activity. We need to make sure that the IOPL will point at the
6657 * same memory as the mapping.
6658 */
6659 if (local_object->true_share) {
6660 assert(local_object->copy_strategy !=
6661 MEMORY_OBJECT_COPY_SYMMETRIC);
6662 } else if (local_object != kernel_object &&
6663 local_object != compressor_object &&
6664 !local_object->phys_contiguous) {
6665 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6666 if (!local_object->true_share &&
6667 vm_object_tracking_inited) {
6668 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6669 int num = 0;
6670 num = OSBacktrace(bt,
6671 VM_OBJECT_TRACKING_BTDEPTH);
6672 btlog_add_entry(vm_object_tracking_btlog,
6673 local_object,
6674 VM_OBJECT_TRACKING_OP_TRUESHARE,
6675 bt,
6676 num);
6677 }
6678 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6679 local_object->true_share = TRUE;
6680 if (local_object->copy_strategy ==
6681 MEMORY_OBJECT_COPY_SYMMETRIC) {
6682 local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6683 }
6684 }
6685
6686 vm_object_reference_locked(local_object);
6687 vm_object_unlock(local_object);
6688
6689 vm_map_unlock_read(map);
6690
6691 ret = vm_object_iopl_request(local_object,
6692 ((vm_object_offset_t)
6693 ((offset - local_start) + local_offset)),
6694 *upl_size,
6695 upl,
6696 page_list,
6697 count,
6698 caller_flags);
6699 vm_object_deallocate(local_object);
6700
6701 return ret;
6702 }
6703
6704 /*
6705 * Internal routine to enter a UPL into a VM map.
6706 *
6707 * JMM - This should just be doable through the standard
6708 * vm_map_enter() API.
6709 */
6710 kern_return_t
6711 vm_map_enter_upl(
6712 vm_map_t map,
6713 upl_t upl,
6714 vm_map_offset_t *dst_addr)
6715 {
6716 vm_map_size_t size;
6717 vm_object_offset_t offset;
6718 vm_map_offset_t addr;
6719 vm_page_t m;
6720 kern_return_t kr;
6721 int isVectorUPL = 0, curr_upl=0;
6722 upl_t vector_upl = NULL;
6723 vm_offset_t vector_upl_dst_addr = 0;
6724 vm_map_t vector_upl_submap = NULL;
6725 upl_offset_t subupl_offset = 0;
6726 upl_size_t subupl_size = 0;
6727
6728 if (upl == UPL_NULL)
6729 return KERN_INVALID_ARGUMENT;
6730
6731 if((isVectorUPL = vector_upl_is_valid(upl))) {
6732 int mapped=0,valid_upls=0;
6733 vector_upl = upl;
6734
6735 upl_lock(vector_upl);
6736 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6737 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6738 if(upl == NULL)
6739 continue;
6740 valid_upls++;
6741 if (UPL_PAGE_LIST_MAPPED & upl->flags)
6742 mapped++;
6743 }
6744
6745 if(mapped) {
6746 if(mapped != valid_upls)
6747 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6748 else {
6749 upl_unlock(vector_upl);
6750 return KERN_FAILURE;
6751 }
6752 }
6753
6754 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap);
6755 if( kr != KERN_SUCCESS )
6756 panic("Vector UPL submap allocation failed\n");
6757 map = vector_upl_submap;
6758 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6759 curr_upl=0;
6760 }
6761 else
6762 upl_lock(upl);
6763
6764 process_upl_to_enter:
6765 if(isVectorUPL){
6766 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6767 *dst_addr = vector_upl_dst_addr;
6768 upl_unlock(vector_upl);
6769 return KERN_SUCCESS;
6770 }
6771 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6772 if(upl == NULL)
6773 goto process_upl_to_enter;
6774
6775 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6776 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6777 } else {
6778 /*
6779 * check to see if already mapped
6780 */
6781 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6782 upl_unlock(upl);
6783 return KERN_FAILURE;
6784 }
6785 }
6786 if ((!(upl->flags & UPL_SHADOWED)) &&
6787 ((upl->flags & UPL_HAS_BUSY) ||
6788 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6789
6790 vm_object_t object;
6791 vm_page_t alias_page;
6792 vm_object_offset_t new_offset;
6793 unsigned int pg_num;
6794 wpl_array_t lite_list;
6795
6796 if (upl->flags & UPL_INTERNAL) {
6797 lite_list = (wpl_array_t)
6798 ((((uintptr_t)upl) + sizeof(struct upl))
6799 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6800 } else {
6801 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6802 }
6803 object = upl->map_object;
6804 upl->map_object = vm_object_allocate(upl->size);
6805
6806 vm_object_lock(upl->map_object);
6807
6808 upl->map_object->shadow = object;
6809 upl->map_object->pageout = TRUE;
6810 upl->map_object->can_persist = FALSE;
6811 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6812 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6813 upl->map_object->wimg_bits = object->wimg_bits;
6814 offset = upl->map_object->vo_shadow_offset;
6815 new_offset = 0;
6816 size = upl->size;
6817
6818 upl->flags |= UPL_SHADOWED;
6819
6820 while (size) {
6821 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6822 assert(pg_num == new_offset / PAGE_SIZE);
6823
6824 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6825
6826 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6827
6828 vm_object_lock(object);
6829
6830 m = vm_page_lookup(object, offset);
6831 if (m == VM_PAGE_NULL) {
6832 panic("vm_upl_map: page missing\n");
6833 }
6834
6835 /*
6836 * Convert the fictitious page to a private
6837 * shadow of the real page.
6838 */
6839 assert(alias_page->fictitious);
6840 alias_page->fictitious = FALSE;
6841 alias_page->private = TRUE;
6842 alias_page->free_when_done = TRUE;
6843 /*
6844 * since m is a page in the upl it must
6845 * already be wired or BUSY, so it's
6846 * safe to assign the underlying physical
6847 * page to the alias
6848 */
6849 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6850
6851 vm_object_unlock(object);
6852
6853 vm_page_lockspin_queues();
6854 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6855 vm_page_unlock_queues();
6856
6857 /*
6858 * ENCRYPTED SWAP:
6859 * The virtual page ("m") has to be wired in some way
6860 * here or its backing physical page could
6861 * be recycled at any time.
6862 * Assuming this is enforced by the caller, we can't
6863 * get an encrypted page here. Since the encryption
6864 * key depends on the VM page's "pager" object and
6865 * the "paging_offset", we couldn't handle 2 pageable
6866 * VM pages (with different pagers and paging_offsets)
6867 * sharing the same physical page: we could end up
6868 * encrypting with one key (via one VM page) and
6869 * decrypting with another key (via the alias VM page).
6870 */
6871 ASSERT_PAGE_DECRYPTED(m);
6872
6873 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6874
6875 assert(!alias_page->wanted);
6876 alias_page->busy = FALSE;
6877 alias_page->absent = FALSE;
6878 }
6879 size -= PAGE_SIZE;
6880 offset += PAGE_SIZE_64;
6881 new_offset += PAGE_SIZE_64;
6882 }
6883 vm_object_unlock(upl->map_object);
6884 }
6885 if (upl->flags & UPL_SHADOWED)
6886 offset = 0;
6887 else
6888 offset = upl->offset - upl->map_object->paging_offset;
6889
6890 size = upl->size;
6891
6892 vm_object_reference(upl->map_object);
6893
6894 if(!isVectorUPL) {
6895 *dst_addr = 0;
6896 /*
6897 * NEED A UPL_MAP ALIAS
6898 */
6899 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6900 VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
6901 upl->map_object, offset, FALSE,
6902 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6903
6904 if (kr != KERN_SUCCESS) {
6905 vm_object_deallocate(upl->map_object);
6906 upl_unlock(upl);
6907 return(kr);
6908 }
6909 }
6910 else {
6911 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6912 VM_FLAGS_FIXED | VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK),
6913 upl->map_object, offset, FALSE,
6914 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6915 if(kr)
6916 panic("vm_map_enter failed for a Vector UPL\n");
6917 }
6918 vm_object_lock(upl->map_object);
6919
6920 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6921 m = vm_page_lookup(upl->map_object, offset);
6922
6923 if (m) {
6924 m->pmapped = TRUE;
6925
6926 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6927 * but only in kernel space. If this was on a user map,
6928 * we'd have to set the wpmapped bit. */
6929 /* m->wpmapped = TRUE; */
6930 assert(map->pmap == kernel_pmap);
6931
6932 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE);
6933 }
6934 offset += PAGE_SIZE_64;
6935 }
6936 vm_object_unlock(upl->map_object);
6937
6938 /*
6939 * hold a reference for the mapping
6940 */
6941 upl->ref_count++;
6942 upl->flags |= UPL_PAGE_LIST_MAPPED;
6943 upl->kaddr = (vm_offset_t) *dst_addr;
6944 assert(upl->kaddr == *dst_addr);
6945
6946 if(isVectorUPL)
6947 goto process_upl_to_enter;
6948
6949 upl_unlock(upl);
6950
6951 return KERN_SUCCESS;
6952 }
6953
6954 /*
6955 * Internal routine to remove a UPL mapping from a VM map.
6956 *
6957 * XXX - This should just be doable through a standard
6958 * vm_map_remove() operation. Otherwise, implicit clean-up
6959 * of the target map won't be able to correctly remove
6960 * these (and release the reference on the UPL). Having
6961 * to do this means we can't map these into user-space
6962 * maps yet.
6963 */
6964 kern_return_t
6965 vm_map_remove_upl(
6966 vm_map_t map,
6967 upl_t upl)
6968 {
6969 vm_address_t addr;
6970 upl_size_t size;
6971 int isVectorUPL = 0, curr_upl = 0;
6972 upl_t vector_upl = NULL;
6973
6974 if (upl == UPL_NULL)
6975 return KERN_INVALID_ARGUMENT;
6976
6977 if((isVectorUPL = vector_upl_is_valid(upl))) {
6978 int unmapped=0, valid_upls=0;
6979 vector_upl = upl;
6980 upl_lock(vector_upl);
6981 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6982 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6983 if(upl == NULL)
6984 continue;
6985 valid_upls++;
6986 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6987 unmapped++;
6988 }
6989
6990 if(unmapped) {
6991 if(unmapped != valid_upls)
6992 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6993 else {
6994 upl_unlock(vector_upl);
6995 return KERN_FAILURE;
6996 }
6997 }
6998 curr_upl=0;
6999 }
7000 else
7001 upl_lock(upl);
7002
7003 process_upl_to_remove:
7004 if(isVectorUPL) {
7005 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
7006 vm_map_t v_upl_submap;
7007 vm_offset_t v_upl_submap_dst_addr;
7008 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
7009
7010 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
7011 vm_map_deallocate(v_upl_submap);
7012 upl_unlock(vector_upl);
7013 return KERN_SUCCESS;
7014 }
7015
7016 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
7017 if(upl == NULL)
7018 goto process_upl_to_remove;
7019 }
7020
7021 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
7022 addr = upl->kaddr;
7023 size = upl->size;
7024
7025 assert(upl->ref_count > 1);
7026 upl->ref_count--; /* removing mapping ref */
7027
7028 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
7029 upl->kaddr = (vm_offset_t) 0;
7030
7031 if(!isVectorUPL) {
7032 upl_unlock(upl);
7033
7034 vm_map_remove(
7035 map,
7036 vm_map_trunc_page(addr,
7037 VM_MAP_PAGE_MASK(map)),
7038 vm_map_round_page(addr + size,
7039 VM_MAP_PAGE_MASK(map)),
7040 VM_MAP_NO_FLAGS);
7041
7042 return KERN_SUCCESS;
7043 }
7044 else {
7045 /*
7046 * If it's a Vectored UPL, we'll be removing the entire
7047 * submap anyways, so no need to remove individual UPL
7048 * element mappings from within the submap
7049 */
7050 goto process_upl_to_remove;
7051 }
7052 }
7053 upl_unlock(upl);
7054
7055 return KERN_FAILURE;
7056 }
7057
7058
7059 kern_return_t
7060 upl_commit_range(
7061 upl_t upl,
7062 upl_offset_t offset,
7063 upl_size_t size,
7064 int flags,
7065 upl_page_info_t *page_list,
7066 mach_msg_type_number_t count,
7067 boolean_t *empty)
7068 {
7069 upl_size_t xfer_size, subupl_size = size;
7070 vm_object_t shadow_object;
7071 vm_object_t object;
7072 vm_object_t m_object;
7073 vm_object_offset_t target_offset;
7074 upl_offset_t subupl_offset = offset;
7075 int entry;
7076 wpl_array_t lite_list;
7077 int occupied;
7078 int clear_refmod = 0;
7079 int pgpgout_count = 0;
7080 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7081 struct vm_page_delayed_work *dwp;
7082 int dw_count;
7083 int dw_limit;
7084 int isVectorUPL = 0;
7085 upl_t vector_upl = NULL;
7086 boolean_t should_be_throttled = FALSE;
7087
7088 vm_page_t nxt_page = VM_PAGE_NULL;
7089 int fast_path_possible = 0;
7090 int fast_path_full_commit = 0;
7091 int throttle_page = 0;
7092 int unwired_count = 0;
7093 int local_queue_count = 0;
7094 vm_page_t first_local, last_local;
7095
7096 *empty = FALSE;
7097
7098 if (upl == UPL_NULL)
7099 return KERN_INVALID_ARGUMENT;
7100
7101 if (count == 0)
7102 page_list = NULL;
7103
7104 if((isVectorUPL = vector_upl_is_valid(upl))) {
7105 vector_upl = upl;
7106 upl_lock(vector_upl);
7107 }
7108 else
7109 upl_lock(upl);
7110
7111 process_upl_to_commit:
7112
7113 if(isVectorUPL) {
7114 size = subupl_size;
7115 offset = subupl_offset;
7116 if(size == 0) {
7117 upl_unlock(vector_upl);
7118 return KERN_SUCCESS;
7119 }
7120 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7121 if(upl == NULL) {
7122 upl_unlock(vector_upl);
7123 return KERN_FAILURE;
7124 }
7125 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
7126 subupl_size -= size;
7127 subupl_offset += size;
7128 }
7129
7130 #if UPL_DEBUG
7131 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7132 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7133
7134 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7135 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7136
7137 upl->upl_commit_index++;
7138 }
7139 #endif
7140 if (upl->flags & UPL_DEVICE_MEMORY)
7141 xfer_size = 0;
7142 else if ((offset + size) <= upl->size)
7143 xfer_size = size;
7144 else {
7145 if(!isVectorUPL)
7146 upl_unlock(upl);
7147 else {
7148 upl_unlock(vector_upl);
7149 }
7150 return KERN_FAILURE;
7151 }
7152 if (upl->flags & UPL_SET_DIRTY)
7153 flags |= UPL_COMMIT_SET_DIRTY;
7154 if (upl->flags & UPL_CLEAR_DIRTY)
7155 flags |= UPL_COMMIT_CLEAR_DIRTY;
7156
7157 if (upl->flags & UPL_INTERNAL)
7158 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
7159 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7160 else
7161 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
7162
7163 object = upl->map_object;
7164
7165 if (upl->flags & UPL_SHADOWED) {
7166 vm_object_lock(object);
7167 shadow_object = object->shadow;
7168 } else {
7169 shadow_object = object;
7170 }
7171 entry = offset/PAGE_SIZE;
7172 target_offset = (vm_object_offset_t)offset;
7173
7174 assert(!(target_offset & PAGE_MASK));
7175 assert(!(xfer_size & PAGE_MASK));
7176
7177 if (upl->flags & UPL_KERNEL_OBJECT)
7178 vm_object_lock_shared(shadow_object);
7179 else
7180 vm_object_lock(shadow_object);
7181
7182 if (upl->flags & UPL_ACCESS_BLOCKED) {
7183 assert(shadow_object->blocked_access);
7184 shadow_object->blocked_access = FALSE;
7185 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7186 }
7187
7188 if (shadow_object->code_signed) {
7189 /*
7190 * CODE SIGNING:
7191 * If the object is code-signed, do not let this UPL tell
7192 * us if the pages are valid or not. Let the pages be
7193 * validated by VM the normal way (when they get mapped or
7194 * copied).
7195 */
7196 flags &= ~UPL_COMMIT_CS_VALIDATED;
7197 }
7198 if (! page_list) {
7199 /*
7200 * No page list to get the code-signing info from !?
7201 */
7202 flags &= ~UPL_COMMIT_CS_VALIDATED;
7203 }
7204 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
7205 should_be_throttled = TRUE;
7206
7207 dwp = &dw_array[0];
7208 dw_count = 0;
7209 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7210
7211 if ((upl->flags & UPL_IO_WIRE) &&
7212 !(flags & UPL_COMMIT_FREE_ABSENT) &&
7213 !isVectorUPL &&
7214 shadow_object->purgable != VM_PURGABLE_VOLATILE &&
7215 shadow_object->purgable != VM_PURGABLE_EMPTY) {
7216
7217 if (!vm_page_queue_empty(&shadow_object->memq)) {
7218
7219 if (size == shadow_object->vo_size) {
7220 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
7221 fast_path_full_commit = 1;
7222 }
7223 fast_path_possible = 1;
7224
7225 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
7226 (shadow_object->purgable == VM_PURGABLE_DENY ||
7227 shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
7228 shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
7229 throttle_page = 1;
7230 }
7231 }
7232 }
7233 first_local = VM_PAGE_NULL;
7234 last_local = VM_PAGE_NULL;
7235
7236 while (xfer_size) {
7237 vm_page_t t, m;
7238
7239 dwp->dw_mask = 0;
7240 clear_refmod = 0;
7241
7242 m = VM_PAGE_NULL;
7243
7244 if (upl->flags & UPL_LITE) {
7245 unsigned int pg_num;
7246
7247 if (nxt_page != VM_PAGE_NULL) {
7248 m = nxt_page;
7249 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
7250 target_offset = m->offset;
7251 }
7252 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7253 assert(pg_num == target_offset/PAGE_SIZE);
7254
7255 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7256 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7257
7258 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
7259 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
7260 } else
7261 m = NULL;
7262 }
7263 if (upl->flags & UPL_SHADOWED) {
7264 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7265
7266 t->free_when_done = FALSE;
7267
7268 VM_PAGE_FREE(t);
7269
7270 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
7271 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7272 }
7273 }
7274 if (m == VM_PAGE_NULL)
7275 goto commit_next_page;
7276
7277 m_object = VM_PAGE_OBJECT(m);
7278
7279 if (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
7280 assert(m->busy);
7281
7282 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7283 goto commit_next_page;
7284 }
7285
7286 if (flags & UPL_COMMIT_CS_VALIDATED) {
7287 /*
7288 * CODE SIGNING:
7289 * Set the code signing bits according to
7290 * what the UPL says they should be.
7291 */
7292 m->cs_validated = page_list[entry].cs_validated;
7293 m->cs_tainted = page_list[entry].cs_tainted;
7294 m->cs_nx = page_list[entry].cs_nx;
7295 }
7296 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
7297 m->written_by_kernel = TRUE;
7298
7299 if (upl->flags & UPL_IO_WIRE) {
7300
7301 if (page_list)
7302 page_list[entry].phys_addr = 0;
7303
7304 if (flags & UPL_COMMIT_SET_DIRTY) {
7305 SET_PAGE_DIRTY(m, FALSE);
7306 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
7307 m->dirty = FALSE;
7308
7309 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
7310 m->cs_validated && !m->cs_tainted) {
7311 /*
7312 * CODE SIGNING:
7313 * This page is no longer dirty
7314 * but could have been modified,
7315 * so it will need to be
7316 * re-validated.
7317 */
7318 if (m->slid) {
7319 panic("upl_commit_range(%p): page %p was slid\n",
7320 upl, m);
7321 }
7322 assert(!m->slid);
7323 m->cs_validated = FALSE;
7324 #if DEVELOPMENT || DEBUG
7325 vm_cs_validated_resets++;
7326 #endif
7327 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7328 }
7329 clear_refmod |= VM_MEM_MODIFIED;
7330 }
7331 if (upl->flags & UPL_ACCESS_BLOCKED) {
7332 /*
7333 * We blocked access to the pages in this UPL.
7334 * Clear the "busy" bit and wake up any waiter
7335 * for this page.
7336 */
7337 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7338 }
7339 if (fast_path_possible) {
7340 assert(m_object->purgable != VM_PURGABLE_EMPTY);
7341 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
7342 if (m->absent) {
7343 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
7344 assert(m->wire_count == 0);
7345 assert(m->busy);
7346
7347 m->absent = FALSE;
7348 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7349 } else {
7350 if (m->wire_count == 0)
7351 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
7352 assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
7353
7354 /*
7355 * XXX FBDP need to update some other
7356 * counters here (purgeable_wired_count)
7357 * (ledgers), ...
7358 */
7359 assert(m->wire_count > 0);
7360 m->wire_count--;
7361
7362 if (m->wire_count == 0) {
7363 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
7364 unwired_count++;
7365 }
7366 }
7367 if (m->wire_count == 0) {
7368 assert(m->pageq.next == 0 && m->pageq.prev == 0);
7369
7370 if (last_local == VM_PAGE_NULL) {
7371 assert(first_local == VM_PAGE_NULL);
7372
7373 last_local = m;
7374 first_local = m;
7375 } else {
7376 assert(first_local != VM_PAGE_NULL);
7377
7378 m->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7379 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
7380 first_local = m;
7381 }
7382 local_queue_count++;
7383
7384 if (throttle_page) {
7385 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
7386 } else {
7387 if (flags & UPL_COMMIT_INACTIVATE) {
7388 if (shadow_object->internal)
7389 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
7390 else
7391 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
7392 } else
7393 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
7394 }
7395 }
7396 } else {
7397 if (flags & UPL_COMMIT_INACTIVATE) {
7398 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7399 clear_refmod |= VM_MEM_REFERENCED;
7400 }
7401 if (m->absent) {
7402 if (flags & UPL_COMMIT_FREE_ABSENT)
7403 dwp->dw_mask |= DW_vm_page_free;
7404 else {
7405 m->absent = FALSE;
7406 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7407
7408 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
7409 dwp->dw_mask |= DW_vm_page_activate;
7410 }
7411 } else
7412 dwp->dw_mask |= DW_vm_page_unwire;
7413 }
7414 goto commit_next_page;
7415 }
7416 assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7417
7418 if (page_list)
7419 page_list[entry].phys_addr = 0;
7420
7421 /*
7422 * make sure to clear the hardware
7423 * modify or reference bits before
7424 * releasing the BUSY bit on this page
7425 * otherwise we risk losing a legitimate
7426 * change of state
7427 */
7428 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
7429 m->dirty = FALSE;
7430
7431 clear_refmod |= VM_MEM_MODIFIED;
7432 }
7433 if (m->laundry)
7434 dwp->dw_mask |= DW_vm_pageout_throttle_up;
7435
7436 if (VM_PAGE_WIRED(m))
7437 m->free_when_done = FALSE;
7438
7439 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
7440 m->cs_validated && !m->cs_tainted) {
7441 /*
7442 * CODE SIGNING:
7443 * This page is no longer dirty
7444 * but could have been modified,
7445 * so it will need to be
7446 * re-validated.
7447 */
7448 if (m->slid) {
7449 panic("upl_commit_range(%p): page %p was slid\n",
7450 upl, m);
7451 }
7452 assert(!m->slid);
7453 m->cs_validated = FALSE;
7454 #if DEVELOPMENT || DEBUG
7455 vm_cs_validated_resets++;
7456 #endif
7457 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7458 }
7459 if (m->overwriting) {
7460 /*
7461 * the (COPY_OUT_FROM == FALSE) request_page_list case
7462 */
7463 if (m->busy) {
7464 #if CONFIG_PHANTOM_CACHE
7465 if (m->absent && !m_object->internal)
7466 dwp->dw_mask |= DW_vm_phantom_cache_update;
7467 #endif
7468 m->absent = FALSE;
7469
7470 dwp->dw_mask |= DW_clear_busy;
7471 } else {
7472 /*
7473 * alternate (COPY_OUT_FROM == FALSE) page_list case
7474 * Occurs when the original page was wired
7475 * at the time of the list request
7476 */
7477 assert(VM_PAGE_WIRED(m));
7478
7479 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
7480 }
7481 m->overwriting = FALSE;
7482 }
7483 if (m->encrypted_cleaning == TRUE) {
7484 m->encrypted_cleaning = FALSE;
7485
7486 dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP;
7487 }
7488 m->cleaning = FALSE;
7489
7490 if (m->free_when_done) {
7491 /*
7492 * With the clean queue enabled, UPL_PAGEOUT should
7493 * no longer set the pageout bit. It's pages now go
7494 * to the clean queue.
7495 */
7496 assert(!(flags & UPL_PAGEOUT));
7497 assert(!m_object->internal);
7498
7499 m->free_when_done = FALSE;
7500 #if MACH_CLUSTER_STATS
7501 if (m->wanted) vm_pageout_target_collisions++;
7502 #endif
7503 if ((flags & UPL_COMMIT_SET_DIRTY) ||
7504 (m->pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
7505 /*
7506 * page was re-dirtied after we started
7507 * the pageout... reactivate it since
7508 * we don't know whether the on-disk
7509 * copy matches what is now in memory
7510 */
7511 SET_PAGE_DIRTY(m, FALSE);
7512
7513 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
7514
7515 if (upl->flags & UPL_PAGEOUT) {
7516 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
7517 VM_STAT_INCR(reactivations);
7518 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
7519 }
7520 } else {
7521 /*
7522 * page has been successfully cleaned
7523 * go ahead and free it for other use
7524 */
7525 if (m_object->internal) {
7526 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
7527 } else {
7528 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
7529 }
7530 m->dirty = FALSE;
7531 m->busy = TRUE;
7532
7533 dwp->dw_mask |= DW_vm_page_free;
7534 }
7535 goto commit_next_page;
7536 }
7537 #if MACH_CLUSTER_STATS
7538 if (m->wpmapped)
7539 m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
7540
7541 if (m->dirty) vm_pageout_cluster_dirtied++;
7542 else vm_pageout_cluster_cleaned++;
7543 if (m->wanted) vm_pageout_cluster_collisions++;
7544 #endif
7545 /*
7546 * It is a part of the semantic of COPYOUT_FROM
7547 * UPLs that a commit implies cache sync
7548 * between the vm page and the backing store
7549 * this can be used to strip the precious bit
7550 * as well as clean
7551 */
7552 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
7553 m->precious = FALSE;
7554
7555 if (flags & UPL_COMMIT_SET_DIRTY) {
7556 SET_PAGE_DIRTY(m, FALSE);
7557 } else {
7558 m->dirty = FALSE;
7559 }
7560
7561 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7562 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
7563 pgpgout_count++;
7564
7565 VM_STAT_INCR(pageouts);
7566 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
7567
7568 dwp->dw_mask |= DW_enqueue_cleaned;
7569 vm_pageout_enqueued_cleaned_from_inactive_dirty++;
7570 } else if (should_be_throttled == TRUE && (m->vm_page_q_state == VM_PAGE_NOT_ON_Q)) {
7571 /*
7572 * page coming back in from being 'frozen'...
7573 * it was dirty before it was frozen, so keep it so
7574 * the vm_page_activate will notice that it really belongs
7575 * on the throttle queue and put it there
7576 */
7577 SET_PAGE_DIRTY(m, FALSE);
7578 dwp->dw_mask |= DW_vm_page_activate;
7579
7580 } else {
7581 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && (m->vm_page_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
7582 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7583 clear_refmod |= VM_MEM_REFERENCED;
7584 } else if ( !VM_PAGE_PAGEABLE(m)) {
7585
7586 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
7587 dwp->dw_mask |= DW_vm_page_speculate;
7588 else if (m->reference)
7589 dwp->dw_mask |= DW_vm_page_activate;
7590 else {
7591 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7592 clear_refmod |= VM_MEM_REFERENCED;
7593 }
7594 }
7595 }
7596 if (upl->flags & UPL_ACCESS_BLOCKED) {
7597 /*
7598 * We blocked access to the pages in this URL.
7599 * Clear the "busy" bit on this page before we
7600 * wake up any waiter.
7601 */
7602 dwp->dw_mask |= DW_clear_busy;
7603 }
7604 /*
7605 * Wakeup any thread waiting for the page to be un-cleaning.
7606 */
7607 dwp->dw_mask |= DW_PAGE_WAKEUP;
7608
7609 commit_next_page:
7610 if (clear_refmod)
7611 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
7612
7613 target_offset += PAGE_SIZE_64;
7614 xfer_size -= PAGE_SIZE;
7615 entry++;
7616
7617 if (dwp->dw_mask) {
7618 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7619 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7620
7621 if (dw_count >= dw_limit) {
7622 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7623
7624 dwp = &dw_array[0];
7625 dw_count = 0;
7626 }
7627 } else {
7628 if (dwp->dw_mask & DW_clear_busy)
7629 m->busy = FALSE;
7630
7631 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7632 PAGE_WAKEUP(m);
7633 }
7634 }
7635 }
7636 if (dw_count)
7637 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7638
7639 if (fast_path_possible) {
7640
7641 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7642 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7643
7644 if (local_queue_count || unwired_count) {
7645
7646 if (local_queue_count) {
7647 vm_page_t first_target;
7648 vm_page_queue_head_t *target_queue;
7649
7650 if (throttle_page)
7651 target_queue = &vm_page_queue_throttled;
7652 else {
7653 if (flags & UPL_COMMIT_INACTIVATE) {
7654 if (shadow_object->internal)
7655 target_queue = &vm_page_queue_anonymous;
7656 else
7657 target_queue = &vm_page_queue_inactive;
7658 } else
7659 target_queue = &vm_page_queue_active;
7660 }
7661 /*
7662 * Transfer the entire local queue to a regular LRU page queues.
7663 */
7664 vm_page_lockspin_queues();
7665
7666 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7667
7668 if (vm_page_queue_empty(target_queue))
7669 target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7670 else
7671 first_target->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7672
7673 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7674 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7675 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7676
7677 /*
7678 * Adjust the global page counts.
7679 */
7680 if (throttle_page) {
7681 vm_page_throttled_count += local_queue_count;
7682 } else {
7683 if (flags & UPL_COMMIT_INACTIVATE) {
7684 if (shadow_object->internal)
7685 vm_page_anonymous_count += local_queue_count;
7686 vm_page_inactive_count += local_queue_count;
7687
7688 token_new_pagecount += local_queue_count;
7689 } else
7690 vm_page_active_count += local_queue_count;
7691
7692 if (shadow_object->internal)
7693 vm_page_pageable_internal_count += local_queue_count;
7694 else
7695 vm_page_pageable_external_count += local_queue_count;
7696 }
7697 } else {
7698 vm_page_lockspin_queues();
7699 }
7700 if (unwired_count) {
7701 vm_page_wire_count -= unwired_count;
7702 VM_CHECK_MEMORYSTATUS;
7703 }
7704 vm_page_unlock_queues();
7705
7706 shadow_object->wired_page_count -= unwired_count;
7707
7708 if (!shadow_object->wired_page_count) {
7709 VM_OBJECT_UNWIRED(shadow_object);
7710 }
7711 }
7712 }
7713 occupied = 1;
7714
7715 if (upl->flags & UPL_DEVICE_MEMORY) {
7716 occupied = 0;
7717 } else if (upl->flags & UPL_LITE) {
7718 int pg_num;
7719 int i;
7720
7721 occupied = 0;
7722
7723 if (!fast_path_full_commit) {
7724 pg_num = upl->size/PAGE_SIZE;
7725 pg_num = (pg_num + 31) >> 5;
7726
7727 for (i = 0; i < pg_num; i++) {
7728 if (lite_list[i] != 0) {
7729 occupied = 1;
7730 break;
7731 }
7732 }
7733 }
7734 } else {
7735 if (vm_page_queue_empty(&upl->map_object->memq))
7736 occupied = 0;
7737 }
7738 if (occupied == 0) {
7739 /*
7740 * If this UPL element belongs to a Vector UPL and is
7741 * empty, then this is the right function to deallocate
7742 * it. So go ahead set the *empty variable. The flag
7743 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7744 * should be considered relevant for the Vector UPL and not
7745 * the internal UPLs.
7746 */
7747 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7748 *empty = TRUE;
7749
7750 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7751 /*
7752 * this is not a paging object
7753 * so we need to drop the paging reference
7754 * that was taken when we created the UPL
7755 * against this object
7756 */
7757 vm_object_activity_end(shadow_object);
7758 vm_object_collapse(shadow_object, 0, TRUE);
7759 } else {
7760 /*
7761 * we dontated the paging reference to
7762 * the map object... vm_pageout_object_terminate
7763 * will drop this reference
7764 */
7765 }
7766 }
7767 vm_object_unlock(shadow_object);
7768 if (object != shadow_object)
7769 vm_object_unlock(object);
7770
7771 if(!isVectorUPL)
7772 upl_unlock(upl);
7773 else {
7774 /*
7775 * If we completed our operations on an UPL that is
7776 * part of a Vectored UPL and if empty is TRUE, then
7777 * we should go ahead and deallocate this UPL element.
7778 * Then we check if this was the last of the UPL elements
7779 * within that Vectored UPL. If so, set empty to TRUE
7780 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7781 * can go ahead and deallocate the Vector UPL too.
7782 */
7783 if(*empty==TRUE) {
7784 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7785 upl_deallocate(upl);
7786 }
7787 goto process_upl_to_commit;
7788 }
7789 if (pgpgout_count) {
7790 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7791 }
7792
7793 return KERN_SUCCESS;
7794 }
7795
7796 kern_return_t
7797 upl_abort_range(
7798 upl_t upl,
7799 upl_offset_t offset,
7800 upl_size_t size,
7801 int error,
7802 boolean_t *empty)
7803 {
7804 upl_page_info_t *user_page_list = NULL;
7805 upl_size_t xfer_size, subupl_size = size;
7806 vm_object_t shadow_object;
7807 vm_object_t object;
7808 vm_object_offset_t target_offset;
7809 upl_offset_t subupl_offset = offset;
7810 int entry;
7811 wpl_array_t lite_list;
7812 int occupied;
7813 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7814 struct vm_page_delayed_work *dwp;
7815 int dw_count;
7816 int dw_limit;
7817 int isVectorUPL = 0;
7818 upl_t vector_upl = NULL;
7819
7820 *empty = FALSE;
7821
7822 if (upl == UPL_NULL)
7823 return KERN_INVALID_ARGUMENT;
7824
7825 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7826 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7827
7828 if((isVectorUPL = vector_upl_is_valid(upl))) {
7829 vector_upl = upl;
7830 upl_lock(vector_upl);
7831 }
7832 else
7833 upl_lock(upl);
7834
7835 process_upl_to_abort:
7836 if(isVectorUPL) {
7837 size = subupl_size;
7838 offset = subupl_offset;
7839 if(size == 0) {
7840 upl_unlock(vector_upl);
7841 return KERN_SUCCESS;
7842 }
7843 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7844 if(upl == NULL) {
7845 upl_unlock(vector_upl);
7846 return KERN_FAILURE;
7847 }
7848 subupl_size -= size;
7849 subupl_offset += size;
7850 }
7851
7852 *empty = FALSE;
7853
7854 #if UPL_DEBUG
7855 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7856 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7857
7858 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7859 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7860 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7861
7862 upl->upl_commit_index++;
7863 }
7864 #endif
7865 if (upl->flags & UPL_DEVICE_MEMORY)
7866 xfer_size = 0;
7867 else if ((offset + size) <= upl->size)
7868 xfer_size = size;
7869 else {
7870 if(!isVectorUPL)
7871 upl_unlock(upl);
7872 else {
7873 upl_unlock(vector_upl);
7874 }
7875
7876 return KERN_FAILURE;
7877 }
7878 if (upl->flags & UPL_INTERNAL) {
7879 lite_list = (wpl_array_t)
7880 ((((uintptr_t)upl) + sizeof(struct upl))
7881 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7882
7883 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7884 } else {
7885 lite_list = (wpl_array_t)
7886 (((uintptr_t)upl) + sizeof(struct upl));
7887 }
7888 object = upl->map_object;
7889
7890 if (upl->flags & UPL_SHADOWED) {
7891 vm_object_lock(object);
7892 shadow_object = object->shadow;
7893 } else
7894 shadow_object = object;
7895
7896 entry = offset/PAGE_SIZE;
7897 target_offset = (vm_object_offset_t)offset;
7898
7899 assert(!(target_offset & PAGE_MASK));
7900 assert(!(xfer_size & PAGE_MASK));
7901
7902 if (upl->flags & UPL_KERNEL_OBJECT)
7903 vm_object_lock_shared(shadow_object);
7904 else
7905 vm_object_lock(shadow_object);
7906
7907 if (upl->flags & UPL_ACCESS_BLOCKED) {
7908 assert(shadow_object->blocked_access);
7909 shadow_object->blocked_access = FALSE;
7910 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7911 }
7912
7913 dwp = &dw_array[0];
7914 dw_count = 0;
7915 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7916
7917 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7918 panic("upl_abort_range: kernel_object being DUMPED");
7919
7920 while (xfer_size) {
7921 vm_page_t t, m;
7922 unsigned int pg_num;
7923 boolean_t needed;
7924
7925 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7926 assert(pg_num == target_offset/PAGE_SIZE);
7927
7928 needed = FALSE;
7929
7930 if (user_page_list)
7931 needed = user_page_list[pg_num].needed;
7932
7933 dwp->dw_mask = 0;
7934 m = VM_PAGE_NULL;
7935
7936 if (upl->flags & UPL_LITE) {
7937
7938 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7939 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7940
7941 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7942 m = vm_page_lookup(shadow_object, target_offset +
7943 (upl->offset - shadow_object->paging_offset));
7944 }
7945 }
7946 if (upl->flags & UPL_SHADOWED) {
7947 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7948 t->free_when_done = FALSE;
7949
7950 VM_PAGE_FREE(t);
7951
7952 if (m == VM_PAGE_NULL)
7953 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7954 }
7955 }
7956 if ((upl->flags & UPL_KERNEL_OBJECT))
7957 goto abort_next_page;
7958
7959 if (m != VM_PAGE_NULL) {
7960
7961 assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7962
7963 if (m->absent) {
7964 boolean_t must_free = TRUE;
7965
7966 /*
7967 * COPYOUT = FALSE case
7968 * check for error conditions which must
7969 * be passed back to the pages customer
7970 */
7971 if (error & UPL_ABORT_RESTART) {
7972 m->restart = TRUE;
7973 m->absent = FALSE;
7974 m->unusual = TRUE;
7975 must_free = FALSE;
7976 } else if (error & UPL_ABORT_UNAVAILABLE) {
7977 m->restart = FALSE;
7978 m->unusual = TRUE;
7979 must_free = FALSE;
7980 } else if (error & UPL_ABORT_ERROR) {
7981 m->restart = FALSE;
7982 m->absent = FALSE;
7983 m->error = TRUE;
7984 m->unusual = TRUE;
7985 must_free = FALSE;
7986 }
7987 if (m->clustered && needed == FALSE) {
7988 /*
7989 * This page was a part of a speculative
7990 * read-ahead initiated by the kernel
7991 * itself. No one is expecting this
7992 * page and no one will clean up its
7993 * error state if it ever becomes valid
7994 * in the future.
7995 * We have to free it here.
7996 */
7997 must_free = TRUE;
7998 }
7999
8000 /*
8001 * ENCRYPTED SWAP:
8002 * If the page was already encrypted,
8003 * we don't really need to decrypt it
8004 * now. It will get decrypted later,
8005 * on demand, as soon as someone needs
8006 * to access its contents.
8007 */
8008
8009 m->cleaning = FALSE;
8010 m->encrypted_cleaning = FALSE;
8011
8012 if (m->overwriting && !m->busy) {
8013 /*
8014 * this shouldn't happen since
8015 * this is an 'absent' page, but
8016 * it doesn't hurt to check for
8017 * the 'alternate' method of
8018 * stabilizing the page...
8019 * we will mark 'busy' to be cleared
8020 * in the following code which will
8021 * take care of the primary stabilzation
8022 * method (i.e. setting 'busy' to TRUE)
8023 */
8024 dwp->dw_mask |= DW_vm_page_unwire;
8025 }
8026 m->overwriting = FALSE;
8027
8028 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
8029
8030 if (must_free == TRUE)
8031 dwp->dw_mask |= DW_vm_page_free;
8032 else
8033 dwp->dw_mask |= DW_vm_page_activate;
8034 } else {
8035 /*
8036 * Handle the trusted pager throttle.
8037 */
8038 if (m->laundry)
8039 dwp->dw_mask |= DW_vm_pageout_throttle_up;
8040
8041 if (upl->flags & UPL_ACCESS_BLOCKED) {
8042 /*
8043 * We blocked access to the pages in this UPL.
8044 * Clear the "busy" bit and wake up any waiter
8045 * for this page.
8046 */
8047 dwp->dw_mask |= DW_clear_busy;
8048 }
8049 if (m->overwriting) {
8050 if (m->busy)
8051 dwp->dw_mask |= DW_clear_busy;
8052 else {
8053 /*
8054 * deal with the 'alternate' method
8055 * of stabilizing the page...
8056 * we will either free the page
8057 * or mark 'busy' to be cleared
8058 * in the following code which will
8059 * take care of the primary stabilzation
8060 * method (i.e. setting 'busy' to TRUE)
8061 */
8062 dwp->dw_mask |= DW_vm_page_unwire;
8063 }
8064 m->overwriting = FALSE;
8065 }
8066 if (m->encrypted_cleaning == TRUE) {
8067 m->encrypted_cleaning = FALSE;
8068
8069 dwp->dw_mask |= DW_clear_busy;
8070 }
8071 m->free_when_done = FALSE;
8072 m->cleaning = FALSE;
8073
8074 if (error & UPL_ABORT_DUMP_PAGES) {
8075 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
8076
8077 dwp->dw_mask |= DW_vm_page_free;
8078 } else {
8079 if (!(dwp->dw_mask & DW_vm_page_unwire)) {
8080 if (error & UPL_ABORT_REFERENCE) {
8081 /*
8082 * we've been told to explictly
8083 * reference this page... for
8084 * file I/O, this is done by
8085 * implementing an LRU on the inactive q
8086 */
8087 dwp->dw_mask |= DW_vm_page_lru;
8088
8089 } else if ( !VM_PAGE_PAGEABLE(m))
8090 dwp->dw_mask |= DW_vm_page_deactivate_internal;
8091 }
8092 dwp->dw_mask |= DW_PAGE_WAKEUP;
8093 }
8094 }
8095 }
8096 abort_next_page:
8097 target_offset += PAGE_SIZE_64;
8098 xfer_size -= PAGE_SIZE;
8099 entry++;
8100
8101 if (dwp->dw_mask) {
8102 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
8103 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
8104
8105 if (dw_count >= dw_limit) {
8106 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
8107
8108 dwp = &dw_array[0];
8109 dw_count = 0;
8110 }
8111 } else {
8112 if (dwp->dw_mask & DW_clear_busy)
8113 m->busy = FALSE;
8114
8115 if (dwp->dw_mask & DW_PAGE_WAKEUP)
8116 PAGE_WAKEUP(m);
8117 }
8118 }
8119 }
8120 if (dw_count)
8121 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
8122
8123 occupied = 1;
8124
8125 if (upl->flags & UPL_DEVICE_MEMORY) {
8126 occupied = 0;
8127 } else if (upl->flags & UPL_LITE) {
8128 int pg_num;
8129 int i;
8130
8131 pg_num = upl->size/PAGE_SIZE;
8132 pg_num = (pg_num + 31) >> 5;
8133 occupied = 0;
8134
8135 for (i = 0; i < pg_num; i++) {
8136 if (lite_list[i] != 0) {
8137 occupied = 1;
8138 break;
8139 }
8140 }
8141 } else {
8142 if (vm_page_queue_empty(&upl->map_object->memq))
8143 occupied = 0;
8144 }
8145 if (occupied == 0) {
8146 /*
8147 * If this UPL element belongs to a Vector UPL and is
8148 * empty, then this is the right function to deallocate
8149 * it. So go ahead set the *empty variable. The flag
8150 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
8151 * should be considered relevant for the Vector UPL and
8152 * not the internal UPLs.
8153 */
8154 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
8155 *empty = TRUE;
8156
8157 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
8158 /*
8159 * this is not a paging object
8160 * so we need to drop the paging reference
8161 * that was taken when we created the UPL
8162 * against this object
8163 */
8164 vm_object_activity_end(shadow_object);
8165 vm_object_collapse(shadow_object, 0, TRUE);
8166 } else {
8167 /*
8168 * we dontated the paging reference to
8169 * the map object... vm_pageout_object_terminate
8170 * will drop this reference
8171 */
8172 }
8173 }
8174 vm_object_unlock(shadow_object);
8175 if (object != shadow_object)
8176 vm_object_unlock(object);
8177
8178 if(!isVectorUPL)
8179 upl_unlock(upl);
8180 else {
8181 /*
8182 * If we completed our operations on an UPL that is
8183 * part of a Vectored UPL and if empty is TRUE, then
8184 * we should go ahead and deallocate this UPL element.
8185 * Then we check if this was the last of the UPL elements
8186 * within that Vectored UPL. If so, set empty to TRUE
8187 * so that in ubc_upl_abort_range or ubc_upl_abort, we
8188 * can go ahead and deallocate the Vector UPL too.
8189 */
8190 if(*empty == TRUE) {
8191 *empty = vector_upl_set_subupl(vector_upl, upl,0);
8192 upl_deallocate(upl);
8193 }
8194 goto process_upl_to_abort;
8195 }
8196
8197 return KERN_SUCCESS;
8198 }
8199
8200
8201 kern_return_t
8202 upl_abort(
8203 upl_t upl,
8204 int error)
8205 {
8206 boolean_t empty;
8207
8208 if (upl == UPL_NULL)
8209 return KERN_INVALID_ARGUMENT;
8210
8211 return upl_abort_range(upl, 0, upl->size, error, &empty);
8212 }
8213
8214
8215 /* an option on commit should be wire */
8216 kern_return_t
8217 upl_commit(
8218 upl_t upl,
8219 upl_page_info_t *page_list,
8220 mach_msg_type_number_t count)
8221 {
8222 boolean_t empty;
8223
8224 if (upl == UPL_NULL)
8225 return KERN_INVALID_ARGUMENT;
8226
8227 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
8228 }
8229
8230
8231 void
8232 iopl_valid_data(
8233 upl_t upl)
8234 {
8235 vm_object_t object;
8236 vm_offset_t offset;
8237 vm_page_t m, nxt_page = VM_PAGE_NULL;
8238 upl_size_t size;
8239 int wired_count = 0;
8240
8241 if (upl == NULL)
8242 panic("iopl_valid_data: NULL upl");
8243 if (vector_upl_is_valid(upl))
8244 panic("iopl_valid_data: vector upl");
8245 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
8246 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
8247
8248 object = upl->map_object;
8249
8250 if (object == kernel_object || object == compressor_object)
8251 panic("iopl_valid_data: object == kernel or compressor");
8252
8253 if (object->purgable == VM_PURGABLE_VOLATILE ||
8254 object->purgable == VM_PURGABLE_EMPTY)
8255 panic("iopl_valid_data: object %p purgable %d",
8256 object, object->purgable);
8257
8258 size = upl->size;
8259
8260 vm_object_lock(object);
8261
8262 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
8263 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
8264 else
8265 offset = 0 + upl->offset - object->paging_offset;
8266
8267 while (size) {
8268
8269 if (nxt_page != VM_PAGE_NULL) {
8270 m = nxt_page;
8271 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
8272 } else {
8273 m = vm_page_lookup(object, offset);
8274 offset += PAGE_SIZE;
8275
8276 if (m == VM_PAGE_NULL)
8277 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
8278 }
8279 if (m->busy) {
8280 if (!m->absent)
8281 panic("iopl_valid_data: busy page w/o absent");
8282
8283 if (m->pageq.next || m->pageq.prev)
8284 panic("iopl_valid_data: busy+absent page on page queue");
8285 if (m->reusable) {
8286 panic("iopl_valid_data: %p is reusable", m);
8287 }
8288
8289 m->absent = FALSE;
8290 m->dirty = TRUE;
8291 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8292 assert(m->wire_count == 0);
8293 m->wire_count++;
8294 assert(m->wire_count);
8295 if (m->wire_count == 1) {
8296 m->vm_page_q_state = VM_PAGE_IS_WIRED;
8297 wired_count++;
8298 } else {
8299 panic("iopl_valid_data: %p already wired\n", m);
8300 }
8301
8302 PAGE_WAKEUP_DONE(m);
8303 }
8304 size -= PAGE_SIZE;
8305 }
8306 if (wired_count) {
8307
8308 if (!object->wired_page_count) {
8309 VM_OBJECT_WIRED(object);
8310 }
8311 object->wired_page_count += wired_count;
8312 assert(object->resident_page_count >= object->wired_page_count);
8313
8314 /* no need to adjust purgeable accounting for this object: */
8315 assert(object->purgable != VM_PURGABLE_VOLATILE);
8316 assert(object->purgable != VM_PURGABLE_EMPTY);
8317
8318 vm_page_lockspin_queues();
8319 vm_page_wire_count += wired_count;
8320 vm_page_unlock_queues();
8321 }
8322 vm_object_unlock(object);
8323 }
8324
8325 vm_tag_t
8326 iopl_set_tag(
8327 upl_t upl,
8328 vm_tag_t tag)
8329 {
8330 vm_object_t object;
8331 vm_tag_t prior_tag;
8332
8333 if (upl == NULL)
8334 panic("%s: NULL upl", __FUNCTION__);
8335 if (vector_upl_is_valid(upl))
8336 panic("%s: vector upl", __FUNCTION__);
8337 if (kernel_object == upl->map_object)
8338 return (tag);
8339 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
8340 return (tag);
8341
8342 object = upl->map_object;
8343 vm_object_lock(object);
8344
8345 prior_tag = object->wire_tag;
8346 object->wire_tag = tag;
8347 if (VM_KERN_MEMORY_NONE == prior_tag) prior_tag = tag;
8348 vm_object_unlock(object);
8349
8350 return (prior_tag);
8351 }
8352
8353
8354 void
8355 vm_object_set_pmap_cache_attr(
8356 vm_object_t object,
8357 upl_page_info_array_t user_page_list,
8358 unsigned int num_pages,
8359 boolean_t batch_pmap_op)
8360 {
8361 unsigned int cache_attr = 0;
8362
8363 cache_attr = object->wimg_bits & VM_WIMG_MASK;
8364 assert(user_page_list);
8365 if (cache_attr != VM_WIMG_USE_DEFAULT) {
8366 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
8367 }
8368 }
8369
8370
8371 boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t);
8372 kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_object_offset_t *, int);
8373
8374
8375
8376 boolean_t
8377 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
8378 wpl_array_t lite_list, upl_control_flags_t cntrl_flags)
8379 {
8380 vm_page_t dst_page;
8381 vm_tag_t tag;
8382 unsigned int entry;
8383 int page_count;
8384 int delayed_unlock = 0;
8385 boolean_t retval = TRUE;
8386 ppnum_t phys_page;
8387
8388 vm_object_lock_assert_exclusive(object);
8389 assert(object->purgable != VM_PURGABLE_VOLATILE);
8390 assert(object->purgable != VM_PURGABLE_EMPTY);
8391 assert(object->pager == NULL);
8392 assert(object->copy == NULL);
8393 assert(object->shadow == NULL);
8394
8395 tag = UPL_MEMORY_TAG(cntrl_flags);
8396 page_count = object->resident_page_count;
8397 dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
8398
8399 vm_page_lock_queues();
8400
8401 while (page_count--) {
8402
8403 if (dst_page->busy ||
8404 dst_page->fictitious ||
8405 dst_page->absent ||
8406 dst_page->error ||
8407 dst_page->cleaning ||
8408 dst_page->restart ||
8409 dst_page->encrypted ||
8410 dst_page->laundry) {
8411 retval = FALSE;
8412 goto done;
8413 }
8414 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
8415 retval = FALSE;
8416 goto done;
8417 }
8418 dst_page->reference = TRUE;
8419
8420 vm_page_wire(dst_page, tag, FALSE);
8421
8422 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8423 SET_PAGE_DIRTY(dst_page, FALSE);
8424 }
8425 entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
8426 assert(entry >= 0 && entry < object->resident_page_count);
8427 lite_list[entry>>5] |= 1 << (entry & 31);
8428
8429 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8430
8431 if (phys_page > upl->highest_page)
8432 upl->highest_page = phys_page;
8433
8434 if (user_page_list) {
8435 user_page_list[entry].phys_addr = phys_page;
8436 user_page_list[entry].absent = dst_page->absent;
8437 user_page_list[entry].dirty = dst_page->dirty;
8438 user_page_list[entry].free_when_done = dst_page->free_when_done;
8439 user_page_list[entry].precious = dst_page->precious;
8440 user_page_list[entry].device = FALSE;
8441 user_page_list[entry].speculative = FALSE;
8442 user_page_list[entry].cs_validated = FALSE;
8443 user_page_list[entry].cs_tainted = FALSE;
8444 user_page_list[entry].cs_nx = FALSE;
8445 user_page_list[entry].needed = FALSE;
8446 user_page_list[entry].mark = FALSE;
8447 }
8448 if (delayed_unlock++ > 256) {
8449 delayed_unlock = 0;
8450 lck_mtx_yield(&vm_page_queue_lock);
8451
8452 VM_CHECK_MEMORYSTATUS;
8453 }
8454 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->listq);
8455 }
8456 done:
8457 vm_page_unlock_queues();
8458
8459 VM_CHECK_MEMORYSTATUS;
8460
8461 return (retval);
8462 }
8463
8464
8465 kern_return_t
8466 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
8467 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_object_offset_t *dst_offset, int page_count)
8468 {
8469 vm_page_t dst_page;
8470 vm_tag_t tag;
8471 boolean_t no_zero_fill = FALSE;
8472 int interruptible;
8473 int pages_wired = 0;
8474 int pages_inserted = 0;
8475 int entry = 0;
8476 uint64_t delayed_ledger_update = 0;
8477 kern_return_t ret = KERN_SUCCESS;
8478 int grab_options;
8479 ppnum_t phys_page;
8480
8481 vm_object_lock_assert_exclusive(object);
8482 assert(object->purgable != VM_PURGABLE_VOLATILE);
8483 assert(object->purgable != VM_PURGABLE_EMPTY);
8484 assert(object->pager == NULL);
8485 assert(object->copy == NULL);
8486 assert(object->shadow == NULL);
8487
8488 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8489 interruptible = THREAD_ABORTSAFE;
8490 else
8491 interruptible = THREAD_UNINT;
8492
8493 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8494 no_zero_fill = TRUE;
8495
8496 tag = UPL_MEMORY_TAG(cntrl_flags);
8497
8498 grab_options = 0;
8499 #if CONFIG_SECLUDED_MEMORY
8500 if (object->can_grab_secluded) {
8501 grab_options |= VM_PAGE_GRAB_SECLUDED;
8502 }
8503 #endif /* CONFIG_SECLUDED_MEMORY */
8504
8505 while (page_count--) {
8506
8507 while ((dst_page = vm_page_grab_options(grab_options))
8508 == VM_PAGE_NULL) {
8509
8510 OSAddAtomic(page_count, &vm_upl_wait_for_pages);
8511
8512 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8513
8514 if (vm_page_wait(interruptible) == FALSE) {
8515 /*
8516 * interrupted case
8517 */
8518 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8519
8520 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8521
8522 ret = MACH_SEND_INTERRUPTED;
8523 goto done;
8524 }
8525 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8526
8527 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8528 }
8529 if (no_zero_fill == FALSE)
8530 vm_page_zero_fill(dst_page);
8531 else
8532 dst_page->absent = TRUE;
8533
8534 dst_page->reference = TRUE;
8535
8536 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8537 SET_PAGE_DIRTY(dst_page, FALSE);
8538 }
8539 if (dst_page->absent == FALSE) {
8540 assert(dst_page->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8541 assert(dst_page->wire_count == 0);
8542 dst_page->wire_count++;
8543 dst_page->vm_page_q_state = VM_PAGE_IS_WIRED;
8544 assert(dst_page->wire_count);
8545 pages_wired++;
8546 PAGE_WAKEUP_DONE(dst_page);
8547 }
8548 pages_inserted++;
8549
8550 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
8551
8552 lite_list[entry>>5] |= 1 << (entry & 31);
8553
8554 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8555
8556 if (phys_page > upl->highest_page)
8557 upl->highest_page = phys_page;
8558
8559 if (user_page_list) {
8560 user_page_list[entry].phys_addr = phys_page;
8561 user_page_list[entry].absent = dst_page->absent;
8562 user_page_list[entry].dirty = dst_page->dirty;
8563 user_page_list[entry].free_when_done = FALSE;
8564 user_page_list[entry].precious = FALSE;
8565 user_page_list[entry].device = FALSE;
8566 user_page_list[entry].speculative = FALSE;
8567 user_page_list[entry].cs_validated = FALSE;
8568 user_page_list[entry].cs_tainted = FALSE;
8569 user_page_list[entry].cs_nx = FALSE;
8570 user_page_list[entry].needed = FALSE;
8571 user_page_list[entry].mark = FALSE;
8572 }
8573 entry++;
8574 *dst_offset += PAGE_SIZE_64;
8575 }
8576 done:
8577 if (pages_wired) {
8578 vm_page_lockspin_queues();
8579 vm_page_wire_count += pages_wired;
8580 vm_page_unlock_queues();
8581 }
8582 if (pages_inserted) {
8583 if (object->internal) {
8584 OSAddAtomic(pages_inserted, &vm_page_internal_count);
8585 } else {
8586 OSAddAtomic(pages_inserted, &vm_page_external_count);
8587 }
8588 }
8589 if (delayed_ledger_update) {
8590 task_t owner;
8591
8592 owner = object->vo_purgeable_owner;
8593 assert(owner);
8594
8595 /* more non-volatile bytes */
8596 ledger_credit(owner->ledger,
8597 task_ledgers.purgeable_nonvolatile,
8598 delayed_ledger_update);
8599 /* more footprint */
8600 ledger_credit(owner->ledger,
8601 task_ledgers.phys_footprint,
8602 delayed_ledger_update);
8603 }
8604 return (ret);
8605 }
8606
8607
8608 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
8609
8610
8611 kern_return_t
8612 vm_object_iopl_request(
8613 vm_object_t object,
8614 vm_object_offset_t offset,
8615 upl_size_t size,
8616 upl_t *upl_ptr,
8617 upl_page_info_array_t user_page_list,
8618 unsigned int *page_list_count,
8619 upl_control_flags_t cntrl_flags)
8620 {
8621 vm_page_t dst_page;
8622 vm_object_offset_t dst_offset;
8623 upl_size_t xfer_size;
8624 upl_t upl = NULL;
8625 unsigned int entry;
8626 wpl_array_t lite_list = NULL;
8627 int no_zero_fill = FALSE;
8628 unsigned int size_in_pages;
8629 u_int32_t psize;
8630 kern_return_t ret;
8631 vm_prot_t prot;
8632 struct vm_object_fault_info fault_info;
8633 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
8634 struct vm_page_delayed_work *dwp;
8635 int dw_count;
8636 int dw_limit;
8637 int dw_index;
8638 boolean_t caller_lookup;
8639 int io_tracking_flag = 0;
8640 int interruptible;
8641 ppnum_t phys_page;
8642
8643 boolean_t set_cache_attr_needed = FALSE;
8644 boolean_t free_wired_pages = FALSE;
8645 boolean_t fast_path_empty_req = FALSE;
8646 boolean_t fast_path_full_req = FALSE;
8647
8648 if (cntrl_flags & ~UPL_VALID_FLAGS) {
8649 /*
8650 * For forward compatibility's sake,
8651 * reject any unknown flag.
8652 */
8653 return KERN_INVALID_VALUE;
8654 }
8655 if (vm_lopage_needed == FALSE)
8656 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8657
8658 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8659 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8660 return KERN_INVALID_VALUE;
8661
8662 if (object->phys_contiguous) {
8663 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8664 return KERN_INVALID_ADDRESS;
8665
8666 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8667 return KERN_INVALID_ADDRESS;
8668 }
8669 }
8670
8671 if (cntrl_flags & UPL_ENCRYPT) {
8672 /*
8673 * ENCRYPTED SWAP:
8674 * The paging path doesn't use this interface,
8675 * so we don't support the UPL_ENCRYPT flag
8676 * here. We won't encrypt the pages.
8677 */
8678 assert(! (cntrl_flags & UPL_ENCRYPT));
8679 }
8680 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8681 no_zero_fill = TRUE;
8682
8683 if (cntrl_flags & UPL_COPYOUT_FROM)
8684 prot = VM_PROT_READ;
8685 else
8686 prot = VM_PROT_READ | VM_PROT_WRITE;
8687
8688 if ((!object->internal) && (object->paging_offset != 0))
8689 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8690
8691 #if CONFIG_IOSCHED || UPL_DEBUG
8692 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8693 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8694 #endif
8695
8696 #if CONFIG_IOSCHED
8697 if (object->io_tracking) {
8698 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8699 if (object != kernel_object)
8700 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8701 }
8702 #endif
8703
8704 if (object->phys_contiguous)
8705 psize = PAGE_SIZE;
8706 else
8707 psize = size;
8708
8709 if (cntrl_flags & UPL_SET_INTERNAL) {
8710 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8711
8712 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8713 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8714 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8715 if (size == 0) {
8716 user_page_list = NULL;
8717 lite_list = NULL;
8718 }
8719 } else {
8720 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8721
8722 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8723 if (size == 0) {
8724 lite_list = NULL;
8725 }
8726 }
8727 if (user_page_list)
8728 user_page_list[0].device = FALSE;
8729 *upl_ptr = upl;
8730
8731 upl->map_object = object;
8732 upl->size = size;
8733
8734 size_in_pages = size / PAGE_SIZE;
8735
8736 if (object == kernel_object &&
8737 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8738 upl->flags |= UPL_KERNEL_OBJECT;
8739 #if UPL_DEBUG
8740 vm_object_lock(object);
8741 #else
8742 vm_object_lock_shared(object);
8743 #endif
8744 } else {
8745 vm_object_lock(object);
8746 vm_object_activity_begin(object);
8747 }
8748 /*
8749 * paging in progress also protects the paging_offset
8750 */
8751 upl->offset = offset + object->paging_offset;
8752
8753 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8754 /*
8755 * The user requested that access to the pages in this UPL
8756 * be blocked until the UPL is commited or aborted.
8757 */
8758 upl->flags |= UPL_ACCESS_BLOCKED;
8759 }
8760
8761 #if CONFIG_IOSCHED || UPL_DEBUG
8762 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8763 vm_object_activity_begin(object);
8764 queue_enter(&object->uplq, upl, upl_t, uplq);
8765 }
8766 #endif
8767
8768 if (object->phys_contiguous) {
8769
8770 if (upl->flags & UPL_ACCESS_BLOCKED) {
8771 assert(!object->blocked_access);
8772 object->blocked_access = TRUE;
8773 }
8774
8775 vm_object_unlock(object);
8776
8777 /*
8778 * don't need any shadow mappings for this one
8779 * since it is already I/O memory
8780 */
8781 upl->flags |= UPL_DEVICE_MEMORY;
8782
8783 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8784
8785 if (user_page_list) {
8786 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8787 user_page_list[0].device = TRUE;
8788 }
8789 if (page_list_count != NULL) {
8790 if (upl->flags & UPL_INTERNAL)
8791 *page_list_count = 0;
8792 else
8793 *page_list_count = 1;
8794 }
8795 return KERN_SUCCESS;
8796 }
8797 if (object != kernel_object && object != compressor_object) {
8798 /*
8799 * Protect user space from future COW operations
8800 */
8801 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8802 if (!object->true_share &&
8803 vm_object_tracking_inited) {
8804 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8805 int num = 0;
8806
8807 num = OSBacktrace(bt,
8808 VM_OBJECT_TRACKING_BTDEPTH);
8809 btlog_add_entry(vm_object_tracking_btlog,
8810 object,
8811 VM_OBJECT_TRACKING_OP_TRUESHARE,
8812 bt,
8813 num);
8814 }
8815 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8816
8817 vm_object_lock_assert_exclusive(object);
8818 object->true_share = TRUE;
8819
8820 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8821 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8822 }
8823
8824 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8825 object->copy != VM_OBJECT_NULL) {
8826 /*
8827 * Honor copy-on-write obligations
8828 *
8829 * The caller is gathering these pages and
8830 * might modify their contents. We need to
8831 * make sure that the copy object has its own
8832 * private copies of these pages before we let
8833 * the caller modify them.
8834 *
8835 * NOTE: someone else could map the original object
8836 * after we've done this copy-on-write here, and they
8837 * could then see an inconsistent picture of the memory
8838 * while it's being modified via the UPL. To prevent this,
8839 * we would have to block access to these pages until the
8840 * UPL is released. We could use the UPL_BLOCK_ACCESS
8841 * code path for that...
8842 */
8843 vm_object_update(object,
8844 offset,
8845 size,
8846 NULL,
8847 NULL,
8848 FALSE, /* should_return */
8849 MEMORY_OBJECT_COPY_SYNC,
8850 VM_PROT_NO_CHANGE);
8851 #if DEVELOPMENT || DEBUG
8852 iopl_cow++;
8853 iopl_cow_pages += size >> PAGE_SHIFT;
8854 #endif
8855 }
8856 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8857 object->purgable != VM_PURGABLE_VOLATILE &&
8858 object->purgable != VM_PURGABLE_EMPTY &&
8859 object->copy == NULL &&
8860 size == object->vo_size &&
8861 offset == 0 &&
8862 object->shadow == NULL &&
8863 object->pager == NULL)
8864 {
8865 if (object->resident_page_count == size_in_pages)
8866 {
8867 assert(object != compressor_object);
8868 assert(object != kernel_object);
8869 fast_path_full_req = TRUE;
8870 }
8871 else if (object->resident_page_count == 0)
8872 {
8873 assert(object != compressor_object);
8874 assert(object != kernel_object);
8875 fast_path_empty_req = TRUE;
8876 set_cache_attr_needed = TRUE;
8877 }
8878 }
8879
8880 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8881 interruptible = THREAD_ABORTSAFE;
8882 else
8883 interruptible = THREAD_UNINT;
8884
8885 entry = 0;
8886
8887 xfer_size = size;
8888 dst_offset = offset;
8889 dw_count = 0;
8890
8891 if (fast_path_full_req) {
8892
8893 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags) == TRUE)
8894 goto finish;
8895 /*
8896 * we couldn't complete the processing of this request on the fast path
8897 * so fall through to the slow path and finish up
8898 */
8899
8900 } else if (fast_path_empty_req) {
8901
8902 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8903 ret = KERN_MEMORY_ERROR;
8904 goto return_err;
8905 }
8906 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, &dst_offset, size_in_pages);
8907
8908 if (ret) {
8909 free_wired_pages = TRUE;
8910 goto return_err;
8911 }
8912 goto finish;
8913 }
8914
8915 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8916 fault_info.user_tag = 0;
8917 fault_info.lo_offset = offset;
8918 fault_info.hi_offset = offset + xfer_size;
8919 fault_info.no_cache = FALSE;
8920 fault_info.stealth = FALSE;
8921 fault_info.io_sync = FALSE;
8922 fault_info.cs_bypass = FALSE;
8923 fault_info.mark_zf_absent = TRUE;
8924 fault_info.interruptible = interruptible;
8925 fault_info.batch_pmap_op = TRUE;
8926
8927 dwp = &dw_array[0];
8928 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8929
8930 while (xfer_size) {
8931 vm_fault_return_t result;
8932
8933 dwp->dw_mask = 0;
8934
8935 if (fast_path_full_req) {
8936 /*
8937 * if we get here, it means that we ran into a page
8938 * state we couldn't handle in the fast path and
8939 * bailed out to the slow path... since the order
8940 * we look at pages is different between the 2 paths,
8941 * the following check is needed to determine whether
8942 * this page was already processed in the fast path
8943 */
8944 if (lite_list[entry>>5] & (1 << (entry & 31)))
8945 goto skip_page;
8946 }
8947 dst_page = vm_page_lookup(object, dst_offset);
8948
8949 /*
8950 * ENCRYPTED SWAP:
8951 * If the page is encrypted, we need to decrypt it,
8952 * so force a soft page fault.
8953 */
8954 if (dst_page == VM_PAGE_NULL ||
8955 dst_page->busy ||
8956 dst_page->encrypted ||
8957 dst_page->error ||
8958 dst_page->restart ||
8959 dst_page->absent ||
8960 dst_page->fictitious) {
8961
8962 if (object == kernel_object)
8963 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8964 if (object == compressor_object)
8965 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8966
8967 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8968 ret = KERN_MEMORY_ERROR;
8969 goto return_err;
8970 }
8971 set_cache_attr_needed = TRUE;
8972
8973 /*
8974 * We just looked up the page and the result remains valid
8975 * until the object lock is release, so send it to
8976 * vm_fault_page() (as "dst_page"), to avoid having to
8977 * look it up again there.
8978 */
8979 caller_lookup = TRUE;
8980
8981 do {
8982 vm_page_t top_page;
8983 kern_return_t error_code;
8984
8985 fault_info.cluster_size = xfer_size;
8986
8987 vm_object_paging_begin(object);
8988
8989 result = vm_fault_page(object, dst_offset,
8990 prot | VM_PROT_WRITE, FALSE,
8991 caller_lookup,
8992 &prot, &dst_page, &top_page,
8993 (int *)0,
8994 &error_code, no_zero_fill,
8995 FALSE, &fault_info);
8996
8997 /* our lookup is no longer valid at this point */
8998 caller_lookup = FALSE;
8999
9000 switch (result) {
9001
9002 case VM_FAULT_SUCCESS:
9003
9004 if ( !dst_page->absent) {
9005 PAGE_WAKEUP_DONE(dst_page);
9006 } else {
9007 /*
9008 * we only get back an absent page if we
9009 * requested that it not be zero-filled
9010 * because we are about to fill it via I/O
9011 *
9012 * absent pages should be left BUSY
9013 * to prevent them from being faulted
9014 * into an address space before we've
9015 * had a chance to complete the I/O on
9016 * them since they may contain info that
9017 * shouldn't be seen by the faulting task
9018 */
9019 }
9020 /*
9021 * Release paging references and
9022 * top-level placeholder page, if any.
9023 */
9024 if (top_page != VM_PAGE_NULL) {
9025 vm_object_t local_object;
9026
9027 local_object = VM_PAGE_OBJECT(top_page);
9028
9029 /*
9030 * comparing 2 packed pointers
9031 */
9032 if (top_page->vm_page_object != dst_page->vm_page_object) {
9033 vm_object_lock(local_object);
9034 VM_PAGE_FREE(top_page);
9035 vm_object_paging_end(local_object);
9036 vm_object_unlock(local_object);
9037 } else {
9038 VM_PAGE_FREE(top_page);
9039 vm_object_paging_end(local_object);
9040 }
9041 }
9042 vm_object_paging_end(object);
9043 break;
9044
9045 case VM_FAULT_RETRY:
9046 vm_object_lock(object);
9047 break;
9048
9049 case VM_FAULT_MEMORY_SHORTAGE:
9050 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
9051
9052 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
9053
9054 if (vm_page_wait(interruptible)) {
9055 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
9056
9057 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
9058 vm_object_lock(object);
9059
9060 break;
9061 }
9062 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
9063
9064 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
9065
9066 /* fall thru */
9067
9068 case VM_FAULT_INTERRUPTED:
9069 error_code = MACH_SEND_INTERRUPTED;
9070 case VM_FAULT_MEMORY_ERROR:
9071 memory_error:
9072 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
9073
9074 vm_object_lock(object);
9075 goto return_err;
9076
9077 case VM_FAULT_SUCCESS_NO_VM_PAGE:
9078 /* success but no page: fail */
9079 vm_object_paging_end(object);
9080 vm_object_unlock(object);
9081 goto memory_error;
9082
9083 default:
9084 panic("vm_object_iopl_request: unexpected error"
9085 " 0x%x from vm_fault_page()\n", result);
9086 }
9087 } while (result != VM_FAULT_SUCCESS);
9088
9089 }
9090 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
9091
9092 if (upl->flags & UPL_KERNEL_OBJECT)
9093 goto record_phys_addr;
9094
9095 if (dst_page->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
9096 dst_page->busy = TRUE;
9097 goto record_phys_addr;
9098 }
9099
9100 if (dst_page->cleaning) {
9101 /*
9102 * Someone else is cleaning this page in place.
9103 * In theory, we should be able to proceed and use this
9104 * page but they'll probably end up clearing the "busy"
9105 * bit on it in upl_commit_range() but they didn't set
9106 * it, so they would clear our "busy" bit and open
9107 * us to race conditions.
9108 * We'd better wait for the cleaning to complete and
9109 * then try again.
9110 */
9111 vm_object_iopl_request_sleep_for_cleaning++;
9112 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
9113 continue;
9114 }
9115 if (dst_page->laundry)
9116 vm_pageout_steal_laundry(dst_page, FALSE);
9117
9118 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
9119 phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
9120 vm_page_t low_page;
9121 int refmod;
9122
9123 /*
9124 * support devices that can't DMA above 32 bits
9125 * by substituting pages from a pool of low address
9126 * memory for any pages we find above the 4G mark
9127 * can't substitute if the page is already wired because
9128 * we don't know whether that physical address has been
9129 * handed out to some other 64 bit capable DMA device to use
9130 */
9131 if (VM_PAGE_WIRED(dst_page)) {
9132 ret = KERN_PROTECTION_FAILURE;
9133 goto return_err;
9134 }
9135 low_page = vm_page_grablo();
9136
9137 if (low_page == VM_PAGE_NULL) {
9138 ret = KERN_RESOURCE_SHORTAGE;
9139 goto return_err;
9140 }
9141 /*
9142 * from here until the vm_page_replace completes
9143 * we musn't drop the object lock... we don't
9144 * want anyone refaulting this page in and using
9145 * it after we disconnect it... we want the fault
9146 * to find the new page being substituted.
9147 */
9148 if (dst_page->pmapped)
9149 refmod = pmap_disconnect(phys_page);
9150 else
9151 refmod = 0;
9152
9153 if (!dst_page->absent)
9154 vm_page_copy(dst_page, low_page);
9155
9156 low_page->reference = dst_page->reference;
9157 low_page->dirty = dst_page->dirty;
9158 low_page->absent = dst_page->absent;
9159
9160 if (refmod & VM_MEM_REFERENCED)
9161 low_page->reference = TRUE;
9162 if (refmod & VM_MEM_MODIFIED) {
9163 SET_PAGE_DIRTY(low_page, FALSE);
9164 }
9165
9166 vm_page_replace(low_page, object, dst_offset);
9167
9168 dst_page = low_page;
9169 /*
9170 * vm_page_grablo returned the page marked
9171 * BUSY... we don't need a PAGE_WAKEUP_DONE
9172 * here, because we've never dropped the object lock
9173 */
9174 if ( !dst_page->absent)
9175 dst_page->busy = FALSE;
9176
9177 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
9178 }
9179 if ( !dst_page->busy)
9180 dwp->dw_mask |= DW_vm_page_wire;
9181
9182 if (cntrl_flags & UPL_BLOCK_ACCESS) {
9183 /*
9184 * Mark the page "busy" to block any future page fault
9185 * on this page in addition to wiring it.
9186 * We'll also remove the mapping
9187 * of all these pages before leaving this routine.
9188 */
9189 assert(!dst_page->fictitious);
9190 dst_page->busy = TRUE;
9191 }
9192 /*
9193 * expect the page to be used
9194 * page queues lock must be held to set 'reference'
9195 */
9196 dwp->dw_mask |= DW_set_reference;
9197
9198 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
9199 SET_PAGE_DIRTY(dst_page, TRUE);
9200 }
9201 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
9202 pmap_sync_page_attributes_phys(phys_page);
9203 dst_page->written_by_kernel = FALSE;
9204 }
9205
9206 record_phys_addr:
9207 if (dst_page->busy)
9208 upl->flags |= UPL_HAS_BUSY;
9209
9210 lite_list[entry>>5] |= 1 << (entry & 31);
9211
9212 if (phys_page > upl->highest_page)
9213 upl->highest_page = phys_page;
9214
9215 if (user_page_list) {
9216 user_page_list[entry].phys_addr = phys_page;
9217 user_page_list[entry].free_when_done = dst_page->free_when_done;
9218 user_page_list[entry].absent = dst_page->absent;
9219 user_page_list[entry].dirty = dst_page->dirty;
9220 user_page_list[entry].precious = dst_page->precious;
9221 user_page_list[entry].device = FALSE;
9222 user_page_list[entry].needed = FALSE;
9223 if (dst_page->clustered == TRUE)
9224 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
9225 else
9226 user_page_list[entry].speculative = FALSE;
9227 user_page_list[entry].cs_validated = dst_page->cs_validated;
9228 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
9229 user_page_list[entry].cs_nx = dst_page->cs_nx;
9230 user_page_list[entry].mark = FALSE;
9231 }
9232 if (object != kernel_object && object != compressor_object) {
9233 /*
9234 * someone is explicitly grabbing this page...
9235 * update clustered and speculative state
9236 *
9237 */
9238 if (dst_page->clustered)
9239 VM_PAGE_CONSUME_CLUSTERED(dst_page);
9240 }
9241 skip_page:
9242 entry++;
9243 dst_offset += PAGE_SIZE_64;
9244 xfer_size -= PAGE_SIZE;
9245
9246 if (dwp->dw_mask) {
9247 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
9248
9249 if (dw_count >= dw_limit) {
9250 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
9251
9252 dwp = &dw_array[0];
9253 dw_count = 0;
9254 }
9255 }
9256 }
9257 assert(entry == size_in_pages);
9258
9259 if (dw_count)
9260 vm_page_do_delayed_work(object, UPL_MEMORY_TAG(cntrl_flags), &dw_array[0], dw_count);
9261 finish:
9262 if (user_page_list && set_cache_attr_needed == TRUE)
9263 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
9264
9265 if (page_list_count != NULL) {
9266 if (upl->flags & UPL_INTERNAL)
9267 *page_list_count = 0;
9268 else if (*page_list_count > size_in_pages)
9269 *page_list_count = size_in_pages;
9270 }
9271 vm_object_unlock(object);
9272
9273 if (cntrl_flags & UPL_BLOCK_ACCESS) {
9274 /*
9275 * We've marked all the pages "busy" so that future
9276 * page faults will block.
9277 * Now remove the mapping for these pages, so that they
9278 * can't be accessed without causing a page fault.
9279 */
9280 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
9281 PMAP_NULL, 0, VM_PROT_NONE);
9282 assert(!object->blocked_access);
9283 object->blocked_access = TRUE;
9284 }
9285
9286 return KERN_SUCCESS;
9287
9288 return_err:
9289 dw_index = 0;
9290
9291 for (; offset < dst_offset; offset += PAGE_SIZE) {
9292 boolean_t need_unwire;
9293
9294 dst_page = vm_page_lookup(object, offset);
9295
9296 if (dst_page == VM_PAGE_NULL)
9297 panic("vm_object_iopl_request: Wired page missing. \n");
9298
9299 /*
9300 * if we've already processed this page in an earlier
9301 * dw_do_work, we need to undo the wiring... we will
9302 * leave the dirty and reference bits on if they
9303 * were set, since we don't have a good way of knowing
9304 * what the previous state was and we won't get here
9305 * under any normal circumstances... we will always
9306 * clear BUSY and wakeup any waiters via vm_page_free
9307 * or PAGE_WAKEUP_DONE
9308 */
9309 need_unwire = TRUE;
9310
9311 if (dw_count) {
9312 if (dw_array[dw_index].dw_m == dst_page) {
9313 /*
9314 * still in the deferred work list
9315 * which means we haven't yet called
9316 * vm_page_wire on this page
9317 */
9318 need_unwire = FALSE;
9319
9320 dw_index++;
9321 dw_count--;
9322 }
9323 }
9324 vm_page_lock_queues();
9325
9326 if (dst_page->absent || free_wired_pages == TRUE) {
9327 vm_page_free(dst_page);
9328
9329 need_unwire = FALSE;
9330 } else {
9331 if (need_unwire == TRUE)
9332 vm_page_unwire(dst_page, TRUE);
9333
9334 PAGE_WAKEUP_DONE(dst_page);
9335 }
9336 vm_page_unlock_queues();
9337
9338 if (need_unwire == TRUE)
9339 VM_STAT_INCR(reactivations);
9340 }
9341 #if UPL_DEBUG
9342 upl->upl_state = 2;
9343 #endif
9344 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
9345 vm_object_activity_end(object);
9346 vm_object_collapse(object, 0, TRUE);
9347 }
9348 vm_object_unlock(object);
9349 upl_destroy(upl);
9350
9351 return ret;
9352 }
9353
9354 kern_return_t
9355 upl_transpose(
9356 upl_t upl1,
9357 upl_t upl2)
9358 {
9359 kern_return_t retval;
9360 boolean_t upls_locked;
9361 vm_object_t object1, object2;
9362
9363 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
9364 return KERN_INVALID_ARGUMENT;
9365 }
9366
9367 upls_locked = FALSE;
9368
9369 /*
9370 * Since we need to lock both UPLs at the same time,
9371 * avoid deadlocks by always taking locks in the same order.
9372 */
9373 if (upl1 < upl2) {
9374 upl_lock(upl1);
9375 upl_lock(upl2);
9376 } else {
9377 upl_lock(upl2);
9378 upl_lock(upl1);
9379 }
9380 upls_locked = TRUE; /* the UPLs will need to be unlocked */
9381
9382 object1 = upl1->map_object;
9383 object2 = upl2->map_object;
9384
9385 if (upl1->offset != 0 || upl2->offset != 0 ||
9386 upl1->size != upl2->size) {
9387 /*
9388 * We deal only with full objects, not subsets.
9389 * That's because we exchange the entire backing store info
9390 * for the objects: pager, resident pages, etc... We can't do
9391 * only part of it.
9392 */
9393 retval = KERN_INVALID_VALUE;
9394 goto done;
9395 }
9396
9397 /*
9398 * Tranpose the VM objects' backing store.
9399 */
9400 retval = vm_object_transpose(object1, object2,
9401 (vm_object_size_t) upl1->size);
9402
9403 if (retval == KERN_SUCCESS) {
9404 /*
9405 * Make each UPL point to the correct VM object, i.e. the
9406 * object holding the pages that the UPL refers to...
9407 */
9408 #if CONFIG_IOSCHED || UPL_DEBUG
9409 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
9410 vm_object_lock(object1);
9411 vm_object_lock(object2);
9412 }
9413 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
9414 queue_remove(&object1->uplq, upl1, upl_t, uplq);
9415 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
9416 queue_remove(&object2->uplq, upl2, upl_t, uplq);
9417 #endif
9418 upl1->map_object = object2;
9419 upl2->map_object = object1;
9420
9421 #if CONFIG_IOSCHED || UPL_DEBUG
9422 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
9423 queue_enter(&object2->uplq, upl1, upl_t, uplq);
9424 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
9425 queue_enter(&object1->uplq, upl2, upl_t, uplq);
9426 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
9427 vm_object_unlock(object2);
9428 vm_object_unlock(object1);
9429 }
9430 #endif
9431 }
9432
9433 done:
9434 /*
9435 * Cleanup.
9436 */
9437 if (upls_locked) {
9438 upl_unlock(upl1);
9439 upl_unlock(upl2);
9440 upls_locked = FALSE;
9441 }
9442
9443 return retval;
9444 }
9445
9446 void
9447 upl_range_needed(
9448 upl_t upl,
9449 int index,
9450 int count)
9451 {
9452 upl_page_info_t *user_page_list;
9453 int size_in_pages;
9454
9455 if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
9456 return;
9457
9458 size_in_pages = upl->size / PAGE_SIZE;
9459
9460 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
9461
9462 while (count-- && index < size_in_pages)
9463 user_page_list[index++].needed = TRUE;
9464 }
9465
9466
9467 /*
9468 * ENCRYPTED SWAP:
9469 *
9470 * Rationale: the user might have some encrypted data on disk (via
9471 * FileVault or any other mechanism). That data is then decrypted in
9472 * memory, which is safe as long as the machine is secure. But that
9473 * decrypted data in memory could be paged out to disk by the default
9474 * pager. The data would then be stored on disk in clear (not encrypted)
9475 * and it could be accessed by anyone who gets physical access to the
9476 * disk (if the laptop or the disk gets stolen for example). This weakens
9477 * the security offered by FileVault.
9478 *
9479 * Solution: the default pager will optionally request that all the
9480 * pages it gathers for pageout be encrypted, via the UPL interfaces,
9481 * before it sends this UPL to disk via the vnode_pageout() path.
9482 *
9483 * Notes:
9484 *
9485 * To avoid disrupting the VM LRU algorithms, we want to keep the
9486 * clean-in-place mechanisms, which allow us to send some extra pages to
9487 * swap (clustering) without actually removing them from the user's
9488 * address space. We don't want the user to unknowingly access encrypted
9489 * data, so we have to actually remove the encrypted pages from the page
9490 * table. When the user accesses the data, the hardware will fail to
9491 * locate the virtual page in its page table and will trigger a page
9492 * fault. We can then decrypt the page and enter it in the page table
9493 * again. Whenever we allow the user to access the contents of a page,
9494 * we have to make sure it's not encrypted.
9495 *
9496 *
9497 */
9498 /*
9499 * ENCRYPTED SWAP:
9500 * Reserve of virtual addresses in the kernel address space.
9501 * We need to map the physical pages in the kernel, so that we
9502 * can call the encryption/decryption routines with a kernel
9503 * virtual address. We keep this pool of pre-allocated kernel
9504 * virtual addresses so that we don't have to scan the kernel's
9505 * virtaul address space each time we need to encrypt or decrypt
9506 * a physical page.
9507 * It would be nice to be able to encrypt and decrypt in physical
9508 * mode but that might not always be more efficient...
9509 */
9510 decl_simple_lock_data(,vm_paging_lock)
9511 #define VM_PAGING_NUM_PAGES 64
9512 vm_map_offset_t vm_paging_base_address = 0;
9513 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
9514 int vm_paging_max_index = 0;
9515 int vm_paging_page_waiter = 0;
9516 int vm_paging_page_waiter_total = 0;
9517 unsigned long vm_paging_no_kernel_page = 0;
9518 unsigned long vm_paging_objects_mapped = 0;
9519 unsigned long vm_paging_pages_mapped = 0;
9520 unsigned long vm_paging_objects_mapped_slow = 0;
9521 unsigned long vm_paging_pages_mapped_slow = 0;
9522
9523 void
9524 vm_paging_map_init(void)
9525 {
9526 kern_return_t kr;
9527 vm_map_offset_t page_map_offset;
9528 vm_map_entry_t map_entry;
9529
9530 assert(vm_paging_base_address == 0);
9531
9532 /*
9533 * Initialize our pool of pre-allocated kernel
9534 * virtual addresses.
9535 */
9536 page_map_offset = 0;
9537 kr = vm_map_find_space(kernel_map,
9538 &page_map_offset,
9539 VM_PAGING_NUM_PAGES * PAGE_SIZE,
9540 0,
9541 0,
9542 &map_entry);
9543 if (kr != KERN_SUCCESS) {
9544 panic("vm_paging_map_init: kernel_map full\n");
9545 }
9546 VME_OBJECT_SET(map_entry, kernel_object);
9547 VME_OFFSET_SET(map_entry, page_map_offset);
9548 map_entry->protection = VM_PROT_NONE;
9549 map_entry->max_protection = VM_PROT_NONE;
9550 map_entry->permanent = TRUE;
9551 vm_object_reference(kernel_object);
9552 vm_map_unlock(kernel_map);
9553
9554 assert(vm_paging_base_address == 0);
9555 vm_paging_base_address = page_map_offset;
9556 }
9557
9558 /*
9559 * ENCRYPTED SWAP:
9560 * vm_paging_map_object:
9561 * Maps part of a VM object's pages in the kernel
9562 * virtual address space, using the pre-allocated
9563 * kernel virtual addresses, if possible.
9564 * Context:
9565 * The VM object is locked. This lock will get
9566 * dropped and re-acquired though, so the caller
9567 * must make sure the VM object is kept alive
9568 * (by holding a VM map that has a reference
9569 * on it, for example, or taking an extra reference).
9570 * The page should also be kept busy to prevent
9571 * it from being reclaimed.
9572 */
9573 kern_return_t
9574 vm_paging_map_object(
9575 vm_page_t page,
9576 vm_object_t object,
9577 vm_object_offset_t offset,
9578 vm_prot_t protection,
9579 boolean_t can_unlock_object,
9580 vm_map_size_t *size, /* IN/OUT */
9581 vm_map_offset_t *address, /* OUT */
9582 boolean_t *need_unmap) /* OUT */
9583 {
9584 kern_return_t kr;
9585 vm_map_offset_t page_map_offset;
9586 vm_map_size_t map_size;
9587 vm_object_offset_t object_offset;
9588 int i;
9589
9590 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
9591 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9592 #if __x86_64__
9593 *address = (vm_map_offset_t)
9594 PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
9595 PAGE_SHIFT);
9596 *need_unmap = FALSE;
9597 return KERN_SUCCESS;
9598 #else
9599 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9600 #endif
9601
9602 assert(page->busy);
9603 /*
9604 * Use one of the pre-allocated kernel virtual addresses
9605 * and just enter the VM page in the kernel address space
9606 * at that virtual address.
9607 */
9608 simple_lock(&vm_paging_lock);
9609
9610 /*
9611 * Try and find an available kernel virtual address
9612 * from our pre-allocated pool.
9613 */
9614 page_map_offset = 0;
9615 for (;;) {
9616 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9617 if (vm_paging_page_inuse[i] == FALSE) {
9618 page_map_offset =
9619 vm_paging_base_address +
9620 (i * PAGE_SIZE);
9621 break;
9622 }
9623 }
9624 if (page_map_offset != 0) {
9625 /* found a space to map our page ! */
9626 break;
9627 }
9628
9629 if (can_unlock_object) {
9630 /*
9631 * If we can afford to unlock the VM object,
9632 * let's take the slow path now...
9633 */
9634 break;
9635 }
9636 /*
9637 * We can't afford to unlock the VM object, so
9638 * let's wait for a space to become available...
9639 */
9640 vm_paging_page_waiter_total++;
9641 vm_paging_page_waiter++;
9642 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9643 if (kr == THREAD_WAITING) {
9644 simple_unlock(&vm_paging_lock);
9645 kr = thread_block(THREAD_CONTINUE_NULL);
9646 simple_lock(&vm_paging_lock);
9647 }
9648 vm_paging_page_waiter--;
9649 /* ... and try again */
9650 }
9651
9652 if (page_map_offset != 0) {
9653 /*
9654 * We found a kernel virtual address;
9655 * map the physical page to that virtual address.
9656 */
9657 if (i > vm_paging_max_index) {
9658 vm_paging_max_index = i;
9659 }
9660 vm_paging_page_inuse[i] = TRUE;
9661 simple_unlock(&vm_paging_lock);
9662
9663 page->pmapped = TRUE;
9664
9665 /*
9666 * Keep the VM object locked over the PMAP_ENTER
9667 * and the actual use of the page by the kernel,
9668 * or this pmap mapping might get undone by a
9669 * vm_object_pmap_protect() call...
9670 */
9671 PMAP_ENTER(kernel_pmap,
9672 page_map_offset,
9673 page,
9674 protection,
9675 VM_PROT_NONE,
9676 0,
9677 TRUE);
9678 vm_paging_objects_mapped++;
9679 vm_paging_pages_mapped++;
9680 *address = page_map_offset;
9681 *need_unmap = TRUE;
9682
9683 /* all done and mapped, ready to use ! */
9684 return KERN_SUCCESS;
9685 }
9686
9687 /*
9688 * We ran out of pre-allocated kernel virtual
9689 * addresses. Just map the page in the kernel
9690 * the slow and regular way.
9691 */
9692 vm_paging_no_kernel_page++;
9693 simple_unlock(&vm_paging_lock);
9694 }
9695
9696 if (! can_unlock_object) {
9697 *address = 0;
9698 *size = 0;
9699 *need_unmap = FALSE;
9700 return KERN_NOT_SUPPORTED;
9701 }
9702
9703 object_offset = vm_object_trunc_page(offset);
9704 map_size = vm_map_round_page(*size,
9705 VM_MAP_PAGE_MASK(kernel_map));
9706
9707 /*
9708 * Try and map the required range of the object
9709 * in the kernel_map
9710 */
9711
9712 vm_object_reference_locked(object); /* for the map entry */
9713 vm_object_unlock(object);
9714
9715 kr = vm_map_enter(kernel_map,
9716 address,
9717 map_size,
9718 0,
9719 VM_FLAGS_ANYWHERE,
9720 object,
9721 object_offset,
9722 FALSE,
9723 protection,
9724 VM_PROT_ALL,
9725 VM_INHERIT_NONE);
9726 if (kr != KERN_SUCCESS) {
9727 *address = 0;
9728 *size = 0;
9729 *need_unmap = FALSE;
9730 vm_object_deallocate(object); /* for the map entry */
9731 vm_object_lock(object);
9732 return kr;
9733 }
9734
9735 *size = map_size;
9736
9737 /*
9738 * Enter the mapped pages in the page table now.
9739 */
9740 vm_object_lock(object);
9741 /*
9742 * VM object must be kept locked from before PMAP_ENTER()
9743 * until after the kernel is done accessing the page(s).
9744 * Otherwise, the pmap mappings in the kernel could be
9745 * undone by a call to vm_object_pmap_protect().
9746 */
9747
9748 for (page_map_offset = 0;
9749 map_size != 0;
9750 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9751
9752 page = vm_page_lookup(object, offset + page_map_offset);
9753 if (page == VM_PAGE_NULL) {
9754 printf("vm_paging_map_object: no page !?");
9755 vm_object_unlock(object);
9756 kr = vm_map_remove(kernel_map, *address, *size,
9757 VM_MAP_NO_FLAGS);
9758 assert(kr == KERN_SUCCESS);
9759 *address = 0;
9760 *size = 0;
9761 *need_unmap = FALSE;
9762 vm_object_lock(object);
9763 return KERN_MEMORY_ERROR;
9764 }
9765 page->pmapped = TRUE;
9766
9767 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9768 PMAP_ENTER(kernel_pmap,
9769 *address + page_map_offset,
9770 page,
9771 protection,
9772 VM_PROT_NONE,
9773 0,
9774 TRUE);
9775 }
9776
9777 vm_paging_objects_mapped_slow++;
9778 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9779
9780 *need_unmap = TRUE;
9781
9782 return KERN_SUCCESS;
9783 }
9784
9785 /*
9786 * ENCRYPTED SWAP:
9787 * vm_paging_unmap_object:
9788 * Unmaps part of a VM object's pages from the kernel
9789 * virtual address space.
9790 * Context:
9791 * The VM object is locked. This lock will get
9792 * dropped and re-acquired though.
9793 */
9794 void
9795 vm_paging_unmap_object(
9796 vm_object_t object,
9797 vm_map_offset_t start,
9798 vm_map_offset_t end)
9799 {
9800 kern_return_t kr;
9801 int i;
9802
9803 if ((vm_paging_base_address == 0) ||
9804 (start < vm_paging_base_address) ||
9805 (end > (vm_paging_base_address
9806 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9807 /*
9808 * We didn't use our pre-allocated pool of
9809 * kernel virtual address. Deallocate the
9810 * virtual memory.
9811 */
9812 if (object != VM_OBJECT_NULL) {
9813 vm_object_unlock(object);
9814 }
9815 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
9816 if (object != VM_OBJECT_NULL) {
9817 vm_object_lock(object);
9818 }
9819 assert(kr == KERN_SUCCESS);
9820 } else {
9821 /*
9822 * We used a kernel virtual address from our
9823 * pre-allocated pool. Put it back in the pool
9824 * for next time.
9825 */
9826 assert(end - start == PAGE_SIZE);
9827 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9828 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9829
9830 /* undo the pmap mapping */
9831 pmap_remove(kernel_pmap, start, end);
9832
9833 simple_lock(&vm_paging_lock);
9834 vm_paging_page_inuse[i] = FALSE;
9835 if (vm_paging_page_waiter) {
9836 thread_wakeup(&vm_paging_page_waiter);
9837 }
9838 simple_unlock(&vm_paging_lock);
9839 }
9840 }
9841
9842 #if ENCRYPTED_SWAP
9843 /*
9844 * Encryption data.
9845 * "iv" is the "initial vector". Ideally, we want to
9846 * have a different one for each page we encrypt, so that
9847 * crackers can't find encryption patterns too easily.
9848 */
9849 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
9850 boolean_t swap_crypt_ctx_initialized = FALSE;
9851 uint32_t swap_crypt_key[8]; /* big enough for a 256 key */
9852 aes_ctx swap_crypt_ctx;
9853 const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
9854
9855 #if DEBUG
9856 boolean_t swap_crypt_ctx_tested = FALSE;
9857 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
9858 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
9859 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
9860 #endif /* DEBUG */
9861
9862 /*
9863 * Initialize the encryption context: key and key size.
9864 */
9865 void swap_crypt_ctx_initialize(void); /* forward */
9866 void
9867 swap_crypt_ctx_initialize(void)
9868 {
9869 unsigned int i;
9870
9871 /*
9872 * No need for locking to protect swap_crypt_ctx_initialized
9873 * because the first use of encryption will come from the
9874 * pageout thread (we won't pagein before there's been a pageout)
9875 * and there's only one pageout thread.
9876 */
9877 if (swap_crypt_ctx_initialized == FALSE) {
9878 for (i = 0;
9879 i < (sizeof (swap_crypt_key) /
9880 sizeof (swap_crypt_key[0]));
9881 i++) {
9882 swap_crypt_key[i] = random();
9883 }
9884 aes_encrypt_key((const unsigned char *) swap_crypt_key,
9885 SWAP_CRYPT_AES_KEY_SIZE,
9886 &swap_crypt_ctx.encrypt);
9887 aes_decrypt_key((const unsigned char *) swap_crypt_key,
9888 SWAP_CRYPT_AES_KEY_SIZE,
9889 &swap_crypt_ctx.decrypt);
9890 swap_crypt_ctx_initialized = TRUE;
9891 }
9892
9893 #if DEBUG
9894 /*
9895 * Validate the encryption algorithms.
9896 */
9897 if (swap_crypt_ctx_tested == FALSE) {
9898 /* initialize */
9899 for (i = 0; i < 4096; i++) {
9900 swap_crypt_test_page_ref[i] = (char) i;
9901 }
9902 /* encrypt */
9903 aes_encrypt_cbc(swap_crypt_test_page_ref,
9904 swap_crypt_null_iv,
9905 PAGE_SIZE / AES_BLOCK_SIZE,
9906 swap_crypt_test_page_encrypt,
9907 &swap_crypt_ctx.encrypt);
9908 /* decrypt */
9909 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
9910 swap_crypt_null_iv,
9911 PAGE_SIZE / AES_BLOCK_SIZE,
9912 swap_crypt_test_page_decrypt,
9913 &swap_crypt_ctx.decrypt);
9914 /* compare result with original */
9915 for (i = 0; i < 4096; i ++) {
9916 if (swap_crypt_test_page_decrypt[i] !=
9917 swap_crypt_test_page_ref[i]) {
9918 panic("encryption test failed");
9919 }
9920 }
9921
9922 /* encrypt again */
9923 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
9924 swap_crypt_null_iv,
9925 PAGE_SIZE / AES_BLOCK_SIZE,
9926 swap_crypt_test_page_decrypt,
9927 &swap_crypt_ctx.encrypt);
9928 /* decrypt in place */
9929 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
9930 swap_crypt_null_iv,
9931 PAGE_SIZE / AES_BLOCK_SIZE,
9932 swap_crypt_test_page_decrypt,
9933 &swap_crypt_ctx.decrypt);
9934 for (i = 0; i < 4096; i ++) {
9935 if (swap_crypt_test_page_decrypt[i] !=
9936 swap_crypt_test_page_ref[i]) {
9937 panic("in place encryption test failed");
9938 }
9939 }
9940
9941 swap_crypt_ctx_tested = TRUE;
9942 }
9943 #endif /* DEBUG */
9944 }
9945
9946 /*
9947 * ENCRYPTED SWAP:
9948 * vm_page_encrypt:
9949 * Encrypt the given page, for secure paging.
9950 * The page might already be mapped at kernel virtual
9951 * address "kernel_mapping_offset". Otherwise, we need
9952 * to map it.
9953 *
9954 * Context:
9955 * The page's object is locked, but this lock will be released
9956 * and re-acquired.
9957 * The page is busy and not accessible by users (not entered in any pmap).
9958 */
9959 void
9960 vm_page_encrypt(
9961 vm_page_t page,
9962 vm_map_offset_t kernel_mapping_offset)
9963 {
9964 kern_return_t kr;
9965 vm_map_size_t kernel_mapping_size;
9966 boolean_t kernel_mapping_needs_unmap;
9967 vm_offset_t kernel_vaddr;
9968 vm_object_t page_object;
9969 union {
9970 unsigned char aes_iv[AES_BLOCK_SIZE];
9971 struct {
9972 memory_object_t pager_object;
9973 vm_object_offset_t paging_offset;
9974 } vm;
9975 } encrypt_iv;
9976
9977 if (! vm_pages_encrypted) {
9978 vm_pages_encrypted = TRUE;
9979 }
9980
9981 assert(page->busy);
9982
9983 if (page->encrypted) {
9984 /*
9985 * Already encrypted: no need to do it again.
9986 */
9987 vm_page_encrypt_already_encrypted_counter++;
9988 return;
9989 }
9990 assert(page->dirty || page->precious);
9991
9992 ASSERT_PAGE_DECRYPTED(page);
9993
9994 page_object = VM_PAGE_OBJECT(page);
9995
9996 /*
9997 * Take a paging-in-progress reference to keep the object
9998 * alive even if we have to unlock it (in vm_paging_map_object()
9999 * for example)...
10000 */
10001 vm_object_paging_begin(page_object);
10002
10003 if (kernel_mapping_offset == 0) {
10004 /*
10005 * The page hasn't already been mapped in kernel space
10006 * by the caller. Map it now, so that we can access
10007 * its contents and encrypt them.
10008 */
10009 kernel_mapping_size = PAGE_SIZE;
10010 kernel_mapping_needs_unmap = FALSE;
10011 kr = vm_paging_map_object(page,
10012 page_object,
10013 page->offset,
10014 VM_PROT_READ | VM_PROT_WRITE,
10015 FALSE,
10016 &kernel_mapping_size,
10017 &kernel_mapping_offset,
10018 &kernel_mapping_needs_unmap);
10019 if (kr != KERN_SUCCESS) {
10020 panic("vm_page_encrypt: "
10021 "could not map page in kernel: 0x%x\n",
10022 kr);
10023 }
10024 } else {
10025 kernel_mapping_size = 0;
10026 kernel_mapping_needs_unmap = FALSE;
10027 }
10028 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
10029
10030 if (swap_crypt_ctx_initialized == FALSE) {
10031 swap_crypt_ctx_initialize();
10032 }
10033 assert(swap_crypt_ctx_initialized);
10034
10035 /*
10036 * Prepare an "initial vector" for the encryption.
10037 * We use the "pager" and the "paging_offset" for that
10038 * page to obfuscate the encrypted data a bit more and
10039 * prevent crackers from finding patterns that they could
10040 * use to break the key.
10041 */
10042 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
10043 encrypt_iv.vm.pager_object = page_object->pager;
10044 encrypt_iv.vm.paging_offset =
10045 page_object->paging_offset + page->offset;
10046
10047 /* encrypt the "initial vector" */
10048 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
10049 swap_crypt_null_iv,
10050 1,
10051 &encrypt_iv.aes_iv[0],
10052 &swap_crypt_ctx.encrypt);
10053
10054 /*
10055 * Encrypt the page.
10056 */
10057 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
10058 &encrypt_iv.aes_iv[0],
10059 PAGE_SIZE / AES_BLOCK_SIZE,
10060 (unsigned char *) kernel_vaddr,
10061 &swap_crypt_ctx.encrypt);
10062
10063 vm_page_encrypt_counter++;
10064
10065 /*
10066 * Unmap the page from the kernel's address space,
10067 * if we had to map it ourselves. Otherwise, let
10068 * the caller undo the mapping if needed.
10069 */
10070 if (kernel_mapping_needs_unmap) {
10071 vm_paging_unmap_object(page_object,
10072 kernel_mapping_offset,
10073 kernel_mapping_offset + kernel_mapping_size);
10074 }
10075
10076 /*
10077 * Clear the "reference" and "modified" bits.
10078 * This should clean up any impact the encryption had
10079 * on them.
10080 * The page was kept busy and disconnected from all pmaps,
10081 * so it can't have been referenced or modified from user
10082 * space.
10083 * The software bits will be reset later after the I/O
10084 * has completed (in upl_commit_range()).
10085 */
10086 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(page), VM_MEM_REFERENCED | VM_MEM_MODIFIED);
10087
10088 page->encrypted = TRUE;
10089
10090 vm_object_paging_end(page_object);
10091 }
10092
10093 /*
10094 * ENCRYPTED SWAP:
10095 * vm_page_decrypt:
10096 * Decrypt the given page.
10097 * The page might already be mapped at kernel virtual
10098 * address "kernel_mapping_offset". Otherwise, we need
10099 * to map it.
10100 *
10101 * Context:
10102 * The page's VM object is locked but will be unlocked and relocked.
10103 * The page is busy and not accessible by users (not entered in any pmap).
10104 */
10105 void
10106 vm_page_decrypt(
10107 vm_page_t page,
10108 vm_map_offset_t kernel_mapping_offset)
10109 {
10110 kern_return_t kr;
10111 vm_map_size_t kernel_mapping_size;
10112 vm_offset_t kernel_vaddr;
10113 boolean_t kernel_mapping_needs_unmap;
10114 vm_object_t page_object;
10115 union {
10116 unsigned char aes_iv[AES_BLOCK_SIZE];
10117 struct {
10118 memory_object_t pager_object;
10119 vm_object_offset_t paging_offset;
10120 } vm;
10121 } decrypt_iv;
10122 boolean_t was_dirty;
10123
10124 assert(page->busy);
10125 assert(page->encrypted);
10126
10127 page_object = VM_PAGE_OBJECT(page);
10128 was_dirty = page->dirty;
10129
10130 /*
10131 * Take a paging-in-progress reference to keep the object
10132 * alive even if we have to unlock it (in vm_paging_map_object()
10133 * for example)...
10134 */
10135 vm_object_paging_begin(page_object);
10136
10137 if (kernel_mapping_offset == 0) {
10138 /*
10139 * The page hasn't already been mapped in kernel space
10140 * by the caller. Map it now, so that we can access
10141 * its contents and decrypt them.
10142 */
10143 kernel_mapping_size = PAGE_SIZE;
10144 kernel_mapping_needs_unmap = FALSE;
10145 kr = vm_paging_map_object(page,
10146 page_object,
10147 page->offset,
10148 VM_PROT_READ | VM_PROT_WRITE,
10149 FALSE,
10150 &kernel_mapping_size,
10151 &kernel_mapping_offset,
10152 &kernel_mapping_needs_unmap);
10153 if (kr != KERN_SUCCESS) {
10154 panic("vm_page_decrypt: "
10155 "could not map page in kernel: 0x%x\n",
10156 kr);
10157 }
10158 } else {
10159 kernel_mapping_size = 0;
10160 kernel_mapping_needs_unmap = FALSE;
10161 }
10162 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
10163
10164 assert(swap_crypt_ctx_initialized);
10165
10166 /*
10167 * Prepare an "initial vector" for the decryption.
10168 * It has to be the same as the "initial vector" we
10169 * used to encrypt that page.
10170 */
10171 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
10172 decrypt_iv.vm.pager_object = page_object->pager;
10173 decrypt_iv.vm.paging_offset =
10174 page_object->paging_offset + page->offset;
10175
10176 /* encrypt the "initial vector" */
10177 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
10178 swap_crypt_null_iv,
10179 1,
10180 &decrypt_iv.aes_iv[0],
10181 &swap_crypt_ctx.encrypt);
10182
10183 /*
10184 * Decrypt the page.
10185 */
10186 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
10187 &decrypt_iv.aes_iv[0],
10188 PAGE_SIZE / AES_BLOCK_SIZE,
10189 (unsigned char *) kernel_vaddr,
10190 &swap_crypt_ctx.decrypt);
10191 vm_page_decrypt_counter++;
10192
10193 /*
10194 * Unmap the page from the kernel's address space,
10195 * if we had to map it ourselves. Otherwise, let
10196 * the caller undo the mapping if needed.
10197 */
10198 if (kernel_mapping_needs_unmap) {
10199 vm_paging_unmap_object(page_object,
10200 kernel_vaddr,
10201 kernel_vaddr + PAGE_SIZE);
10202 }
10203
10204 if (was_dirty) {
10205 /*
10206 * The pager did not specify that the page would be
10207 * clean when it got paged in, so let's not clean it here
10208 * either.
10209 */
10210 } else {
10211 /*
10212 * After decryption, the page is actually still clean.
10213 * It was encrypted as part of paging, which "cleans"
10214 * the "dirty" pages.
10215 * Noone could access it after it was encrypted
10216 * and the decryption doesn't count.
10217 */
10218 page->dirty = FALSE;
10219 assert (page->cs_validated == FALSE);
10220 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(page), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
10221 }
10222 page->encrypted = FALSE;
10223
10224 /*
10225 * We've just modified the page's contents via the data cache and part
10226 * of the new contents might still be in the cache and not yet in RAM.
10227 * Since the page is now available and might get gathered in a UPL to
10228 * be part of a DMA transfer from a driver that expects the memory to
10229 * be coherent at this point, we have to flush the data cache.
10230 */
10231 pmap_sync_page_attributes_phys(VM_PAGE_GET_PHYS_PAGE(page));
10232 /*
10233 * Since the page is not mapped yet, some code might assume that it
10234 * doesn't need to invalidate the instruction cache when writing to
10235 * that page. That code relies on "pmapped" being FALSE, so that the
10236 * caches get synchronized when the page is first mapped.
10237 */
10238 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
10239 page->pmapped = FALSE;
10240 page->wpmapped = FALSE;
10241
10242 vm_object_paging_end(page_object);
10243 }
10244
10245 #if DEVELOPMENT || DEBUG
10246 unsigned long upl_encrypt_upls = 0;
10247 unsigned long upl_encrypt_pages = 0;
10248 #endif
10249
10250 /*
10251 * ENCRYPTED SWAP:
10252 *
10253 * upl_encrypt:
10254 * Encrypts all the pages in the UPL, within the specified range.
10255 *
10256 */
10257 void
10258 upl_encrypt(
10259 upl_t upl,
10260 upl_offset_t crypt_offset,
10261 upl_size_t crypt_size)
10262 {
10263 upl_size_t upl_size, subupl_size=crypt_size;
10264 upl_offset_t offset_in_upl, subupl_offset=crypt_offset;
10265 vm_object_t upl_object;
10266 vm_object_offset_t upl_offset;
10267 vm_page_t page;
10268 vm_object_t shadow_object;
10269 vm_object_offset_t shadow_offset;
10270 vm_object_offset_t paging_offset;
10271 vm_object_offset_t base_offset;
10272 int isVectorUPL = 0;
10273 upl_t vector_upl = NULL;
10274
10275 if((isVectorUPL = vector_upl_is_valid(upl)))
10276 vector_upl = upl;
10277
10278 process_upl_to_encrypt:
10279 if(isVectorUPL) {
10280 crypt_size = subupl_size;
10281 crypt_offset = subupl_offset;
10282 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size);
10283 if(upl == NULL)
10284 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
10285 subupl_size -= crypt_size;
10286 subupl_offset += crypt_size;
10287 }
10288
10289 #if DEVELOPMENT || DEBUG
10290 upl_encrypt_upls++;
10291 upl_encrypt_pages += crypt_size / PAGE_SIZE;
10292 #endif
10293 upl_object = upl->map_object;
10294 upl_offset = upl->offset;
10295 upl_size = upl->size;
10296
10297 vm_object_lock(upl_object);
10298
10299 /*
10300 * Find the VM object that contains the actual pages.
10301 */
10302 if (upl_object->pageout) {
10303 shadow_object = upl_object->shadow;
10304 /*
10305 * The offset in the shadow object is actually also
10306 * accounted for in upl->offset. It possibly shouldn't be
10307 * this way, but for now don't account for it twice.
10308 */
10309 shadow_offset = 0;
10310 assert(upl_object->paging_offset == 0); /* XXX ? */
10311 vm_object_lock(shadow_object);
10312 } else {
10313 shadow_object = upl_object;
10314 shadow_offset = 0;
10315 }
10316
10317 paging_offset = shadow_object->paging_offset;
10318 vm_object_paging_begin(shadow_object);
10319
10320 if (shadow_object != upl_object)
10321 vm_object_unlock(upl_object);
10322
10323
10324 base_offset = shadow_offset;
10325 base_offset += upl_offset;
10326 base_offset += crypt_offset;
10327 base_offset -= paging_offset;
10328
10329 assert(crypt_offset + crypt_size <= upl_size);
10330
10331 for (offset_in_upl = 0;
10332 offset_in_upl < crypt_size;
10333 offset_in_upl += PAGE_SIZE) {
10334 page = vm_page_lookup(shadow_object,
10335 base_offset + offset_in_upl);
10336 if (page == VM_PAGE_NULL) {
10337 panic("upl_encrypt: "
10338 "no page for (obj=%p,off=0x%llx+0x%x)!\n",
10339 shadow_object,
10340 base_offset,
10341 offset_in_upl);
10342 }
10343 /*
10344 * Disconnect the page from all pmaps, so that nobody can
10345 * access it while it's encrypted. After that point, all
10346 * accesses to this page will cause a page fault and block
10347 * while the page is busy being encrypted. After the
10348 * encryption completes, any access will cause a
10349 * page fault and the page gets decrypted at that time.
10350 */
10351 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(page));
10352 vm_page_encrypt(page, 0);
10353
10354 if (vm_object_lock_avoid(shadow_object)) {
10355 /*
10356 * Give vm_pageout_scan() a chance to convert more
10357 * pages from "clean-in-place" to "clean-and-free",
10358 * if it's interested in the same pages we selected
10359 * in this cluster.
10360 */
10361 vm_object_unlock(shadow_object);
10362 mutex_pause(2);
10363 vm_object_lock(shadow_object);
10364 }
10365 }
10366
10367 vm_object_paging_end(shadow_object);
10368 vm_object_unlock(shadow_object);
10369
10370 if(isVectorUPL && subupl_size)
10371 goto process_upl_to_encrypt;
10372 }
10373
10374 #else /* ENCRYPTED_SWAP */
10375 void
10376 upl_encrypt(
10377 __unused upl_t upl,
10378 __unused upl_offset_t crypt_offset,
10379 __unused upl_size_t crypt_size)
10380 {
10381 }
10382
10383 void
10384 vm_page_encrypt(
10385 __unused vm_page_t page,
10386 __unused vm_map_offset_t kernel_mapping_offset)
10387 {
10388 }
10389
10390 void
10391 vm_page_decrypt(
10392 __unused vm_page_t page,
10393 __unused vm_map_offset_t kernel_mapping_offset)
10394 {
10395 }
10396
10397 #endif /* ENCRYPTED_SWAP */
10398
10399 /*
10400 * page->object must be locked
10401 */
10402 void
10403 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
10404 {
10405 if (!queues_locked) {
10406 vm_page_lockspin_queues();
10407 }
10408
10409 page->free_when_done = FALSE;
10410 /*
10411 * need to drop the laundry count...
10412 * we may also need to remove it
10413 * from the I/O paging queue...
10414 * vm_pageout_throttle_up handles both cases
10415 *
10416 * the laundry and pageout_queue flags are cleared...
10417 */
10418 vm_pageout_throttle_up(page);
10419
10420 vm_page_steal_pageout_page++;
10421
10422 if (!queues_locked) {
10423 vm_page_unlock_queues();
10424 }
10425 }
10426
10427 upl_t
10428 vector_upl_create(vm_offset_t upl_offset)
10429 {
10430 int vector_upl_size = sizeof(struct _vector_upl);
10431 int i=0;
10432 upl_t upl;
10433 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
10434
10435 upl = upl_create(0,UPL_VECTOR,0);
10436 upl->vector_upl = vector_upl;
10437 upl->offset = upl_offset;
10438 vector_upl->size = 0;
10439 vector_upl->offset = upl_offset;
10440 vector_upl->invalid_upls=0;
10441 vector_upl->num_upls=0;
10442 vector_upl->pagelist = NULL;
10443
10444 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
10445 vector_upl->upl_iostates[i].size = 0;
10446 vector_upl->upl_iostates[i].offset = 0;
10447
10448 }
10449 return upl;
10450 }
10451
10452 void
10453 vector_upl_deallocate(upl_t upl)
10454 {
10455 if(upl) {
10456 vector_upl_t vector_upl = upl->vector_upl;
10457 if(vector_upl) {
10458 if(vector_upl->invalid_upls != vector_upl->num_upls)
10459 panic("Deallocating non-empty Vectored UPL\n");
10460 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
10461 vector_upl->invalid_upls=0;
10462 vector_upl->num_upls = 0;
10463 vector_upl->pagelist = NULL;
10464 vector_upl->size = 0;
10465 vector_upl->offset = 0;
10466 kfree(vector_upl, sizeof(struct _vector_upl));
10467 vector_upl = (vector_upl_t)0xfeedfeed;
10468 }
10469 else
10470 panic("vector_upl_deallocate was passed a non-vectored upl\n");
10471 }
10472 else
10473 panic("vector_upl_deallocate was passed a NULL upl\n");
10474 }
10475
10476 boolean_t
10477 vector_upl_is_valid(upl_t upl)
10478 {
10479 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
10480 vector_upl_t vector_upl = upl->vector_upl;
10481 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
10482 return FALSE;
10483 else
10484 return TRUE;
10485 }
10486 return FALSE;
10487 }
10488
10489 boolean_t
10490 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
10491 {
10492 if(vector_upl_is_valid(upl)) {
10493 vector_upl_t vector_upl = upl->vector_upl;
10494
10495 if(vector_upl) {
10496 if(subupl) {
10497 if(io_size) {
10498 if(io_size < PAGE_SIZE)
10499 io_size = PAGE_SIZE;
10500 subupl->vector_upl = (void*)vector_upl;
10501 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
10502 vector_upl->size += io_size;
10503 upl->size += io_size;
10504 }
10505 else {
10506 uint32_t i=0,invalid_upls=0;
10507 for(i = 0; i < vector_upl->num_upls; i++) {
10508 if(vector_upl->upl_elems[i] == subupl)
10509 break;
10510 }
10511 if(i == vector_upl->num_upls)
10512 panic("Trying to remove sub-upl when none exists");
10513
10514 vector_upl->upl_elems[i] = NULL;
10515 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
10516 if(invalid_upls == vector_upl->num_upls)
10517 return TRUE;
10518 else
10519 return FALSE;
10520 }
10521 }
10522 else
10523 panic("vector_upl_set_subupl was passed a NULL upl element\n");
10524 }
10525 else
10526 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
10527 }
10528 else
10529 panic("vector_upl_set_subupl was passed a NULL upl\n");
10530
10531 return FALSE;
10532 }
10533
10534 void
10535 vector_upl_set_pagelist(upl_t upl)
10536 {
10537 if(vector_upl_is_valid(upl)) {
10538 uint32_t i=0;
10539 vector_upl_t vector_upl = upl->vector_upl;
10540
10541 if(vector_upl) {
10542 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
10543
10544 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
10545
10546 for(i=0; i < vector_upl->num_upls; i++) {
10547 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
10548 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
10549 pagelist_size += cur_upl_pagelist_size;
10550 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
10551 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
10552 }
10553 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
10554 }
10555 else
10556 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
10557 }
10558 else
10559 panic("vector_upl_set_pagelist was passed a NULL upl\n");
10560
10561 }
10562
10563 upl_t
10564 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
10565 {
10566 if(vector_upl_is_valid(upl)) {
10567 vector_upl_t vector_upl = upl->vector_upl;
10568 if(vector_upl) {
10569 if(index < vector_upl->num_upls)
10570 return vector_upl->upl_elems[index];
10571 }
10572 else
10573 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
10574 }
10575 return NULL;
10576 }
10577
10578 upl_t
10579 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
10580 {
10581 if(vector_upl_is_valid(upl)) {
10582 uint32_t i=0;
10583 vector_upl_t vector_upl = upl->vector_upl;
10584
10585 if(vector_upl) {
10586 upl_t subupl = NULL;
10587 vector_upl_iostates_t subupl_state;
10588
10589 for(i=0; i < vector_upl->num_upls; i++) {
10590 subupl = vector_upl->upl_elems[i];
10591 subupl_state = vector_upl->upl_iostates[i];
10592 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
10593 /* We could have been passed an offset/size pair that belongs
10594 * to an UPL element that has already been committed/aborted.
10595 * If so, return NULL.
10596 */
10597 if(subupl == NULL)
10598 return NULL;
10599 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
10600 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
10601 if(*upl_size > subupl_state.size)
10602 *upl_size = subupl_state.size;
10603 }
10604 if(*upl_offset >= subupl_state.offset)
10605 *upl_offset -= subupl_state.offset;
10606 else if(i)
10607 panic("Vector UPL offset miscalculation\n");
10608 return subupl;
10609 }
10610 }
10611 }
10612 else
10613 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
10614 }
10615 return NULL;
10616 }
10617
10618 void
10619 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
10620 {
10621 *v_upl_submap = NULL;
10622
10623 if(vector_upl_is_valid(upl)) {
10624 vector_upl_t vector_upl = upl->vector_upl;
10625 if(vector_upl) {
10626 *v_upl_submap = vector_upl->submap;
10627 *submap_dst_addr = vector_upl->submap_dst_addr;
10628 }
10629 else
10630 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10631 }
10632 else
10633 panic("vector_upl_get_submap was passed a null UPL\n");
10634 }
10635
10636 void
10637 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
10638 {
10639 if(vector_upl_is_valid(upl)) {
10640 vector_upl_t vector_upl = upl->vector_upl;
10641 if(vector_upl) {
10642 vector_upl->submap = submap;
10643 vector_upl->submap_dst_addr = submap_dst_addr;
10644 }
10645 else
10646 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10647 }
10648 else
10649 panic("vector_upl_get_submap was passed a NULL UPL\n");
10650 }
10651
10652 void
10653 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
10654 {
10655 if(vector_upl_is_valid(upl)) {
10656 uint32_t i = 0;
10657 vector_upl_t vector_upl = upl->vector_upl;
10658
10659 if(vector_upl) {
10660 for(i = 0; i < vector_upl->num_upls; i++) {
10661 if(vector_upl->upl_elems[i] == subupl)
10662 break;
10663 }
10664
10665 if(i == vector_upl->num_upls)
10666 panic("setting sub-upl iostate when none exists");
10667
10668 vector_upl->upl_iostates[i].offset = offset;
10669 if(size < PAGE_SIZE)
10670 size = PAGE_SIZE;
10671 vector_upl->upl_iostates[i].size = size;
10672 }
10673 else
10674 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
10675 }
10676 else
10677 panic("vector_upl_set_iostate was passed a NULL UPL\n");
10678 }
10679
10680 void
10681 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
10682 {
10683 if(vector_upl_is_valid(upl)) {
10684 uint32_t i = 0;
10685 vector_upl_t vector_upl = upl->vector_upl;
10686
10687 if(vector_upl) {
10688 for(i = 0; i < vector_upl->num_upls; i++) {
10689 if(vector_upl->upl_elems[i] == subupl)
10690 break;
10691 }
10692
10693 if(i == vector_upl->num_upls)
10694 panic("getting sub-upl iostate when none exists");
10695
10696 *offset = vector_upl->upl_iostates[i].offset;
10697 *size = vector_upl->upl_iostates[i].size;
10698 }
10699 else
10700 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
10701 }
10702 else
10703 panic("vector_upl_get_iostate was passed a NULL UPL\n");
10704 }
10705
10706 void
10707 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
10708 {
10709 if(vector_upl_is_valid(upl)) {
10710 vector_upl_t vector_upl = upl->vector_upl;
10711 if(vector_upl) {
10712 if(index < vector_upl->num_upls) {
10713 *offset = vector_upl->upl_iostates[index].offset;
10714 *size = vector_upl->upl_iostates[index].size;
10715 }
10716 else
10717 *offset = *size = 0;
10718 }
10719 else
10720 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
10721 }
10722 else
10723 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
10724 }
10725
10726 upl_page_info_t *
10727 upl_get_internal_vectorupl_pagelist(upl_t upl)
10728 {
10729 return ((vector_upl_t)(upl->vector_upl))->pagelist;
10730 }
10731
10732 void *
10733 upl_get_internal_vectorupl(upl_t upl)
10734 {
10735 return upl->vector_upl;
10736 }
10737
10738 vm_size_t
10739 upl_get_internal_pagelist_offset(void)
10740 {
10741 return sizeof(struct upl);
10742 }
10743
10744 void
10745 upl_clear_dirty(
10746 upl_t upl,
10747 boolean_t value)
10748 {
10749 if (value) {
10750 upl->flags |= UPL_CLEAR_DIRTY;
10751 } else {
10752 upl->flags &= ~UPL_CLEAR_DIRTY;
10753 }
10754 }
10755
10756 void
10757 upl_set_referenced(
10758 upl_t upl,
10759 boolean_t value)
10760 {
10761 upl_lock(upl);
10762 if (value) {
10763 upl->ext_ref_count++;
10764 } else {
10765 if (!upl->ext_ref_count) {
10766 panic("upl_set_referenced not %p\n", upl);
10767 }
10768 upl->ext_ref_count--;
10769 }
10770 upl_unlock(upl);
10771 }
10772
10773 #if CONFIG_IOSCHED
10774 void
10775 upl_set_blkno(
10776 upl_t upl,
10777 vm_offset_t upl_offset,
10778 int io_size,
10779 int64_t blkno)
10780 {
10781 int i,j;
10782 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
10783 return;
10784
10785 assert(upl->upl_reprio_info != 0);
10786 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
10787 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
10788 }
10789 }
10790 #endif
10791
10792 boolean_t
10793 vm_page_is_slideable(vm_page_t m)
10794 {
10795 boolean_t result = FALSE;
10796 vm_shared_region_slide_info_t si;
10797 vm_object_t m_object;
10798
10799 m_object = VM_PAGE_OBJECT(m);
10800
10801 vm_object_lock_assert_held(m_object);
10802
10803 /* make sure our page belongs to the one object allowed to do this */
10804 if (!m_object->object_slid) {
10805 goto done;
10806 }
10807
10808 si = m_object->vo_slide_info;
10809 if (si == NULL) {
10810 goto done;
10811 }
10812
10813 if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
10814 result = TRUE;
10815 }
10816
10817 done:
10818 return result;
10819 }
10820
10821 int vm_page_slide_counter = 0;
10822 int vm_page_slide_errors = 0;
10823 kern_return_t
10824 vm_page_slide(
10825 vm_page_t page,
10826 vm_map_offset_t kernel_mapping_offset)
10827 {
10828 kern_return_t kr;
10829 vm_map_size_t kernel_mapping_size;
10830 boolean_t kernel_mapping_needs_unmap;
10831 vm_offset_t kernel_vaddr;
10832 uint32_t pageIndex;
10833 uint32_t slide_chunk;
10834 vm_object_t page_object;
10835
10836 page_object = VM_PAGE_OBJECT(page);
10837
10838 assert(!page->slid);
10839 assert(page_object->object_slid);
10840 vm_object_lock_assert_exclusive(page_object);
10841
10842 if (page->error)
10843 return KERN_FAILURE;
10844
10845 /*
10846 * Take a paging-in-progress reference to keep the object
10847 * alive even if we have to unlock it (in vm_paging_map_object()
10848 * for example)...
10849 */
10850 vm_object_paging_begin(page_object);
10851
10852 if (kernel_mapping_offset == 0) {
10853 /*
10854 * The page hasn't already been mapped in kernel space
10855 * by the caller. Map it now, so that we can access
10856 * its contents and decrypt them.
10857 */
10858 kernel_mapping_size = PAGE_SIZE;
10859 kernel_mapping_needs_unmap = FALSE;
10860 kr = vm_paging_map_object(page,
10861 page_object,
10862 page->offset,
10863 VM_PROT_READ | VM_PROT_WRITE,
10864 FALSE,
10865 &kernel_mapping_size,
10866 &kernel_mapping_offset,
10867 &kernel_mapping_needs_unmap);
10868 if (kr != KERN_SUCCESS) {
10869 panic("vm_page_slide: "
10870 "could not map page in kernel: 0x%x\n",
10871 kr);
10872 }
10873 } else {
10874 kernel_mapping_size = 0;
10875 kernel_mapping_needs_unmap = FALSE;
10876 }
10877 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
10878
10879 /*
10880 * Slide the pointers on the page.
10881 */
10882
10883 /*assert that slide_file_info.start/end are page-aligned?*/
10884
10885 assert(!page->slid);
10886 assert(page_object->object_slid);
10887
10888 pageIndex = (uint32_t)((page->offset -
10889 page_object->vo_slide_info->start) /
10890 PAGE_SIZE_FOR_SR_SLIDE);
10891 for (slide_chunk = 0;
10892 slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
10893 slide_chunk++) {
10894 kr = vm_shared_region_slide_page(page_object->vo_slide_info,
10895 (kernel_vaddr +
10896 (slide_chunk *
10897 PAGE_SIZE_FOR_SR_SLIDE)),
10898 (pageIndex + slide_chunk));
10899 if (kr != KERN_SUCCESS) {
10900 break;
10901 }
10902 }
10903
10904 vm_page_slide_counter++;
10905
10906 /*
10907 * Unmap the page from the kernel's address space,
10908 */
10909 if (kernel_mapping_needs_unmap) {
10910 vm_paging_unmap_object(page_object,
10911 kernel_vaddr,
10912 kernel_vaddr + PAGE_SIZE);
10913 }
10914
10915 page->dirty = FALSE;
10916 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(page), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
10917
10918 if (kr != KERN_SUCCESS || cs_debug > 1) {
10919 printf("vm_page_slide(%p): "
10920 "obj %p off 0x%llx mobj %p moff 0x%llx\n",
10921 page,
10922 page_object, page->offset,
10923 page_object->pager,
10924 page->offset + page_object->paging_offset);
10925 }
10926
10927 if (kr == KERN_SUCCESS) {
10928 page->slid = TRUE;
10929 } else {
10930 page->error = TRUE;
10931 vm_page_slide_errors++;
10932 }
10933
10934 vm_object_paging_end(page_object);
10935
10936 return kr;
10937 }
10938
10939 void inline memoryshot(unsigned int event, unsigned int control)
10940 {
10941 if (vm_debug_events) {
10942 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
10943 vm_page_active_count, vm_page_inactive_count,
10944 vm_page_free_count, vm_page_speculative_count,
10945 vm_page_throttled_count);
10946 } else {
10947 (void) event;
10948 (void) control;
10949 }
10950
10951 }
10952
10953 #ifdef MACH_BSD
10954
10955 boolean_t upl_device_page(upl_page_info_t *upl)
10956 {
10957 return(UPL_DEVICE_PAGE(upl));
10958 }
10959 boolean_t upl_page_present(upl_page_info_t *upl, int index)
10960 {
10961 return(UPL_PAGE_PRESENT(upl, index));
10962 }
10963 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
10964 {
10965 return(UPL_SPECULATIVE_PAGE(upl, index));
10966 }
10967 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
10968 {
10969 return(UPL_DIRTY_PAGE(upl, index));
10970 }
10971 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
10972 {
10973 return(UPL_VALID_PAGE(upl, index));
10974 }
10975 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
10976 {
10977 return(UPL_PHYS_PAGE(upl, index));
10978 }
10979
10980 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
10981 {
10982 upl[index].mark = v;
10983 }
10984
10985 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
10986 {
10987 return upl[index].mark;
10988 }
10989
10990 void
10991 vm_countdirtypages(void)
10992 {
10993 vm_page_t m;
10994 int dpages;
10995 int pgopages;
10996 int precpages;
10997
10998
10999 dpages=0;
11000 pgopages=0;
11001 precpages=0;
11002
11003 vm_page_lock_queues();
11004 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
11005 do {
11006 if (m ==(vm_page_t )0) break;
11007
11008 if(m->dirty) dpages++;
11009 if(m->free_when_done) pgopages++;
11010 if(m->precious) precpages++;
11011
11012 assert(VM_PAGE_OBJECT(m) != kernel_object);
11013 m = (vm_page_t) vm_page_queue_next(&m->pageq);
11014 if (m ==(vm_page_t )0) break;
11015
11016 } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
11017 vm_page_unlock_queues();
11018
11019 vm_page_lock_queues();
11020 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
11021 do {
11022 if (m ==(vm_page_t )0) break;
11023
11024 dpages++;
11025 assert(m->dirty);
11026 assert(!m->free_when_done);
11027 assert(VM_PAGE_OBJECT(m) != kernel_object);
11028 m = (vm_page_t) vm_page_queue_next(&m->pageq);
11029 if (m ==(vm_page_t )0) break;
11030
11031 } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
11032 vm_page_unlock_queues();
11033
11034 vm_page_lock_queues();
11035 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
11036 do {
11037 if (m ==(vm_page_t )0) break;
11038
11039 if(m->dirty) dpages++;
11040 if(m->free_when_done) pgopages++;
11041 if(m->precious) precpages++;
11042
11043 assert(VM_PAGE_OBJECT(m) != kernel_object);
11044 m = (vm_page_t) vm_page_queue_next(&m->pageq);
11045 if (m ==(vm_page_t )0) break;
11046
11047 } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
11048 vm_page_unlock_queues();
11049
11050 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
11051
11052 dpages=0;
11053 pgopages=0;
11054 precpages=0;
11055
11056 vm_page_lock_queues();
11057 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
11058
11059 do {
11060 if(m == (vm_page_t )0) break;
11061 if(m->dirty) dpages++;
11062 if(m->free_when_done) pgopages++;
11063 if(m->precious) precpages++;
11064
11065 assert(VM_PAGE_OBJECT(m) != kernel_object);
11066 m = (vm_page_t) vm_page_queue_next(&m->pageq);
11067 if(m == (vm_page_t )0) break;
11068
11069 } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
11070 vm_page_unlock_queues();
11071
11072 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
11073
11074 }
11075 #endif /* MACH_BSD */
11076
11077 ppnum_t upl_get_highest_page(
11078 upl_t upl)
11079 {
11080 return upl->highest_page;
11081 }
11082
11083 upl_size_t upl_get_size(
11084 upl_t upl)
11085 {
11086 return upl->size;
11087 }
11088
11089 upl_t upl_associated_upl(upl_t upl)
11090 {
11091 return upl->associated_upl;
11092 }
11093
11094 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
11095 {
11096 upl->associated_upl = associated_upl;
11097 }
11098
11099 struct vnode * upl_lookup_vnode(upl_t upl)
11100 {
11101 if (!upl->map_object->internal)
11102 return vnode_pager_lookup_vnode(upl->map_object->pager);
11103 else
11104 return NULL;
11105 }
11106
11107 #if UPL_DEBUG
11108 kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
11109 {
11110 upl->ubc_alias1 = alias1;
11111 upl->ubc_alias2 = alias2;
11112 return KERN_SUCCESS;
11113 }
11114 int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
11115 {
11116 if(al)
11117 *al = upl->ubc_alias1;
11118 if(al2)
11119 *al2 = upl->ubc_alias2;
11120 return KERN_SUCCESS;
11121 }
11122 #endif /* UPL_DEBUG */
11123
11124 #if VM_PRESSURE_EVENTS
11125 /*
11126 * Upward trajectory.
11127 */
11128 extern boolean_t vm_compressor_low_on_space(void);
11129
11130 boolean_t
11131 VM_PRESSURE_NORMAL_TO_WARNING(void) {
11132
11133 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
11134
11135 /* Available pages below our threshold */
11136 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
11137 /* No frozen processes to kill */
11138 if (memorystatus_frozen_count == 0) {
11139 /* Not enough suspended processes available. */
11140 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
11141 return TRUE;
11142 }
11143 }
11144 }
11145 return FALSE;
11146
11147 } else {
11148 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
11149 }
11150 }
11151
11152 boolean_t
11153 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
11154
11155 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
11156
11157 /* Available pages below our threshold */
11158 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
11159 return TRUE;
11160 }
11161 return FALSE;
11162 } else {
11163 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
11164 }
11165 }
11166
11167 /*
11168 * Downward trajectory.
11169 */
11170 boolean_t
11171 VM_PRESSURE_WARNING_TO_NORMAL(void) {
11172
11173 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
11174
11175 /* Available pages above our threshold */
11176 unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100);
11177 if (memorystatus_available_pages > target_threshold) {
11178 return TRUE;
11179 }
11180 return FALSE;
11181 } else {
11182 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
11183 }
11184 }
11185
11186 boolean_t
11187 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
11188
11189 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
11190
11191 /* Available pages above our threshold */
11192 unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100);
11193 if (memorystatus_available_pages > target_threshold) {
11194 return TRUE;
11195 }
11196 return FALSE;
11197 } else {
11198 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
11199 }
11200 }
11201 #endif /* VM_PRESSURE_EVENTS */
11202