2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * The proverbial page-out daemon.
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/memory_object_default.h>
75 #include <mach/memory_object_control_server.h>
76 #include <mach/mach_host_server.h>
78 #include <mach/vm_map.h>
79 #include <mach/vm_param.h>
80 #include <mach/vm_statistics.h>
83 #include <kern/kern_types.h>
84 #include <kern/counters.h>
85 #include <kern/host_statistics.h>
86 #include <kern/machine.h>
87 #include <kern/misc_protos.h>
88 #include <kern/sched.h>
89 #include <kern/thread.h>
91 #include <kern/kalloc.h>
93 #include <machine/vm_tuning.h>
94 #include <machine/commpage.h>
97 #include <vm/vm_compressor_pager.h>
98 #include <vm/vm_fault.h>
99 #include <vm/vm_map.h>
100 #include <vm/vm_object.h>
101 #include <vm/vm_page.h>
102 #include <vm/vm_pageout.h>
103 #include <vm/vm_protos.h> /* must be last */
104 #include <vm/memory_object.h>
105 #include <vm/vm_purgeable_internal.h>
106 #include <vm/vm_shared_region.h>
107 #include <vm/vm_compressor.h>
109 #if CONFIG_PHANTOM_CACHE
110 #include <vm/vm_phantom_cache.h>
115 #include <libkern/crypto/aes.h>
116 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
121 #include <libkern/OSDebug.h>
124 extern void m_drain(void);
126 #if VM_PRESSURE_EVENTS
127 extern unsigned int memorystatus_available_pages
;
128 extern unsigned int memorystatus_available_pages_pressure
;
129 extern unsigned int memorystatus_available_pages_critical
;
130 extern unsigned int memorystatus_frozen_count
;
131 extern unsigned int memorystatus_suspended_count
;
133 extern vm_pressure_level_t memorystatus_vm_pressure_level
;
134 int memorystatus_purge_on_warning
= 2;
135 int memorystatus_purge_on_urgent
= 5;
136 int memorystatus_purge_on_critical
= 8;
138 void vm_pressure_response(void);
139 boolean_t vm_pressure_thread_running
= FALSE
;
140 extern void consider_vm_pressure_events(void);
142 #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
143 #endif /* VM_PRESSURE_EVENTS */
145 boolean_t vm_pressure_changed
= FALSE
;
147 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
148 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
151 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
152 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
155 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
156 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
159 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
160 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
163 #ifndef VM_PAGE_LAUNDRY_MAX
164 #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
165 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
167 #ifndef VM_PAGEOUT_BURST_WAIT
168 #define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
169 #endif /* VM_PAGEOUT_BURST_WAIT */
171 #ifndef VM_PAGEOUT_EMPTY_WAIT
172 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
173 #endif /* VM_PAGEOUT_EMPTY_WAIT */
175 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
176 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
177 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
179 #ifndef VM_PAGEOUT_IDLE_WAIT
180 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
181 #endif /* VM_PAGEOUT_IDLE_WAIT */
183 #ifndef VM_PAGEOUT_SWAP_WAIT
184 #define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
185 #endif /* VM_PAGEOUT_SWAP_WAIT */
187 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
188 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
189 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
191 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
192 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
193 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
195 unsigned int vm_page_speculative_q_age_ms
= VM_PAGE_SPECULATIVE_Q_AGE_MS
;
196 unsigned int vm_page_speculative_percentage
= 5;
198 #ifndef VM_PAGE_SPECULATIVE_TARGET
199 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
200 #endif /* VM_PAGE_SPECULATIVE_TARGET */
203 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
204 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
205 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
209 * To obtain a reasonable LRU approximation, the inactive queue
210 * needs to be large enough to give pages on it a chance to be
211 * referenced a second time. This macro defines the fraction
212 * of active+inactive pages that should be inactive.
213 * The pageout daemon uses it to update vm_page_inactive_target.
215 * If vm_page_free_count falls below vm_page_free_target and
216 * vm_page_inactive_count is below vm_page_inactive_target,
217 * then the pageout daemon starts running.
220 #ifndef VM_PAGE_INACTIVE_TARGET
221 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
222 #endif /* VM_PAGE_INACTIVE_TARGET */
225 * Once the pageout daemon starts running, it keeps going
226 * until vm_page_free_count meets or exceeds vm_page_free_target.
229 #ifndef VM_PAGE_FREE_TARGET
230 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
231 #endif /* VM_PAGE_FREE_TARGET */
235 * The pageout daemon always starts running once vm_page_free_count
236 * falls below vm_page_free_min.
239 #ifndef VM_PAGE_FREE_MIN
240 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
241 #endif /* VM_PAGE_FREE_MIN */
243 #define VM_PAGE_FREE_RESERVED_LIMIT 1700
244 #define VM_PAGE_FREE_MIN_LIMIT 3500
245 #define VM_PAGE_FREE_TARGET_LIMIT 4000
248 * When vm_page_free_count falls below vm_page_free_reserved,
249 * only vm-privileged threads can allocate pages. vm-privilege
250 * allows the pageout daemon and default pager (and any other
251 * associated threads needed for default pageout) to continue
252 * operation by dipping into the reserved pool of pages.
255 #ifndef VM_PAGE_FREE_RESERVED
256 #define VM_PAGE_FREE_RESERVED(n) \
257 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
258 #endif /* VM_PAGE_FREE_RESERVED */
261 * When we dequeue pages from the inactive list, they are
262 * reactivated (ie, put back on the active queue) if referenced.
263 * However, it is possible to starve the free list if other
264 * processors are referencing pages faster than we can turn off
265 * the referenced bit. So we limit the number of reactivations
266 * we will make per call of vm_pageout_scan().
268 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
269 #ifndef VM_PAGE_REACTIVATE_LIMIT
270 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
271 #endif /* VM_PAGE_REACTIVATE_LIMIT */
272 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
275 extern boolean_t hibernate_cleaning_in_progress
;
278 * Exported variable used to broadcast the activation of the pageout scan
279 * Working Set uses this to throttle its use of pmap removes. In this
280 * way, code which runs within memory in an uncontested context does
281 * not keep encountering soft faults.
284 unsigned int vm_pageout_scan_event_counter
= 0;
287 * Forward declarations for internal routines.
290 struct vm_pageout_queue
*q
;
295 #define MAX_COMPRESSOR_THREAD_COUNT 8
297 struct cq ciq
[MAX_COMPRESSOR_THREAD_COUNT
];
299 void *vm_pageout_immediate_chead
;
300 char *vm_pageout_immediate_scratch_buf
;
303 #if VM_PRESSURE_EVENTS
304 void vm_pressure_thread(void);
306 boolean_t
VM_PRESSURE_NORMAL_TO_WARNING(void);
307 boolean_t
VM_PRESSURE_WARNING_TO_CRITICAL(void);
309 boolean_t
VM_PRESSURE_WARNING_TO_NORMAL(void);
310 boolean_t
VM_PRESSURE_CRITICAL_TO_WARNING(void);
312 static void vm_pageout_garbage_collect(int);
313 static void vm_pageout_iothread_continue(struct vm_pageout_queue
*);
314 static void vm_pageout_iothread_external(void);
315 static void vm_pageout_iothread_internal(struct cq
*cq
);
316 static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue
*, struct vm_pageout_queue
*, boolean_t
);
318 extern void vm_pageout_continue(void);
319 extern void vm_pageout_scan(void);
321 static void vm_pageout_immediate(vm_page_t
, boolean_t
);
322 boolean_t vm_compressor_immediate_preferred
= FALSE
;
323 boolean_t vm_compressor_immediate_preferred_override
= FALSE
;
324 boolean_t vm_restricted_to_single_processor
= FALSE
;
325 static boolean_t vm_pageout_waiter
= FALSE
;
326 static boolean_t vm_pageout_running
= FALSE
;
329 static thread_t vm_pageout_external_iothread
= THREAD_NULL
;
330 static thread_t vm_pageout_internal_iothread
= THREAD_NULL
;
332 unsigned int vm_pageout_reserved_internal
= 0;
333 unsigned int vm_pageout_reserved_really
= 0;
335 unsigned int vm_pageout_swap_wait
= 0;
336 unsigned int vm_pageout_idle_wait
= 0; /* milliseconds */
337 unsigned int vm_pageout_empty_wait
= 0; /* milliseconds */
338 unsigned int vm_pageout_burst_wait
= 0; /* milliseconds */
339 unsigned int vm_pageout_deadlock_wait
= 0; /* milliseconds */
340 unsigned int vm_pageout_deadlock_relief
= 0;
341 unsigned int vm_pageout_inactive_relief
= 0;
342 unsigned int vm_pageout_burst_active_throttle
= 0;
343 unsigned int vm_pageout_burst_inactive_throttle
= 0;
345 int vm_upl_wait_for_pages
= 0;
349 * These variables record the pageout daemon's actions:
350 * how many pages it looks at and what happens to those pages.
351 * No locking needed because only one thread modifies the variables.
354 unsigned int vm_pageout_active
= 0; /* debugging */
355 unsigned int vm_pageout_inactive
= 0; /* debugging */
356 unsigned int vm_pageout_inactive_throttled
= 0; /* debugging */
357 unsigned int vm_pageout_inactive_forced
= 0; /* debugging */
358 unsigned int vm_pageout_inactive_nolock
= 0; /* debugging */
359 unsigned int vm_pageout_inactive_avoid
= 0; /* debugging */
360 unsigned int vm_pageout_inactive_busy
= 0; /* debugging */
361 unsigned int vm_pageout_inactive_error
= 0; /* debugging */
362 unsigned int vm_pageout_inactive_absent
= 0; /* debugging */
363 unsigned int vm_pageout_inactive_notalive
= 0; /* debugging */
364 unsigned int vm_pageout_inactive_used
= 0; /* debugging */
365 unsigned int vm_pageout_cache_evicted
= 0; /* debugging */
366 unsigned int vm_pageout_inactive_clean
= 0; /* debugging */
367 unsigned int vm_pageout_speculative_clean
= 0; /* debugging */
369 unsigned int vm_pageout_freed_from_cleaned
= 0;
370 unsigned int vm_pageout_freed_from_speculative
= 0;
371 unsigned int vm_pageout_freed_from_inactive_clean
= 0;
373 unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean
= 0;
374 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty
= 0;
376 unsigned int vm_pageout_cleaned_reclaimed
= 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
377 unsigned int vm_pageout_cleaned_reactivated
= 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
378 unsigned int vm_pageout_cleaned_reference_reactivated
= 0;
379 unsigned int vm_pageout_cleaned_volatile_reactivated
= 0;
380 unsigned int vm_pageout_cleaned_fault_reactivated
= 0;
381 unsigned int vm_pageout_cleaned_commit_reactivated
= 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
382 unsigned int vm_pageout_cleaned_busy
= 0;
383 unsigned int vm_pageout_cleaned_nolock
= 0;
385 unsigned int vm_pageout_inactive_dirty_internal
= 0; /* debugging */
386 unsigned int vm_pageout_inactive_dirty_external
= 0; /* debugging */
387 unsigned int vm_pageout_inactive_deactivated
= 0; /* debugging */
388 unsigned int vm_pageout_inactive_anonymous
= 0; /* debugging */
389 unsigned int vm_pageout_dirty_no_pager
= 0; /* debugging */
390 unsigned int vm_pageout_purged_objects
= 0; /* used for sysctl vm stats */
391 unsigned int vm_stat_discard
= 0; /* debugging */
392 unsigned int vm_stat_discard_sent
= 0; /* debugging */
393 unsigned int vm_stat_discard_failure
= 0; /* debugging */
394 unsigned int vm_stat_discard_throttle
= 0; /* debugging */
395 unsigned int vm_pageout_reactivation_limit_exceeded
= 0; /* debugging */
396 unsigned int vm_pageout_catch_ups
= 0; /* debugging */
397 unsigned int vm_pageout_inactive_force_reclaim
= 0; /* debugging */
399 unsigned int vm_pageout_scan_reclaimed_throttled
= 0;
400 unsigned int vm_pageout_scan_active_throttled
= 0;
401 unsigned int vm_pageout_scan_inactive_throttled_internal
= 0;
402 unsigned int vm_pageout_scan_inactive_throttled_external
= 0;
403 unsigned int vm_pageout_scan_throttle
= 0; /* debugging */
404 unsigned int vm_pageout_scan_burst_throttle
= 0; /* debugging */
405 unsigned int vm_pageout_scan_empty_throttle
= 0; /* debugging */
406 unsigned int vm_pageout_scan_swap_throttle
= 0; /* debugging */
407 unsigned int vm_pageout_scan_deadlock_detected
= 0; /* debugging */
408 unsigned int vm_pageout_scan_active_throttle_success
= 0; /* debugging */
409 unsigned int vm_pageout_scan_inactive_throttle_success
= 0; /* debugging */
410 unsigned int vm_pageout_inactive_external_forced_jetsam_count
= 0; /* debugging */
411 unsigned int vm_pageout_scan_throttle_deferred
= 0; /* debugging */
412 unsigned int vm_pageout_scan_yield_unthrottled
= 0; /* debugging */
413 unsigned int vm_page_speculative_count_drifts
= 0;
414 unsigned int vm_page_speculative_count_drift_max
= 0;
418 * Backing store throttle when BS is exhausted
420 unsigned int vm_backing_store_low
= 0;
422 unsigned int vm_pageout_out_of_line
= 0;
423 unsigned int vm_pageout_in_place
= 0;
425 unsigned int vm_page_steal_pageout_page
= 0;
429 * counters and statistics...
431 unsigned long vm_page_decrypt_counter
= 0;
432 unsigned long vm_page_decrypt_for_upl_counter
= 0;
433 unsigned long vm_page_encrypt_counter
= 0;
434 unsigned long vm_page_encrypt_abort_counter
= 0;
435 unsigned long vm_page_encrypt_already_encrypted_counter
= 0;
436 boolean_t vm_pages_encrypted
= FALSE
; /* are there encrypted pages ? */
438 struct vm_pageout_queue vm_pageout_queue_internal
;
439 struct vm_pageout_queue vm_pageout_queue_external
;
441 unsigned int vm_page_speculative_target
= 0;
443 vm_object_t vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
445 boolean_t (* volatile consider_buffer_cache_collect
)(int) = NULL
;
447 #if DEVELOPMENT || DEBUG
448 unsigned long vm_cs_validated_resets
= 0;
451 int vm_debug_events
= 0;
453 #if CONFIG_MEMORYSTATUS
455 extern boolean_t
memorystatus_idle_exit_from_VM(void);
457 extern boolean_t
memorystatus_kill_on_VM_page_shortage(boolean_t async
);
458 extern void memorystatus_on_pageout_scan_end(void);
462 * Routine: vm_backing_store_disable
464 * Suspend non-privileged threads wishing to extend
465 * backing store when we are low on backing store
466 * (Synchronized by caller)
469 vm_backing_store_disable(
473 vm_backing_store_low
= 1;
475 if(vm_backing_store_low
) {
476 vm_backing_store_low
= 0;
477 thread_wakeup((event_t
) &vm_backing_store_low
);
483 #if MACH_CLUSTER_STATS
484 unsigned long vm_pageout_cluster_dirtied
= 0;
485 unsigned long vm_pageout_cluster_cleaned
= 0;
486 unsigned long vm_pageout_cluster_collisions
= 0;
487 unsigned long vm_pageout_cluster_clusters
= 0;
488 unsigned long vm_pageout_cluster_conversions
= 0;
489 unsigned long vm_pageout_target_collisions
= 0;
490 unsigned long vm_pageout_target_page_dirtied
= 0;
491 unsigned long vm_pageout_target_page_freed
= 0;
492 #define CLUSTER_STAT(clause) clause
493 #else /* MACH_CLUSTER_STATS */
494 #define CLUSTER_STAT(clause)
495 #endif /* MACH_CLUSTER_STATS */
498 * Routine: vm_pageout_object_terminate
500 * Destroy the pageout_object, and perform all of the
501 * required cleanup actions.
504 * The object must be locked, and will be returned locked.
507 vm_pageout_object_terminate(
510 vm_object_t shadow_object
;
513 * Deal with the deallocation (last reference) of a pageout object
514 * (used for cleaning-in-place) by dropping the paging references/
515 * freeing pages in the original object.
518 assert(object
->pageout
);
519 shadow_object
= object
->shadow
;
520 vm_object_lock(shadow_object
);
522 while (!queue_empty(&object
->memq
)) {
524 vm_object_offset_t offset
;
526 p
= (vm_page_t
) queue_first(&object
->memq
);
531 assert(!p
->cleaning
);
538 m
= vm_page_lookup(shadow_object
,
539 offset
+ object
->vo_shadow_offset
);
541 if(m
== VM_PAGE_NULL
)
544 assert((m
->dirty
) || (m
->precious
) ||
545 (m
->busy
&& m
->cleaning
));
548 * Handle the trusted pager throttle.
549 * Also decrement the burst throttle (if external).
551 vm_page_lock_queues();
552 if (m
->pageout_queue
)
553 vm_pageout_throttle_up(m
);
556 * Handle the "target" page(s). These pages are to be freed if
557 * successfully cleaned. Target pages are always busy, and are
558 * wired exactly once. The initial target pages are not mapped,
559 * (so cannot be referenced or modified) but converted target
560 * pages may have been modified between the selection as an
561 * adjacent page and conversion to a target.
565 assert(m
->wire_count
== 1);
567 m
->encrypted_cleaning
= FALSE
;
569 #if MACH_CLUSTER_STATS
570 if (m
->wanted
) vm_pageout_target_collisions
++;
573 * Revoke all access to the page. Since the object is
574 * locked, and the page is busy, this prevents the page
575 * from being dirtied after the pmap_disconnect() call
578 * Since the page is left "dirty" but "not modifed", we
579 * can detect whether the page was redirtied during
580 * pageout by checking the modify state.
582 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
) {
583 SET_PAGE_DIRTY(m
, FALSE
);
589 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
590 vm_page_unwire(m
, TRUE
); /* reactivates */
591 VM_STAT_INCR(reactivations
);
594 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
595 vm_page_free(m
);/* clears busy, etc. */
597 vm_page_unlock_queues();
601 * Handle the "adjacent" pages. These pages were cleaned in
602 * place, and should be left alone.
603 * If prep_pin_count is nonzero, then someone is using the
604 * page, so make it active.
606 if (!m
->active
&& !m
->inactive
&& !m
->throttled
&& !m
->private) {
610 vm_page_deactivate(m
);
612 if (m
->overwriting
) {
614 * the (COPY_OUT_FROM == FALSE) request_page_list case
618 * We do not re-set m->dirty !
619 * The page was busy so no extraneous activity
620 * could have occurred. COPY_INTO is a read into the
621 * new pages. CLEAN_IN_PLACE does actually write
622 * out the pages but handling outside of this code
623 * will take care of resetting dirty. We clear the
624 * modify however for the Programmed I/O case.
626 pmap_clear_modify(m
->phys_page
);
632 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
633 * Occurs when the original page was wired
634 * at the time of the list request
636 assert(VM_PAGE_WIRED(m
));
637 vm_page_unwire(m
, TRUE
); /* reactivates */
639 m
->overwriting
= FALSE
;
642 * Set the dirty state according to whether or not the page was
643 * modified during the pageout. Note that we purposefully do
644 * NOT call pmap_clear_modify since the page is still mapped.
645 * If the page were to be dirtied between the 2 calls, this
646 * this fact would be lost. This code is only necessary to
647 * maintain statistics, since the pmap module is always
648 * consulted if m->dirty is false.
650 #if MACH_CLUSTER_STATS
651 m
->dirty
= pmap_is_modified(m
->phys_page
);
653 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
654 else vm_pageout_cluster_cleaned
++;
655 if (m
->wanted
) vm_pageout_cluster_collisions
++;
660 if (m
->encrypted_cleaning
== TRUE
) {
661 m
->encrypted_cleaning
= FALSE
;
667 * Wakeup any thread waiting for the page to be un-cleaning.
670 vm_page_unlock_queues();
673 * Account for the paging reference taken in vm_paging_object_allocate.
675 vm_object_activity_end(shadow_object
);
676 vm_object_unlock(shadow_object
);
678 assert(object
->ref_count
== 0);
679 assert(object
->paging_in_progress
== 0);
680 assert(object
->activity_in_progress
== 0);
681 assert(object
->resident_page_count
== 0);
686 * Routine: vm_pageclean_setup
688 * Purpose: setup a page to be cleaned (made non-dirty), but not
689 * necessarily flushed from the VM page cache.
690 * This is accomplished by cleaning in place.
692 * The page must not be busy, and new_object
700 vm_object_t new_object
,
701 vm_object_offset_t new_offset
)
705 assert(!m
->cleaning
);
709 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
710 m
->object
, m
->offset
, m
,
713 pmap_clear_modify(m
->phys_page
);
716 * Mark original page as cleaning in place.
719 SET_PAGE_DIRTY(m
, FALSE
);
723 * Convert the fictitious page to a private shadow of
726 assert(new_m
->fictitious
);
727 assert(new_m
->phys_page
== vm_page_fictitious_addr
);
728 new_m
->fictitious
= FALSE
;
729 new_m
->private = TRUE
;
730 new_m
->pageout
= TRUE
;
731 new_m
->phys_page
= m
->phys_page
;
733 vm_page_lockspin_queues();
734 vm_page_wire(new_m
, VM_KERN_MEMORY_NONE
, TRUE
);
735 vm_page_unlock_queues();
737 vm_page_insert_wired(new_m
, new_object
, new_offset
, VM_KERN_MEMORY_NONE
);
738 assert(!new_m
->wanted
);
743 * Routine: vm_pageout_initialize_page
745 * Causes the specified page to be initialized in
746 * the appropriate memory object. This routine is used to push
747 * pages into a copy-object when they are modified in the
750 * The page is moved to a temporary object and paged out.
753 * The page in question must not be on any pageout queues.
754 * The object to which it belongs must be locked.
755 * The page must be busy, but not hold a paging reference.
758 * Move this page to a completely new object.
761 vm_pageout_initialize_page(
765 vm_object_offset_t paging_offset
;
766 memory_object_t pager
;
769 "vm_pageout_initialize_page, page 0x%X\n",
774 * Verify that we really want to clean this page
781 * Create a paging reference to let us play with the object.
784 paging_offset
= m
->offset
+ object
->paging_offset
;
786 if (m
->absent
|| m
->error
|| m
->restart
|| (!m
->dirty
&& !m
->precious
)) {
788 panic("reservation without pageout?"); /* alan */
789 vm_object_unlock(object
);
795 * If there's no pager, then we can't clean the page. This should
796 * never happen since this should be a copy object and therefore not
797 * an external object, so the pager should always be there.
800 pager
= object
->pager
;
802 if (pager
== MEMORY_OBJECT_NULL
) {
804 panic("missing pager for copy object");
809 * set the page for future call to vm_fault_list_request
811 pmap_clear_modify(m
->phys_page
);
812 SET_PAGE_DIRTY(m
, FALSE
);
816 * keep the object from collapsing or terminating
818 vm_object_paging_begin(object
);
819 vm_object_unlock(object
);
822 * Write the data to its pager.
823 * Note that the data is passed by naming the new object,
824 * not a virtual address; the pager interface has been
825 * manipulated to use the "internal memory" data type.
826 * [The object reference from its allocation is donated
827 * to the eventual recipient.]
829 memory_object_data_initialize(pager
, paging_offset
, PAGE_SIZE
);
831 vm_object_lock(object
);
832 vm_object_paging_end(object
);
835 #if MACH_CLUSTER_STATS
836 #define MAXCLUSTERPAGES 16
838 unsigned long pages_in_cluster
;
839 unsigned long pages_at_higher_offsets
;
840 unsigned long pages_at_lower_offsets
;
841 } cluster_stats
[MAXCLUSTERPAGES
];
842 #endif /* MACH_CLUSTER_STATS */
846 * vm_pageout_cluster:
848 * Given a page, queue it to the appropriate I/O thread,
849 * which will page it out and attempt to clean adjacent pages
850 * in the same operation.
852 * The object and queues must be locked. We will take a
853 * paging reference to prevent deallocation or collapse when we
854 * release the object lock back at the call site. The I/O thread
855 * is responsible for consuming this reference
857 * The page must not be on any pageout queue.
861 vm_pageout_cluster(vm_page_t m
, boolean_t pageout
, boolean_t immediate_ok
, boolean_t keep_object_locked
)
863 vm_object_t object
= m
->object
;
864 struct vm_pageout_queue
*q
;
868 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
869 object
, m
->offset
, m
, 0, 0);
873 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
875 vm_object_lock_assert_exclusive(object
);
878 * Only a certain kind of page is appreciated here.
880 assert((m
->dirty
|| m
->precious
) && (!VM_PAGE_WIRED(m
)));
881 assert(!m
->cleaning
&& !m
->pageout
&& !m
->laundry
);
882 #ifndef CONFIG_FREEZE
883 assert(!m
->inactive
&& !m
->active
);
884 assert(!m
->throttled
);
888 * protect the object from collapse or termination
890 vm_object_activity_begin(object
);
892 m
->pageout
= pageout
;
894 if (object
->internal
== TRUE
) {
895 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
898 if (vm_compressor_immediate_preferred
== TRUE
&& immediate_ok
== TRUE
) {
899 if (keep_object_locked
== FALSE
)
900 vm_object_unlock(object
);
901 vm_page_unlock_queues();
903 vm_pageout_immediate(m
, keep_object_locked
);
908 q
= &vm_pageout_queue_internal
;
910 q
= &vm_pageout_queue_external
;
913 * pgo_laundry count is tied to the laundry bit
918 m
->pageout_queue
= TRUE
;
919 queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
921 if (q
->pgo_idle
== TRUE
) {
923 thread_wakeup((event_t
) &q
->pgo_pending
);
931 unsigned long vm_pageout_throttle_up_count
= 0;
934 * A page is back from laundry or we are stealing it back from
935 * the laundering state. See if there are some pages waiting to
936 * go to laundry and if we can let some of them go now.
938 * Object and page queues must be locked.
941 vm_pageout_throttle_up(
944 struct vm_pageout_queue
*q
;
946 assert(m
->object
!= VM_OBJECT_NULL
);
947 assert(m
->object
!= kernel_object
);
950 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
951 vm_object_lock_assert_exclusive(m
->object
);
954 vm_pageout_throttle_up_count
++;
956 if (m
->object
->internal
== TRUE
)
957 q
= &vm_pageout_queue_internal
;
959 q
= &vm_pageout_queue_external
;
961 if (m
->pageout_queue
== TRUE
) {
963 queue_remove(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
964 m
->pageout_queue
= FALSE
;
966 m
->pageq
.next
= NULL
;
967 m
->pageq
.prev
= NULL
;
969 vm_object_activity_end(m
->object
);
971 if (m
->laundry
== TRUE
) {
976 if (q
->pgo_throttled
== TRUE
) {
977 q
->pgo_throttled
= FALSE
;
978 thread_wakeup((event_t
) &q
->pgo_laundry
);
980 if (q
->pgo_draining
== TRUE
&& q
->pgo_laundry
== 0) {
981 q
->pgo_draining
= FALSE
;
982 thread_wakeup((event_t
) (&q
->pgo_laundry
+1));
989 vm_pageout_throttle_up_batch(
990 struct vm_pageout_queue
*q
,
994 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
997 vm_pageout_throttle_up_count
+= batch_cnt
;
999 q
->pgo_laundry
-= batch_cnt
;
1001 if (q
->pgo_throttled
== TRUE
) {
1002 q
->pgo_throttled
= FALSE
;
1003 thread_wakeup((event_t
) &q
->pgo_laundry
);
1005 if (q
->pgo_draining
== TRUE
&& q
->pgo_laundry
== 0) {
1006 q
->pgo_draining
= FALSE
;
1007 thread_wakeup((event_t
) (&q
->pgo_laundry
+1));
1014 * VM memory pressure monitoring.
1016 * vm_pageout_scan() keeps track of the number of pages it considers and
1017 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1019 * compute_memory_pressure() is called every second from compute_averages()
1020 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1021 * of recalimed pages in a new vm_pageout_stat[] bucket.
1023 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1024 * The caller provides the number of seconds ("nsecs") worth of statistics
1025 * it wants, up to 30 seconds.
1026 * It computes the number of pages reclaimed in the past "nsecs" seconds and
1027 * also returns the number of pages the system still needs to reclaim at this
1030 #define VM_PAGEOUT_STAT_SIZE 31
1031 struct vm_pageout_stat
{
1032 unsigned int considered
;
1033 unsigned int reclaimed
;
1034 } vm_pageout_stats
[VM_PAGEOUT_STAT_SIZE
] = {{0,0}, };
1035 unsigned int vm_pageout_stat_now
= 0;
1036 unsigned int vm_memory_pressure
= 0;
1038 #define VM_PAGEOUT_STAT_BEFORE(i) \
1039 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1040 #define VM_PAGEOUT_STAT_AFTER(i) \
1041 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1043 #if VM_PAGE_BUCKETS_CHECK
1044 int vm_page_buckets_check_interval
= 10; /* in seconds */
1045 #endif /* VM_PAGE_BUCKETS_CHECK */
1048 * Called from compute_averages().
1051 compute_memory_pressure(
1054 unsigned int vm_pageout_next
;
1056 #if VM_PAGE_BUCKETS_CHECK
1057 /* check the consistency of VM page buckets at regular interval */
1058 static int counter
= 0;
1059 if ((++counter
% vm_page_buckets_check_interval
) == 0) {
1060 vm_page_buckets_check();
1062 #endif /* VM_PAGE_BUCKETS_CHECK */
1064 vm_memory_pressure
=
1065 vm_pageout_stats
[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now
)].reclaimed
;
1067 commpage_set_memory_pressure( vm_memory_pressure
);
1069 /* move "now" forward */
1070 vm_pageout_next
= VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now
);
1071 vm_pageout_stats
[vm_pageout_next
].considered
= 0;
1072 vm_pageout_stats
[vm_pageout_next
].reclaimed
= 0;
1073 vm_pageout_stat_now
= vm_pageout_next
;
1079 * mach_vm_ctl_page_free_wanted() is called indirectly, via
1080 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1081 * it must be safe in the restricted stackshot context. Locks and/or
1082 * blocking are not allowable.
1085 mach_vm_ctl_page_free_wanted(void)
1087 unsigned int page_free_target
, page_free_count
, page_free_wanted
;
1089 page_free_target
= vm_page_free_target
;
1090 page_free_count
= vm_page_free_count
;
1091 if (page_free_target
> page_free_count
) {
1092 page_free_wanted
= page_free_target
- page_free_count
;
1094 page_free_wanted
= 0;
1097 return page_free_wanted
;
1103 * mach_vm_pressure_monitor() is called when taking a stackshot, with
1104 * wait_for_pressure FALSE, so that code path must remain safe in the
1105 * restricted stackshot context. No blocking or locks are allowable.
1106 * on that code path.
1110 mach_vm_pressure_monitor(
1111 boolean_t wait_for_pressure
,
1112 unsigned int nsecs_monitored
,
1113 unsigned int *pages_reclaimed_p
,
1114 unsigned int *pages_wanted_p
)
1117 unsigned int vm_pageout_then
, vm_pageout_now
;
1118 unsigned int pages_reclaimed
;
1121 * We don't take the vm_page_queue_lock here because we don't want
1122 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1123 * thread when it's trying to reclaim memory. We don't need fully
1124 * accurate monitoring anyway...
1127 if (wait_for_pressure
) {
1128 /* wait until there's memory pressure */
1129 while (vm_page_free_count
>= vm_page_free_target
) {
1130 wr
= assert_wait((event_t
) &vm_page_free_wanted
,
1131 THREAD_INTERRUPTIBLE
);
1132 if (wr
== THREAD_WAITING
) {
1133 wr
= thread_block(THREAD_CONTINUE_NULL
);
1135 if (wr
== THREAD_INTERRUPTED
) {
1136 return KERN_ABORTED
;
1138 if (wr
== THREAD_AWAKENED
) {
1140 * The memory pressure might have already
1141 * been relieved but let's not block again
1142 * and let's report that there was memory
1143 * pressure at some point.
1150 /* provide the number of pages the system wants to reclaim */
1151 if (pages_wanted_p
!= NULL
) {
1152 *pages_wanted_p
= mach_vm_ctl_page_free_wanted();
1155 if (pages_reclaimed_p
== NULL
) {
1156 return KERN_SUCCESS
;
1159 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1161 vm_pageout_now
= vm_pageout_stat_now
;
1162 pages_reclaimed
= 0;
1163 for (vm_pageout_then
=
1164 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now
);
1165 vm_pageout_then
!= vm_pageout_now
&&
1166 nsecs_monitored
-- != 0;
1168 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then
)) {
1169 pages_reclaimed
+= vm_pageout_stats
[vm_pageout_then
].reclaimed
;
1171 } while (vm_pageout_now
!= vm_pageout_stat_now
);
1172 *pages_reclaimed_p
= pages_reclaimed
;
1174 return KERN_SUCCESS
;
1180 vm_pageout_page_queue(queue_head_t
*, int);
1183 * condition variable used to make sure there is
1184 * only a single sweep going on at a time
1186 boolean_t vm_pageout_anonymous_pages_active
= FALSE
;
1190 vm_pageout_anonymous_pages()
1192 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
1194 vm_page_lock_queues();
1196 if (vm_pageout_anonymous_pages_active
== TRUE
) {
1197 vm_page_unlock_queues();
1200 vm_pageout_anonymous_pages_active
= TRUE
;
1201 vm_page_unlock_queues();
1203 vm_pageout_page_queue(&vm_page_queue_throttled
, vm_page_throttled_count
);
1204 vm_pageout_page_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
);
1205 vm_pageout_page_queue(&vm_page_queue_active
, vm_page_active_count
);
1207 vm_consider_swapping();
1209 vm_page_lock_queues();
1210 vm_pageout_anonymous_pages_active
= FALSE
;
1211 vm_page_unlock_queues();
1217 vm_pageout_page_queue(queue_head_t
*q
, int qcount
)
1220 vm_object_t t_object
= NULL
;
1221 vm_object_t l_object
= NULL
;
1222 vm_object_t m_object
= NULL
;
1223 int delayed_unlock
= 0;
1224 int try_failed_count
= 0;
1227 struct vm_pageout_queue
*iq
;
1230 iq
= &vm_pageout_queue_internal
;
1232 vm_page_lock_queues();
1234 while (qcount
&& !queue_empty(q
)) {
1236 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
1238 if (VM_PAGE_Q_THROTTLED(iq
)) {
1240 if (l_object
!= NULL
) {
1241 vm_object_unlock(l_object
);
1244 iq
->pgo_draining
= TRUE
;
1246 assert_wait((event_t
) (&iq
->pgo_laundry
+ 1), THREAD_INTERRUPTIBLE
);
1247 vm_page_unlock_queues();
1249 thread_block(THREAD_CONTINUE_NULL
);
1251 vm_page_lock_queues();
1255 m
= (vm_page_t
) queue_first(q
);
1256 m_object
= m
->object
;
1259 * check to see if we currently are working
1260 * with the same object... if so, we've
1261 * already got the lock
1263 if (m_object
!= l_object
) {
1264 if ( !m_object
->internal
)
1265 goto reenter_pg_on_q
;
1268 * the object associated with candidate page is
1269 * different from the one we were just working
1270 * with... dump the lock if we still own it
1272 if (l_object
!= NULL
) {
1273 vm_object_unlock(l_object
);
1276 if (m_object
!= t_object
)
1277 try_failed_count
= 0;
1280 * Try to lock object; since we've alread got the
1281 * page queues lock, we can only 'try' for this one.
1282 * if the 'try' fails, we need to do a mutex_pause
1283 * to allow the owner of the object lock a chance to
1286 if ( !vm_object_lock_try_scan(m_object
)) {
1288 if (try_failed_count
> 20) {
1289 goto reenter_pg_on_q
;
1291 vm_page_unlock_queues();
1292 mutex_pause(try_failed_count
++);
1293 vm_page_lock_queues();
1296 t_object
= m_object
;
1299 l_object
= m_object
;
1301 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
|| m
->pageout
) {
1303 * page is not to be cleaned
1304 * put it back on the head of its queue
1306 goto reenter_pg_on_q
;
1308 if (m
->reference
== FALSE
&& m
->pmapped
== TRUE
) {
1309 refmod_state
= pmap_get_refmod(m
->phys_page
);
1311 if (refmod_state
& VM_MEM_REFERENCED
)
1312 m
->reference
= TRUE
;
1313 if (refmod_state
& VM_MEM_MODIFIED
) {
1314 SET_PAGE_DIRTY(m
, FALSE
);
1317 if (m
->reference
== TRUE
) {
1318 m
->reference
= FALSE
;
1319 pmap_clear_refmod_options(m
->phys_page
, VM_MEM_REFERENCED
, PMAP_OPTIONS_NOFLUSH
, (void *)NULL
);
1320 goto reenter_pg_on_q
;
1322 if (m
->pmapped
== TRUE
) {
1323 if (m
->dirty
|| m
->precious
) {
1324 pmap_options
= PMAP_OPTIONS_COMPRESSOR
;
1326 pmap_options
= PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1328 refmod_state
= pmap_disconnect_options(m
->phys_page
, pmap_options
, NULL
);
1329 if (refmod_state
& VM_MEM_MODIFIED
) {
1330 SET_PAGE_DIRTY(m
, FALSE
);
1333 if ( !m
->dirty
&& !m
->precious
) {
1334 vm_page_unlock_queues();
1336 vm_page_lock_queues();
1341 if (!m_object
->pager_initialized
|| m_object
->pager
== MEMORY_OBJECT_NULL
) {
1343 if (!m_object
->pager_initialized
) {
1345 vm_page_unlock_queues();
1347 vm_object_collapse(m_object
, (vm_object_offset_t
) 0, TRUE
);
1349 if (!m_object
->pager_initialized
)
1350 vm_object_compressor_pager_create(m_object
);
1352 vm_page_lock_queues();
1355 if (!m_object
->pager_initialized
|| m_object
->pager
== MEMORY_OBJECT_NULL
)
1356 goto reenter_pg_on_q
;
1358 * vm_object_compressor_pager_create will drop the object lock
1359 * which means 'm' may no longer be valid to use
1364 * we've already factored out pages in the laundry which
1365 * means this page can't be on the pageout queue so it's
1366 * safe to do the vm_page_queues_remove
1368 assert(!m
->pageout_queue
);
1370 vm_page_queues_remove(m
);
1372 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
1374 vm_pageout_cluster(m
, TRUE
, FALSE
, FALSE
);
1379 queue_remove(q
, m
, vm_page_t
, pageq
);
1380 queue_enter(q
, m
, vm_page_t
, pageq
);
1383 try_failed_count
= 0;
1385 if (delayed_unlock
++ > 128) {
1387 if (l_object
!= NULL
) {
1388 vm_object_unlock(l_object
);
1391 lck_mtx_yield(&vm_page_queue_lock
);
1395 if (l_object
!= NULL
) {
1396 vm_object_unlock(l_object
);
1399 vm_page_unlock_queues();
1405 * function in BSD to apply I/O throttle to the pageout thread
1407 extern void vm_pageout_io_throttle(void);
1410 * Page States: Used below to maintain the page state
1411 * before it's removed from it's Q. This saved state
1412 * helps us do the right accounting in certain cases
1414 #define PAGE_STATE_SPECULATIVE 1
1415 #define PAGE_STATE_ANONYMOUS 2
1416 #define PAGE_STATE_INACTIVE 3
1417 #define PAGE_STATE_INACTIVE_FIRST 4
1418 #define PAGE_STATE_CLEAN 5
1421 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \
1424 * If a "reusable" page somehow made it back into \
1425 * the active queue, it's been re-used and is not \
1426 * quite re-usable. \
1427 * If the VM object was "all_reusable", consider it \
1428 * as "all re-used" instead of converting it to \
1429 * "partially re-used", which could be expensive. \
1431 if ((m)->reusable || \
1432 (m)->object->all_reusable) { \
1433 vm_object_reuse_pages((m)->object, \
1435 (m)->offset + PAGE_SIZE_64, \
1441 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1442 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1445 #define FCS_DELAYED 1
1446 #define FCS_DEADLOCK_DETECTED 2
1448 struct flow_control
{
1453 uint32_t vm_pageout_considered_page
= 0;
1454 uint32_t vm_page_filecache_min
= 0;
1456 #define ANONS_GRABBED_LIMIT 2
1459 * vm_pageout_scan does the dirty work for the pageout daemon.
1460 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1461 * held and vm_page_free_wanted == 0.
1464 vm_pageout_scan(void)
1466 unsigned int loop_count
= 0;
1467 unsigned int inactive_burst_count
= 0;
1468 unsigned int active_burst_count
= 0;
1469 unsigned int reactivated_this_call
;
1470 unsigned int reactivate_limit
;
1471 vm_page_t local_freeq
= NULL
;
1472 int local_freed
= 0;
1474 int delayed_unlock_limit
= 0;
1475 int refmod_state
= 0;
1476 int vm_pageout_deadlock_target
= 0;
1477 struct vm_pageout_queue
*iq
;
1478 struct vm_pageout_queue
*eq
;
1479 struct vm_speculative_age_q
*sq
;
1480 struct flow_control flow_control
= { 0, { 0, 0 } };
1481 boolean_t inactive_throttled
= FALSE
;
1482 boolean_t try_failed
;
1484 unsigned int msecs
= 0;
1486 vm_object_t last_object_tried
;
1487 uint32_t catch_up_count
= 0;
1488 uint32_t inactive_reclaim_run
;
1489 boolean_t forced_reclaim
;
1490 boolean_t exceeded_burst_throttle
;
1491 boolean_t grab_anonymous
= FALSE
;
1492 boolean_t force_anonymous
= FALSE
;
1493 int anons_grabbed
= 0;
1494 int page_prev_state
= 0;
1495 int cache_evict_throttle
= 0;
1496 uint32_t vm_pageout_inactive_external_forced_reactivate_limit
= 0;
1497 int force_purge
= 0;
1498 #define DELAY_SPECULATIVE_AGE 1000
1499 int delay_speculative_age
= 0;
1501 #if VM_PRESSURE_EVENTS
1502 vm_pressure_level_t pressure_level
;
1503 #endif /* VM_PRESSURE_EVENTS */
1505 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan
, VM_PAGEOUT_SCAN
, DBG_FUNC_START
,
1506 vm_pageout_speculative_clean
, vm_pageout_inactive_clean
,
1507 vm_pageout_inactive_dirty_internal
, vm_pageout_inactive_dirty_external
);
1509 flow_control
.state
= FCS_IDLE
;
1510 iq
= &vm_pageout_queue_internal
;
1511 eq
= &vm_pageout_queue_external
;
1512 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
1515 XPR(XPR_VM_PAGEOUT
, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1518 vm_page_lock_queues();
1519 delayed_unlock
= 1; /* must be nonzero if Qs are locked, 0 if unlocked */
1522 * Calculate the max number of referenced pages on the inactive
1523 * queue that we will reactivate.
1525 reactivated_this_call
= 0;
1526 reactivate_limit
= VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count
+
1527 vm_page_inactive_count
);
1528 inactive_reclaim_run
= 0;
1530 vm_pageout_inactive_external_forced_reactivate_limit
= vm_page_active_count
+ vm_page_inactive_count
;
1533 * We want to gradually dribble pages from the active queue
1534 * to the inactive queue. If we let the inactive queue get
1535 * very small, and then suddenly dump many pages into it,
1536 * those pages won't get a sufficient chance to be referenced
1537 * before we start taking them from the inactive queue.
1539 * We must limit the rate at which we send pages to the pagers
1540 * so that we don't tie up too many pages in the I/O queues.
1541 * We implement a throttling mechanism using the laundry count
1542 * to limit the number of pages outstanding to the default
1543 * and external pagers. We can bypass the throttles and look
1544 * for clean pages if the pageout queues don't drain in a timely
1545 * fashion since this may indicate that the pageout paths are
1546 * stalled waiting for memory, which only we can provide.
1551 assert(delayed_unlock
!=0);
1554 * Recalculate vm_page_inactivate_target.
1556 vm_page_inactive_target
= VM_PAGE_INACTIVE_TARGET(vm_page_active_count
+
1557 vm_page_inactive_count
+
1558 vm_page_speculative_count
);
1560 vm_page_anonymous_min
= vm_page_inactive_target
/ 20;
1564 * don't want to wake the pageout_scan thread up everytime we fall below
1565 * the targets... set a low water mark at 0.25% below the target
1567 vm_page_inactive_min
= vm_page_inactive_target
- (vm_page_inactive_target
/ 400);
1569 if (vm_page_speculative_percentage
> 50)
1570 vm_page_speculative_percentage
= 50;
1571 else if (vm_page_speculative_percentage
<= 0)
1572 vm_page_speculative_percentage
= 1;
1574 vm_page_speculative_target
= VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count
+
1575 vm_page_inactive_count
);
1578 last_object_tried
= NULL
;
1581 if ((vm_page_inactive_count
+ vm_page_speculative_count
) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count
))
1582 catch_up_count
= vm_page_inactive_count
+ vm_page_speculative_count
;
1589 DTRACE_VM2(rev
, int, 1, (uint64_t *), NULL
);
1591 assert(delayed_unlock
);
1593 if (vm_upl_wait_for_pages
< 0)
1594 vm_upl_wait_for_pages
= 0;
1596 delayed_unlock_limit
= VM_PAGEOUT_DELAYED_UNLOCK_LIMIT
+ vm_upl_wait_for_pages
;
1598 if (delayed_unlock_limit
> VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX
)
1599 delayed_unlock_limit
= VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX
;
1602 * Move pages from active to inactive if we're below the target
1604 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */
1605 if ((vm_page_inactive_count
+ vm_page_speculative_count
) >= vm_page_inactive_target
)
1606 goto done_moving_active_pages
;
1608 if (object
!= NULL
) {
1609 vm_object_unlock(object
);
1611 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
1614 * Don't sweep through active queue more than the throttle
1615 * which should be kept relatively low
1617 active_burst_count
= MIN(vm_pageout_burst_active_throttle
, vm_page_active_count
);
1619 VM_DEBUG_EVENT(vm_pageout_balance
, VM_PAGEOUT_BALANCE
, DBG_FUNC_START
,
1620 vm_pageout_inactive
, vm_pageout_inactive_used
, vm_page_free_count
, local_freed
);
1622 VM_DEBUG_EVENT(vm_pageout_balance
, VM_PAGEOUT_BALANCE
, DBG_FUNC_NONE
,
1623 vm_pageout_speculative_clean
, vm_pageout_inactive_clean
,
1624 vm_pageout_inactive_dirty_internal
, vm_pageout_inactive_dirty_external
);
1625 memoryshot(VM_PAGEOUT_BALANCE
, DBG_FUNC_START
);
1628 while (!queue_empty(&vm_page_queue_active
) && active_burst_count
--) {
1630 vm_pageout_active
++;
1632 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
1634 assert(m
->active
&& !m
->inactive
);
1635 assert(!m
->laundry
);
1636 assert(m
->object
!= kernel_object
);
1637 assert(m
->phys_page
!= vm_page_guard_addr
);
1639 DTRACE_VM2(scan
, int, 1, (uint64_t *), NULL
);
1642 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1644 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1645 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1646 * new reference happens. If no futher references happen on the page after that remote TLB flushes
1647 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1648 * by pageout_scan, which is just fine since the last reference would have happened quite far
1649 * in the past (TLB caches don't hang around for very long), and of course could just as easily
1650 * have happened before we moved the page
1652 pmap_clear_refmod_options(m
->phys_page
, VM_MEM_REFERENCED
, PMAP_OPTIONS_NOFLUSH
, (void *)NULL
);
1655 * The page might be absent or busy,
1656 * but vm_page_deactivate can handle that.
1657 * FALSE indicates that we don't want a H/W clear reference
1659 vm_page_deactivate_internal(m
, FALSE
);
1661 if (delayed_unlock
++ > delayed_unlock_limit
) {
1664 vm_page_unlock_queues();
1666 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
1667 vm_page_free_count
, local_freed
, delayed_unlock_limit
, 1);
1669 vm_page_free_list(local_freeq
, TRUE
);
1671 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
1672 vm_page_free_count
, 0, 0, 1);
1676 vm_page_lock_queues();
1678 lck_mtx_yield(&vm_page_queue_lock
);
1684 * continue the while loop processing
1685 * the active queue... need to hold
1686 * the page queues lock
1691 VM_DEBUG_EVENT(vm_pageout_balance
, VM_PAGEOUT_BALANCE
, DBG_FUNC_END
,
1692 vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
, vm_page_inactive_target
);
1693 memoryshot(VM_PAGEOUT_BALANCE
, DBG_FUNC_END
);
1695 /**********************************************************************
1696 * above this point we're playing with the active queue
1697 * below this point we're playing with the throttling mechanisms
1698 * and the inactive queue
1699 **********************************************************************/
1701 done_moving_active_pages
:
1703 if (vm_page_free_count
+ local_freed
>= vm_page_free_target
) {
1704 if (object
!= NULL
) {
1705 vm_object_unlock(object
);
1708 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
1710 vm_page_unlock_queues();
1714 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
1715 vm_page_free_count
, local_freed
, delayed_unlock_limit
, 2);
1717 vm_page_free_list(local_freeq
, TRUE
);
1719 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
1720 vm_page_free_count
, local_freed
, 0, 2);
1725 vm_consider_waking_compactor_swapper();
1727 vm_page_lock_queues();
1730 * make sure the pageout I/O threads are running
1731 * throttled in case there are still requests
1732 * in the laundry... since we have met our targets
1733 * we don't need the laundry to be cleaned in a timely
1734 * fashion... so let's avoid interfering with foreground
1737 vm_pageout_adjust_io_throttles(iq
, eq
, TRUE
);
1740 * recalculate vm_page_inactivate_target
1742 vm_page_inactive_target
= VM_PAGE_INACTIVE_TARGET(vm_page_active_count
+
1743 vm_page_inactive_count
+
1744 vm_page_speculative_count
);
1745 if (((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_target
) &&
1746 !queue_empty(&vm_page_queue_active
)) {
1748 * inactive target still not met... keep going
1749 * until we get the queues balanced...
1753 lck_mtx_lock(&vm_page_queue_free_lock
);
1755 if ((vm_page_free_count
>= vm_page_free_target
) &&
1756 (vm_page_free_wanted
== 0) && (vm_page_free_wanted_privileged
== 0)) {
1758 * done - we have met our target *and*
1759 * there is no one waiting for a page.
1762 assert(vm_pageout_scan_wants_object
== VM_OBJECT_NULL
);
1764 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan
, VM_PAGEOUT_SCAN
, DBG_FUNC_NONE
,
1765 vm_pageout_inactive
, vm_pageout_inactive_used
, 0, 0);
1766 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan
, VM_PAGEOUT_SCAN
, DBG_FUNC_END
,
1767 vm_pageout_speculative_clean
, vm_pageout_inactive_clean
,
1768 vm_pageout_inactive_dirty_internal
, vm_pageout_inactive_dirty_external
);
1772 lck_mtx_unlock(&vm_page_queue_free_lock
);
1776 * Before anything, we check if we have any ripe volatile
1777 * objects around. If so, try to purge the first object.
1778 * If the purge fails, fall through to reclaim a page instead.
1779 * If the purge succeeds, go back to the top and reevalute
1780 * the new memory situation.
1783 assert (available_for_purge
>=0);
1784 force_purge
= 0; /* no force-purging */
1786 #if VM_PRESSURE_EVENTS
1787 pressure_level
= memorystatus_vm_pressure_level
;
1789 if (pressure_level
> kVMPressureNormal
) {
1791 if (pressure_level
>= kVMPressureCritical
) {
1792 force_purge
= memorystatus_purge_on_critical
;
1793 } else if (pressure_level
>= kVMPressureUrgent
) {
1794 force_purge
= memorystatus_purge_on_urgent
;
1795 } else if (pressure_level
>= kVMPressureWarning
) {
1796 force_purge
= memorystatus_purge_on_warning
;
1799 #endif /* VM_PRESSURE_EVENTS */
1801 if (available_for_purge
|| force_purge
) {
1803 if (object
!= NULL
) {
1804 vm_object_unlock(object
);
1808 memoryshot(VM_PAGEOUT_PURGEONE
, DBG_FUNC_START
);
1810 VM_DEBUG_EVENT(vm_pageout_purgeone
, VM_PAGEOUT_PURGEONE
, DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0);
1811 if (vm_purgeable_object_purge_one(force_purge
, C_DONT_BLOCK
)) {
1812 vm_pageout_purged_objects
++;
1813 VM_DEBUG_EVENT(vm_pageout_purgeone
, VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
, vm_page_free_count
, 0, 0, 0);
1814 memoryshot(VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
);
1817 VM_DEBUG_EVENT(vm_pageout_purgeone
, VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
, 0, 0, 0, -1);
1818 memoryshot(VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
);
1821 if (queue_empty(&sq
->age_q
) && vm_page_speculative_count
) {
1823 * try to pull pages from the aging bins...
1824 * see vm_page.h for an explanation of how
1825 * this mechanism works
1827 struct vm_speculative_age_q
*aq
;
1828 boolean_t can_steal
= FALSE
;
1829 int num_scanned_queues
;
1831 aq
= &vm_page_queue_speculative
[speculative_steal_index
];
1833 num_scanned_queues
= 0;
1834 while (queue_empty(&aq
->age_q
) &&
1835 num_scanned_queues
++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q
) {
1837 speculative_steal_index
++;
1839 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
1840 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
1842 aq
= &vm_page_queue_speculative
[speculative_steal_index
];
1845 if (num_scanned_queues
== VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1) {
1847 * XXX We've scanned all the speculative
1848 * queues but still haven't found one
1849 * that is not empty, even though
1850 * vm_page_speculative_count is not 0.
1852 * report the anomaly...
1854 printf("vm_pageout_scan: "
1855 "all speculative queues empty "
1856 "but count=%d. Re-adjusting.\n",
1857 vm_page_speculative_count
);
1858 if (vm_page_speculative_count
> vm_page_speculative_count_drift_max
)
1859 vm_page_speculative_count_drift_max
= vm_page_speculative_count
;
1860 vm_page_speculative_count_drifts
++;
1862 Debugger("vm_pageout_scan: no speculative pages");
1865 vm_page_speculative_count
= 0;
1866 /* ... and continue */
1870 if (vm_page_speculative_count
> vm_page_speculative_target
)
1873 if (!delay_speculative_age
) {
1874 mach_timespec_t ts_fully_aged
;
1876 ts_fully_aged
.tv_sec
= (VM_PAGE_MAX_SPECULATIVE_AGE_Q
* vm_page_speculative_q_age_ms
) / 1000;
1877 ts_fully_aged
.tv_nsec
= ((VM_PAGE_MAX_SPECULATIVE_AGE_Q
* vm_page_speculative_q_age_ms
) % 1000)
1878 * 1000 * NSEC_PER_USEC
;
1880 ADD_MACH_TIMESPEC(&ts_fully_aged
, &aq
->age_ts
);
1884 clock_get_system_nanotime(&sec
, &nsec
);
1885 ts
.tv_sec
= (unsigned int) sec
;
1888 if (CMP_MACH_TIMESPEC(&ts
, &ts_fully_aged
) >= 0)
1891 delay_speculative_age
++;
1893 delay_speculative_age
++;
1894 if (delay_speculative_age
== DELAY_SPECULATIVE_AGE
)
1895 delay_speculative_age
= 0;
1898 if (can_steal
== TRUE
)
1899 vm_page_speculate_ageit(aq
);
1901 if (queue_empty(&sq
->age_q
) && cache_evict_throttle
== 0) {
1904 if (object
!= NULL
) {
1905 vm_object_unlock(object
);
1908 pages_evicted
= vm_object_cache_evict(100, 10);
1910 if (pages_evicted
) {
1912 vm_pageout_cache_evicted
+= pages_evicted
;
1914 VM_DEBUG_EVENT(vm_pageout_cache_evict
, VM_PAGEOUT_CACHE_EVICT
, DBG_FUNC_NONE
,
1915 vm_page_free_count
, pages_evicted
, vm_pageout_cache_evicted
, 0);
1916 memoryshot(VM_PAGEOUT_CACHE_EVICT
, DBG_FUNC_NONE
);
1919 * we just freed up to 100 pages,
1920 * so go back to the top of the main loop
1921 * and re-evaulate the memory situation
1925 cache_evict_throttle
= 100;
1927 if (cache_evict_throttle
)
1928 cache_evict_throttle
--;
1932 * don't let the filecache_min fall below 15% of available memory
1933 * on systems with an active compressor that isn't nearing its
1934 * limits w/r to accepting new data
1936 * on systems w/o the compressor/swapper, the filecache is always
1937 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
1938 * since most (if not all) of the anonymous pages are in the
1939 * throttled queue (which isn't counted as available) which
1940 * effectively disables this filter
1942 if (vm_compressor_low_on_space())
1943 vm_page_filecache_min
= 0;
1945 vm_page_filecache_min
= (AVAILABLE_NON_COMPRESSED_MEMORY
/ 7);
1948 * don't let the filecache_min fall below 33% of available memory...
1950 vm_page_filecache_min
= (AVAILABLE_NON_COMPRESSED_MEMORY
/ 3);
1953 exceeded_burst_throttle
= FALSE
;
1955 * Sometimes we have to pause:
1956 * 1) No inactive pages - nothing to do.
1957 * 2) Loop control - no acceptable pages found on the inactive queue
1958 * within the last vm_pageout_burst_inactive_throttle iterations
1959 * 3) Flow control - default pageout queue is full
1961 if (queue_empty(&vm_page_queue_inactive
) && queue_empty(&vm_page_queue_anonymous
) && queue_empty(&sq
->age_q
)) {
1962 vm_pageout_scan_empty_throttle
++;
1963 msecs
= vm_pageout_empty_wait
;
1964 goto vm_pageout_scan_delay
;
1966 } else if (inactive_burst_count
>=
1967 MIN(vm_pageout_burst_inactive_throttle
,
1968 (vm_page_inactive_count
+
1969 vm_page_speculative_count
))) {
1970 vm_pageout_scan_burst_throttle
++;
1971 msecs
= vm_pageout_burst_wait
;
1973 exceeded_burst_throttle
= TRUE
;
1974 goto vm_pageout_scan_delay
;
1976 } else if (vm_page_free_count
> (vm_page_free_reserved
/ 4) &&
1977 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
1978 vm_pageout_scan_swap_throttle
++;
1979 msecs
= vm_pageout_swap_wait
;
1980 goto vm_pageout_scan_delay
;
1982 } else if (VM_PAGE_Q_THROTTLED(iq
) &&
1983 VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
)) {
1987 switch (flow_control
.state
) {
1990 if ((vm_page_free_count
+ local_freed
) < vm_page_free_target
) {
1992 if (object
!= NULL
) {
1993 vm_object_unlock(object
);
1996 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
1998 vm_page_unlock_queues();
2002 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
2003 vm_page_free_count
, local_freed
, delayed_unlock_limit
, 3);
2005 vm_page_free_list(local_freeq
, TRUE
);
2007 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
2008 vm_page_free_count
, local_freed
, 0, 3);
2013 thread_yield_internal(1);
2015 vm_page_lock_queues();
2017 if (!VM_PAGE_Q_THROTTLED(iq
)) {
2018 vm_pageout_scan_yield_unthrottled
++;
2021 if (vm_page_pageable_external_count
> vm_page_filecache_min
&& !queue_empty(&vm_page_queue_inactive
)) {
2022 anons_grabbed
= ANONS_GRABBED_LIMIT
;
2023 vm_pageout_scan_throttle_deferred
++;
2024 goto consider_inactive
;
2026 if (((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_target
) && vm_page_active_count
)
2029 reset_deadlock_timer
:
2030 ts
.tv_sec
= vm_pageout_deadlock_wait
/ 1000;
2031 ts
.tv_nsec
= (vm_pageout_deadlock_wait
% 1000) * 1000 * NSEC_PER_USEC
;
2032 clock_get_system_nanotime(&sec
, &nsec
);
2033 flow_control
.ts
.tv_sec
= (unsigned int) sec
;
2034 flow_control
.ts
.tv_nsec
= nsec
;
2035 ADD_MACH_TIMESPEC(&flow_control
.ts
, &ts
);
2037 flow_control
.state
= FCS_DELAYED
;
2038 msecs
= vm_pageout_deadlock_wait
;
2043 clock_get_system_nanotime(&sec
, &nsec
);
2044 ts
.tv_sec
= (unsigned int) sec
;
2047 if (CMP_MACH_TIMESPEC(&ts
, &flow_control
.ts
) >= 0) {
2049 * the pageout thread for the default pager is potentially
2050 * deadlocked since the
2051 * default pager queue has been throttled for more than the
2052 * allowable time... we need to move some clean pages or dirty
2053 * pages belonging to the external pagers if they aren't throttled
2054 * vm_page_free_wanted represents the number of threads currently
2055 * blocked waiting for pages... we'll move one page for each of
2056 * these plus a fixed amount to break the logjam... once we're done
2057 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2058 * with a new timeout target since we have no way of knowing
2059 * whether we've broken the deadlock except through observation
2060 * of the queue associated with the default pager... we need to
2061 * stop moving pages and allow the system to run to see what
2062 * state it settles into.
2064 vm_pageout_deadlock_target
= vm_pageout_deadlock_relief
+ vm_page_free_wanted
+ vm_page_free_wanted_privileged
;
2065 vm_pageout_scan_deadlock_detected
++;
2066 flow_control
.state
= FCS_DEADLOCK_DETECTED
;
2067 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
2068 goto consider_inactive
;
2071 * just resniff instead of trying
2072 * to compute a new delay time... we're going to be
2073 * awakened immediately upon a laundry completion,
2074 * so we won't wait any longer than necessary
2076 msecs
= vm_pageout_idle_wait
;
2079 case FCS_DEADLOCK_DETECTED
:
2080 if (vm_pageout_deadlock_target
)
2081 goto consider_inactive
;
2082 goto reset_deadlock_timer
;
2085 vm_pageout_scan_delay
:
2086 if (object
!= NULL
) {
2087 vm_object_unlock(object
);
2090 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
2092 vm_page_unlock_queues();
2096 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
2097 vm_page_free_count
, local_freed
, delayed_unlock_limit
, 3);
2099 vm_page_free_list(local_freeq
, TRUE
);
2101 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
2102 vm_page_free_count
, local_freed
, 0, 3);
2107 vm_consider_waking_compactor_swapper();
2109 vm_page_lock_queues();
2111 if (flow_control
.state
== FCS_DELAYED
&&
2112 !VM_PAGE_Q_THROTTLED(iq
)) {
2113 flow_control
.state
= FCS_IDLE
;
2114 goto consider_inactive
;
2117 if (vm_page_free_count
>= vm_page_free_target
) {
2119 * we're here because
2120 * 1) someone else freed up some pages while we had
2121 * the queues unlocked above
2122 * and we've hit one of the 3 conditions that
2123 * cause us to pause the pageout scan thread
2125 * since we already have enough free pages,
2126 * let's avoid stalling and return normally
2128 * before we return, make sure the pageout I/O threads
2129 * are running throttled in case there are still requests
2130 * in the laundry... since we have enough free pages
2131 * we don't need the laundry to be cleaned in a timely
2132 * fashion... so let's avoid interfering with foreground
2135 * we don't want to hold vm_page_queue_free_lock when
2136 * calling vm_pageout_adjust_io_throttles (since it
2137 * may cause other locks to be taken), we do the intitial
2138 * check outside of the lock. Once we take the lock,
2139 * we recheck the condition since it may have changed.
2140 * if it has, no problem, we will make the threads
2141 * non-throttled before actually blocking
2143 vm_pageout_adjust_io_throttles(iq
, eq
, TRUE
);
2145 lck_mtx_lock(&vm_page_queue_free_lock
);
2147 if (vm_page_free_count
>= vm_page_free_target
&&
2148 (vm_page_free_wanted
== 0) && (vm_page_free_wanted_privileged
== 0)) {
2149 goto return_from_scan
;
2151 lck_mtx_unlock(&vm_page_queue_free_lock
);
2153 if ((vm_page_free_count
+ vm_page_cleaned_count
) < vm_page_free_target
) {
2155 * we're most likely about to block due to one of
2156 * the 3 conditions that cause vm_pageout_scan to
2157 * not be able to make forward progress w/r
2158 * to providing new pages to the free queue,
2159 * so unthrottle the I/O threads in case we
2160 * have laundry to be cleaned... it needs
2161 * to be completed ASAP.
2163 * even if we don't block, we want the io threads
2164 * running unthrottled since the sum of free +
2165 * clean pages is still under our free target
2167 vm_pageout_adjust_io_throttles(iq
, eq
, FALSE
);
2169 if (vm_page_cleaned_count
> 0 && exceeded_burst_throttle
== FALSE
) {
2171 * if we get here we're below our free target and
2172 * we're stalling due to a full laundry queue or
2173 * we don't have any inactive pages other then
2174 * those in the clean queue...
2175 * however, we have pages on the clean queue that
2176 * can be moved to the free queue, so let's not
2177 * stall the pageout scan
2179 flow_control
.state
= FCS_IDLE
;
2180 goto consider_inactive
;
2182 VM_CHECK_MEMORYSTATUS
;
2184 if (flow_control
.state
!= FCS_IDLE
)
2185 vm_pageout_scan_throttle
++;
2186 iq
->pgo_throttled
= TRUE
;
2188 assert_wait_timeout((event_t
) &iq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, msecs
, 1000*NSEC_PER_USEC
);
2189 counter(c_vm_pageout_scan_block
++);
2191 vm_page_unlock_queues();
2193 assert(vm_pageout_scan_wants_object
== VM_OBJECT_NULL
);
2195 VM_DEBUG_EVENT(vm_pageout_thread_block
, VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_START
,
2196 iq
->pgo_laundry
, iq
->pgo_maxlaundry
, msecs
, 0);
2197 memoryshot(VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_START
);
2199 thread_block(THREAD_CONTINUE_NULL
);
2201 VM_DEBUG_EVENT(vm_pageout_thread_block
, VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_END
,
2202 iq
->pgo_laundry
, iq
->pgo_maxlaundry
, msecs
, 0);
2203 memoryshot(VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_END
);
2205 vm_page_lock_queues();
2208 iq
->pgo_throttled
= FALSE
;
2210 if (loop_count
>= vm_page_inactive_count
)
2212 inactive_burst_count
= 0;
2219 flow_control
.state
= FCS_IDLE
;
2221 vm_pageout_inactive_external_forced_reactivate_limit
= MIN((vm_page_active_count
+ vm_page_inactive_count
),
2222 vm_pageout_inactive_external_forced_reactivate_limit
);
2224 inactive_burst_count
++;
2225 vm_pageout_inactive
++;
2232 uint32_t inactive_external_count
;
2236 if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
)) {
2237 assert(vm_page_throttled_count
== 0);
2238 assert(queue_empty(&vm_page_queue_throttled
));
2241 * The most eligible pages are ones we paged in speculatively,
2242 * but which have not yet been touched.
2244 if (!queue_empty(&sq
->age_q
) && force_anonymous
== FALSE
) {
2245 m
= (vm_page_t
) queue_first(&sq
->age_q
);
2247 page_prev_state
= PAGE_STATE_SPECULATIVE
;
2252 * Try a clean-queue inactive page.
2254 if (!queue_empty(&vm_page_queue_cleaned
)) {
2255 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
2257 page_prev_state
= PAGE_STATE_CLEAN
;
2262 grab_anonymous
= (vm_page_anonymous_count
> vm_page_anonymous_min
);
2263 inactive_external_count
= vm_page_inactive_count
- vm_page_anonymous_count
;
2265 if ((vm_page_pageable_external_count
< vm_page_filecache_min
|| force_anonymous
== TRUE
) ||
2266 ((inactive_external_count
< vm_page_anonymous_count
) && (inactive_external_count
< (vm_page_pageable_external_count
/ 3)))) {
2267 grab_anonymous
= TRUE
;
2271 if (grab_anonymous
== FALSE
|| anons_grabbed
>= ANONS_GRABBED_LIMIT
|| queue_empty(&vm_page_queue_anonymous
)) {
2273 if ( !queue_empty(&vm_page_queue_inactive
) ) {
2274 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
2276 page_prev_state
= PAGE_STATE_INACTIVE
;
2279 if (vm_page_pageable_external_count
< vm_page_filecache_min
) {
2280 if ((++reactivated_this_call
% 100))
2281 goto must_activate_page
;
2283 * steal 1% of the file backed pages even if
2284 * we are under the limit that has been set
2285 * for a healthy filecache
2291 if ( !queue_empty(&vm_page_queue_anonymous
) ) {
2292 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
2294 page_prev_state
= PAGE_STATE_ANONYMOUS
;
2301 * if we've gotten here, we have no victim page.
2302 * if making clean, free the local freed list and return.
2303 * if making free, check to see if we've finished balancing the queues
2304 * yet, if we haven't just continue, else panic
2306 vm_page_unlock_queues();
2308 if (object
!= NULL
) {
2309 vm_object_unlock(object
);
2312 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
2315 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
2316 vm_page_free_count
, local_freed
, delayed_unlock_limit
, 5);
2318 vm_page_free_list(local_freeq
, TRUE
);
2320 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
2321 vm_page_free_count
, local_freed
, 0, 5);
2326 vm_page_lock_queues();
2329 force_anonymous
= FALSE
;
2331 if ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_target
)
2334 if (!queue_empty(&sq
->age_q
))
2337 panic("vm_pageout: no victim");
2341 force_anonymous
= FALSE
;
2344 * we just found this page on one of our queues...
2345 * it can't also be on the pageout queue, so safe
2346 * to call vm_page_queues_remove
2348 assert(!m
->pageout_queue
);
2350 vm_page_queues_remove(m
);
2352 assert(!m
->laundry
);
2353 assert(!m
->private);
2354 assert(!m
->fictitious
);
2355 assert(m
->object
!= kernel_object
);
2356 assert(m
->phys_page
!= vm_page_guard_addr
);
2359 if (page_prev_state
!= PAGE_STATE_SPECULATIVE
)
2360 vm_pageout_stats
[vm_pageout_stat_now
].considered
++;
2362 DTRACE_VM2(scan
, int, 1, (uint64_t *), NULL
);
2365 * check to see if we currently are working
2366 * with the same object... if so, we've
2367 * already got the lock
2369 if (m
->object
!= object
) {
2371 * the object associated with candidate page is
2372 * different from the one we were just working
2373 * with... dump the lock if we still own it
2375 if (object
!= NULL
) {
2376 vm_object_unlock(object
);
2378 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
2381 * Try to lock object; since we've alread got the
2382 * page queues lock, we can only 'try' for this one.
2383 * if the 'try' fails, we need to do a mutex_pause
2384 * to allow the owner of the object lock a chance to
2385 * run... otherwise, we're likely to trip over this
2386 * object in the same state as we work our way through
2387 * the queue... clumps of pages associated with the same
2388 * object are fairly typical on the inactive and active queues
2390 if (!vm_object_lock_try_scan(m
->object
)) {
2391 vm_page_t m_want
= NULL
;
2393 vm_pageout_inactive_nolock
++;
2395 if (page_prev_state
== PAGE_STATE_CLEAN
)
2396 vm_pageout_cleaned_nolock
++;
2398 if (page_prev_state
== PAGE_STATE_SPECULATIVE
)
2399 page_prev_state
= PAGE_STATE_INACTIVE_FIRST
;
2401 pmap_clear_reference(m
->phys_page
);
2402 m
->reference
= FALSE
;
2405 * m->object must be stable since we hold the page queues lock...
2406 * we can update the scan_collisions field sans the object lock
2407 * since it is a separate field and this is the only spot that does
2408 * a read-modify-write operation and it is never executed concurrently...
2409 * we can asynchronously set this field to 0 when creating a UPL, so it
2410 * is possible for the value to be a bit non-determistic, but that's ok
2411 * since it's only used as a hint
2413 m
->object
->scan_collisions
= 1;
2415 if ( !queue_empty(&sq
->age_q
) )
2416 m_want
= (vm_page_t
) queue_first(&sq
->age_q
);
2417 else if ( !queue_empty(&vm_page_queue_cleaned
))
2418 m_want
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
2419 else if (anons_grabbed
>= ANONS_GRABBED_LIMIT
|| queue_empty(&vm_page_queue_anonymous
))
2420 m_want
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
2421 else if ( !queue_empty(&vm_page_queue_anonymous
))
2422 m_want
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
2425 * this is the next object we're going to be interested in
2426 * try to make sure its available after the mutex_yield
2430 vm_pageout_scan_wants_object
= m_want
->object
;
2433 * force us to dump any collected free pages
2434 * and to pause before moving on
2441 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
2449 if (m
->encrypted_cleaning
) {
2452 * if this page has already been picked up as
2453 * part of a page-out cluster, it will be busy
2454 * because it is being encrypted (see
2455 * vm_object_upl_request()). But we still
2456 * want to demote it from "clean-in-place"
2457 * (aka "adjacent") to "clean-and-free" (aka
2458 * "target"), so let's ignore its "busy" bit
2459 * here and proceed to check for "cleaning" a
2460 * little bit below...
2463 * A "busy" page should still be left alone for
2464 * most purposes, so we have to be very careful
2465 * not to process that page too much.
2467 assert(m
->cleaning
);
2468 goto consider_inactive_page
;
2472 * Somebody is already playing with this page.
2473 * Put it back on the appropriate queue
2476 vm_pageout_inactive_busy
++;
2478 if (page_prev_state
== PAGE_STATE_CLEAN
)
2479 vm_pageout_cleaned_busy
++;
2482 switch (page_prev_state
) {
2484 case PAGE_STATE_SPECULATIVE
:
2485 case PAGE_STATE_ANONYMOUS
:
2486 case PAGE_STATE_CLEAN
:
2487 case PAGE_STATE_INACTIVE
:
2488 vm_page_enqueue_inactive(m
, FALSE
);
2491 case PAGE_STATE_INACTIVE_FIRST
:
2492 vm_page_enqueue_inactive(m
, TRUE
);
2495 goto done_with_inactivepage
;
2500 * If it's absent, in error or the object is no longer alive,
2501 * we can reclaim the page... in the no longer alive case,
2502 * there are 2 states the page can be in that preclude us
2503 * from reclaiming it - busy or cleaning - that we've already
2506 if (m
->absent
|| m
->error
|| !object
->alive
) {
2509 vm_pageout_inactive_absent
++;
2510 else if (!object
->alive
)
2511 vm_pageout_inactive_notalive
++;
2513 vm_pageout_inactive_error
++;
2515 if (vm_pageout_deadlock_target
) {
2516 vm_pageout_scan_inactive_throttle_success
++;
2517 vm_pageout_deadlock_target
--;
2520 DTRACE_VM2(dfree
, int, 1, (uint64_t *), NULL
);
2522 if (object
->internal
) {
2523 DTRACE_VM2(anonfree
, int, 1, (uint64_t *), NULL
);
2525 DTRACE_VM2(fsfree
, int, 1, (uint64_t *), NULL
);
2527 assert(!m
->cleaning
);
2528 assert(!m
->laundry
);
2533 * remove page from object here since we're already
2534 * behind the object lock... defer the rest of the work
2535 * we'd normally do in vm_page_free_prepare_object
2536 * until 'vm_page_free_list' is called
2539 vm_page_remove(m
, TRUE
);
2541 assert(m
->pageq
.next
== NULL
&&
2542 m
->pageq
.prev
== NULL
);
2543 m
->pageq
.next
= (queue_entry_t
)local_freeq
;
2547 if (page_prev_state
== PAGE_STATE_SPECULATIVE
)
2548 vm_pageout_freed_from_speculative
++;
2549 else if (page_prev_state
== PAGE_STATE_CLEAN
)
2550 vm_pageout_freed_from_cleaned
++;
2552 vm_pageout_freed_from_inactive_clean
++;
2554 if (page_prev_state
!= PAGE_STATE_SPECULATIVE
)
2555 vm_pageout_stats
[vm_pageout_stat_now
].reclaimed
++;
2557 inactive_burst_count
= 0;
2558 goto done_with_inactivepage
;
2561 * If the object is empty, the page must be reclaimed even
2563 * If the page belongs to a volatile object, we stick it back
2566 if (object
->copy
== VM_OBJECT_NULL
) {
2567 if (object
->purgable
== VM_PURGABLE_EMPTY
) {
2568 if (m
->pmapped
== TRUE
) {
2569 /* unmap the page */
2570 refmod_state
= pmap_disconnect(m
->phys_page
);
2571 if (refmod_state
& VM_MEM_MODIFIED
) {
2572 SET_PAGE_DIRTY(m
, FALSE
);
2575 if (m
->dirty
|| m
->precious
) {
2576 /* we saved the cost of cleaning this page ! */
2577 vm_page_purged_count
++;
2582 if (COMPRESSED_PAGER_IS_ACTIVE
) {
2584 * With the VM compressor, the cost of
2585 * reclaiming a page is much lower (no I/O),
2586 * so if we find a "volatile" page, it's better
2587 * to let it get compressed rather than letting
2588 * it occupy a full page until it gets purged.
2589 * So no need to check for "volatile" here.
2591 } else if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
2593 * Avoid cleaning a "volatile" page which might
2597 /* if it's wired, we can't put it on our queue */
2598 assert(!VM_PAGE_WIRED(m
));
2600 /* just stick it back on! */
2601 reactivated_this_call
++;
2603 if (page_prev_state
== PAGE_STATE_CLEAN
)
2604 vm_pageout_cleaned_volatile_reactivated
++;
2606 goto reactivate_page
;
2610 consider_inactive_page
:
2614 * A "busy" page should always be left alone, except...
2616 if (m
->cleaning
&& m
->encrypted_cleaning
) {
2619 * We could get here with a "busy" page
2620 * if it's being encrypted during a
2621 * "clean-in-place" operation. We'll deal
2622 * with it right away by testing if it has been
2623 * referenced and either reactivating it or
2624 * promoting it from "clean-in-place" to
2628 panic("\"busy\" page considered for pageout\n");
2633 * If it's being used, reactivate.
2634 * (Fictitious pages are either busy or absent.)
2635 * First, update the reference and dirty bits
2636 * to make sure the page is unreferenced.
2640 if (m
->reference
== FALSE
&& m
->pmapped
== TRUE
) {
2641 refmod_state
= pmap_get_refmod(m
->phys_page
);
2643 if (refmod_state
& VM_MEM_REFERENCED
)
2644 m
->reference
= TRUE
;
2645 if (refmod_state
& VM_MEM_MODIFIED
) {
2646 SET_PAGE_DIRTY(m
, FALSE
);
2651 * if (m->cleaning && !m->pageout)
2652 * If already cleaning this page in place and it hasn't
2653 * been recently referenced, just pull off the queue.
2654 * We can leave the page mapped, and upl_commit_range
2655 * will put it on the clean queue.
2657 * note: if m->encrypted_cleaning == TRUE, then
2658 * m->cleaning == TRUE
2659 * and we'll handle it here
2661 * if (m->pageout && !m->cleaning)
2662 * an msync INVALIDATE is in progress...
2663 * this page has been marked for destruction
2664 * after it has been cleaned,
2665 * but not yet gathered into a UPL
2666 * where 'cleaning' will be set...
2667 * just leave it off the paging queues
2669 * if (m->pageout && m->clenaing)
2670 * an msync INVALIDATE is in progress
2671 * and the UPL has already gathered this page...
2672 * just leave it off the paging queues
2676 * page with m->pageout and still on the queues means that an
2677 * MS_INVALIDATE is in progress on this page... leave it alone
2680 goto done_with_inactivepage
;
2683 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */
2685 if (m
->reference
== TRUE
) {
2686 reactivated_this_call
++;
2687 goto reactivate_page
;
2689 goto done_with_inactivepage
;
2693 if (m
->reference
|| m
->dirty
) {
2694 /* deal with a rogue "reusable" page */
2695 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m
);
2700 (m
->xpmapped
&& !object
->internal
&& (vm_page_xpmapped_external_count
< (vm_page_external_count
/ 4))))) {
2702 * The page we pulled off the inactive list has
2703 * been referenced. It is possible for other
2704 * processors to be touching pages faster than we
2705 * can clear the referenced bit and traverse the
2706 * inactive queue, so we limit the number of
2709 if (++reactivated_this_call
>= reactivate_limit
) {
2710 vm_pageout_reactivation_limit_exceeded
++;
2711 } else if (catch_up_count
) {
2712 vm_pageout_catch_ups
++;
2713 } else if (++inactive_reclaim_run
>= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM
) {
2714 vm_pageout_inactive_force_reclaim
++;
2718 if (page_prev_state
== PAGE_STATE_CLEAN
)
2719 vm_pageout_cleaned_reference_reactivated
++;
2722 if ( !object
->internal
&& object
->pager
!= MEMORY_OBJECT_NULL
&&
2723 vnode_pager_get_isinuse(object
->pager
, &isinuse
) == KERN_SUCCESS
&& !isinuse
) {
2725 * no explict mappings of this object exist
2726 * and it's not open via the filesystem
2728 vm_page_deactivate(m
);
2729 vm_pageout_inactive_deactivated
++;
2733 * The page was/is being used, so put back on active list.
2735 vm_page_activate(m
);
2736 VM_STAT_INCR(reactivations
);
2737 inactive_burst_count
= 0;
2740 if (page_prev_state
== PAGE_STATE_CLEAN
)
2741 vm_pageout_cleaned_reactivated
++;
2743 vm_pageout_inactive_used
++;
2745 goto done_with_inactivepage
;
2748 * Make sure we call pmap_get_refmod() if it
2749 * wasn't already called just above, to update
2752 if ((refmod_state
== -1) && !m
->dirty
&& m
->pmapped
) {
2753 refmod_state
= pmap_get_refmod(m
->phys_page
);
2754 if (refmod_state
& VM_MEM_MODIFIED
) {
2755 SET_PAGE_DIRTY(m
, FALSE
);
2758 forced_reclaim
= TRUE
;
2760 forced_reclaim
= FALSE
;
2764 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2765 object
, m
->offset
, m
, 0,0);
2768 * we've got a candidate page to steal...
2770 * m->dirty is up to date courtesy of the
2771 * preceding check for m->reference... if
2772 * we get here, then m->reference had to be
2773 * FALSE (or possibly "reactivate_limit" was
2774 * exceeded), but in either case we called
2775 * pmap_get_refmod() and updated both
2776 * m->reference and m->dirty
2778 * if it's dirty or precious we need to
2779 * see if the target queue is throtttled
2780 * it if is, we need to skip over it by moving it back
2781 * to the end of the inactive queue
2784 inactive_throttled
= FALSE
;
2786 if (m
->dirty
|| m
->precious
) {
2787 if (object
->internal
) {
2788 if (VM_PAGE_Q_THROTTLED(iq
))
2789 inactive_throttled
= TRUE
;
2790 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
2791 inactive_throttled
= TRUE
;
2795 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2796 object
->internal
&& m
->dirty
&&
2797 (object
->purgable
== VM_PURGABLE_DENY
||
2798 object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2799 object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2800 vm_page_check_pageable_safe(m
);
2801 queue_enter(&vm_page_queue_throttled
, m
,
2803 m
->throttled
= TRUE
;
2804 vm_page_throttled_count
++;
2806 vm_pageout_scan_reclaimed_throttled
++;
2808 inactive_burst_count
= 0;
2809 goto done_with_inactivepage
;
2811 if (inactive_throttled
== TRUE
) {
2813 if (object
->internal
== FALSE
) {
2815 * we need to break up the following potential deadlock case...
2816 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
2817 * b) The thread doing the writing is waiting for pages while holding the truncate lock
2818 * c) Most of the pages in the inactive queue belong to this file.
2820 * we are potentially in this deadlock because...
2821 * a) the external pageout queue is throttled
2822 * b) we're done with the active queue and moved on to the inactive queue
2823 * c) we've got a dirty external page
2825 * since we don't know the reason for the external pageout queue being throttled we
2826 * must suspect that we are deadlocked, so move the current page onto the active queue
2827 * in an effort to cause a page from the active queue to 'age' to the inactive queue
2829 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
2830 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
2831 * pool the next time we select a victim page... if we can make enough new free pages,
2832 * the deadlock will break, the external pageout queue will empty and it will no longer
2835 * if we have jestam configured, keep a count of the pages reactivated this way so
2836 * that we can try to find clean pages in the active/inactive queues before
2837 * deciding to jetsam a process
2839 vm_pageout_scan_inactive_throttled_external
++;
2841 vm_page_check_pageable_safe(m
);
2842 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
2844 vm_page_active_count
++;
2845 vm_page_pageable_external_count
++;
2847 vm_pageout_adjust_io_throttles(iq
, eq
, FALSE
);
2849 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2850 vm_pageout_inactive_external_forced_reactivate_limit
--;
2852 if (vm_pageout_inactive_external_forced_reactivate_limit
<= 0) {
2853 vm_pageout_inactive_external_forced_reactivate_limit
= vm_page_active_count
+ vm_page_inactive_count
;
2855 * Possible deadlock scenario so request jetsam action
2858 vm_object_unlock(object
);
2859 object
= VM_OBJECT_NULL
;
2860 vm_page_unlock_queues();
2862 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam
, VM_PAGEOUT_JETSAM
, DBG_FUNC_START
,
2863 vm_page_active_count
, vm_page_inactive_count
, vm_page_free_count
, vm_page_free_count
);
2865 /* Kill first suitable process */
2866 if (memorystatus_kill_on_VM_page_shortage(FALSE
) == FALSE
) {
2867 panic("vm_pageout_scan: Jetsam request failed\n");
2870 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam
, VM_PAGEOUT_JETSAM
, DBG_FUNC_END
, 0, 0, 0, 0);
2872 vm_pageout_inactive_external_forced_jetsam_count
++;
2873 vm_page_lock_queues();
2876 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2877 force_anonymous
= TRUE
;
2879 inactive_burst_count
= 0;
2880 goto done_with_inactivepage
;
2882 if (page_prev_state
== PAGE_STATE_SPECULATIVE
)
2883 page_prev_state
= PAGE_STATE_INACTIVE
;
2885 vm_pageout_scan_inactive_throttled_internal
++;
2887 goto must_activate_page
;
2892 * we've got a page that we can steal...
2893 * eliminate all mappings and make sure
2894 * we have the up-to-date modified state
2896 * if we need to do a pmap_disconnect then we
2897 * need to re-evaluate m->dirty since the pmap_disconnect
2898 * provides the true state atomically... the
2899 * page was still mapped up to the pmap_disconnect
2900 * and may have been dirtied at the last microsecond
2902 * Note that if 'pmapped' is FALSE then the page is not
2903 * and has not been in any map, so there is no point calling
2904 * pmap_disconnect(). m->dirty could have been set in anticipation
2905 * of likely usage of the page.
2907 if (m
->pmapped
== TRUE
) {
2911 * Don't count this page as going into the compressor
2912 * if any of these are true:
2913 * 1) We have the dynamic pager i.e. no compressed pager
2914 * 2) Freezer enabled device with a freezer file to
2915 * hold the app data i.e. no compressed pager
2916 * 3) Freezer enabled device with compressed pager
2917 * backend (exclusive use) i.e. most of the VM system
2918 * (including vm_pageout_scan) has no knowledge of
2920 * 4) This page belongs to a file and hence will not be
2921 * sent into the compressor
2923 if (DEFAULT_PAGER_IS_ACTIVE
||
2924 DEFAULT_FREEZER_IS_ACTIVE
||
2925 DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS
||
2926 object
->internal
== FALSE
) {
2928 } else if (m
->dirty
|| m
->precious
) {
2930 * VM knows that this page is dirty (or
2931 * precious) and needs to be compressed
2932 * rather than freed.
2933 * Tell the pmap layer to count this page
2936 pmap_options
= PMAP_OPTIONS_COMPRESSOR
;
2939 * VM does not know if the page needs to
2940 * be preserved but the pmap layer might tell
2941 * us if any mapping has "modified" it.
2942 * Let's the pmap layer to count this page
2943 * as compressed if and only if it has been
2947 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
2949 refmod_state
= pmap_disconnect_options(m
->phys_page
,
2952 if (refmod_state
& VM_MEM_MODIFIED
) {
2953 SET_PAGE_DIRTY(m
, FALSE
);
2957 * reset our count of pages that have been reclaimed
2958 * since the last page was 'stolen'
2960 inactive_reclaim_run
= 0;
2963 * If it's clean and not precious, we can free the page.
2965 if (!m
->dirty
&& !m
->precious
) {
2967 if (page_prev_state
== PAGE_STATE_SPECULATIVE
)
2968 vm_pageout_speculative_clean
++;
2970 if (page_prev_state
== PAGE_STATE_ANONYMOUS
)
2971 vm_pageout_inactive_anonymous
++;
2972 else if (page_prev_state
== PAGE_STATE_CLEAN
)
2973 vm_pageout_cleaned_reclaimed
++;
2975 vm_pageout_inactive_clean
++;
2979 * OK, at this point we have found a page we are going to free.
2981 #if CONFIG_PHANTOM_CACHE
2982 if (!object
->internal
)
2983 vm_phantom_cache_add_ghost(m
);
2989 * The page may have been dirtied since the last check
2990 * for a throttled target queue (which may have been skipped
2991 * if the page was clean then). With the dirty page
2992 * disconnected here, we can make one final check.
2994 if (object
->internal
) {
2995 if (VM_PAGE_Q_THROTTLED(iq
))
2996 inactive_throttled
= TRUE
;
2997 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
2998 inactive_throttled
= TRUE
;
3001 if (inactive_throttled
== TRUE
)
3002 goto throttle_inactive
;
3004 #if VM_PRESSURE_EVENTS
3008 * If Jetsam is enabled, then the sending
3009 * of memory pressure notifications is handled
3010 * from the same thread that takes care of high-water
3011 * and other jetsams i.e. the memorystatus_thread.
3014 #else /* CONFIG_JETSAM */
3016 vm_pressure_response();
3018 #endif /* CONFIG_JETSAM */
3019 #endif /* VM_PRESSURE_EVENTS */
3021 if (page_prev_state
== PAGE_STATE_ANONYMOUS
)
3022 vm_pageout_inactive_anonymous
++;
3023 if (object
->internal
)
3024 vm_pageout_inactive_dirty_internal
++;
3026 vm_pageout_inactive_dirty_external
++;
3029 * do NOT set the pageout bit!
3030 * sure, we might need free pages, but this page is going to take time to become free
3031 * anyway, so we may as well put it on the clean queue first and take it from there later
3032 * if necessary. that way, we'll ensure we don't free up too much. -mj
3034 vm_pageout_cluster(m
, FALSE
, FALSE
, FALSE
);
3036 done_with_inactivepage
:
3038 if (delayed_unlock
++ > delayed_unlock_limit
|| try_failed
== TRUE
) {
3039 boolean_t need_delay
= TRUE
;
3041 if (object
!= NULL
) {
3042 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
3043 vm_object_unlock(object
);
3046 vm_page_unlock_queues();
3050 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
3051 vm_page_free_count
, local_freed
, delayed_unlock_limit
, 4);
3053 vm_page_free_list(local_freeq
, TRUE
);
3055 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
3056 vm_page_free_count
, local_freed
, 0, 4);
3062 vm_consider_waking_compactor_swapper();
3064 vm_page_lock_queues();
3066 if (need_delay
== TRUE
)
3067 lck_mtx_yield(&vm_page_queue_lock
);
3071 vm_pageout_considered_page
++;
3074 * back to top of pageout scan loop
3080 int vm_page_free_count_init
;
3083 vm_page_free_reserve(
3086 int free_after_reserve
;
3088 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
3090 if ((vm_page_free_reserved
+ pages
+ COMPRESSOR_FREE_RESERVED_LIMIT
) >= (VM_PAGE_FREE_RESERVED_LIMIT
+ COMPRESSOR_FREE_RESERVED_LIMIT
))
3091 vm_page_free_reserved
= VM_PAGE_FREE_RESERVED_LIMIT
+ COMPRESSOR_FREE_RESERVED_LIMIT
;
3093 vm_page_free_reserved
+= (pages
+ COMPRESSOR_FREE_RESERVED_LIMIT
);
3096 if ((vm_page_free_reserved
+ pages
) >= VM_PAGE_FREE_RESERVED_LIMIT
)
3097 vm_page_free_reserved
= VM_PAGE_FREE_RESERVED_LIMIT
;
3099 vm_page_free_reserved
+= pages
;
3101 free_after_reserve
= vm_page_free_count_init
- vm_page_free_reserved
;
3103 vm_page_free_min
= vm_page_free_reserved
+
3104 VM_PAGE_FREE_MIN(free_after_reserve
);
3106 if (vm_page_free_min
> VM_PAGE_FREE_MIN_LIMIT
)
3107 vm_page_free_min
= VM_PAGE_FREE_MIN_LIMIT
;
3109 vm_page_free_target
= vm_page_free_reserved
+
3110 VM_PAGE_FREE_TARGET(free_after_reserve
);
3112 if (vm_page_free_target
> VM_PAGE_FREE_TARGET_LIMIT
)
3113 vm_page_free_target
= VM_PAGE_FREE_TARGET_LIMIT
;
3115 if (vm_page_free_target
< vm_page_free_min
+ 5)
3116 vm_page_free_target
= vm_page_free_min
+ 5;
3118 vm_page_throttle_limit
= vm_page_free_target
- (vm_page_free_target
/ 2);
3122 * vm_pageout is the high level pageout daemon.
3126 vm_pageout_continue(void)
3128 DTRACE_VM2(pgrrun
, int, 1, (uint64_t *), NULL
);
3129 vm_pageout_scan_event_counter
++;
3131 lck_mtx_lock(&vm_page_queue_free_lock
);
3132 vm_pageout_running
= TRUE
;
3133 lck_mtx_unlock(&vm_page_queue_free_lock
);
3137 * we hold both the vm_page_queue_free_lock
3138 * and the vm_page_queues_lock at this point
3140 assert(vm_page_free_wanted
== 0);
3141 assert(vm_page_free_wanted_privileged
== 0);
3142 assert_wait((event_t
) &vm_page_free_wanted
, THREAD_UNINT
);
3144 vm_pageout_running
= FALSE
;
3145 if (vm_pageout_waiter
) {
3146 vm_pageout_waiter
= FALSE
;
3147 thread_wakeup((event_t
)&vm_pageout_waiter
);
3150 lck_mtx_unlock(&vm_page_queue_free_lock
);
3151 vm_page_unlock_queues();
3153 counter(c_vm_pageout_block
++);
3154 thread_block((thread_continue_t
)vm_pageout_continue
);
3159 vm_pageout_wait(uint64_t deadline
)
3163 lck_mtx_lock(&vm_page_queue_free_lock
);
3164 for (kr
= KERN_SUCCESS
; vm_pageout_running
&& (KERN_SUCCESS
== kr
); ) {
3165 vm_pageout_waiter
= TRUE
;
3166 if (THREAD_AWAKENED
!= lck_mtx_sleep_deadline(
3167 &vm_page_queue_free_lock
, LCK_SLEEP_DEFAULT
,
3168 (event_t
) &vm_pageout_waiter
, THREAD_UNINT
, deadline
)) {
3169 kr
= KERN_OPERATION_TIMED_OUT
;
3172 lck_mtx_unlock(&vm_page_queue_free_lock
);
3178 #ifdef FAKE_DEADLOCK
3180 #define FAKE_COUNT 5000
3182 int internal_count
= 0;
3183 int fake_deadlock
= 0;
3188 vm_pageout_iothread_continue(struct vm_pageout_queue
*q
)
3192 vm_object_offset_t offset
;
3193 memory_object_t pager
;
3194 thread_t self
= current_thread();
3196 if ((vm_pageout_internal_iothread
!= THREAD_NULL
)
3197 && (self
== vm_pageout_external_iothread
)
3198 && (self
->options
& TH_OPT_VMPRIV
))
3199 self
->options
&= ~TH_OPT_VMPRIV
;
3201 vm_page_lockspin_queues();
3203 while ( !queue_empty(&q
->pgo_pending
) ) {
3206 queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
3207 if (m
->object
->object_slid
) {
3208 panic("slid page %p not allowed on this path\n", m
);
3211 m
->pageout_queue
= FALSE
;
3212 m
->pageq
.next
= NULL
;
3213 m
->pageq
.prev
= NULL
;
3216 * grab a snapshot of the object and offset this
3217 * page is tabled in so that we can relookup this
3218 * page after we've taken the object lock - these
3219 * fields are stable while we hold the page queues lock
3220 * but as soon as we drop it, there is nothing to keep
3221 * this page in this object... we hold an activity_in_progress
3222 * on this object which will keep it from terminating
3227 vm_page_unlock_queues();
3229 #ifdef FAKE_DEADLOCK
3230 if (q
== &vm_pageout_queue_internal
) {
3236 if ((internal_count
== FAKE_COUNT
)) {
3238 pg_count
= vm_page_free_count
+ vm_page_free_reserved
;
3240 if (kmem_alloc(kernel_map
, &addr
, PAGE_SIZE
* pg_count
) == KERN_SUCCESS
) {
3241 kmem_free(kernel_map
, addr
, PAGE_SIZE
* pg_count
);
3248 vm_object_lock(object
);
3250 m
= vm_page_lookup(object
, offset
);
3253 m
->busy
|| m
->cleaning
|| m
->pageout_queue
|| !m
->laundry
) {
3255 * it's either the same page that someone else has
3256 * started cleaning (or it's finished cleaning or
3257 * been put back on the pageout queue), or
3258 * the page has been freed or we have found a
3259 * new page at this offset... in all of these cases
3260 * we merely need to release the activity_in_progress
3261 * we took when we put the page on the pageout queue
3263 vm_object_activity_end(object
);
3264 vm_object_unlock(object
);
3266 vm_page_lockspin_queues();
3269 if (!object
->pager_initialized
) {
3272 * If there is no memory object for the page, create
3273 * one and hand it to the default pager.
3276 if (!object
->pager_initialized
)
3277 vm_object_collapse(object
,
3278 (vm_object_offset_t
) 0,
3280 if (!object
->pager_initialized
)
3281 vm_object_pager_create(object
);
3282 if (!object
->pager_initialized
) {
3284 * Still no pager for the object.
3285 * Reactivate the page.
3287 * Should only happen if there is no
3292 vm_page_lockspin_queues();
3294 vm_pageout_throttle_up(m
);
3295 vm_page_activate(m
);
3296 vm_pageout_dirty_no_pager
++;
3298 vm_page_unlock_queues();
3301 * And we are done with it.
3303 vm_object_activity_end(object
);
3304 vm_object_unlock(object
);
3306 vm_page_lockspin_queues();
3310 pager
= object
->pager
;
3312 if (pager
== MEMORY_OBJECT_NULL
) {
3314 * This pager has been destroyed by either
3315 * memory_object_destroy or vm_object_destroy, and
3316 * so there is nowhere for the page to go.
3320 * Just free the page... VM_PAGE_FREE takes
3321 * care of cleaning up all the state...
3322 * including doing the vm_pageout_throttle_up
3326 vm_page_lockspin_queues();
3328 vm_pageout_throttle_up(m
);
3329 vm_page_activate(m
);
3331 vm_page_unlock_queues();
3334 * And we are done with it.
3337 vm_object_activity_end(object
);
3338 vm_object_unlock(object
);
3340 vm_page_lockspin_queues();
3345 * we don't hold the page queue lock
3346 * so this check isn't safe to make
3351 * give back the activity_in_progress reference we
3352 * took when we queued up this page and replace it
3353 * it with a paging_in_progress reference that will
3354 * also hold the paging offset from changing and
3355 * prevent the object from terminating
3357 vm_object_activity_end(object
);
3358 vm_object_paging_begin(object
);
3359 vm_object_unlock(object
);
3362 * Send the data to the pager.
3363 * any pageout clustering happens there
3365 memory_object_data_return(pager
,
3366 m
->offset
+ object
->paging_offset
,
3374 vm_object_lock(object
);
3375 vm_object_paging_end(object
);
3376 vm_object_unlock(object
);
3378 vm_pageout_io_throttle();
3380 vm_page_lockspin_queues();
3382 q
->pgo_busy
= FALSE
;
3385 assert_wait((event_t
) &q
->pgo_pending
, THREAD_UNINT
);
3386 vm_page_unlock_queues();
3388 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_continue
, (void *) q
);
3394 vm_pageout_iothread_external_continue(struct vm_pageout_queue
*q
)
3398 vm_object_offset_t offset
;
3399 memory_object_t pager
;
3402 if (vm_pageout_internal_iothread
!= THREAD_NULL
)
3403 current_thread()->options
&= ~TH_OPT_VMPRIV
;
3405 vm_page_lockspin_queues();
3407 while ( !queue_empty(&q
->pgo_pending
) ) {
3410 queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
3411 if (m
->object
->object_slid
) {
3412 panic("slid page %p not allowed on this path\n", m
);
3415 m
->pageout_queue
= FALSE
;
3416 m
->pageq
.next
= NULL
;
3417 m
->pageq
.prev
= NULL
;
3420 * grab a snapshot of the object and offset this
3421 * page is tabled in so that we can relookup this
3422 * page after we've taken the object lock - these
3423 * fields are stable while we hold the page queues lock
3424 * but as soon as we drop it, there is nothing to keep
3425 * this page in this object... we hold an activity_in_progress
3426 * on this object which will keep it from terminating
3431 vm_page_unlock_queues();
3433 vm_object_lock(object
);
3435 m
= vm_page_lookup(object
, offset
);
3438 m
->busy
|| m
->cleaning
|| m
->pageout_queue
|| !m
->laundry
) {
3440 * it's either the same page that someone else has
3441 * started cleaning (or it's finished cleaning or
3442 * been put back on the pageout queue), or
3443 * the page has been freed or we have found a
3444 * new page at this offset... in all of these cases
3445 * we merely need to release the activity_in_progress
3446 * we took when we put the page on the pageout queue
3448 vm_object_activity_end(object
);
3449 vm_object_unlock(object
);
3451 vm_page_lockspin_queues();
3454 pager
= object
->pager
;
3456 if (pager
== MEMORY_OBJECT_NULL
) {
3458 * This pager has been destroyed by either
3459 * memory_object_destroy or vm_object_destroy, and
3460 * so there is nowhere for the page to go.
3464 * Just free the page... VM_PAGE_FREE takes
3465 * care of cleaning up all the state...
3466 * including doing the vm_pageout_throttle_up
3470 vm_page_lockspin_queues();
3472 vm_pageout_throttle_up(m
);
3473 vm_page_activate(m
);
3475 vm_page_unlock_queues();
3478 * And we are done with it.
3481 vm_object_activity_end(object
);
3482 vm_object_unlock(object
);
3484 vm_page_lockspin_queues();
3489 * we don't hold the page queue lock
3490 * so this check isn't safe to make
3495 * give back the activity_in_progress reference we
3496 * took when we queued up this page and replace it
3497 * it with a paging_in_progress reference that will
3498 * also hold the paging offset from changing and
3499 * prevent the object from terminating
3501 vm_object_activity_end(object
);
3502 vm_object_paging_begin(object
);
3503 vm_object_unlock(object
);
3506 * Send the data to the pager.
3507 * any pageout clustering happens there
3509 memory_object_data_return(pager
,
3510 m
->offset
+ object
->paging_offset
,
3518 vm_object_lock(object
);
3519 vm_object_paging_end(object
);
3520 vm_object_unlock(object
);
3522 vm_pageout_io_throttle();
3524 vm_page_lockspin_queues();
3526 q
->pgo_busy
= FALSE
;
3529 assert_wait((event_t
) &q
->pgo_pending
, THREAD_UNINT
);
3530 vm_page_unlock_queues();
3532 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_external_continue
, (void *) q
);
3537 uint32_t vm_compressor_failed
;
3539 #define MAX_FREE_BATCH 32
3542 vm_pageout_iothread_internal_continue(struct cq
*cq
)
3544 struct vm_pageout_queue
*q
;
3546 boolean_t pgo_draining
;
3549 vm_page_t local_freeq
= NULL
;
3550 int local_freed
= 0;
3551 int local_batch_size
;
3554 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END
, 0, 0, 0, 0, 0);
3557 local_batch_size
= q
->pgo_maxlaundry
/ (vm_compressor_thread_count
* 2);
3559 #if RECORD_THE_COMPRESSED_DATA
3561 c_compressed_record_init();
3564 int pages_left_on_q
= 0;
3569 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START
, 0, 0, 0, 0, 0);
3571 vm_page_lock_queues();
3573 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
3575 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START
, q
->pgo_laundry
, 0, 0, 0, 0);
3577 while ( !queue_empty(&q
->pgo_pending
) && local_cnt
< local_batch_size
) {
3579 queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
3583 m
->pageout_queue
= FALSE
;
3584 m
->pageq
.prev
= NULL
;
3586 m
->pageq
.next
= (queue_entry_t
)local_q
;
3590 if (local_q
== NULL
)
3595 if ((pgo_draining
= q
->pgo_draining
) == FALSE
) {
3596 vm_pageout_throttle_up_batch(q
, local_cnt
);
3597 pages_left_on_q
= q
->pgo_laundry
;
3599 pages_left_on_q
= q
->pgo_laundry
- local_cnt
;
3601 vm_page_unlock_queues();
3603 #if !RECORD_THE_COMPRESSED_DATA
3604 if (pages_left_on_q
>= local_batch_size
&& cq
->id
< (vm_compressor_thread_count
- 1))
3605 thread_wakeup((event_t
) ((uintptr_t)&q
->pgo_pending
+ cq
->id
+ 1));
3607 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END
, q
->pgo_laundry
, 0, 0, 0, 0);
3611 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START
, local_cnt
, 0, 0, 0, 0);
3614 local_q
= (vm_page_t
)m
->pageq
.next
;
3615 m
->pageq
.next
= NULL
;
3617 if (vm_pageout_compress_page(&cq
->current_chead
, cq
->scratch_buf
, m
, FALSE
) == KERN_SUCCESS
) {
3619 m
->pageq
.next
= (queue_entry_t
)local_freeq
;
3623 if (local_freed
>= MAX_FREE_BATCH
) {
3625 vm_page_free_list(local_freeq
, TRUE
);
3631 while (vm_page_free_count
< COMPRESSOR_FREE_RESERVED_LIMIT
) {
3632 kern_return_t wait_result
;
3633 int need_wakeup
= 0;
3636 vm_page_free_list(local_freeq
, TRUE
);
3643 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3645 if (vm_page_free_count
< COMPRESSOR_FREE_RESERVED_LIMIT
) {
3647 if (vm_page_free_wanted_privileged
++ == 0)
3649 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, THREAD_UNINT
);
3651 lck_mtx_unlock(&vm_page_queue_free_lock
);
3654 thread_wakeup((event_t
)&vm_page_free_wanted
);
3656 if (wait_result
== THREAD_WAITING
)
3658 thread_block(THREAD_CONTINUE_NULL
);
3660 lck_mtx_unlock(&vm_page_queue_free_lock
);
3665 vm_page_free_list(local_freeq
, TRUE
);
3670 if (pgo_draining
== TRUE
) {
3671 vm_page_lockspin_queues();
3672 vm_pageout_throttle_up_batch(q
, local_cnt
);
3673 vm_page_unlock_queues();
3676 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START
, 0, 0, 0, 0, 0);
3679 * queue lock is held and our q is empty
3681 q
->pgo_busy
= FALSE
;
3684 assert_wait((event_t
) ((uintptr_t)&q
->pgo_pending
+ cq
->id
), THREAD_UNINT
);
3685 vm_page_unlock_queues();
3687 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
3689 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_internal_continue
, (void *) cq
);
3696 vm_pageout_immediate(vm_page_t m
, boolean_t object_locked_by_caller
)
3698 assert(vm_pageout_immediate_scratch_buf
);
3700 if (vm_pageout_compress_page(&vm_pageout_immediate_chead
, vm_pageout_immediate_scratch_buf
, m
, object_locked_by_caller
) == KERN_SUCCESS
) {
3702 vm_page_free_prepare_object(m
, TRUE
);
3709 vm_pageout_compress_page(void **current_chead
, char *scratch_buf
, vm_page_t m
, boolean_t object_locked_by_caller
)
3712 memory_object_t pager
;
3713 int compressed_count_delta
;
3714 kern_return_t retval
;
3716 if (m
->object
->object_slid
) {
3717 panic("slid page %p not allowed on this path\n", m
);
3721 pager
= object
->pager
;
3723 if (object_locked_by_caller
== FALSE
&& (!object
->pager_initialized
|| pager
== MEMORY_OBJECT_NULL
)) {
3725 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START
, object
, pager
, 0, 0, 0);
3727 vm_object_lock(object
);
3730 * If there is no memory object for the page, create
3731 * one and hand it to the compression pager.
3734 if (!object
->pager_initialized
)
3735 vm_object_collapse(object
, (vm_object_offset_t
) 0, TRUE
);
3736 if (!object
->pager_initialized
)
3737 vm_object_compressor_pager_create(object
);
3739 if (!object
->pager_initialized
) {
3741 * Still no pager for the object.
3742 * Reactivate the page.
3744 * Should only happen if there is no
3749 PAGE_WAKEUP_DONE(m
);
3751 vm_page_lockspin_queues();
3752 vm_page_activate(m
);
3753 vm_pageout_dirty_no_pager
++;
3754 vm_page_unlock_queues();
3757 * And we are done with it.
3759 vm_object_activity_end(object
);
3760 vm_object_unlock(object
);
3762 return KERN_FAILURE
;
3764 pager
= object
->pager
;
3766 if (pager
== MEMORY_OBJECT_NULL
) {
3768 * This pager has been destroyed by either
3769 * memory_object_destroy or vm_object_destroy, and
3770 * so there is nowhere for the page to go.
3774 * Just free the page... VM_PAGE_FREE takes
3775 * care of cleaning up all the state...
3776 * including doing the vm_pageout_throttle_up
3781 PAGE_WAKEUP_DONE(m
);
3783 vm_page_lockspin_queues();
3784 vm_page_activate(m
);
3785 vm_page_unlock_queues();
3788 * And we are done with it.
3791 vm_object_activity_end(object
);
3792 vm_object_unlock(object
);
3794 return KERN_FAILURE
;
3796 vm_object_unlock(object
);
3798 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END
, object
, pager
, 0, 0, 0);
3800 assert(object
->pager_initialized
&& pager
!= MEMORY_OBJECT_NULL
);
3802 if (object_locked_by_caller
== FALSE
)
3803 assert(object
->activity_in_progress
> 0);
3805 retval
= vm_compressor_pager_put(
3807 m
->offset
+ object
->paging_offset
,
3811 &compressed_count_delta
);
3813 if (object_locked_by_caller
== FALSE
) {
3814 vm_object_lock(object
);
3816 assert(object
->activity_in_progress
> 0);
3817 assert(m
->object
== object
);
3820 vm_compressor_pager_count(pager
,
3821 compressed_count_delta
,
3822 FALSE
, /* shared_lock */
3828 if (retval
== KERN_SUCCESS
) {
3830 * If the object is purgeable, its owner's
3831 * purgeable ledgers will be updated in
3832 * vm_page_remove() but the page still
3833 * contributes to the owner's memory footprint,
3834 * so account for it as such.
3836 if (object
->purgable
!= VM_PURGABLE_DENY
&&
3837 object
->vo_purgeable_owner
!= NULL
) {
3838 /* one more compressed purgeable page */
3839 vm_purgeable_compressed_update(object
,
3842 VM_STAT_INCR(compressions
);
3845 vm_page_remove(m
, TRUE
);
3848 PAGE_WAKEUP_DONE(m
);
3850 vm_page_lockspin_queues();
3852 vm_page_activate(m
);
3853 vm_compressor_failed
++;
3855 vm_page_unlock_queues();
3857 if (object_locked_by_caller
== FALSE
) {
3858 vm_object_activity_end(object
);
3859 vm_object_unlock(object
);
3866 vm_pageout_adjust_io_throttles(struct vm_pageout_queue
*iq
, struct vm_pageout_queue
*eq
, boolean_t req_lowpriority
)
3869 boolean_t set_iq
= FALSE
;
3870 boolean_t set_eq
= FALSE
;
3872 if (hibernate_cleaning_in_progress
== TRUE
)
3873 req_lowpriority
= FALSE
;
3875 if ((DEFAULT_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_IS_ACTIVE
) && iq
->pgo_inited
== TRUE
&& iq
->pgo_lowpriority
!= req_lowpriority
)
3878 if (eq
->pgo_inited
== TRUE
&& eq
->pgo_lowpriority
!= req_lowpriority
)
3881 if (set_iq
== TRUE
|| set_eq
== TRUE
) {
3883 vm_page_unlock_queues();
3885 if (req_lowpriority
== TRUE
) {
3886 policy
= THROTTLE_LEVEL_PAGEOUT_THROTTLED
;
3887 DTRACE_VM(laundrythrottle
);
3889 policy
= THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED
;
3890 DTRACE_VM(laundryunthrottle
);
3892 if (set_iq
== TRUE
) {
3893 proc_set_task_policy_thread(kernel_task
, iq
->pgo_tid
, TASK_POLICY_EXTERNAL
, TASK_POLICY_IO
, policy
);
3895 iq
->pgo_lowpriority
= req_lowpriority
;
3897 if (set_eq
== TRUE
) {
3898 proc_set_task_policy_thread(kernel_task
, eq
->pgo_tid
, TASK_POLICY_EXTERNAL
, TASK_POLICY_IO
, policy
);
3900 eq
->pgo_lowpriority
= req_lowpriority
;
3902 vm_page_lock_queues();
3908 vm_pageout_iothread_external(void)
3910 thread_t self
= current_thread();
3912 self
->options
|= TH_OPT_VMPRIV
;
3914 DTRACE_VM2(laundrythrottle
, int, 1, (uint64_t *), NULL
);
3916 proc_set_task_policy_thread(kernel_task
, self
->thread_id
, TASK_POLICY_EXTERNAL
,
3917 TASK_POLICY_IO
, THROTTLE_LEVEL_PAGEOUT_THROTTLED
);
3919 vm_page_lock_queues();
3921 vm_pageout_queue_external
.pgo_tid
= self
->thread_id
;
3922 vm_pageout_queue_external
.pgo_lowpriority
= TRUE
;
3923 vm_pageout_queue_external
.pgo_inited
= TRUE
;
3925 vm_page_unlock_queues();
3927 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
)
3928 vm_pageout_iothread_external_continue(&vm_pageout_queue_external
);
3930 vm_pageout_iothread_continue(&vm_pageout_queue_external
);
3937 vm_pageout_iothread_internal(struct cq
*cq
)
3939 thread_t self
= current_thread();
3941 self
->options
|= TH_OPT_VMPRIV
;
3943 if (DEFAULT_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_IS_ACTIVE
) {
3944 DTRACE_VM2(laundrythrottle
, int, 1, (uint64_t *), NULL
);
3946 proc_set_task_policy_thread(kernel_task
, self
->thread_id
, TASK_POLICY_EXTERNAL
,
3947 TASK_POLICY_IO
, THROTTLE_LEVEL_PAGEOUT_THROTTLED
);
3949 vm_page_lock_queues();
3951 vm_pageout_queue_internal
.pgo_tid
= self
->thread_id
;
3952 vm_pageout_queue_internal
.pgo_lowpriority
= TRUE
;
3953 vm_pageout_queue_internal
.pgo_inited
= TRUE
;
3955 vm_page_unlock_queues();
3957 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
3959 if (vm_restricted_to_single_processor
== TRUE
)
3960 thread_vm_bind_group_add();
3962 vm_pageout_iothread_internal_continue(cq
);
3964 vm_pageout_iothread_continue(&vm_pageout_queue_internal
);
3970 vm_set_buffer_cleanup_callout(boolean_t (*func
)(int))
3972 if (OSCompareAndSwapPtr(NULL
, func
, (void * volatile *) &consider_buffer_cache_collect
)) {
3973 return KERN_SUCCESS
;
3975 return KERN_FAILURE
; /* Already set */
3979 extern boolean_t memorystatus_manual_testing_on
;
3980 extern unsigned int memorystatus_level
;
3983 #if VM_PRESSURE_EVENTS
3985 boolean_t vm_pressure_events_enabled
= FALSE
;
3988 vm_pressure_response(void)
3991 vm_pressure_level_t old_level
= kVMPressureNormal
;
3994 uint64_t available_memory
= 0;
3996 if (vm_pressure_events_enabled
== FALSE
)
4000 available_memory
= (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY
) * 100);
4003 memorystatus_level
= (unsigned int) (available_memory
/ atop_64(max_mem
));
4005 if (memorystatus_manual_testing_on
) {
4009 old_level
= memorystatus_vm_pressure_level
;
4011 switch (memorystatus_vm_pressure_level
) {
4013 case kVMPressureNormal
:
4015 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4016 new_level
= kVMPressureCritical
;
4017 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4018 new_level
= kVMPressureWarning
;
4023 case kVMPressureWarning
:
4024 case kVMPressureUrgent
:
4026 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4027 new_level
= kVMPressureNormal
;
4028 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4029 new_level
= kVMPressureCritical
;
4034 case kVMPressureCritical
:
4036 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4037 new_level
= kVMPressureNormal
;
4038 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4039 new_level
= kVMPressureWarning
;
4048 if (new_level
!= -1) {
4049 memorystatus_vm_pressure_level
= (vm_pressure_level_t
) new_level
;
4051 if ((memorystatus_vm_pressure_level
!= kVMPressureNormal
) || (old_level
!= new_level
)) {
4052 if (vm_pressure_thread_running
== FALSE
) {
4053 thread_wakeup(&vm_pressure_thread
);
4056 if (old_level
!= new_level
) {
4057 thread_wakeup(&vm_pressure_changed
);
4063 #endif /* VM_PRESSURE_EVENTS */
4066 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure
, __unused
unsigned int *pressure_level
) {
4068 #if !VM_PRESSURE_EVENTS
4070 return KERN_FAILURE
;
4072 #else /* VM_PRESSURE_EVENTS */
4074 kern_return_t kr
= KERN_SUCCESS
;
4076 if (pressure_level
!= NULL
) {
4078 vm_pressure_level_t old_level
= memorystatus_vm_pressure_level
;
4080 if (wait_for_pressure
== TRUE
) {
4081 wait_result_t wr
= 0;
4083 while (old_level
== *pressure_level
) {
4084 wr
= assert_wait((event_t
) &vm_pressure_changed
,
4085 THREAD_INTERRUPTIBLE
);
4086 if (wr
== THREAD_WAITING
) {
4087 wr
= thread_block(THREAD_CONTINUE_NULL
);
4089 if (wr
== THREAD_INTERRUPTED
) {
4090 return KERN_ABORTED
;
4092 if (wr
== THREAD_AWAKENED
) {
4094 old_level
= memorystatus_vm_pressure_level
;
4096 if (old_level
!= *pressure_level
) {
4103 *pressure_level
= old_level
;
4106 kr
= KERN_INVALID_ARGUMENT
;
4110 #endif /* VM_PRESSURE_EVENTS */
4113 #if VM_PRESSURE_EVENTS
4115 vm_pressure_thread(void) {
4116 static boolean_t thread_initialized
= FALSE
;
4118 if (thread_initialized
== TRUE
) {
4119 vm_pressure_thread_running
= TRUE
;
4120 consider_vm_pressure_events();
4121 vm_pressure_thread_running
= FALSE
;
4124 thread_initialized
= TRUE
;
4125 assert_wait((event_t
) &vm_pressure_thread
, THREAD_UNINT
);
4126 thread_block((thread_continue_t
)vm_pressure_thread
);
4128 #endif /* VM_PRESSURE_EVENTS */
4131 uint32_t vm_pageout_considered_page_last
= 0;
4134 * called once per-second via "compute_averages"
4137 compute_pageout_gc_throttle()
4139 if (vm_pageout_considered_page
!= vm_pageout_considered_page_last
) {
4141 vm_pageout_considered_page_last
= vm_pageout_considered_page
;
4143 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
4149 vm_pageout_garbage_collect(int collect
)
4153 boolean_t buf_large_zfree
= FALSE
;
4154 boolean_t first_try
= TRUE
;
4158 consider_machine_collect();
4162 if (consider_buffer_cache_collect
!= NULL
) {
4163 buf_large_zfree
= (*consider_buffer_cache_collect
)(0);
4165 if (first_try
== TRUE
|| buf_large_zfree
== TRUE
) {
4167 * consider_zone_gc should be last, because the other operations
4168 * might return memory to zones.
4170 consider_zone_gc(buf_large_zfree
);
4174 } while (buf_large_zfree
== TRUE
&& vm_page_free_count
< vm_page_free_target
);
4176 consider_machine_adjust();
4178 assert_wait((event_t
) &vm_pageout_garbage_collect
, THREAD_UNINT
);
4180 thread_block_parameter((thread_continue_t
) vm_pageout_garbage_collect
, (void *)1);
4185 void vm_pageout_reinit_tuneables(void);
4188 vm_pageout_reinit_tuneables(void)
4191 vm_compressor_minorcompact_threshold_divisor
= 18;
4192 vm_compressor_majorcompact_threshold_divisor
= 22;
4193 vm_compressor_unthrottle_threshold_divisor
= 32;
4197 #if VM_PAGE_BUCKETS_CHECK
4198 #if VM_PAGE_FAKE_BUCKETS
4199 extern vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
4200 #endif /* VM_PAGE_FAKE_BUCKETS */
4201 #endif /* VM_PAGE_BUCKETS_CHECK */
4203 #define FBDP_TEST_COLLAPSE_COMPRESSOR 0
4204 #if FBDP_TEST_COLLAPSE_COMPRESSOR
4205 extern boolean_t vm_object_collapse_compressor_allowed
;
4206 #include <IOKit/IOLib.h>
4207 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4209 #define FBDP_TEST_WIRE_AND_EXTRACT 0
4210 #if FBDP_TEST_WIRE_AND_EXTRACT
4211 extern ledger_template_t task_ledger_template
;
4212 #include <mach/mach_vm.h>
4213 extern ppnum_t
vm_map_get_phys_page(vm_map_t map
,
4214 vm_offset_t offset
);
4215 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4219 vm_set_restrictions()
4221 host_basic_info_data_t hinfo
;
4222 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
4225 host_info((host_t
)BSD_HOST
, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
4227 assert(hinfo
.max_cpus
> 0);
4229 if (hinfo
.max_cpus
<= 3) {
4231 * on systems with a limited number of CPUS, bind the
4232 * 4 major threads that can free memory and that tend to use
4233 * a fair bit of CPU under pressured conditions to a single processor.
4234 * This insures that these threads don't hog all of the available CPUs
4235 * (important for camera launch), while allowing them to run independently
4236 * w/r to locks... the 4 threads are
4237 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4238 * vm_compressor_swap_trigger_thread (minor and major compactions),
4239 * memorystatus_thread (jetsams).
4241 * the first time the thread is run, it is responsible for checking the
4242 * state of vm_restricted_to_single_processor, and if TRUE it calls
4243 * thread_bind_master... someday this should be replaced with a group
4244 * scheduling mechanism and KPI.
4246 vm_restricted_to_single_processor
= TRUE
;
4254 thread_t self
= current_thread();
4256 kern_return_t result
;
4260 * Set thread privileges.
4265 self
->options
|= TH_OPT_VMPRIV
;
4266 sched_set_thread_base_priority(self
, BASEPRI_PREEMPT
- 1);
4267 thread_unlock(self
);
4269 if (!self
->reserved_stack
)
4270 self
->reserved_stack
= self
->kernel_stack
;
4272 if (vm_restricted_to_single_processor
== TRUE
)
4273 thread_vm_bind_group_add();
4278 * Initialize some paging parameters.
4281 if (vm_pageout_swap_wait
== 0)
4282 vm_pageout_swap_wait
= VM_PAGEOUT_SWAP_WAIT
;
4284 if (vm_pageout_idle_wait
== 0)
4285 vm_pageout_idle_wait
= VM_PAGEOUT_IDLE_WAIT
;
4287 if (vm_pageout_burst_wait
== 0)
4288 vm_pageout_burst_wait
= VM_PAGEOUT_BURST_WAIT
;
4290 if (vm_pageout_empty_wait
== 0)
4291 vm_pageout_empty_wait
= VM_PAGEOUT_EMPTY_WAIT
;
4293 if (vm_pageout_deadlock_wait
== 0)
4294 vm_pageout_deadlock_wait
= VM_PAGEOUT_DEADLOCK_WAIT
;
4296 if (vm_pageout_deadlock_relief
== 0)
4297 vm_pageout_deadlock_relief
= VM_PAGEOUT_DEADLOCK_RELIEF
;
4299 if (vm_pageout_inactive_relief
== 0)
4300 vm_pageout_inactive_relief
= VM_PAGEOUT_INACTIVE_RELIEF
;
4302 if (vm_pageout_burst_active_throttle
== 0)
4303 vm_pageout_burst_active_throttle
= VM_PAGEOUT_BURST_ACTIVE_THROTTLE
;
4305 if (vm_pageout_burst_inactive_throttle
== 0)
4306 vm_pageout_burst_inactive_throttle
= VM_PAGEOUT_BURST_INACTIVE_THROTTLE
;
4309 * Set kernel task to low backing store privileged
4312 task_lock(kernel_task
);
4313 kernel_task
->priv_flags
|= VM_BACKING_STORE_PRIV
;
4314 task_unlock(kernel_task
);
4316 vm_page_free_count_init
= vm_page_free_count
;
4319 * even if we've already called vm_page_free_reserve
4320 * call it again here to insure that the targets are
4321 * accurately calculated (it uses vm_page_free_count_init)
4322 * calling it with an arg of 0 will not change the reserve
4323 * but will re-calculate free_min and free_target
4325 if (vm_page_free_reserved
< VM_PAGE_FREE_RESERVED(processor_count
)) {
4326 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count
)) - vm_page_free_reserved
);
4328 vm_page_free_reserve(0);
4331 queue_init(&vm_pageout_queue_external
.pgo_pending
);
4332 vm_pageout_queue_external
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
4333 vm_pageout_queue_external
.pgo_laundry
= 0;
4334 vm_pageout_queue_external
.pgo_idle
= FALSE
;
4335 vm_pageout_queue_external
.pgo_busy
= FALSE
;
4336 vm_pageout_queue_external
.pgo_throttled
= FALSE
;
4337 vm_pageout_queue_external
.pgo_draining
= FALSE
;
4338 vm_pageout_queue_external
.pgo_lowpriority
= FALSE
;
4339 vm_pageout_queue_external
.pgo_tid
= -1;
4340 vm_pageout_queue_external
.pgo_inited
= FALSE
;
4342 queue_init(&vm_pageout_queue_internal
.pgo_pending
);
4343 vm_pageout_queue_internal
.pgo_maxlaundry
= 0;
4344 vm_pageout_queue_internal
.pgo_laundry
= 0;
4345 vm_pageout_queue_internal
.pgo_idle
= FALSE
;
4346 vm_pageout_queue_internal
.pgo_busy
= FALSE
;
4347 vm_pageout_queue_internal
.pgo_throttled
= FALSE
;
4348 vm_pageout_queue_internal
.pgo_draining
= FALSE
;
4349 vm_pageout_queue_internal
.pgo_lowpriority
= FALSE
;
4350 vm_pageout_queue_internal
.pgo_tid
= -1;
4351 vm_pageout_queue_internal
.pgo_inited
= FALSE
;
4353 /* internal pageout thread started when default pager registered first time */
4354 /* external pageout and garbage collection threads started here */
4356 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_external
, NULL
,
4357 BASEPRI_PREEMPT
- 1,
4358 &vm_pageout_external_iothread
);
4359 if (result
!= KERN_SUCCESS
)
4360 panic("vm_pageout_iothread_external: create failed");
4362 thread_deallocate(vm_pageout_external_iothread
);
4364 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_garbage_collect
, NULL
,
4367 if (result
!= KERN_SUCCESS
)
4368 panic("vm_pageout_garbage_collect: create failed");
4370 thread_deallocate(thread
);
4372 #if VM_PRESSURE_EVENTS
4373 result
= kernel_thread_start_priority((thread_continue_t
)vm_pressure_thread
, NULL
,
4377 if (result
!= KERN_SUCCESS
)
4378 panic("vm_pressure_thread: create failed");
4380 thread_deallocate(thread
);
4383 vm_object_reaper_init();
4385 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
)
4386 vm_compressor_pager_init();
4388 #if VM_PRESSURE_EVENTS
4389 vm_pressure_events_enabled
= TRUE
;
4390 #endif /* VM_PRESSURE_EVENTS */
4392 #if CONFIG_PHANTOM_CACHE
4393 vm_phantom_cache_init();
4395 #if VM_PAGE_BUCKETS_CHECK
4396 #if VM_PAGE_FAKE_BUCKETS
4397 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4398 (uint64_t) vm_page_fake_buckets_start
,
4399 (uint64_t) vm_page_fake_buckets_end
);
4400 pmap_protect(kernel_pmap
,
4401 vm_page_fake_buckets_start
,
4402 vm_page_fake_buckets_end
,
4404 // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4405 #endif /* VM_PAGE_FAKE_BUCKETS */
4406 #endif /* VM_PAGE_BUCKETS_CHECK */
4408 #if VM_OBJECT_TRACKING
4409 vm_object_tracking_init();
4410 #endif /* VM_OBJECT_TRACKING */
4413 #if FBDP_TEST_COLLAPSE_COMPRESSOR
4414 vm_object_size_t backing_size
, top_size
;
4415 vm_object_t backing_object
, top_object
;
4416 vm_map_offset_t backing_offset
, top_offset
;
4417 unsigned char *backing_address
, *top_address
;
4420 printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n");
4422 /* create backing object */
4423 backing_size
= 15 * PAGE_SIZE
;
4424 backing_object
= vm_object_allocate(backing_size
);
4425 assert(backing_object
!= VM_OBJECT_NULL
);
4426 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
4428 /* map backing object */
4430 kr
= vm_map_enter(kernel_map
, &backing_offset
, backing_size
, 0,
4431 VM_FLAGS_ANYWHERE
, backing_object
, 0, FALSE
,
4432 VM_PROT_DEFAULT
, VM_PROT_DEFAULT
, VM_INHERIT_DEFAULT
);
4433 assert(kr
== KERN_SUCCESS
);
4434 backing_address
= (unsigned char *) backing_offset
;
4435 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4436 "mapped backing object %p at 0x%llx\n",
4437 backing_object
, (uint64_t) backing_offset
);
4438 /* populate with pages to be compressed in backing object */
4439 backing_address
[0x1*PAGE_SIZE
] = 0xB1;
4440 backing_address
[0x4*PAGE_SIZE
] = 0xB4;
4441 backing_address
[0x7*PAGE_SIZE
] = 0xB7;
4442 backing_address
[0xa*PAGE_SIZE
] = 0xBA;
4443 backing_address
[0xd*PAGE_SIZE
] = 0xBD;
4444 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4445 "populated pages to be compressed in "
4446 "backing_object %p\n", backing_object
);
4447 /* compress backing object */
4448 vm_object_pageout(backing_object
);
4449 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
4451 /* wait for all the pages to be gone */
4452 while (*(volatile int *)&backing_object
->resident_page_count
!= 0)
4454 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
4456 /* populate with pages to be resident in backing object */
4457 backing_address
[0x0*PAGE_SIZE
] = 0xB0;
4458 backing_address
[0x3*PAGE_SIZE
] = 0xB3;
4459 backing_address
[0x6*PAGE_SIZE
] = 0xB6;
4460 backing_address
[0x9*PAGE_SIZE
] = 0xB9;
4461 backing_address
[0xc*PAGE_SIZE
] = 0xBC;
4462 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4463 "populated pages to be resident in "
4464 "backing_object %p\n", backing_object
);
4465 /* leave the other pages absent */
4466 /* mess with the paging_offset of the backing_object */
4467 assert(backing_object
->paging_offset
== 0);
4468 backing_object
->paging_offset
= 0x3000;
4470 /* create top object */
4471 top_size
= 9 * PAGE_SIZE
;
4472 top_object
= vm_object_allocate(top_size
);
4473 assert(top_object
!= VM_OBJECT_NULL
);
4474 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
4476 /* map top object */
4478 kr
= vm_map_enter(kernel_map
, &top_offset
, top_size
, 0,
4479 VM_FLAGS_ANYWHERE
, top_object
, 0, FALSE
,
4480 VM_PROT_DEFAULT
, VM_PROT_DEFAULT
, VM_INHERIT_DEFAULT
);
4481 assert(kr
== KERN_SUCCESS
);
4482 top_address
= (unsigned char *) top_offset
;
4483 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4484 "mapped top object %p at 0x%llx\n",
4485 top_object
, (uint64_t) top_offset
);
4486 /* populate with pages to be compressed in top object */
4487 top_address
[0x3*PAGE_SIZE
] = 0xA3;
4488 top_address
[0x4*PAGE_SIZE
] = 0xA4;
4489 top_address
[0x5*PAGE_SIZE
] = 0xA5;
4490 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4491 "populated pages to be compressed in "
4492 "top_object %p\n", top_object
);
4493 /* compress top object */
4494 vm_object_pageout(top_object
);
4495 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
4497 /* wait for all the pages to be gone */
4498 while (top_object
->resident_page_count
!= 0);
4499 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
4501 /* populate with pages to be resident in top object */
4502 top_address
[0x0*PAGE_SIZE
] = 0xA0;
4503 top_address
[0x1*PAGE_SIZE
] = 0xA1;
4504 top_address
[0x2*PAGE_SIZE
] = 0xA2;
4505 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4506 "populated pages to be resident in "
4507 "top_object %p\n", top_object
);
4508 /* leave the other pages absent */
4510 /* link the 2 objects */
4511 vm_object_reference(backing_object
);
4512 top_object
->shadow
= backing_object
;
4513 top_object
->vo_shadow_offset
= 0x3000;
4514 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
4515 top_object
, backing_object
);
4517 /* unmap backing object */
4518 vm_map_remove(kernel_map
,
4520 backing_offset
+ backing_size
,
4522 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4523 "unmapped backing_object %p [0x%llx:0x%llx]\n",
4525 (uint64_t) backing_offset
,
4526 (uint64_t) (backing_offset
+ backing_size
));
4529 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object
);
4530 vm_object_lock(top_object
);
4531 vm_object_collapse(top_object
, 0, FALSE
);
4532 vm_object_unlock(top_object
);
4533 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object
);
4536 if (top_object
->shadow
!= VM_OBJECT_NULL
) {
4537 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
4538 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4539 if (vm_object_collapse_compressor_allowed
) {
4540 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4543 /* check the contents of the mapping */
4544 unsigned char expect
[9] =
4545 { 0xA0, 0xA1, 0xA2, /* resident in top */
4546 0xA3, 0xA4, 0xA5, /* compressed in top */
4547 0xB9, /* resident in backing + shadow_offset */
4548 0xBD, /* compressed in backing + shadow_offset + paging_offset */
4549 0x00 }; /* absent in both */
4550 unsigned char actual
[9];
4551 unsigned int i
, errors
;
4554 for (i
= 0; i
< sizeof (actual
); i
++) {
4555 actual
[i
] = (unsigned char) top_address
[i
*PAGE_SIZE
];
4556 if (actual
[i
] != expect
[i
]) {
4560 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: "
4561 "actual [%x %x %x %x %x %x %x %x %x] "
4562 "expect [%x %x %x %x %x %x %x %x %x] "
4564 actual
[0], actual
[1], actual
[2], actual
[3],
4565 actual
[4], actual
[5], actual
[6], actual
[7],
4567 expect
[0], expect
[1], expect
[2], expect
[3],
4568 expect
[4], expect
[5], expect
[6], expect
[7],
4572 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
4574 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n");
4577 #endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */
4579 #if FBDP_TEST_WIRE_AND_EXTRACT
4581 vm_map_t user_map
, wire_map
;
4582 mach_vm_address_t user_addr
, wire_addr
;
4583 mach_vm_size_t user_size
, wire_size
;
4584 mach_vm_offset_t cur_offset
;
4585 vm_prot_t cur_prot
, max_prot
;
4586 ppnum_t user_ppnum
, wire_ppnum
;
4589 ledger
= ledger_instantiate(task_ledger_template
,
4590 LEDGER_CREATE_ACTIVE_ENTRIES
);
4591 user_map
= vm_map_create(pmap_create(ledger
, 0, PMAP_CREATE_64BIT
),
4595 wire_map
= vm_map_create(NULL
,
4600 user_size
= 0x10000;
4601 kr
= mach_vm_allocate(user_map
,
4605 assert(kr
== KERN_SUCCESS
);
4607 wire_size
= user_size
;
4608 kr
= mach_vm_remap(wire_map
,
4619 assert(kr
== KERN_SUCCESS
);
4620 for (cur_offset
= 0;
4621 cur_offset
< wire_size
;
4622 cur_offset
+= PAGE_SIZE
) {
4623 kr
= vm_map_wire_and_extract(wire_map
,
4624 wire_addr
+ cur_offset
,
4625 VM_PROT_DEFAULT
| VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK
)),
4628 assert(kr
== KERN_SUCCESS
);
4629 user_ppnum
= vm_map_get_phys_page(user_map
,
4630 user_addr
+ cur_offset
);
4631 printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x "
4632 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4634 user_map
, user_addr
+ cur_offset
, user_ppnum
,
4635 wire_map
, wire_addr
+ cur_offset
, wire_ppnum
);
4636 if (kr
!= KERN_SUCCESS
||
4638 wire_ppnum
!= user_ppnum
) {
4639 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4642 cur_offset
-= PAGE_SIZE
;
4643 kr
= vm_map_wire_and_extract(wire_map
,
4644 wire_addr
+ cur_offset
,
4648 assert(kr
== KERN_SUCCESS
);
4649 printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
4650 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
4652 user_map
, user_addr
+ cur_offset
, user_ppnum
,
4653 wire_map
, wire_addr
+ cur_offset
, wire_ppnum
);
4654 if (kr
!= KERN_SUCCESS
||
4656 wire_ppnum
!= user_ppnum
) {
4657 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n");
4660 printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n");
4661 #endif /* FBDP_TEST_WIRE_AND_EXTRACT */
4663 vm_pageout_continue();
4668 * The vm_pageout_continue() call above never returns, so the code below is never
4669 * executed. We take advantage of this to declare several DTrace VM related probe
4670 * points that our kernel doesn't have an analog for. These are probe points that
4671 * exist in Solaris and are in the DTrace documentation, so people may have written
4672 * scripts that use them. Declaring the probe points here means their scripts will
4673 * compile and execute which we want for portability of the scripts, but since this
4674 * section of code is never reached, the probe points will simply never fire. Yes,
4675 * this is basically a hack. The problem is the DTrace probe points were chosen with
4676 * Solaris specific VM events in mind, not portability to different VM implementations.
4679 DTRACE_VM2(execfree
, int, 1, (uint64_t *), NULL
);
4680 DTRACE_VM2(execpgin
, int, 1, (uint64_t *), NULL
);
4681 DTRACE_VM2(execpgout
, int, 1, (uint64_t *), NULL
);
4682 DTRACE_VM2(pgswapin
, int, 1, (uint64_t *), NULL
);
4683 DTRACE_VM2(pgswapout
, int, 1, (uint64_t *), NULL
);
4684 DTRACE_VM2(swapin
, int, 1, (uint64_t *), NULL
);
4685 DTRACE_VM2(swapout
, int, 1, (uint64_t *), NULL
);
4691 int vm_compressor_thread_count
= 2;
4694 vm_pageout_internal_start(void)
4696 kern_return_t result
;
4698 host_basic_info_data_t hinfo
;
4702 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
4703 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
4705 host_info((host_t
)BSD_HOST
, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
4707 assert(hinfo
.max_cpus
> 0);
4709 if (vm_compressor_thread_count
>= hinfo
.max_cpus
)
4710 vm_compressor_thread_count
= hinfo
.max_cpus
- 1;
4711 if (vm_compressor_thread_count
<= 0)
4712 vm_compressor_thread_count
= 1;
4713 else if (vm_compressor_thread_count
> MAX_COMPRESSOR_THREAD_COUNT
)
4714 vm_compressor_thread_count
= MAX_COMPRESSOR_THREAD_COUNT
;
4716 if (vm_compressor_immediate_preferred
== TRUE
) {
4717 vm_pageout_immediate_chead
= NULL
;
4718 vm_pageout_immediate_scratch_buf
= kalloc(COMPRESSOR_SCRATCH_BUF_SIZE
);
4720 vm_compressor_thread_count
= 1;
4722 thread_count
= vm_compressor_thread_count
;
4724 vm_pageout_queue_internal
.pgo_maxlaundry
= (vm_compressor_thread_count
* 4) * VM_PAGE_LAUNDRY_MAX
;
4726 vm_compressor_thread_count
= 0;
4728 vm_pageout_queue_internal
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
4731 for (i
= 0; i
< vm_compressor_thread_count
; i
++) {
4733 ciq
[i
].q
= &vm_pageout_queue_internal
;
4734 ciq
[i
].current_chead
= NULL
;
4735 ciq
[i
].scratch_buf
= kalloc(COMPRESSOR_SCRATCH_BUF_SIZE
);
4737 for (i
= 0; i
< thread_count
; i
++) {
4738 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_internal
, (void *)&ciq
[i
], BASEPRI_PREEMPT
- 1, &vm_pageout_internal_iothread
);
4740 if (result
== KERN_SUCCESS
)
4741 thread_deallocate(vm_pageout_internal_iothread
);
4750 * To support I/O Expedite for compressed files we mark the upls with special flags.
4751 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4752 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4753 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4754 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4755 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4756 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4757 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4758 * unless the real I/O upl is being destroyed).
4763 upl_set_decmp_info(upl_t upl
, upl_t src_upl
)
4765 assert((src_upl
->flags
& UPL_DECMP_REQ
) != 0);
4768 if (src_upl
->decmp_io_upl
) {
4770 * If there is already an alive real I/O UPL, ignore this new UPL.
4771 * This case should rarely happen and even if it does, it just means
4772 * that we might issue a spurious expedite which the driver is expected
4775 upl_unlock(src_upl
);
4778 src_upl
->decmp_io_upl
= (void *)upl
;
4779 src_upl
->ref_count
++;
4781 upl
->flags
|= UPL_DECMP_REAL_IO
;
4782 upl
->decmp_io_upl
= (void *)src_upl
;
4783 upl_unlock(src_upl
);
4785 #endif /* CONFIG_IOSCHED */
4788 int upl_debug_enabled
= 1;
4790 int upl_debug_enabled
= 0;
4794 upl_create(int type
, int flags
, upl_size_t size
)
4797 vm_size_t page_field_size
= 0;
4799 vm_size_t upl_size
= sizeof(struct upl
);
4801 size
= round_page_32(size
);
4803 if (type
& UPL_CREATE_LITE
) {
4804 page_field_size
= (atop(size
) + 7) >> 3;
4805 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
4807 upl_flags
|= UPL_LITE
;
4809 if (type
& UPL_CREATE_INTERNAL
) {
4810 upl_size
+= sizeof(struct upl_page_info
) * atop(size
);
4812 upl_flags
|= UPL_INTERNAL
;
4814 upl
= (upl_t
)kalloc(upl_size
+ page_field_size
);
4816 if (page_field_size
)
4817 bzero((char *)upl
+ upl_size
, page_field_size
);
4819 upl
->flags
= upl_flags
| flags
;
4820 upl
->src_object
= NULL
;
4821 upl
->kaddr
= (vm_offset_t
)0;
4823 upl
->map_object
= NULL
;
4825 upl
->ext_ref_count
= 0;
4826 upl
->highest_page
= 0;
4828 upl
->vector_upl
= NULL
;
4829 upl
->associated_upl
= NULL
;
4831 if (type
& UPL_CREATE_IO_TRACKING
) {
4832 upl
->upl_priority
= proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO
);
4835 upl
->upl_reprio_info
= 0;
4836 upl
->decmp_io_upl
= 0;
4837 if ((type
& UPL_CREATE_INTERNAL
) && (type
& UPL_CREATE_EXPEDITE_SUP
)) {
4838 /* Only support expedite on internal UPLs */
4839 thread_t curthread
= current_thread();
4840 upl
->upl_reprio_info
= (uint64_t *)kalloc(sizeof(uint64_t) * atop(size
));
4841 bzero(upl
->upl_reprio_info
, (sizeof(uint64_t) * atop(size
)));
4842 upl
->flags
|= UPL_EXPEDITE_SUPPORTED
;
4843 if (curthread
->decmp_upl
!= NULL
)
4844 upl_set_decmp_info(upl
, curthread
->decmp_upl
);
4847 #if CONFIG_IOSCHED || UPL_DEBUG
4848 if ((type
& UPL_CREATE_IO_TRACKING
) || upl_debug_enabled
) {
4849 upl
->upl_creator
= current_thread();
4852 upl
->flags
|= UPL_TRACKED_BY_OBJECT
;
4857 upl
->ubc_alias1
= 0;
4858 upl
->ubc_alias2
= 0;
4861 upl
->upl_commit_index
= 0;
4862 bzero(&upl
->upl_commit_records
[0], sizeof(upl
->upl_commit_records
));
4864 (void) OSBacktrace(&upl
->upl_create_retaddr
[0], UPL_DEBUG_STACK_FRAMES
);
4865 #endif /* UPL_DEBUG */
4871 upl_destroy(upl_t upl
)
4873 int page_field_size
; /* bit field in word size buf */
4876 if (upl
->ext_ref_count
) {
4877 panic("upl(%p) ext_ref_count", upl
);
4881 if ((upl
->flags
& UPL_DECMP_REAL_IO
) && upl
->decmp_io_upl
) {
4883 src_upl
= upl
->decmp_io_upl
;
4884 assert((src_upl
->flags
& UPL_DECMP_REQ
) != 0);
4886 src_upl
->decmp_io_upl
= NULL
;
4887 upl_unlock(src_upl
);
4888 upl_deallocate(src_upl
);
4890 #endif /* CONFIG_IOSCHED */
4892 #if CONFIG_IOSCHED || UPL_DEBUG
4893 if ((upl
->flags
& UPL_TRACKED_BY_OBJECT
) && !(upl
->flags
& UPL_VECTOR
)) {
4896 if (upl
->flags
& UPL_SHADOWED
) {
4897 object
= upl
->map_object
->shadow
;
4899 object
= upl
->map_object
;
4902 vm_object_lock(object
);
4903 queue_remove(&object
->uplq
, upl
, upl_t
, uplq
);
4904 vm_object_activity_end(object
);
4905 vm_object_collapse(object
, 0, TRUE
);
4906 vm_object_unlock(object
);
4910 * drop a reference on the map_object whether or
4911 * not a pageout object is inserted
4913 if (upl
->flags
& UPL_SHADOWED
)
4914 vm_object_deallocate(upl
->map_object
);
4916 if (upl
->flags
& UPL_DEVICE_MEMORY
)
4920 page_field_size
= 0;
4922 if (upl
->flags
& UPL_LITE
) {
4923 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4924 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
4926 upl_lock_destroy(upl
);
4927 upl
->vector_upl
= (vector_upl_t
) 0xfeedbeef;
4930 if (upl
->flags
& UPL_EXPEDITE_SUPPORTED
)
4931 kfree(upl
->upl_reprio_info
, sizeof(uint64_t) * (size
/PAGE_SIZE
));
4934 if (upl
->flags
& UPL_INTERNAL
) {
4936 sizeof(struct upl
) +
4937 (sizeof(struct upl_page_info
) * (size
/PAGE_SIZE
))
4940 kfree(upl
, sizeof(struct upl
) + page_field_size
);
4945 upl_deallocate(upl_t upl
)
4948 if (--upl
->ref_count
== 0) {
4949 if(vector_upl_is_valid(upl
))
4950 vector_upl_deallocate(upl
);
4960 upl_mark_decmp(upl_t upl
)
4962 if (upl
->flags
& UPL_TRACKED_BY_OBJECT
) {
4963 upl
->flags
|= UPL_DECMP_REQ
;
4964 upl
->upl_creator
->decmp_upl
= (void *)upl
;
4969 upl_unmark_decmp(upl_t upl
)
4971 if(upl
&& (upl
->flags
& UPL_DECMP_REQ
)) {
4972 upl
->upl_creator
->decmp_upl
= NULL
;
4976 #endif /* CONFIG_IOSCHED */
4978 #define VM_PAGE_Q_BACKING_UP(q) \
4979 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4981 boolean_t
must_throttle_writes(void);
4984 must_throttle_writes()
4986 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external
) &&
4987 vm_page_pageable_external_count
> (AVAILABLE_NON_COMPRESSED_MEMORY
* 6) / 10)
4994 #if DEVELOPMENT || DEBUG
4996 * Statistics about UPL enforcement of copy-on-write obligations.
4998 unsigned long upl_cow
= 0;
4999 unsigned long upl_cow_again
= 0;
5000 unsigned long upl_cow_pages
= 0;
5001 unsigned long upl_cow_again_pages
= 0;
5003 unsigned long iopl_cow
= 0;
5004 unsigned long iopl_cow_pages
= 0;
5008 * Routine: vm_object_upl_request
5010 * Cause the population of a portion of a vm_object.
5011 * Depending on the nature of the request, the pages
5012 * returned may be contain valid data or be uninitialized.
5013 * A page list structure, listing the physical pages
5014 * will be returned upon request.
5015 * This function is called by the file system or any other
5016 * supplier of backing store to a pager.
5017 * IMPORTANT NOTE: The caller must still respect the relationship
5018 * between the vm_object and its backing memory object. The
5019 * caller MUST NOT substitute changes in the backing file
5020 * without first doing a memory_object_lock_request on the
5021 * target range unless it is know that the pages are not
5022 * shared with another entity at the pager level.
5024 * if a page list structure is present
5025 * return the mapped physical pages, where a
5026 * page is not present, return a non-initialized
5027 * one. If the no_sync bit is turned on, don't
5028 * call the pager unlock to synchronize with other
5029 * possible copies of the page. Leave pages busy
5030 * in the original object, if a page list structure
5031 * was specified. When a commit of the page list
5032 * pages is done, the dirty bit will be set for each one.
5034 * If a page list structure is present, return
5035 * all mapped pages. Where a page does not exist
5036 * map a zero filled one. Leave pages busy in
5037 * the original object. If a page list structure
5038 * is not specified, this call is a no-op.
5040 * Note: access of default pager objects has a rather interesting
5041 * twist. The caller of this routine, presumably the file system
5042 * page cache handling code, will never actually make a request
5043 * against a default pager backed object. Only the default
5044 * pager will make requests on backing store related vm_objects
5045 * In this way the default pager can maintain the relationship
5046 * between backing store files (abstract memory objects) and
5047 * the vm_objects (cache objects), they support.
5051 __private_extern__ kern_return_t
5052 vm_object_upl_request(
5054 vm_object_offset_t offset
,
5057 upl_page_info_array_t user_page_list
,
5058 unsigned int *page_list_count
,
5059 upl_control_flags_t cntrl_flags
)
5061 vm_page_t dst_page
= VM_PAGE_NULL
;
5062 vm_object_offset_t dst_offset
;
5063 upl_size_t xfer_size
;
5064 unsigned int size_in_pages
;
5069 #if MACH_CLUSTER_STATS
5070 boolean_t encountered_lrp
= FALSE
;
5072 vm_page_t alias_page
= NULL
;
5073 int refmod_state
= 0;
5074 wpl_array_t lite_list
= NULL
;
5075 vm_object_t last_copy_object
;
5076 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
5077 struct vm_page_delayed_work
*dwp
;
5080 int io_tracking_flag
= 0;
5082 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
5084 * For forward compatibility's sake,
5085 * reject any unknown flag.
5087 return KERN_INVALID_VALUE
;
5089 if ( (!object
->internal
) && (object
->paging_offset
!= 0) )
5090 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5091 if (object
->phys_contiguous
)
5092 panic("vm_object_upl_request: contiguous object specified\n");
5095 if (size
> MAX_UPL_SIZE_BYTES
)
5096 size
= MAX_UPL_SIZE_BYTES
;
5098 if ( (cntrl_flags
& UPL_SET_INTERNAL
) && page_list_count
!= NULL
)
5099 *page_list_count
= MAX_UPL_SIZE_BYTES
>> PAGE_SHIFT
;
5101 #if CONFIG_IOSCHED || UPL_DEBUG
5102 if (object
->io_tracking
|| upl_debug_enabled
)
5103 io_tracking_flag
|= UPL_CREATE_IO_TRACKING
;
5106 if (object
->io_tracking
)
5107 io_tracking_flag
|= UPL_CREATE_EXPEDITE_SUP
;
5110 if (cntrl_flags
& UPL_SET_INTERNAL
) {
5111 if (cntrl_flags
& UPL_SET_LITE
) {
5113 upl
= upl_create(UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
| io_tracking_flag
, 0, size
);
5115 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
5116 lite_list
= (wpl_array_t
)
5117 (((uintptr_t)user_page_list
) +
5118 ((size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
5120 user_page_list
= NULL
;
5124 upl
= upl_create(UPL_CREATE_INTERNAL
| io_tracking_flag
, 0, size
);
5126 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
5128 user_page_list
= NULL
;
5132 if (cntrl_flags
& UPL_SET_LITE
) {
5134 upl
= upl_create(UPL_CREATE_EXTERNAL
| UPL_CREATE_LITE
| io_tracking_flag
, 0, size
);
5136 lite_list
= (wpl_array_t
) (((uintptr_t)upl
) + sizeof(struct upl
));
5141 upl
= upl_create(UPL_CREATE_EXTERNAL
| io_tracking_flag
, 0, size
);
5147 user_page_list
[0].device
= FALSE
;
5149 if (cntrl_flags
& UPL_SET_LITE
) {
5150 upl
->map_object
= object
;
5152 upl
->map_object
= vm_object_allocate(size
);
5154 * No neeed to lock the new object: nobody else knows
5155 * about it yet, so it's all ours so far.
5157 upl
->map_object
->shadow
= object
;
5158 upl
->map_object
->pageout
= TRUE
;
5159 upl
->map_object
->can_persist
= FALSE
;
5160 upl
->map_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
5161 upl
->map_object
->vo_shadow_offset
= offset
;
5162 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
5164 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
5166 upl
->flags
|= UPL_SHADOWED
;
5170 * Just mark the UPL as "encrypted" here.
5171 * We'll actually encrypt the pages later,
5172 * in upl_encrypt(), when the caller has
5173 * selected which pages need to go to swap.
5175 if (cntrl_flags
& UPL_ENCRYPT
)
5176 upl
->flags
|= UPL_ENCRYPTED
;
5178 if (cntrl_flags
& UPL_FOR_PAGEOUT
)
5179 upl
->flags
|= UPL_PAGEOUT
;
5181 vm_object_lock(object
);
5182 vm_object_activity_begin(object
);
5185 * we can lock in the paging_offset once paging_in_progress is set
5188 upl
->offset
= offset
+ object
->paging_offset
;
5190 #if CONFIG_IOSCHED || UPL_DEBUG
5191 if (object
->io_tracking
|| upl_debug_enabled
) {
5192 vm_object_activity_begin(object
);
5193 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
5196 if ((cntrl_flags
& UPL_WILL_MODIFY
) && object
->copy
!= VM_OBJECT_NULL
) {
5198 * Honor copy-on-write obligations
5200 * The caller is gathering these pages and
5201 * might modify their contents. We need to
5202 * make sure that the copy object has its own
5203 * private copies of these pages before we let
5204 * the caller modify them.
5206 vm_object_update(object
,
5211 FALSE
, /* should_return */
5212 MEMORY_OBJECT_COPY_SYNC
,
5214 #if DEVELOPMENT || DEBUG
5216 upl_cow_pages
+= size
>> PAGE_SHIFT
;
5220 * remember which copy object we synchronized with
5222 last_copy_object
= object
->copy
;
5226 dst_offset
= offset
;
5227 size_in_pages
= size
/ PAGE_SIZE
;
5231 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
5233 if (vm_page_free_count
> (vm_page_free_target
+ size_in_pages
) ||
5234 object
->resident_page_count
< ((MAX_UPL_SIZE_BYTES
* 2) >> PAGE_SHIFT
))
5235 object
->scan_collisions
= 0;
5237 if ((cntrl_flags
& UPL_WILL_MODIFY
) && must_throttle_writes() == TRUE
) {
5238 boolean_t isSSD
= FALSE
;
5240 vnode_pager_get_isSSD(object
->pager
, &isSSD
);
5241 vm_object_unlock(object
);
5243 OSAddAtomic(size_in_pages
, &vm_upl_wait_for_pages
);
5246 delay(1000 * size_in_pages
);
5248 delay(5000 * size_in_pages
);
5249 OSAddAtomic(-size_in_pages
, &vm_upl_wait_for_pages
);
5251 vm_object_lock(object
);
5258 if ((alias_page
== NULL
) && !(cntrl_flags
& UPL_SET_LITE
)) {
5259 vm_object_unlock(object
);
5260 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
5261 vm_object_lock(object
);
5263 if (cntrl_flags
& UPL_COPYOUT_FROM
) {
5264 upl
->flags
|= UPL_PAGE_SYNC_DONE
;
5266 if ( ((dst_page
= vm_page_lookup(object
, dst_offset
)) == VM_PAGE_NULL
) ||
5267 dst_page
->fictitious
||
5270 dst_page
->cleaning
||
5271 (VM_PAGE_WIRED(dst_page
))) {
5274 user_page_list
[entry
].phys_addr
= 0;
5279 * grab this up front...
5280 * a high percentange of the time we're going to
5281 * need the hardware modification state a bit later
5282 * anyway... so we can eliminate an extra call into
5283 * the pmap layer by grabbing it here and recording it
5285 if (dst_page
->pmapped
)
5286 refmod_state
= pmap_get_refmod(dst_page
->phys_page
);
5290 if ( (refmod_state
& VM_MEM_REFERENCED
) && dst_page
->inactive
) {
5292 * page is on inactive list and referenced...
5293 * reactivate it now... this gets it out of the
5294 * way of vm_pageout_scan which would have to
5295 * reactivate it upon tripping over it
5297 dwp
->dw_mask
|= DW_vm_page_activate
;
5299 if (cntrl_flags
& UPL_RET_ONLY_DIRTY
) {
5301 * we're only asking for DIRTY pages to be returned
5303 if (dst_page
->laundry
|| !(cntrl_flags
& UPL_FOR_PAGEOUT
)) {
5305 * if we were the page stolen by vm_pageout_scan to be
5306 * cleaned (as opposed to a buddy being clustered in
5307 * or this request is not being driven by a PAGEOUT cluster
5308 * then we only need to check for the page being dirty or
5309 * precious to decide whether to return it
5311 if (dst_page
->dirty
|| dst_page
->precious
|| (refmod_state
& VM_MEM_MODIFIED
))
5316 * this is a request for a PAGEOUT cluster and this page
5317 * is merely along for the ride as a 'buddy'... not only
5318 * does it have to be dirty to be returned, but it also
5319 * can't have been referenced recently...
5321 if ( (hibernate_cleaning_in_progress
== TRUE
||
5322 (!((refmod_state
& VM_MEM_REFERENCED
) || dst_page
->reference
) || dst_page
->throttled
)) &&
5323 ((refmod_state
& VM_MEM_MODIFIED
) || dst_page
->dirty
|| dst_page
->precious
) ) {
5328 * if we reach here, we're not to return
5329 * the page... go on to the next one
5331 if (dst_page
->laundry
== TRUE
) {
5333 * if we get here, the page is not 'cleaning' (filtered out above).
5334 * since it has been referenced, remove it from the laundry
5335 * so we don't pay the cost of an I/O to clean a page
5336 * we're just going to take back
5338 vm_page_lockspin_queues();
5340 vm_pageout_steal_laundry(dst_page
, TRUE
);
5341 vm_page_activate(dst_page
);
5343 vm_page_unlock_queues();
5346 user_page_list
[entry
].phys_addr
= 0;
5351 if (dst_page
->busy
) {
5352 if (cntrl_flags
& UPL_NOBLOCK
) {
5354 user_page_list
[entry
].phys_addr
= 0;
5359 * someone else is playing with the
5360 * page. We will have to wait.
5362 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
5368 * The caller is gathering this page and might
5369 * access its contents later on. Decrypt the
5370 * page before adding it to the UPL, so that
5371 * the caller never sees encrypted data.
5373 if (! (cntrl_flags
& UPL_ENCRYPT
) && dst_page
->encrypted
) {
5377 * save the current state of busy
5378 * mark page as busy while decrypt
5379 * is in progress since it will drop
5380 * the object lock...
5382 was_busy
= dst_page
->busy
;
5383 dst_page
->busy
= TRUE
;
5385 vm_page_decrypt(dst_page
, 0);
5386 vm_page_decrypt_for_upl_counter
++;
5388 * restore to original busy state
5390 dst_page
->busy
= was_busy
;
5392 if (dst_page
->pageout_queue
== TRUE
) {
5394 vm_page_lockspin_queues();
5396 if (dst_page
->pageout_queue
== TRUE
) {
5398 * we've buddied up a page for a clustered pageout
5399 * that has already been moved to the pageout
5400 * queue by pageout_scan... we need to remove
5401 * it from the queue and drop the laundry count
5404 vm_pageout_throttle_up(dst_page
);
5406 vm_page_unlock_queues();
5408 #if MACH_CLUSTER_STATS
5410 * pageout statistics gathering. count
5411 * all the pages we will page out that
5412 * were not counted in the initial
5413 * vm_pageout_scan work
5415 if (dst_page
->pageout
)
5416 encountered_lrp
= TRUE
;
5417 if ((dst_page
->dirty
|| (dst_page
->object
->internal
&& dst_page
->precious
))) {
5418 if (encountered_lrp
)
5419 CLUSTER_STAT(pages_at_higher_offsets
++;)
5421 CLUSTER_STAT(pages_at_lower_offsets
++;)
5424 hw_dirty
= refmod_state
& VM_MEM_MODIFIED
;
5425 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
5427 if (dst_page
->phys_page
> upl
->highest_page
)
5428 upl
->highest_page
= dst_page
->phys_page
;
5430 assert (!pmap_is_noencrypt(dst_page
->phys_page
));
5432 if (cntrl_flags
& UPL_SET_LITE
) {
5433 unsigned int pg_num
;
5435 pg_num
= (unsigned int) ((dst_offset
-offset
)/PAGE_SIZE
);
5436 assert(pg_num
== (dst_offset
-offset
)/PAGE_SIZE
);
5437 lite_list
[pg_num
>>5] |= 1 << (pg_num
& 31);
5440 pmap_clear_modify(dst_page
->phys_page
);
5443 * Mark original page as cleaning
5446 dst_page
->cleaning
= TRUE
;
5447 dst_page
->precious
= FALSE
;
5450 * use pageclean setup, it is more
5451 * convenient even for the pageout
5454 vm_object_lock(upl
->map_object
);
5455 vm_pageclean_setup(dst_page
, alias_page
, upl
->map_object
, size
- xfer_size
);
5456 vm_object_unlock(upl
->map_object
);
5458 alias_page
->absent
= FALSE
;
5463 * Record that this page has been
5466 vm_external_state_set(object
->existence_map
, dst_page
->offset
);
5467 #endif /*MACH_PAGEMAP*/
5469 SET_PAGE_DIRTY(dst_page
, FALSE
);
5471 dst_page
->dirty
= FALSE
;
5475 dst_page
->precious
= TRUE
;
5477 if ( (cntrl_flags
& UPL_ENCRYPT
) ) {
5480 * We want to deny access to the target page
5481 * because its contents are about to be
5482 * encrypted and the user would be very
5483 * confused to see encrypted data instead
5485 * We also set "encrypted_cleaning" to allow
5486 * vm_pageout_scan() to demote that page
5487 * from "adjacent/clean-in-place" to
5488 * "target/clean-and-free" if it bumps into
5489 * this page during its scanning while we're
5490 * still processing this cluster.
5492 dst_page
->busy
= TRUE
;
5493 dst_page
->encrypted_cleaning
= TRUE
;
5495 if ( !(cntrl_flags
& UPL_CLEAN_IN_PLACE
) ) {
5496 if ( !VM_PAGE_WIRED(dst_page
))
5497 dst_page
->pageout
= TRUE
;
5500 if ((cntrl_flags
& UPL_WILL_MODIFY
) && object
->copy
!= last_copy_object
) {
5502 * Honor copy-on-write obligations
5504 * The copy object has changed since we
5505 * last synchronized for copy-on-write.
5506 * Another copy object might have been
5507 * inserted while we released the object's
5508 * lock. Since someone could have seen the
5509 * original contents of the remaining pages
5510 * through that new object, we have to
5511 * synchronize with it again for the remaining
5512 * pages only. The previous pages are "busy"
5513 * so they can not be seen through the new
5514 * mapping. The new mapping will see our
5515 * upcoming changes for those previous pages,
5516 * but that's OK since they couldn't see what
5517 * was there before. It's just a race anyway
5518 * and there's no guarantee of consistency or
5519 * atomicity. We just don't want new mappings
5520 * to see both the *before* and *after* pages.
5522 if (object
->copy
!= VM_OBJECT_NULL
) {
5525 dst_offset
,/* current offset */
5526 xfer_size
, /* remaining size */
5529 FALSE
, /* should_return */
5530 MEMORY_OBJECT_COPY_SYNC
,
5533 #if DEVELOPMENT || DEBUG
5535 upl_cow_again_pages
+= xfer_size
>> PAGE_SHIFT
;
5539 * remember the copy object we synced with
5541 last_copy_object
= object
->copy
;
5543 dst_page
= vm_page_lookup(object
, dst_offset
);
5545 if (dst_page
!= VM_PAGE_NULL
) {
5547 if ((cntrl_flags
& UPL_RET_ONLY_ABSENT
)) {
5549 * skip over pages already present in the cache
5552 user_page_list
[entry
].phys_addr
= 0;
5556 if (dst_page
->fictitious
) {
5557 panic("need corner case for fictitious page");
5560 if (dst_page
->busy
|| dst_page
->cleaning
) {
5562 * someone else is playing with the
5563 * page. We will have to wait.
5565 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
5569 if (dst_page
->laundry
) {
5570 dst_page
->pageout
= FALSE
;
5572 vm_pageout_steal_laundry(dst_page
, FALSE
);
5575 if (object
->private) {
5577 * This is a nasty wrinkle for users
5578 * of upl who encounter device or
5579 * private memory however, it is
5580 * unavoidable, only a fault can
5581 * resolve the actual backing
5582 * physical page by asking the
5586 user_page_list
[entry
].phys_addr
= 0;
5590 if (object
->scan_collisions
) {
5592 * the pageout_scan thread is trying to steal
5593 * pages from this object, but has run into our
5594 * lock... grab 2 pages from the head of the object...
5595 * the first is freed on behalf of pageout_scan, the
5596 * 2nd is for our own use... we use vm_object_page_grab
5597 * in both cases to avoid taking pages from the free
5598 * list since we are under memory pressure and our
5599 * lock on this object is getting in the way of
5602 dst_page
= vm_object_page_grab(object
);
5604 if (dst_page
!= VM_PAGE_NULL
)
5605 vm_page_release(dst_page
);
5607 dst_page
= vm_object_page_grab(object
);
5609 if (dst_page
== VM_PAGE_NULL
) {
5611 * need to allocate a page
5613 dst_page
= vm_page_grab();
5615 if (dst_page
== VM_PAGE_NULL
) {
5616 if ( (cntrl_flags
& (UPL_RET_ONLY_ABSENT
| UPL_NOBLOCK
)) == (UPL_RET_ONLY_ABSENT
| UPL_NOBLOCK
)) {
5618 * we don't want to stall waiting for pages to come onto the free list
5619 * while we're already holding absent pages in this UPL
5620 * the caller will deal with the empty slots
5623 user_page_list
[entry
].phys_addr
= 0;
5628 * no pages available... wait
5629 * then try again for the same
5632 vm_object_unlock(object
);
5634 OSAddAtomic(size_in_pages
, &vm_upl_wait_for_pages
);
5636 VM_DEBUG_EVENT(vm_upl_page_wait
, VM_UPL_PAGE_WAIT
, DBG_FUNC_START
, vm_upl_wait_for_pages
, 0, 0, 0);
5639 OSAddAtomic(-size_in_pages
, &vm_upl_wait_for_pages
);
5641 VM_DEBUG_EVENT(vm_upl_page_wait
, VM_UPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, 0);
5643 vm_object_lock(object
);
5647 vm_page_insert(dst_page
, object
, dst_offset
);
5649 dst_page
->absent
= TRUE
;
5650 dst_page
->busy
= FALSE
;
5652 if (cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
5654 * if UPL_RET_ONLY_ABSENT was specified,
5655 * than we're definitely setting up a
5656 * upl for a clustered read/pagein
5657 * operation... mark the pages as clustered
5658 * so upl_commit_range can put them on the
5661 dst_page
->clustered
= TRUE
;
5663 if ( !(cntrl_flags
& UPL_FILE_IO
))
5664 VM_STAT_INCR(pageins
);
5670 if (cntrl_flags
& UPL_ENCRYPT
) {
5672 * The page is going to be encrypted when we
5673 * get it from the pager, so mark it so.
5675 dst_page
->encrypted
= TRUE
;
5678 * Otherwise, the page will not contain
5681 dst_page
->encrypted
= FALSE
;
5683 dst_page
->overwriting
= TRUE
;
5685 if (dst_page
->pmapped
) {
5686 if ( !(cntrl_flags
& UPL_FILE_IO
))
5688 * eliminate all mappings from the
5689 * original object and its prodigy
5691 refmod_state
= pmap_disconnect(dst_page
->phys_page
);
5693 refmod_state
= pmap_get_refmod(dst_page
->phys_page
);
5697 hw_dirty
= refmod_state
& VM_MEM_MODIFIED
;
5698 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
5700 if (cntrl_flags
& UPL_SET_LITE
) {
5701 unsigned int pg_num
;
5703 pg_num
= (unsigned int) ((dst_offset
-offset
)/PAGE_SIZE
);
5704 assert(pg_num
== (dst_offset
-offset
)/PAGE_SIZE
);
5705 lite_list
[pg_num
>>5] |= 1 << (pg_num
& 31);
5708 pmap_clear_modify(dst_page
->phys_page
);
5711 * Mark original page as cleaning
5714 dst_page
->cleaning
= TRUE
;
5715 dst_page
->precious
= FALSE
;
5718 * use pageclean setup, it is more
5719 * convenient even for the pageout
5722 vm_object_lock(upl
->map_object
);
5723 vm_pageclean_setup(dst_page
, alias_page
, upl
->map_object
, size
- xfer_size
);
5724 vm_object_unlock(upl
->map_object
);
5726 alias_page
->absent
= FALSE
;
5730 if (cntrl_flags
& UPL_REQUEST_SET_DIRTY
) {
5731 upl
->flags
&= ~UPL_CLEAR_DIRTY
;
5732 upl
->flags
|= UPL_SET_DIRTY
;
5734 upl
->flags
|= UPL_SET_DIRTY
;
5735 } else if (cntrl_flags
& UPL_CLEAN_IN_PLACE
) {
5737 * clean in place for read implies
5738 * that a write will be done on all
5739 * the pages that are dirty before
5740 * a upl commit is done. The caller
5741 * is obligated to preserve the
5742 * contents of all pages marked dirty
5744 upl
->flags
|= UPL_CLEAR_DIRTY
;
5746 dst_page
->dirty
= dirty
;
5749 dst_page
->precious
= TRUE
;
5751 if ( !VM_PAGE_WIRED(dst_page
)) {
5753 * deny access to the target page while
5754 * it is being worked on
5756 dst_page
->busy
= TRUE
;
5758 dwp
->dw_mask
|= DW_vm_page_wire
;
5761 * We might be about to satisfy a fault which has been
5762 * requested. So no need for the "restart" bit.
5764 dst_page
->restart
= FALSE
;
5765 if (!dst_page
->absent
&& !(cntrl_flags
& UPL_WILL_MODIFY
)) {
5767 * expect the page to be used
5769 dwp
->dw_mask
|= DW_set_reference
;
5771 if (cntrl_flags
& UPL_PRECIOUS
) {
5772 if (dst_page
->object
->internal
) {
5773 SET_PAGE_DIRTY(dst_page
, FALSE
);
5774 dst_page
->precious
= FALSE
;
5776 dst_page
->precious
= TRUE
;
5779 dst_page
->precious
= FALSE
;
5783 upl
->flags
|= UPL_HAS_BUSY
;
5785 if (dst_page
->phys_page
> upl
->highest_page
)
5786 upl
->highest_page
= dst_page
->phys_page
;
5787 assert (!pmap_is_noencrypt(dst_page
->phys_page
));
5788 if (user_page_list
) {
5789 user_page_list
[entry
].phys_addr
= dst_page
->phys_page
;
5790 user_page_list
[entry
].pageout
= dst_page
->pageout
;
5791 user_page_list
[entry
].absent
= dst_page
->absent
;
5792 user_page_list
[entry
].dirty
= dst_page
->dirty
;
5793 user_page_list
[entry
].precious
= dst_page
->precious
;
5794 user_page_list
[entry
].device
= FALSE
;
5795 user_page_list
[entry
].needed
= FALSE
;
5796 if (dst_page
->clustered
== TRUE
)
5797 user_page_list
[entry
].speculative
= dst_page
->speculative
;
5799 user_page_list
[entry
].speculative
= FALSE
;
5800 user_page_list
[entry
].cs_validated
= dst_page
->cs_validated
;
5801 user_page_list
[entry
].cs_tainted
= dst_page
->cs_tainted
;
5802 user_page_list
[entry
].cs_nx
= dst_page
->cs_nx
;
5803 user_page_list
[entry
].mark
= FALSE
;
5806 * if UPL_RET_ONLY_ABSENT is set, then
5807 * we are working with a fresh page and we've
5808 * just set the clustered flag on it to
5809 * indicate that it was drug in as part of a
5810 * speculative cluster... so leave it alone
5812 if ( !(cntrl_flags
& UPL_RET_ONLY_ABSENT
)) {
5814 * someone is explicitly grabbing this page...
5815 * update clustered and speculative state
5818 if (dst_page
->clustered
)
5819 VM_PAGE_CONSUME_CLUSTERED(dst_page
);
5823 if (dwp
->dw_mask
& DW_vm_page_activate
)
5824 VM_STAT_INCR(reactivations
);
5826 VM_PAGE_ADD_DELAYED_WORK(dwp
, dst_page
, dw_count
);
5828 if (dw_count
>= dw_limit
) {
5829 vm_page_do_delayed_work(object
, UPL_MEMORY_TAG(cntrl_flags
), &dw_array
[0], dw_count
);
5836 dst_offset
+= PAGE_SIZE_64
;
5837 xfer_size
-= PAGE_SIZE
;
5840 vm_page_do_delayed_work(object
, UPL_MEMORY_TAG(cntrl_flags
), &dw_array
[0], dw_count
);
5842 if (alias_page
!= NULL
) {
5843 VM_PAGE_FREE(alias_page
);
5846 if (page_list_count
!= NULL
) {
5847 if (upl
->flags
& UPL_INTERNAL
)
5848 *page_list_count
= 0;
5849 else if (*page_list_count
> entry
)
5850 *page_list_count
= entry
;
5855 vm_object_unlock(object
);
5857 return KERN_SUCCESS
;
5861 * Routine: vm_object_super_upl_request
5863 * Cause the population of a portion of a vm_object
5864 * in much the same way as memory_object_upl_request.
5865 * Depending on the nature of the request, the pages
5866 * returned may be contain valid data or be uninitialized.
5867 * However, the region may be expanded up to the super
5868 * cluster size provided.
5871 __private_extern__ kern_return_t
5872 vm_object_super_upl_request(
5874 vm_object_offset_t offset
,
5876 upl_size_t super_cluster
,
5878 upl_page_info_t
*user_page_list
,
5879 unsigned int *page_list_count
,
5880 upl_control_flags_t cntrl_flags
)
5882 if (object
->paging_offset
> offset
|| ((cntrl_flags
& UPL_VECTOR
)==UPL_VECTOR
))
5883 return KERN_FAILURE
;
5885 assert(object
->paging_in_progress
);
5886 offset
= offset
- object
->paging_offset
;
5888 if (super_cluster
> size
) {
5890 vm_object_offset_t base_offset
;
5891 upl_size_t super_size
;
5892 vm_object_size_t super_size_64
;
5894 base_offset
= (offset
& ~((vm_object_offset_t
) super_cluster
- 1));
5895 super_size
= (offset
+ size
) > (base_offset
+ super_cluster
) ? super_cluster
<<1 : super_cluster
;
5896 super_size_64
= ((base_offset
+ super_size
) > object
->vo_size
) ? (object
->vo_size
- base_offset
) : super_size
;
5897 super_size
= (upl_size_t
) super_size_64
;
5898 assert(super_size
== super_size_64
);
5900 if (offset
> (base_offset
+ super_size
)) {
5901 panic("vm_object_super_upl_request: Missed target pageout"
5902 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5903 offset
, base_offset
, super_size
, super_cluster
,
5904 size
, object
->paging_offset
);
5907 * apparently there is a case where the vm requests a
5908 * page to be written out who's offset is beyond the
5911 if ((offset
+ size
) > (base_offset
+ super_size
)) {
5912 super_size_64
= (offset
+ size
) - base_offset
;
5913 super_size
= (upl_size_t
) super_size_64
;
5914 assert(super_size
== super_size_64
);
5917 offset
= base_offset
;
5920 return vm_object_upl_request(object
, offset
, size
, upl
, user_page_list
, page_list_count
, cntrl_flags
);
5927 vm_map_address_t offset
,
5928 upl_size_t
*upl_size
,
5930 upl_page_info_array_t page_list
,
5931 unsigned int *count
,
5932 upl_control_flags_t
*flags
)
5934 vm_map_entry_t entry
;
5935 upl_control_flags_t caller_flags
;
5936 int force_data_sync
;
5938 vm_object_t local_object
;
5939 vm_map_offset_t local_offset
;
5940 vm_map_offset_t local_start
;
5943 caller_flags
= *flags
;
5945 if (caller_flags
& ~UPL_VALID_FLAGS
) {
5947 * For forward compatibility's sake,
5948 * reject any unknown flag.
5950 return KERN_INVALID_VALUE
;
5952 force_data_sync
= (caller_flags
& UPL_FORCE_DATA_SYNC
);
5953 sync_cow_data
= !(caller_flags
& UPL_COPYOUT_FROM
);
5956 return KERN_INVALID_ARGUMENT
;
5959 vm_map_lock_read(map
);
5961 if (!vm_map_lookup_entry(map
, offset
, &entry
)) {
5962 vm_map_unlock_read(map
);
5963 return KERN_FAILURE
;
5966 if ((entry
->vme_end
- offset
) < *upl_size
) {
5967 *upl_size
= (upl_size_t
) (entry
->vme_end
- offset
);
5968 assert(*upl_size
== entry
->vme_end
- offset
);
5971 if (caller_flags
& UPL_QUERY_OBJECT_TYPE
) {
5974 if (!entry
->is_sub_map
&&
5975 VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
5976 if (VME_OBJECT(entry
)->private)
5977 *flags
= UPL_DEV_MEMORY
;
5979 if (VME_OBJECT(entry
)->phys_contiguous
)
5980 *flags
|= UPL_PHYS_CONTIG
;
5982 vm_map_unlock_read(map
);
5983 return KERN_SUCCESS
;
5986 if (entry
->is_sub_map
) {
5989 submap
= VME_SUBMAP(entry
);
5990 local_start
= entry
->vme_start
;
5991 local_offset
= VME_OFFSET(entry
);
5993 vm_map_reference(submap
);
5994 vm_map_unlock_read(map
);
5996 ret
= vm_map_create_upl(submap
,
5997 local_offset
+ (offset
- local_start
),
5998 upl_size
, upl
, page_list
, count
, flags
);
5999 vm_map_deallocate(submap
);
6004 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
||
6005 !VME_OBJECT(entry
)->phys_contiguous
) {
6006 if (*upl_size
> MAX_UPL_SIZE_BYTES
)
6007 *upl_size
= MAX_UPL_SIZE_BYTES
;
6011 * Create an object if necessary.
6013 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6015 if (vm_map_lock_read_to_write(map
))
6016 goto REDISCOVER_ENTRY
;
6018 VME_OBJECT_SET(entry
,
6019 vm_object_allocate((vm_size_t
)
6021 entry
->vme_start
)));
6022 VME_OFFSET_SET(entry
, 0);
6024 vm_map_lock_write_to_read(map
);
6027 if (!(caller_flags
& UPL_COPYOUT_FROM
) &&
6028 !(entry
->protection
& VM_PROT_WRITE
)) {
6029 vm_map_unlock_read(map
);
6030 return KERN_PROTECTION_FAILURE
;
6033 local_object
= VME_OBJECT(entry
);
6034 assert(local_object
!= VM_OBJECT_NULL
);
6036 if (*upl_size
!= 0 &&
6037 local_object
->vo_size
> *upl_size
&& /* partial UPL */
6038 entry
->wired_count
== 0 && /* No COW for entries that are wired */
6039 (map
->pmap
!= kernel_pmap
) && /* alias checks */
6040 (vm_map_entry_should_cow_for_true_share(entry
) /* case 1 */
6042 (!entry
->needs_copy
&& /* case 2 */
6043 local_object
->internal
&&
6044 (local_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) &&
6045 local_object
->ref_count
> 1))) {
6050 * Set up the targeted range for copy-on-write to avoid
6051 * applying true_share/copy_delay to the entire object.
6054 * This map entry covers only part of an internal
6055 * object. There could be other map entries covering
6056 * other areas of this object and some of these map
6057 * entries could be marked as "needs_copy", which
6058 * assumes that the object is COPY_SYMMETRIC.
6059 * To avoid marking this object as COPY_DELAY and
6060 * "true_share", let's shadow it and mark the new
6061 * (smaller) object as "true_share" and COPY_DELAY.
6064 if (vm_map_lock_read_to_write(map
)) {
6065 goto REDISCOVER_ENTRY
;
6067 vm_map_lock_assert_exclusive(map
);
6068 assert(VME_OBJECT(entry
) == local_object
);
6070 vm_map_clip_start(map
,
6072 vm_map_trunc_page(offset
,
6073 VM_MAP_PAGE_MASK(map
)));
6074 vm_map_clip_end(map
,
6076 vm_map_round_page(offset
+ *upl_size
,
6077 VM_MAP_PAGE_MASK(map
)));
6078 if ((entry
->vme_end
- offset
) < *upl_size
) {
6079 *upl_size
= (upl_size_t
) (entry
->vme_end
- offset
);
6080 assert(*upl_size
== entry
->vme_end
- offset
);
6083 prot
= entry
->protection
& ~VM_PROT_WRITE
;
6084 if (override_nx(map
, VME_ALIAS(entry
)) && prot
)
6085 prot
|= VM_PROT_EXECUTE
;
6086 vm_object_pmap_protect(local_object
,
6088 entry
->vme_end
- entry
->vme_start
,
6089 ((entry
->is_shared
||
6090 map
->mapped_in_other_pmaps
)
6096 assert(entry
->wired_count
== 0);
6099 * Lock the VM object and re-check its status: if it's mapped
6100 * in another address space, we could still be racing with
6101 * another thread holding that other VM map exclusively.
6103 vm_object_lock(local_object
);
6104 if (local_object
->true_share
) {
6105 /* object is already in proper state: no COW needed */
6106 assert(local_object
->copy_strategy
!=
6107 MEMORY_OBJECT_COPY_SYMMETRIC
);
6109 /* not true_share: ask for copy-on-write below */
6110 assert(local_object
->copy_strategy
==
6111 MEMORY_OBJECT_COPY_SYMMETRIC
);
6112 entry
->needs_copy
= TRUE
;
6114 vm_object_unlock(local_object
);
6116 vm_map_lock_write_to_read(map
);
6119 if (entry
->needs_copy
) {
6121 * Honor copy-on-write for COPY_SYMMETRIC
6126 vm_object_offset_t new_offset
;
6129 vm_map_version_t version
;
6131 vm_prot_t fault_type
;
6135 if (caller_flags
& UPL_COPYOUT_FROM
) {
6136 fault_type
= VM_PROT_READ
| VM_PROT_COPY
;
6137 vm_counters
.create_upl_extra_cow
++;
6138 vm_counters
.create_upl_extra_cow_pages
+=
6139 (entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE
;
6141 fault_type
= VM_PROT_WRITE
;
6143 if (vm_map_lookup_locked(&local_map
,
6145 OBJECT_LOCK_EXCLUSIVE
,
6147 &new_offset
, &prot
, &wired
,
6149 &real_map
) != KERN_SUCCESS
) {
6150 if (fault_type
== VM_PROT_WRITE
) {
6151 vm_counters
.create_upl_lookup_failure_write
++;
6153 vm_counters
.create_upl_lookup_failure_copy
++;
6155 vm_map_unlock_read(local_map
);
6156 return KERN_FAILURE
;
6158 if (real_map
!= map
)
6159 vm_map_unlock(real_map
);
6160 vm_map_unlock_read(local_map
);
6162 vm_object_unlock(object
);
6164 goto REDISCOVER_ENTRY
;
6167 if (sync_cow_data
&&
6168 (VME_OBJECT(entry
)->shadow
||
6169 VME_OBJECT(entry
)->copy
)) {
6170 local_object
= VME_OBJECT(entry
);
6171 local_start
= entry
->vme_start
;
6172 local_offset
= VME_OFFSET(entry
);
6174 vm_object_reference(local_object
);
6175 vm_map_unlock_read(map
);
6177 if (local_object
->shadow
&& local_object
->copy
) {
6178 vm_object_lock_request(local_object
->shadow
,
6179 ((vm_object_offset_t
)
6180 ((offset
- local_start
) +
6182 local_object
->vo_shadow_offset
),
6184 MEMORY_OBJECT_DATA_SYNC
,
6187 sync_cow_data
= FALSE
;
6188 vm_object_deallocate(local_object
);
6190 goto REDISCOVER_ENTRY
;
6192 if (force_data_sync
) {
6193 local_object
= VME_OBJECT(entry
);
6194 local_start
= entry
->vme_start
;
6195 local_offset
= VME_OFFSET(entry
);
6197 vm_object_reference(local_object
);
6198 vm_map_unlock_read(map
);
6200 vm_object_lock_request(local_object
,
6201 ((vm_object_offset_t
)
6202 ((offset
- local_start
) +
6204 (vm_object_size_t
)*upl_size
,
6206 MEMORY_OBJECT_DATA_SYNC
,
6209 force_data_sync
= FALSE
;
6210 vm_object_deallocate(local_object
);
6212 goto REDISCOVER_ENTRY
;
6214 if (VME_OBJECT(entry
)->private)
6215 *flags
= UPL_DEV_MEMORY
;
6219 if (VME_OBJECT(entry
)->phys_contiguous
)
6220 *flags
|= UPL_PHYS_CONTIG
;
6222 local_object
= VME_OBJECT(entry
);
6223 local_offset
= VME_OFFSET(entry
);
6224 local_start
= entry
->vme_start
;
6226 vm_object_lock(local_object
);
6229 * Ensure that this object is "true_share" and "copy_delay" now,
6230 * while we're still holding the VM map lock. After we unlock the map,
6231 * anything could happen to that mapping, including some copy-on-write
6232 * activity. We need to make sure that the IOPL will point at the
6233 * same memory as the mapping.
6235 if (local_object
->true_share
) {
6236 assert(local_object
->copy_strategy
!=
6237 MEMORY_OBJECT_COPY_SYMMETRIC
);
6238 } else if (local_object
!= kernel_object
&&
6239 local_object
!= compressor_object
&&
6240 !local_object
->phys_contiguous
) {
6241 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6242 if (!local_object
->true_share
&&
6243 vm_object_tracking_inited
) {
6244 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
6246 num
= OSBacktrace(bt
,
6247 VM_OBJECT_TRACKING_BTDEPTH
);
6248 btlog_add_entry(vm_object_tracking_btlog
,
6250 VM_OBJECT_TRACKING_OP_TRUESHARE
,
6254 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6255 local_object
->true_share
= TRUE
;
6256 if (local_object
->copy_strategy
==
6257 MEMORY_OBJECT_COPY_SYMMETRIC
) {
6258 local_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
6262 vm_object_reference_locked(local_object
);
6263 vm_object_unlock(local_object
);
6265 vm_map_unlock_read(map
);
6267 ret
= vm_object_iopl_request(local_object
,
6268 ((vm_object_offset_t
)
6269 ((offset
- local_start
) + local_offset
)),
6275 vm_object_deallocate(local_object
);
6281 * Internal routine to enter a UPL into a VM map.
6283 * JMM - This should just be doable through the standard
6284 * vm_map_enter() API.
6290 vm_map_offset_t
*dst_addr
)
6293 vm_object_offset_t offset
;
6294 vm_map_offset_t addr
;
6297 int isVectorUPL
= 0, curr_upl
=0;
6298 upl_t vector_upl
= NULL
;
6299 vm_offset_t vector_upl_dst_addr
= 0;
6300 vm_map_t vector_upl_submap
= NULL
;
6301 upl_offset_t subupl_offset
= 0;
6302 upl_size_t subupl_size
= 0;
6304 if (upl
== UPL_NULL
)
6305 return KERN_INVALID_ARGUMENT
;
6307 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
6308 int mapped
=0,valid_upls
=0;
6311 upl_lock(vector_upl
);
6312 for(curr_upl
=0; curr_upl
< MAX_VECTOR_UPL_ELEMENTS
; curr_upl
++) {
6313 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
);
6317 if (UPL_PAGE_LIST_MAPPED
& upl
->flags
)
6322 if(mapped
!= valid_upls
)
6323 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped
, valid_upls
);
6325 upl_unlock(vector_upl
);
6326 return KERN_FAILURE
;
6330 kr
= kmem_suballoc(map
, &vector_upl_dst_addr
, vector_upl
->size
, FALSE
, VM_FLAGS_ANYWHERE
, &vector_upl_submap
);
6331 if( kr
!= KERN_SUCCESS
)
6332 panic("Vector UPL submap allocation failed\n");
6333 map
= vector_upl_submap
;
6334 vector_upl_set_submap(vector_upl
, vector_upl_submap
, vector_upl_dst_addr
);
6340 process_upl_to_enter
:
6342 if(curr_upl
== MAX_VECTOR_UPL_ELEMENTS
) {
6343 *dst_addr
= vector_upl_dst_addr
;
6344 upl_unlock(vector_upl
);
6345 return KERN_SUCCESS
;
6347 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
++ );
6349 goto process_upl_to_enter
;
6351 vector_upl_get_iostate(vector_upl
, upl
, &subupl_offset
, &subupl_size
);
6352 *dst_addr
= (vm_map_offset_t
)(vector_upl_dst_addr
+ (vm_map_offset_t
)subupl_offset
);
6355 * check to see if already mapped
6357 if (UPL_PAGE_LIST_MAPPED
& upl
->flags
) {
6359 return KERN_FAILURE
;
6362 if ((!(upl
->flags
& UPL_SHADOWED
)) &&
6363 ((upl
->flags
& UPL_HAS_BUSY
) ||
6364 !((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) || (upl
->map_object
->phys_contiguous
)))) {
6367 vm_page_t alias_page
;
6368 vm_object_offset_t new_offset
;
6369 unsigned int pg_num
;
6370 wpl_array_t lite_list
;
6372 if (upl
->flags
& UPL_INTERNAL
) {
6373 lite_list
= (wpl_array_t
)
6374 ((((uintptr_t)upl
) + sizeof(struct upl
))
6375 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
6377 lite_list
= (wpl_array_t
)(((uintptr_t)upl
) + sizeof(struct upl
));
6379 object
= upl
->map_object
;
6380 upl
->map_object
= vm_object_allocate(upl
->size
);
6382 vm_object_lock(upl
->map_object
);
6384 upl
->map_object
->shadow
= object
;
6385 upl
->map_object
->pageout
= TRUE
;
6386 upl
->map_object
->can_persist
= FALSE
;
6387 upl
->map_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
6388 upl
->map_object
->vo_shadow_offset
= upl
->offset
- object
->paging_offset
;
6389 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
6390 offset
= upl
->map_object
->vo_shadow_offset
;
6394 upl
->flags
|= UPL_SHADOWED
;
6397 pg_num
= (unsigned int) (new_offset
/ PAGE_SIZE
);
6398 assert(pg_num
== new_offset
/ PAGE_SIZE
);
6400 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
6402 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
6404 vm_object_lock(object
);
6406 m
= vm_page_lookup(object
, offset
);
6407 if (m
== VM_PAGE_NULL
) {
6408 panic("vm_upl_map: page missing\n");
6412 * Convert the fictitious page to a private
6413 * shadow of the real page.
6415 assert(alias_page
->fictitious
);
6416 alias_page
->fictitious
= FALSE
;
6417 alias_page
->private = TRUE
;
6418 alias_page
->pageout
= TRUE
;
6420 * since m is a page in the upl it must
6421 * already be wired or BUSY, so it's
6422 * safe to assign the underlying physical
6425 alias_page
->phys_page
= m
->phys_page
;
6427 vm_object_unlock(object
);
6429 vm_page_lockspin_queues();
6430 vm_page_wire(alias_page
, VM_KERN_MEMORY_NONE
, TRUE
);
6431 vm_page_unlock_queues();
6435 * The virtual page ("m") has to be wired in some way
6436 * here or its physical page ("m->phys_page") could
6437 * be recycled at any time.
6438 * Assuming this is enforced by the caller, we can't
6439 * get an encrypted page here. Since the encryption
6440 * key depends on the VM page's "pager" object and
6441 * the "paging_offset", we couldn't handle 2 pageable
6442 * VM pages (with different pagers and paging_offsets)
6443 * sharing the same physical page: we could end up
6444 * encrypting with one key (via one VM page) and
6445 * decrypting with another key (via the alias VM page).
6447 ASSERT_PAGE_DECRYPTED(m
);
6449 vm_page_insert_wired(alias_page
, upl
->map_object
, new_offset
, VM_KERN_MEMORY_NONE
);
6451 assert(!alias_page
->wanted
);
6452 alias_page
->busy
= FALSE
;
6453 alias_page
->absent
= FALSE
;
6456 offset
+= PAGE_SIZE_64
;
6457 new_offset
+= PAGE_SIZE_64
;
6459 vm_object_unlock(upl
->map_object
);
6461 if (upl
->flags
& UPL_SHADOWED
)
6464 offset
= upl
->offset
- upl
->map_object
->paging_offset
;
6468 vm_object_reference(upl
->map_object
);
6473 * NEED A UPL_MAP ALIAS
6475 kr
= vm_map_enter(map
, dst_addr
, (vm_map_size_t
)size
, (vm_map_offset_t
) 0,
6476 VM_FLAGS_ANYWHERE
| VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK
),
6477 upl
->map_object
, offset
, FALSE
,
6478 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
6480 if (kr
!= KERN_SUCCESS
) {
6486 kr
= vm_map_enter(map
, dst_addr
, (vm_map_size_t
)size
, (vm_map_offset_t
) 0,
6487 VM_FLAGS_FIXED
| VM_MAKE_TAG(VM_KERN_MEMORY_OSFMK
),
6488 upl
->map_object
, offset
, FALSE
,
6489 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
6491 panic("vm_map_enter failed for a Vector UPL\n");
6493 vm_object_lock(upl
->map_object
);
6495 for (addr
= *dst_addr
; size
> 0; size
-= PAGE_SIZE
, addr
+= PAGE_SIZE
) {
6496 m
= vm_page_lookup(upl
->map_object
, offset
);
6501 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6502 * but only in kernel space. If this was on a user map,
6503 * we'd have to set the wpmapped bit. */
6504 /* m->wpmapped = TRUE; */
6505 assert(map
->pmap
== kernel_pmap
);
6507 PMAP_ENTER(map
->pmap
, addr
, m
, VM_PROT_DEFAULT
, VM_PROT_NONE
, 0, TRUE
);
6509 offset
+= PAGE_SIZE_64
;
6511 vm_object_unlock(upl
->map_object
);
6514 * hold a reference for the mapping
6517 upl
->flags
|= UPL_PAGE_LIST_MAPPED
;
6518 upl
->kaddr
= (vm_offset_t
) *dst_addr
;
6519 assert(upl
->kaddr
== *dst_addr
);
6522 goto process_upl_to_enter
;
6526 return KERN_SUCCESS
;
6530 * Internal routine to remove a UPL mapping from a VM map.
6532 * XXX - This should just be doable through a standard
6533 * vm_map_remove() operation. Otherwise, implicit clean-up
6534 * of the target map won't be able to correctly remove
6535 * these (and release the reference on the UPL). Having
6536 * to do this means we can't map these into user-space
6546 int isVectorUPL
= 0, curr_upl
= 0;
6547 upl_t vector_upl
= NULL
;
6549 if (upl
== UPL_NULL
)
6550 return KERN_INVALID_ARGUMENT
;
6552 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
6553 int unmapped
=0, valid_upls
=0;
6555 upl_lock(vector_upl
);
6556 for(curr_upl
=0; curr_upl
< MAX_VECTOR_UPL_ELEMENTS
; curr_upl
++) {
6557 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
);
6561 if (!(UPL_PAGE_LIST_MAPPED
& upl
->flags
))
6566 if(unmapped
!= valid_upls
)
6567 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped
, valid_upls
);
6569 upl_unlock(vector_upl
);
6570 return KERN_FAILURE
;
6578 process_upl_to_remove
:
6580 if(curr_upl
== MAX_VECTOR_UPL_ELEMENTS
) {
6581 vm_map_t v_upl_submap
;
6582 vm_offset_t v_upl_submap_dst_addr
;
6583 vector_upl_get_submap(vector_upl
, &v_upl_submap
, &v_upl_submap_dst_addr
);
6585 vm_map_remove(map
, v_upl_submap_dst_addr
, v_upl_submap_dst_addr
+ vector_upl
->size
, VM_MAP_NO_FLAGS
);
6586 vm_map_deallocate(v_upl_submap
);
6587 upl_unlock(vector_upl
);
6588 return KERN_SUCCESS
;
6591 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
++ );
6593 goto process_upl_to_remove
;
6596 if (upl
->flags
& UPL_PAGE_LIST_MAPPED
) {
6600 assert(upl
->ref_count
> 1);
6601 upl
->ref_count
--; /* removing mapping ref */
6603 upl
->flags
&= ~UPL_PAGE_LIST_MAPPED
;
6604 upl
->kaddr
= (vm_offset_t
) 0;
6611 vm_map_trunc_page(addr
,
6612 VM_MAP_PAGE_MASK(map
)),
6613 vm_map_round_page(addr
+ size
,
6614 VM_MAP_PAGE_MASK(map
)),
6617 return KERN_SUCCESS
;
6621 * If it's a Vectored UPL, we'll be removing the entire
6622 * submap anyways, so no need to remove individual UPL
6623 * element mappings from within the submap
6625 goto process_upl_to_remove
;
6630 return KERN_FAILURE
;
6636 upl_offset_t offset
,
6639 upl_page_info_t
*page_list
,
6640 mach_msg_type_number_t count
,
6643 upl_size_t xfer_size
, subupl_size
= size
;
6644 vm_object_t shadow_object
;
6646 vm_object_offset_t target_offset
;
6647 upl_offset_t subupl_offset
= offset
;
6649 wpl_array_t lite_list
;
6651 int clear_refmod
= 0;
6652 int pgpgout_count
= 0;
6653 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
6654 struct vm_page_delayed_work
*dwp
;
6657 int isVectorUPL
= 0;
6658 upl_t vector_upl
= NULL
;
6659 boolean_t should_be_throttled
= FALSE
;
6661 vm_page_t nxt_page
= VM_PAGE_NULL
;
6662 int fast_path_possible
= 0;
6663 int fast_path_full_commit
= 0;
6664 int throttle_page
= 0;
6665 int unwired_count
= 0;
6666 int local_queue_count
= 0;
6667 queue_head_t local_queue
;
6671 if (upl
== UPL_NULL
)
6672 return KERN_INVALID_ARGUMENT
;
6677 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
6679 upl_lock(vector_upl
);
6684 process_upl_to_commit
:
6688 offset
= subupl_offset
;
6690 upl_unlock(vector_upl
);
6691 return KERN_SUCCESS
;
6693 upl
= vector_upl_subupl_byoffset(vector_upl
, &offset
, &size
);
6695 upl_unlock(vector_upl
);
6696 return KERN_FAILURE
;
6698 page_list
= UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl
);
6699 subupl_size
-= size
;
6700 subupl_offset
+= size
;
6704 if (upl
->upl_commit_index
< UPL_DEBUG_COMMIT_RECORDS
) {
6705 (void) OSBacktrace(&upl
->upl_commit_records
[upl
->upl_commit_index
].c_retaddr
[0], UPL_DEBUG_STACK_FRAMES
);
6707 upl
->upl_commit_records
[upl
->upl_commit_index
].c_beg
= offset
;
6708 upl
->upl_commit_records
[upl
->upl_commit_index
].c_end
= (offset
+ size
);
6710 upl
->upl_commit_index
++;
6713 if (upl
->flags
& UPL_DEVICE_MEMORY
)
6715 else if ((offset
+ size
) <= upl
->size
)
6721 upl_unlock(vector_upl
);
6723 return KERN_FAILURE
;
6725 if (upl
->flags
& UPL_SET_DIRTY
)
6726 flags
|= UPL_COMMIT_SET_DIRTY
;
6727 if (upl
->flags
& UPL_CLEAR_DIRTY
)
6728 flags
|= UPL_COMMIT_CLEAR_DIRTY
;
6730 if (upl
->flags
& UPL_INTERNAL
)
6731 lite_list
= (wpl_array_t
) ((((uintptr_t)upl
) + sizeof(struct upl
))
6732 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
6734 lite_list
= (wpl_array_t
) (((uintptr_t)upl
) + sizeof(struct upl
));
6736 object
= upl
->map_object
;
6738 if (upl
->flags
& UPL_SHADOWED
) {
6739 vm_object_lock(object
);
6740 shadow_object
= object
->shadow
;
6742 shadow_object
= object
;
6744 entry
= offset
/PAGE_SIZE
;
6745 target_offset
= (vm_object_offset_t
)offset
;
6747 assert(!(target_offset
& PAGE_MASK
));
6748 assert(!(xfer_size
& PAGE_MASK
));
6750 if (upl
->flags
& UPL_KERNEL_OBJECT
)
6751 vm_object_lock_shared(shadow_object
);
6753 vm_object_lock(shadow_object
);
6755 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
6756 assert(shadow_object
->blocked_access
);
6757 shadow_object
->blocked_access
= FALSE
;
6758 vm_object_wakeup(object
, VM_OBJECT_EVENT_UNBLOCKED
);
6761 if (shadow_object
->code_signed
) {
6764 * If the object is code-signed, do not let this UPL tell
6765 * us if the pages are valid or not. Let the pages be
6766 * validated by VM the normal way (when they get mapped or
6769 flags
&= ~UPL_COMMIT_CS_VALIDATED
;
6773 * No page list to get the code-signing info from !?
6775 flags
&= ~UPL_COMMIT_CS_VALIDATED
;
6777 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) && shadow_object
->internal
)
6778 should_be_throttled
= TRUE
;
6782 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
6784 if ((upl
->flags
& UPL_IO_WIRE
) &&
6785 !(flags
& UPL_COMMIT_FREE_ABSENT
) &&
6787 shadow_object
->purgable
!= VM_PURGABLE_VOLATILE
&&
6788 shadow_object
->purgable
!= VM_PURGABLE_EMPTY
) {
6790 if (!queue_empty(&shadow_object
->memq
)) {
6791 queue_init(&local_queue
);
6792 if (size
== shadow_object
->vo_size
) {
6793 nxt_page
= (vm_page_t
)queue_first(&shadow_object
->memq
);
6794 fast_path_full_commit
= 1;
6796 fast_path_possible
= 1;
6798 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) && shadow_object
->internal
&&
6799 (shadow_object
->purgable
== VM_PURGABLE_DENY
||
6800 shadow_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
6801 shadow_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
6815 if (upl
->flags
& UPL_LITE
) {
6816 unsigned int pg_num
;
6818 if (nxt_page
!= VM_PAGE_NULL
) {
6820 nxt_page
= (vm_page_t
)queue_next(&nxt_page
->listq
);
6821 target_offset
= m
->offset
;
6823 pg_num
= (unsigned int) (target_offset
/PAGE_SIZE
);
6824 assert(pg_num
== target_offset
/PAGE_SIZE
);
6826 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
6827 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
6829 if (!(upl
->flags
& UPL_KERNEL_OBJECT
) && m
== VM_PAGE_NULL
)
6830 m
= vm_page_lookup(shadow_object
, target_offset
+ (upl
->offset
- shadow_object
->paging_offset
));
6834 if (upl
->flags
& UPL_SHADOWED
) {
6835 if ((t
= vm_page_lookup(object
, target_offset
)) != VM_PAGE_NULL
) {
6841 if (!(upl
->flags
& UPL_KERNEL_OBJECT
) && m
== VM_PAGE_NULL
)
6842 m
= vm_page_lookup(shadow_object
, target_offset
+ object
->vo_shadow_offset
);
6845 if (m
== VM_PAGE_NULL
)
6846 goto commit_next_page
;
6848 if (m
->compressor
) {
6851 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6852 goto commit_next_page
;
6855 if (flags
& UPL_COMMIT_CS_VALIDATED
) {
6858 * Set the code signing bits according to
6859 * what the UPL says they should be.
6861 m
->cs_validated
= page_list
[entry
].cs_validated
;
6862 m
->cs_tainted
= page_list
[entry
].cs_tainted
;
6863 m
->cs_nx
= page_list
[entry
].cs_nx
;
6865 if (flags
& UPL_COMMIT_WRITTEN_BY_KERNEL
)
6866 m
->written_by_kernel
= TRUE
;
6868 if (upl
->flags
& UPL_IO_WIRE
) {
6871 page_list
[entry
].phys_addr
= 0;
6873 if (flags
& UPL_COMMIT_SET_DIRTY
) {
6874 SET_PAGE_DIRTY(m
, FALSE
);
6875 } else if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
6878 if (! (flags
& UPL_COMMIT_CS_VALIDATED
) &&
6879 m
->cs_validated
&& !m
->cs_tainted
) {
6882 * This page is no longer dirty
6883 * but could have been modified,
6884 * so it will need to be
6888 panic("upl_commit_range(%p): page %p was slid\n",
6892 m
->cs_validated
= FALSE
;
6893 #if DEVELOPMENT || DEBUG
6894 vm_cs_validated_resets
++;
6896 pmap_disconnect(m
->phys_page
);
6898 clear_refmod
|= VM_MEM_MODIFIED
;
6900 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
6902 * We blocked access to the pages in this UPL.
6903 * Clear the "busy" bit and wake up any waiter
6906 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6908 if (fast_path_possible
) {
6909 assert(m
->object
->purgable
!= VM_PURGABLE_EMPTY
);
6910 assert(m
->object
->purgable
!= VM_PURGABLE_VOLATILE
);
6912 assert(m
->wire_count
== 0);
6916 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6918 if (m
->wire_count
== 0)
6919 panic("wire_count == 0, m = %p, obj = %p\n", m
, shadow_object
);
6922 * XXX FBDP need to update some other
6923 * counters here (purgeable_wired_count)
6926 assert(m
->wire_count
);
6929 if (m
->wire_count
== 0)
6932 if (m
->wire_count
== 0) {
6933 queue_enter(&local_queue
, m
, vm_page_t
, pageq
);
6934 local_queue_count
++;
6936 if (throttle_page
) {
6937 m
->throttled
= TRUE
;
6939 if (flags
& UPL_COMMIT_INACTIVATE
)
6946 if (flags
& UPL_COMMIT_INACTIVATE
) {
6947 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
6948 clear_refmod
|= VM_MEM_REFERENCED
;
6951 if (flags
& UPL_COMMIT_FREE_ABSENT
)
6952 dwp
->dw_mask
|= DW_vm_page_free
;
6955 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6957 if ( !(dwp
->dw_mask
& DW_vm_page_deactivate_internal
))
6958 dwp
->dw_mask
|= DW_vm_page_activate
;
6961 dwp
->dw_mask
|= DW_vm_page_unwire
;
6963 goto commit_next_page
;
6965 assert(!m
->compressor
);
6968 page_list
[entry
].phys_addr
= 0;
6971 * make sure to clear the hardware
6972 * modify or reference bits before
6973 * releasing the BUSY bit on this page
6974 * otherwise we risk losing a legitimate
6977 if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
6980 clear_refmod
|= VM_MEM_MODIFIED
;
6983 dwp
->dw_mask
|= DW_vm_pageout_throttle_up
;
6985 if (VM_PAGE_WIRED(m
))
6988 if (! (flags
& UPL_COMMIT_CS_VALIDATED
) &&
6989 m
->cs_validated
&& !m
->cs_tainted
) {
6992 * This page is no longer dirty
6993 * but could have been modified,
6994 * so it will need to be
6998 panic("upl_commit_range(%p): page %p was slid\n",
7002 m
->cs_validated
= FALSE
;
7003 #if DEVELOPMENT || DEBUG
7004 vm_cs_validated_resets
++;
7006 pmap_disconnect(m
->phys_page
);
7008 if (m
->overwriting
) {
7010 * the (COPY_OUT_FROM == FALSE) request_page_list case
7013 #if CONFIG_PHANTOM_CACHE
7014 if (m
->absent
&& !m
->object
->internal
)
7015 dwp
->dw_mask
|= DW_vm_phantom_cache_update
;
7019 dwp
->dw_mask
|= DW_clear_busy
;
7022 * alternate (COPY_OUT_FROM == FALSE) page_list case
7023 * Occurs when the original page was wired
7024 * at the time of the list request
7026 assert(VM_PAGE_WIRED(m
));
7028 dwp
->dw_mask
|= DW_vm_page_unwire
; /* reactivates */
7030 m
->overwriting
= FALSE
;
7032 if (m
->encrypted_cleaning
== TRUE
) {
7033 m
->encrypted_cleaning
= FALSE
;
7035 dwp
->dw_mask
|= DW_clear_busy
| DW_PAGE_WAKEUP
;
7037 m
->cleaning
= FALSE
;
7041 * With the clean queue enabled, UPL_PAGEOUT should
7042 * no longer set the pageout bit. It's pages now go
7043 * to the clean queue.
7045 assert(!(flags
& UPL_PAGEOUT
));
7048 #if MACH_CLUSTER_STATS
7049 if (m
->wanted
) vm_pageout_target_collisions
++;
7051 if ((flags
& UPL_COMMIT_SET_DIRTY
) ||
7052 (m
->pmapped
&& (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
))) {
7054 * page was re-dirtied after we started
7055 * the pageout... reactivate it since
7056 * we don't know whether the on-disk
7057 * copy matches what is now in memory
7059 SET_PAGE_DIRTY(m
, FALSE
);
7061 dwp
->dw_mask
|= DW_vm_page_activate
| DW_PAGE_WAKEUP
;
7063 if (upl
->flags
& UPL_PAGEOUT
) {
7064 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
7065 VM_STAT_INCR(reactivations
);
7066 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
7070 * page has been successfully cleaned
7071 * go ahead and free it for other use
7073 if (m
->object
->internal
) {
7074 DTRACE_VM2(anonpgout
, int, 1, (uint64_t *), NULL
);
7076 DTRACE_VM2(fspgout
, int, 1, (uint64_t *), NULL
);
7081 dwp
->dw_mask
|= DW_vm_page_free
;
7083 goto commit_next_page
;
7085 #if MACH_CLUSTER_STATS
7087 m
->dirty
= pmap_is_modified(m
->phys_page
);
7089 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
7090 else vm_pageout_cluster_cleaned
++;
7091 if (m
->wanted
) vm_pageout_cluster_collisions
++;
7094 * It is a part of the semantic of COPYOUT_FROM
7095 * UPLs that a commit implies cache sync
7096 * between the vm page and the backing store
7097 * this can be used to strip the precious bit
7100 if ((upl
->flags
& UPL_PAGE_SYNC_DONE
) || (flags
& UPL_COMMIT_CLEAR_PRECIOUS
))
7101 m
->precious
= FALSE
;
7103 if (flags
& UPL_COMMIT_SET_DIRTY
) {
7104 SET_PAGE_DIRTY(m
, FALSE
);
7109 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7110 if (hibernate_cleaning_in_progress
== FALSE
&& !m
->dirty
&& (upl
->flags
& UPL_PAGEOUT
)) {
7113 VM_STAT_INCR(pageouts
);
7114 DTRACE_VM2(pgout
, int, 1, (uint64_t *), NULL
);
7116 dwp
->dw_mask
|= DW_enqueue_cleaned
;
7117 vm_pageout_enqueued_cleaned_from_inactive_dirty
++;
7118 } else if (should_be_throttled
== TRUE
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
) {
7120 * page coming back in from being 'frozen'...
7121 * it was dirty before it was frozen, so keep it so
7122 * the vm_page_activate will notice that it really belongs
7123 * on the throttle queue and put it there
7125 SET_PAGE_DIRTY(m
, FALSE
);
7126 dwp
->dw_mask
|= DW_vm_page_activate
;
7129 if ((flags
& UPL_COMMIT_INACTIVATE
) && !m
->clustered
&& !m
->speculative
) {
7130 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
7131 clear_refmod
|= VM_MEM_REFERENCED
;
7132 } else if (!m
->active
&& !m
->inactive
&& !m
->speculative
) {
7134 if (m
->clustered
|| (flags
& UPL_COMMIT_SPECULATE
))
7135 dwp
->dw_mask
|= DW_vm_page_speculate
;
7136 else if (m
->reference
)
7137 dwp
->dw_mask
|= DW_vm_page_activate
;
7139 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
7140 clear_refmod
|= VM_MEM_REFERENCED
;
7144 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
7146 * We blocked access to the pages in this URL.
7147 * Clear the "busy" bit on this page before we
7148 * wake up any waiter.
7150 dwp
->dw_mask
|= DW_clear_busy
;
7153 * Wakeup any thread waiting for the page to be un-cleaning.
7155 dwp
->dw_mask
|= DW_PAGE_WAKEUP
;
7159 pmap_clear_refmod(m
->phys_page
, clear_refmod
);
7161 target_offset
+= PAGE_SIZE_64
;
7162 xfer_size
-= PAGE_SIZE
;
7166 if (dwp
->dw_mask
& ~(DW_clear_busy
| DW_PAGE_WAKEUP
)) {
7167 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
7169 if (dw_count
>= dw_limit
) {
7170 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7176 if (dwp
->dw_mask
& DW_clear_busy
)
7179 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
7185 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7187 if (fast_path_possible
) {
7189 assert(shadow_object
->purgable
!= VM_PURGABLE_VOLATILE
);
7190 assert(shadow_object
->purgable
!= VM_PURGABLE_EMPTY
);
7192 if (local_queue_count
|| unwired_count
) {
7194 if (local_queue_count
) {
7195 vm_page_t first_local
, last_local
;
7196 vm_page_t first_target
;
7197 queue_head_t
*target_queue
;
7200 target_queue
= &vm_page_queue_throttled
;
7202 if (flags
& UPL_COMMIT_INACTIVATE
) {
7203 if (shadow_object
->internal
)
7204 target_queue
= &vm_page_queue_anonymous
;
7206 target_queue
= &vm_page_queue_inactive
;
7208 target_queue
= &vm_page_queue_active
;
7211 * Transfer the entire local queue to a regular LRU page queues.
7213 first_local
= (vm_page_t
) queue_first(&local_queue
);
7214 last_local
= (vm_page_t
) queue_last(&local_queue
);
7216 vm_page_lockspin_queues();
7218 first_target
= (vm_page_t
) queue_first(target_queue
);
7220 if (queue_empty(target_queue
))
7221 queue_last(target_queue
) = (queue_entry_t
) last_local
;
7223 queue_prev(&first_target
->pageq
) = (queue_entry_t
) last_local
;
7225 queue_first(target_queue
) = (queue_entry_t
) first_local
;
7226 queue_prev(&first_local
->pageq
) = (queue_entry_t
) target_queue
;
7227 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_target
;
7230 * Adjust the global page counts.
7232 if (throttle_page
) {
7233 vm_page_throttled_count
+= local_queue_count
;
7235 if (flags
& UPL_COMMIT_INACTIVATE
) {
7236 if (shadow_object
->internal
)
7237 vm_page_anonymous_count
+= local_queue_count
;
7238 vm_page_inactive_count
+= local_queue_count
;
7240 token_new_pagecount
+= local_queue_count
;
7242 vm_page_active_count
+= local_queue_count
;
7244 if (shadow_object
->internal
)
7245 vm_page_pageable_internal_count
+= local_queue_count
;
7247 vm_page_pageable_external_count
+= local_queue_count
;
7250 vm_page_lockspin_queues();
7252 if (unwired_count
) {
7253 vm_page_wire_count
-= unwired_count
;
7254 VM_CHECK_MEMORYSTATUS
;
7256 vm_page_unlock_queues();
7258 shadow_object
->wired_page_count
-= unwired_count
;
7260 if (!shadow_object
->wired_page_count
) {
7261 VM_OBJECT_UNWIRED(shadow_object
);
7267 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
7269 } else if (upl
->flags
& UPL_LITE
) {
7275 if (!fast_path_full_commit
) {
7276 pg_num
= upl
->size
/PAGE_SIZE
;
7277 pg_num
= (pg_num
+ 31) >> 5;
7279 for (i
= 0; i
< pg_num
; i
++) {
7280 if (lite_list
[i
] != 0) {
7287 if (queue_empty(&upl
->map_object
->memq
))
7290 if (occupied
== 0) {
7292 * If this UPL element belongs to a Vector UPL and is
7293 * empty, then this is the right function to deallocate
7294 * it. So go ahead set the *empty variable. The flag
7295 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7296 * should be considered relevant for the Vector UPL and not
7297 * the internal UPLs.
7299 if ((upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) || isVectorUPL
)
7302 if (object
== shadow_object
&& !(upl
->flags
& UPL_KERNEL_OBJECT
)) {
7304 * this is not a paging object
7305 * so we need to drop the paging reference
7306 * that was taken when we created the UPL
7307 * against this object
7309 vm_object_activity_end(shadow_object
);
7310 vm_object_collapse(shadow_object
, 0, TRUE
);
7313 * we dontated the paging reference to
7314 * the map object... vm_pageout_object_terminate
7315 * will drop this reference
7319 vm_object_unlock(shadow_object
);
7320 if (object
!= shadow_object
)
7321 vm_object_unlock(object
);
7327 * If we completed our operations on an UPL that is
7328 * part of a Vectored UPL and if empty is TRUE, then
7329 * we should go ahead and deallocate this UPL element.
7330 * Then we check if this was the last of the UPL elements
7331 * within that Vectored UPL. If so, set empty to TRUE
7332 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7333 * can go ahead and deallocate the Vector UPL too.
7336 *empty
= vector_upl_set_subupl(vector_upl
, upl
, 0);
7337 upl_deallocate(upl
);
7339 goto process_upl_to_commit
;
7342 if (pgpgout_count
) {
7343 DTRACE_VM2(pgpgout
, int, pgpgout_count
, (uint64_t *), NULL
);
7346 return KERN_SUCCESS
;
7352 upl_offset_t offset
,
7357 upl_page_info_t
*user_page_list
= NULL
;
7358 upl_size_t xfer_size
, subupl_size
= size
;
7359 vm_object_t shadow_object
;
7361 vm_object_offset_t target_offset
;
7362 upl_offset_t subupl_offset
= offset
;
7364 wpl_array_t lite_list
;
7366 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
7367 struct vm_page_delayed_work
*dwp
;
7370 int isVectorUPL
= 0;
7371 upl_t vector_upl
= NULL
;
7375 if (upl
== UPL_NULL
)
7376 return KERN_INVALID_ARGUMENT
;
7378 if ( (upl
->flags
& UPL_IO_WIRE
) && !(error
& UPL_ABORT_DUMP_PAGES
) )
7379 return upl_commit_range(upl
, offset
, size
, UPL_COMMIT_FREE_ABSENT
, NULL
, 0, empty
);
7381 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
7383 upl_lock(vector_upl
);
7388 process_upl_to_abort
:
7391 offset
= subupl_offset
;
7393 upl_unlock(vector_upl
);
7394 return KERN_SUCCESS
;
7396 upl
= vector_upl_subupl_byoffset(vector_upl
, &offset
, &size
);
7398 upl_unlock(vector_upl
);
7399 return KERN_FAILURE
;
7401 subupl_size
-= size
;
7402 subupl_offset
+= size
;
7408 if (upl
->upl_commit_index
< UPL_DEBUG_COMMIT_RECORDS
) {
7409 (void) OSBacktrace(&upl
->upl_commit_records
[upl
->upl_commit_index
].c_retaddr
[0], UPL_DEBUG_STACK_FRAMES
);
7411 upl
->upl_commit_records
[upl
->upl_commit_index
].c_beg
= offset
;
7412 upl
->upl_commit_records
[upl
->upl_commit_index
].c_end
= (offset
+ size
);
7413 upl
->upl_commit_records
[upl
->upl_commit_index
].c_aborted
= 1;
7415 upl
->upl_commit_index
++;
7418 if (upl
->flags
& UPL_DEVICE_MEMORY
)
7420 else if ((offset
+ size
) <= upl
->size
)
7426 upl_unlock(vector_upl
);
7429 return KERN_FAILURE
;
7431 if (upl
->flags
& UPL_INTERNAL
) {
7432 lite_list
= (wpl_array_t
)
7433 ((((uintptr_t)upl
) + sizeof(struct upl
))
7434 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
7436 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
7438 lite_list
= (wpl_array_t
)
7439 (((uintptr_t)upl
) + sizeof(struct upl
));
7441 object
= upl
->map_object
;
7443 if (upl
->flags
& UPL_SHADOWED
) {
7444 vm_object_lock(object
);
7445 shadow_object
= object
->shadow
;
7447 shadow_object
= object
;
7449 entry
= offset
/PAGE_SIZE
;
7450 target_offset
= (vm_object_offset_t
)offset
;
7452 assert(!(target_offset
& PAGE_MASK
));
7453 assert(!(xfer_size
& PAGE_MASK
));
7455 if (upl
->flags
& UPL_KERNEL_OBJECT
)
7456 vm_object_lock_shared(shadow_object
);
7458 vm_object_lock(shadow_object
);
7460 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
7461 assert(shadow_object
->blocked_access
);
7462 shadow_object
->blocked_access
= FALSE
;
7463 vm_object_wakeup(object
, VM_OBJECT_EVENT_UNBLOCKED
);
7468 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
7470 if ((error
& UPL_ABORT_DUMP_PAGES
) && (upl
->flags
& UPL_KERNEL_OBJECT
))
7471 panic("upl_abort_range: kernel_object being DUMPED");
7475 unsigned int pg_num
;
7478 pg_num
= (unsigned int) (target_offset
/PAGE_SIZE
);
7479 assert(pg_num
== target_offset
/PAGE_SIZE
);
7484 needed
= user_page_list
[pg_num
].needed
;
7489 if (upl
->flags
& UPL_LITE
) {
7491 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
7492 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
7494 if ( !(upl
->flags
& UPL_KERNEL_OBJECT
))
7495 m
= vm_page_lookup(shadow_object
, target_offset
+
7496 (upl
->offset
- shadow_object
->paging_offset
));
7499 if (upl
->flags
& UPL_SHADOWED
) {
7500 if ((t
= vm_page_lookup(object
, target_offset
)) != VM_PAGE_NULL
) {
7505 if (m
== VM_PAGE_NULL
)
7506 m
= vm_page_lookup(shadow_object
, target_offset
+ object
->vo_shadow_offset
);
7509 if ((upl
->flags
& UPL_KERNEL_OBJECT
))
7510 goto abort_next_page
;
7512 if (m
!= VM_PAGE_NULL
) {
7514 assert(!m
->compressor
);
7517 boolean_t must_free
= TRUE
;
7520 * COPYOUT = FALSE case
7521 * check for error conditions which must
7522 * be passed back to the pages customer
7524 if (error
& UPL_ABORT_RESTART
) {
7529 } else if (error
& UPL_ABORT_UNAVAILABLE
) {
7533 } else if (error
& UPL_ABORT_ERROR
) {
7540 if (m
->clustered
&& needed
== FALSE
) {
7542 * This page was a part of a speculative
7543 * read-ahead initiated by the kernel
7544 * itself. No one is expecting this
7545 * page and no one will clean up its
7546 * error state if it ever becomes valid
7548 * We have to free it here.
7555 * If the page was already encrypted,
7556 * we don't really need to decrypt it
7557 * now. It will get decrypted later,
7558 * on demand, as soon as someone needs
7559 * to access its contents.
7562 m
->cleaning
= FALSE
;
7563 m
->encrypted_cleaning
= FALSE
;
7565 if (m
->overwriting
&& !m
->busy
) {
7567 * this shouldn't happen since
7568 * this is an 'absent' page, but
7569 * it doesn't hurt to check for
7570 * the 'alternate' method of
7571 * stabilizing the page...
7572 * we will mark 'busy' to be cleared
7573 * in the following code which will
7574 * take care of the primary stabilzation
7575 * method (i.e. setting 'busy' to TRUE)
7577 dwp
->dw_mask
|= DW_vm_page_unwire
;
7579 m
->overwriting
= FALSE
;
7581 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
7583 if (must_free
== TRUE
)
7584 dwp
->dw_mask
|= DW_vm_page_free
;
7586 dwp
->dw_mask
|= DW_vm_page_activate
;
7589 * Handle the trusted pager throttle.
7592 dwp
->dw_mask
|= DW_vm_pageout_throttle_up
;
7594 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
7596 * We blocked access to the pages in this UPL.
7597 * Clear the "busy" bit and wake up any waiter
7600 dwp
->dw_mask
|= DW_clear_busy
;
7602 if (m
->overwriting
) {
7604 dwp
->dw_mask
|= DW_clear_busy
;
7607 * deal with the 'alternate' method
7608 * of stabilizing the page...
7609 * we will either free the page
7610 * or mark 'busy' to be cleared
7611 * in the following code which will
7612 * take care of the primary stabilzation
7613 * method (i.e. setting 'busy' to TRUE)
7615 dwp
->dw_mask
|= DW_vm_page_unwire
;
7617 m
->overwriting
= FALSE
;
7619 if (m
->encrypted_cleaning
== TRUE
) {
7620 m
->encrypted_cleaning
= FALSE
;
7622 dwp
->dw_mask
|= DW_clear_busy
;
7625 m
->cleaning
= FALSE
;
7627 vm_external_state_clr(m
->object
->existence_map
, m
->offset
);
7628 #endif /* MACH_PAGEMAP */
7629 if (error
& UPL_ABORT_DUMP_PAGES
) {
7630 pmap_disconnect(m
->phys_page
);
7632 dwp
->dw_mask
|= DW_vm_page_free
;
7634 if (!(dwp
->dw_mask
& DW_vm_page_unwire
)) {
7635 if (error
& UPL_ABORT_REFERENCE
) {
7637 * we've been told to explictly
7638 * reference this page... for
7639 * file I/O, this is done by
7640 * implementing an LRU on the inactive q
7642 dwp
->dw_mask
|= DW_vm_page_lru
;
7644 } else if (!m
->active
&& !m
->inactive
&& !m
->speculative
)
7645 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
7647 dwp
->dw_mask
|= DW_PAGE_WAKEUP
;
7652 target_offset
+= PAGE_SIZE_64
;
7653 xfer_size
-= PAGE_SIZE
;
7657 if (dwp
->dw_mask
& ~(DW_clear_busy
| DW_PAGE_WAKEUP
)) {
7658 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
7660 if (dw_count
>= dw_limit
) {
7661 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7667 if (dwp
->dw_mask
& DW_clear_busy
)
7670 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
7676 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7680 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
7682 } else if (upl
->flags
& UPL_LITE
) {
7686 pg_num
= upl
->size
/PAGE_SIZE
;
7687 pg_num
= (pg_num
+ 31) >> 5;
7690 for (i
= 0; i
< pg_num
; i
++) {
7691 if (lite_list
[i
] != 0) {
7697 if (queue_empty(&upl
->map_object
->memq
))
7700 if (occupied
== 0) {
7702 * If this UPL element belongs to a Vector UPL and is
7703 * empty, then this is the right function to deallocate
7704 * it. So go ahead set the *empty variable. The flag
7705 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7706 * should be considered relevant for the Vector UPL and
7707 * not the internal UPLs.
7709 if ((upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) || isVectorUPL
)
7712 if (object
== shadow_object
&& !(upl
->flags
& UPL_KERNEL_OBJECT
)) {
7714 * this is not a paging object
7715 * so we need to drop the paging reference
7716 * that was taken when we created the UPL
7717 * against this object
7719 vm_object_activity_end(shadow_object
);
7720 vm_object_collapse(shadow_object
, 0, TRUE
);
7723 * we dontated the paging reference to
7724 * the map object... vm_pageout_object_terminate
7725 * will drop this reference
7729 vm_object_unlock(shadow_object
);
7730 if (object
!= shadow_object
)
7731 vm_object_unlock(object
);
7737 * If we completed our operations on an UPL that is
7738 * part of a Vectored UPL and if empty is TRUE, then
7739 * we should go ahead and deallocate this UPL element.
7740 * Then we check if this was the last of the UPL elements
7741 * within that Vectored UPL. If so, set empty to TRUE
7742 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7743 * can go ahead and deallocate the Vector UPL too.
7745 if(*empty
== TRUE
) {
7746 *empty
= vector_upl_set_subupl(vector_upl
, upl
,0);
7747 upl_deallocate(upl
);
7749 goto process_upl_to_abort
;
7752 return KERN_SUCCESS
;
7763 return upl_abort_range(upl
, 0, upl
->size
, error
, &empty
);
7767 /* an option on commit should be wire */
7771 upl_page_info_t
*page_list
,
7772 mach_msg_type_number_t count
)
7776 return upl_commit_range(upl
, 0, upl
->size
, 0, page_list
, count
, &empty
);
7786 vm_page_t m
, nxt_page
= VM_PAGE_NULL
;
7788 int wired_count
= 0;
7791 panic("iopl_valid_data: NULL upl");
7792 if (vector_upl_is_valid(upl
))
7793 panic("iopl_valid_data: vector upl");
7794 if ((upl
->flags
& (UPL_DEVICE_MEMORY
|UPL_SHADOWED
|UPL_ACCESS_BLOCKED
|UPL_IO_WIRE
|UPL_INTERNAL
)) != UPL_IO_WIRE
)
7795 panic("iopl_valid_data: unsupported upl, flags = %x", upl
->flags
);
7797 object
= upl
->map_object
;
7799 if (object
== kernel_object
|| object
== compressor_object
)
7800 panic("iopl_valid_data: object == kernel or compressor");
7802 if (object
->purgable
== VM_PURGABLE_VOLATILE
)
7803 panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE");
7807 vm_object_lock(object
);
7809 if (object
->vo_size
== size
&& object
->resident_page_count
== (size
/ PAGE_SIZE
))
7810 nxt_page
= (vm_page_t
)queue_first(&object
->memq
);
7812 offset
= 0 + upl
->offset
- object
->paging_offset
;
7816 if (nxt_page
!= VM_PAGE_NULL
) {
7818 nxt_page
= (vm_page_t
)queue_next(&nxt_page
->listq
);
7820 m
= vm_page_lookup(object
, offset
);
7821 offset
+= PAGE_SIZE
;
7823 if (m
== VM_PAGE_NULL
)
7824 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset
);
7828 panic("iopl_valid_data: busy page w/o absent");
7830 if (m
->pageq
.next
|| m
->pageq
.prev
)
7831 panic("iopl_valid_data: busy+absent page on page queue");
7838 PAGE_WAKEUP_DONE(m
);
7844 if (!object
->wired_page_count
) {
7845 VM_OBJECT_WIRED(object
);
7847 object
->wired_page_count
+= wired_count
;
7849 vm_page_lockspin_queues();
7850 vm_page_wire_count
+= wired_count
;
7851 vm_page_unlock_queues();
7853 vm_object_unlock(object
);
7857 vm_object_set_pmap_cache_attr(
7859 upl_page_info_array_t user_page_list
,
7860 unsigned int num_pages
,
7861 boolean_t batch_pmap_op
)
7863 unsigned int cache_attr
= 0;
7865 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
7866 assert(user_page_list
);
7867 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
7868 PMAP_BATCH_SET_CACHE_ATTR(object
, user_page_list
, cache_attr
, num_pages
, batch_pmap_op
);
7873 boolean_t
vm_object_iopl_wire_full(vm_object_t
, upl_t
, upl_page_info_array_t
, wpl_array_t
, upl_control_flags_t
);
7874 kern_return_t
vm_object_iopl_wire_empty(vm_object_t
, upl_t
, upl_page_info_array_t
, wpl_array_t
, upl_control_flags_t
, vm_object_offset_t
*, int);
7879 vm_object_iopl_wire_full(vm_object_t object
, upl_t upl
, upl_page_info_array_t user_page_list
,
7880 wpl_array_t lite_list
, upl_control_flags_t cntrl_flags
)
7886 int delayed_unlock
= 0;
7887 boolean_t retval
= TRUE
;
7889 vm_object_lock_assert_exclusive(object
);
7890 assert(object
->purgable
!= VM_PURGABLE_VOLATILE
);
7891 assert(object
->purgable
!= VM_PURGABLE_EMPTY
);
7892 assert(object
->pager
== NULL
);
7893 assert(object
->copy
== NULL
);
7894 assert(object
->shadow
== NULL
);
7896 tag
= UPL_MEMORY_TAG(cntrl_flags
);
7897 page_count
= object
->resident_page_count
;
7898 dst_page
= (vm_page_t
)queue_first(&object
->memq
);
7900 vm_page_lock_queues();
7902 while (page_count
--) {
7904 if (dst_page
->busy
||
7905 dst_page
->fictitious
||
7908 dst_page
->cleaning
||
7909 dst_page
->restart
||
7910 dst_page
->encrypted
||
7911 dst_page
->laundry
) {
7915 if ((cntrl_flags
& UPL_REQUEST_FORCE_COHERENCY
) && dst_page
->written_by_kernel
== TRUE
) {
7919 dst_page
->reference
= TRUE
;
7921 vm_page_wire(dst_page
, tag
, FALSE
);
7923 if (!(cntrl_flags
& UPL_COPYOUT_FROM
)) {
7924 SET_PAGE_DIRTY(dst_page
, FALSE
);
7926 entry
= (unsigned int)(dst_page
->offset
/ PAGE_SIZE
);
7927 assert(entry
>= 0 && entry
< object
->resident_page_count
);
7928 lite_list
[entry
>>5] |= 1 << (entry
& 31);
7930 if (dst_page
->phys_page
> upl
->highest_page
)
7931 upl
->highest_page
= dst_page
->phys_page
;
7933 if (user_page_list
) {
7934 user_page_list
[entry
].phys_addr
= dst_page
->phys_page
;
7935 user_page_list
[entry
].absent
= dst_page
->absent
;
7936 user_page_list
[entry
].dirty
= dst_page
->dirty
;
7937 user_page_list
[entry
].pageout
= dst_page
->pageout
;;
7938 user_page_list
[entry
].precious
= dst_page
->precious
;
7939 user_page_list
[entry
].device
= FALSE
;
7940 user_page_list
[entry
].speculative
= FALSE
;
7941 user_page_list
[entry
].cs_validated
= FALSE
;
7942 user_page_list
[entry
].cs_tainted
= FALSE
;
7943 user_page_list
[entry
].cs_nx
= FALSE
;
7944 user_page_list
[entry
].needed
= FALSE
;
7945 user_page_list
[entry
].mark
= FALSE
;
7947 if (delayed_unlock
++ > 256) {
7949 lck_mtx_yield(&vm_page_queue_lock
);
7951 VM_CHECK_MEMORYSTATUS
;
7953 dst_page
= (vm_page_t
)queue_next(&dst_page
->listq
);
7956 vm_page_unlock_queues();
7958 VM_CHECK_MEMORYSTATUS
;
7965 vm_object_iopl_wire_empty(vm_object_t object
, upl_t upl
, upl_page_info_array_t user_page_list
,
7966 wpl_array_t lite_list
, upl_control_flags_t cntrl_flags
, vm_object_offset_t
*dst_offset
, int page_count
)
7970 boolean_t no_zero_fill
= FALSE
;
7972 int pages_wired
= 0;
7973 int pages_inserted
= 0;
7975 uint64_t delayed_ledger_update
= 0;
7976 kern_return_t ret
= KERN_SUCCESS
;
7978 vm_object_lock_assert_exclusive(object
);
7979 assert(object
->purgable
!= VM_PURGABLE_VOLATILE
);
7980 assert(object
->purgable
!= VM_PURGABLE_EMPTY
);
7981 assert(object
->pager
== NULL
);
7982 assert(object
->copy
== NULL
);
7983 assert(object
->shadow
== NULL
);
7985 if (cntrl_flags
& UPL_SET_INTERRUPTIBLE
)
7986 interruptible
= THREAD_ABORTSAFE
;
7988 interruptible
= THREAD_UNINT
;
7990 if (cntrl_flags
& (UPL_NOZEROFILL
| UPL_NOZEROFILLIO
))
7991 no_zero_fill
= TRUE
;
7993 tag
= UPL_MEMORY_TAG(cntrl_flags
);
7995 while (page_count
--) {
7997 while ( (dst_page
= vm_page_grab()) == VM_PAGE_NULL
) {
7999 OSAddAtomic(page_count
, &vm_upl_wait_for_pages
);
8001 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_START
, vm_upl_wait_for_pages
, 0, 0, 0);
8003 if (vm_page_wait(interruptible
) == FALSE
) {
8007 OSAddAtomic(-page_count
, &vm_upl_wait_for_pages
);
8009 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, -1);
8011 ret
= MACH_SEND_INTERRUPTED
;
8014 OSAddAtomic(-page_count
, &vm_upl_wait_for_pages
);
8016 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, 0);
8018 if (no_zero_fill
== FALSE
)
8019 vm_page_zero_fill(dst_page
);
8021 dst_page
->absent
= TRUE
;
8023 dst_page
->reference
= TRUE
;
8025 if (!(cntrl_flags
& UPL_COPYOUT_FROM
)) {
8026 SET_PAGE_DIRTY(dst_page
, FALSE
);
8028 if (dst_page
->absent
== FALSE
) {
8029 dst_page
->wire_count
++;
8031 PAGE_WAKEUP_DONE(dst_page
);
8035 vm_page_insert_internal(dst_page
, object
, *dst_offset
, tag
, FALSE
, TRUE
, TRUE
, TRUE
, &delayed_ledger_update
);
8037 lite_list
[entry
>>5] |= 1 << (entry
& 31);
8039 if (dst_page
->phys_page
> upl
->highest_page
)
8040 upl
->highest_page
= dst_page
->phys_page
;
8042 if (user_page_list
) {
8043 user_page_list
[entry
].phys_addr
= dst_page
->phys_page
;
8044 user_page_list
[entry
].absent
= dst_page
->absent
;
8045 user_page_list
[entry
].dirty
= dst_page
->dirty
;
8046 user_page_list
[entry
].pageout
= FALSE
;
8047 user_page_list
[entry
].precious
= FALSE
;
8048 user_page_list
[entry
].device
= FALSE
;
8049 user_page_list
[entry
].speculative
= FALSE
;
8050 user_page_list
[entry
].cs_validated
= FALSE
;
8051 user_page_list
[entry
].cs_tainted
= FALSE
;
8052 user_page_list
[entry
].cs_nx
= FALSE
;
8053 user_page_list
[entry
].needed
= FALSE
;
8054 user_page_list
[entry
].mark
= FALSE
;
8057 *dst_offset
+= PAGE_SIZE_64
;
8061 vm_page_lockspin_queues();
8062 vm_page_wire_count
+= pages_wired
;
8063 vm_page_unlock_queues();
8065 if (pages_inserted
) {
8066 if (object
->internal
) {
8067 OSAddAtomic(pages_inserted
, &vm_page_internal_count
);
8069 OSAddAtomic(pages_inserted
, &vm_page_external_count
);
8072 if (delayed_ledger_update
) {
8075 owner
= object
->vo_purgeable_owner
;
8078 /* more non-volatile bytes */
8079 ledger_credit(owner
->ledger
,
8080 task_ledgers
.purgeable_nonvolatile
,
8081 delayed_ledger_update
);
8082 /* more footprint */
8083 ledger_credit(owner
->ledger
,
8084 task_ledgers
.phys_footprint
,
8085 delayed_ledger_update
);
8091 unsigned int vm_object_iopl_request_sleep_for_cleaning
= 0;
8095 vm_object_iopl_request(
8097 vm_object_offset_t offset
,
8100 upl_page_info_array_t user_page_list
,
8101 unsigned int *page_list_count
,
8102 upl_control_flags_t cntrl_flags
)
8105 vm_object_offset_t dst_offset
;
8106 upl_size_t xfer_size
;
8109 wpl_array_t lite_list
= NULL
;
8110 int no_zero_fill
= FALSE
;
8111 unsigned int size_in_pages
;
8115 struct vm_object_fault_info fault_info
;
8116 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
8117 struct vm_page_delayed_work
*dwp
;
8121 boolean_t caller_lookup
;
8122 int io_tracking_flag
= 0;
8125 boolean_t set_cache_attr_needed
= FALSE
;
8126 boolean_t free_wired_pages
= FALSE
;
8127 boolean_t fast_path_empty_req
= FALSE
;
8128 boolean_t fast_path_full_req
= FALSE
;
8130 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
8132 * For forward compatibility's sake,
8133 * reject any unknown flag.
8135 return KERN_INVALID_VALUE
;
8137 if (vm_lopage_needed
== FALSE
)
8138 cntrl_flags
&= ~UPL_NEED_32BIT_ADDR
;
8140 if (cntrl_flags
& UPL_NEED_32BIT_ADDR
) {
8141 if ( (cntrl_flags
& (UPL_SET_IO_WIRE
| UPL_SET_LITE
)) != (UPL_SET_IO_WIRE
| UPL_SET_LITE
))
8142 return KERN_INVALID_VALUE
;
8144 if (object
->phys_contiguous
) {
8145 if ((offset
+ object
->vo_shadow_offset
) >= (vm_object_offset_t
)max_valid_dma_address
)
8146 return KERN_INVALID_ADDRESS
;
8148 if (((offset
+ object
->vo_shadow_offset
) + size
) >= (vm_object_offset_t
)max_valid_dma_address
)
8149 return KERN_INVALID_ADDRESS
;
8153 if (cntrl_flags
& UPL_ENCRYPT
) {
8156 * The paging path doesn't use this interface,
8157 * so we don't support the UPL_ENCRYPT flag
8158 * here. We won't encrypt the pages.
8160 assert(! (cntrl_flags
& UPL_ENCRYPT
));
8162 if (cntrl_flags
& (UPL_NOZEROFILL
| UPL_NOZEROFILLIO
))
8163 no_zero_fill
= TRUE
;
8165 if (cntrl_flags
& UPL_COPYOUT_FROM
)
8166 prot
= VM_PROT_READ
;
8168 prot
= VM_PROT_READ
| VM_PROT_WRITE
;
8170 if ((!object
->internal
) && (object
->paging_offset
!= 0))
8171 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8173 #if CONFIG_IOSCHED || UPL_DEBUG
8174 if ((object
->io_tracking
&& object
!= kernel_object
) || upl_debug_enabled
)
8175 io_tracking_flag
|= UPL_CREATE_IO_TRACKING
;
8179 if (object
->io_tracking
) {
8180 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8181 if (object
!= kernel_object
)
8182 io_tracking_flag
|= UPL_CREATE_EXPEDITE_SUP
;
8186 if (object
->phys_contiguous
)
8191 if (cntrl_flags
& UPL_SET_INTERNAL
) {
8192 upl
= upl_create(UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
| io_tracking_flag
, UPL_IO_WIRE
, psize
);
8194 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
8195 lite_list
= (wpl_array_t
) (((uintptr_t)user_page_list
) +
8196 ((psize
/ PAGE_SIZE
) * sizeof(upl_page_info_t
)));
8198 user_page_list
= NULL
;
8202 upl
= upl_create(UPL_CREATE_LITE
| io_tracking_flag
, UPL_IO_WIRE
, psize
);
8204 lite_list
= (wpl_array_t
) (((uintptr_t)upl
) + sizeof(struct upl
));
8210 user_page_list
[0].device
= FALSE
;
8213 upl
->map_object
= object
;
8216 size_in_pages
= size
/ PAGE_SIZE
;
8218 if (object
== kernel_object
&&
8219 !(cntrl_flags
& (UPL_NEED_32BIT_ADDR
| UPL_BLOCK_ACCESS
))) {
8220 upl
->flags
|= UPL_KERNEL_OBJECT
;
8222 vm_object_lock(object
);
8224 vm_object_lock_shared(object
);
8227 vm_object_lock(object
);
8228 vm_object_activity_begin(object
);
8231 * paging in progress also protects the paging_offset
8233 upl
->offset
= offset
+ object
->paging_offset
;
8235 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
8237 * The user requested that access to the pages in this UPL
8238 * be blocked until the UPL is commited or aborted.
8240 upl
->flags
|= UPL_ACCESS_BLOCKED
;
8243 #if CONFIG_IOSCHED || UPL_DEBUG
8244 if (upl
->flags
& UPL_TRACKED_BY_OBJECT
) {
8245 vm_object_activity_begin(object
);
8246 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
8250 if (object
->phys_contiguous
) {
8252 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
8253 assert(!object
->blocked_access
);
8254 object
->blocked_access
= TRUE
;
8257 vm_object_unlock(object
);
8260 * don't need any shadow mappings for this one
8261 * since it is already I/O memory
8263 upl
->flags
|= UPL_DEVICE_MEMORY
;
8265 upl
->highest_page
= (ppnum_t
) ((offset
+ object
->vo_shadow_offset
+ size
- 1)>>PAGE_SHIFT
);
8267 if (user_page_list
) {
8268 user_page_list
[0].phys_addr
= (ppnum_t
) ((offset
+ object
->vo_shadow_offset
)>>PAGE_SHIFT
);
8269 user_page_list
[0].device
= TRUE
;
8271 if (page_list_count
!= NULL
) {
8272 if (upl
->flags
& UPL_INTERNAL
)
8273 *page_list_count
= 0;
8275 *page_list_count
= 1;
8277 return KERN_SUCCESS
;
8279 if (object
!= kernel_object
&& object
!= compressor_object
) {
8281 * Protect user space from future COW operations
8283 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8284 if (!object
->true_share
&&
8285 vm_object_tracking_inited
) {
8286 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
8289 num
= OSBacktrace(bt
,
8290 VM_OBJECT_TRACKING_BTDEPTH
);
8291 btlog_add_entry(vm_object_tracking_btlog
,
8293 VM_OBJECT_TRACKING_OP_TRUESHARE
,
8297 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8299 object
->true_share
= TRUE
;
8301 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
)
8302 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
8305 if (!(cntrl_flags
& UPL_COPYOUT_FROM
) &&
8306 object
->copy
!= VM_OBJECT_NULL
) {
8308 * Honor copy-on-write obligations
8310 * The caller is gathering these pages and
8311 * might modify their contents. We need to
8312 * make sure that the copy object has its own
8313 * private copies of these pages before we let
8314 * the caller modify them.
8316 * NOTE: someone else could map the original object
8317 * after we've done this copy-on-write here, and they
8318 * could then see an inconsistent picture of the memory
8319 * while it's being modified via the UPL. To prevent this,
8320 * we would have to block access to these pages until the
8321 * UPL is released. We could use the UPL_BLOCK_ACCESS
8322 * code path for that...
8324 vm_object_update(object
,
8329 FALSE
, /* should_return */
8330 MEMORY_OBJECT_COPY_SYNC
,
8332 #if DEVELOPMENT || DEBUG
8334 iopl_cow_pages
+= size
>> PAGE_SHIFT
;
8337 if (!(cntrl_flags
& (UPL_NEED_32BIT_ADDR
| UPL_BLOCK_ACCESS
)) &&
8338 object
->purgable
!= VM_PURGABLE_VOLATILE
&&
8339 object
->purgable
!= VM_PURGABLE_EMPTY
&&
8340 object
->copy
== NULL
&&
8341 size
== object
->vo_size
&&
8343 object
->shadow
== NULL
&&
8344 object
->pager
== NULL
)
8346 if (object
->resident_page_count
== size_in_pages
)
8348 assert(object
!= compressor_object
);
8349 assert(object
!= kernel_object
);
8350 fast_path_full_req
= TRUE
;
8352 else if (object
->resident_page_count
== 0)
8354 assert(object
!= compressor_object
);
8355 assert(object
!= kernel_object
);
8356 fast_path_empty_req
= TRUE
;
8357 set_cache_attr_needed
= TRUE
;
8361 if (cntrl_flags
& UPL_SET_INTERRUPTIBLE
)
8362 interruptible
= THREAD_ABORTSAFE
;
8364 interruptible
= THREAD_UNINT
;
8369 dst_offset
= offset
;
8372 if (fast_path_full_req
) {
8374 if (vm_object_iopl_wire_full(object
, upl
, user_page_list
, lite_list
, cntrl_flags
) == TRUE
)
8377 * we couldn't complete the processing of this request on the fast path
8378 * so fall through to the slow path and finish up
8381 } else if (fast_path_empty_req
) {
8383 if (cntrl_flags
& UPL_REQUEST_NO_FAULT
) {
8384 ret
= KERN_MEMORY_ERROR
;
8387 ret
= vm_object_iopl_wire_empty(object
, upl
, user_page_list
, lite_list
, cntrl_flags
, &dst_offset
, size_in_pages
);
8390 free_wired_pages
= TRUE
;
8396 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
8397 fault_info
.user_tag
= 0;
8398 fault_info
.lo_offset
= offset
;
8399 fault_info
.hi_offset
= offset
+ xfer_size
;
8400 fault_info
.no_cache
= FALSE
;
8401 fault_info
.stealth
= FALSE
;
8402 fault_info
.io_sync
= FALSE
;
8403 fault_info
.cs_bypass
= FALSE
;
8404 fault_info
.mark_zf_absent
= TRUE
;
8405 fault_info
.interruptible
= interruptible
;
8406 fault_info
.batch_pmap_op
= TRUE
;
8409 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
8412 vm_fault_return_t result
;
8416 if (fast_path_full_req
) {
8418 * if we get here, it means that we ran into a page
8419 * state we couldn't handle in the fast path and
8420 * bailed out to the slow path... since the order
8421 * we look at pages is different between the 2 paths,
8422 * the following check is needed to determine whether
8423 * this page was already processed in the fast path
8425 if (lite_list
[entry
>>5] & (1 << (entry
& 31)))
8428 dst_page
= vm_page_lookup(object
, dst_offset
);
8432 * If the page is encrypted, we need to decrypt it,
8433 * so force a soft page fault.
8435 if (dst_page
== VM_PAGE_NULL
||
8437 dst_page
->encrypted
||
8439 dst_page
->restart
||
8441 dst_page
->fictitious
) {
8443 if (object
== kernel_object
)
8444 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8445 if (object
== compressor_object
)
8446 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8448 if (cntrl_flags
& UPL_REQUEST_NO_FAULT
) {
8449 ret
= KERN_MEMORY_ERROR
;
8452 set_cache_attr_needed
= TRUE
;
8455 * We just looked up the page and the result remains valid
8456 * until the object lock is release, so send it to
8457 * vm_fault_page() (as "dst_page"), to avoid having to
8458 * look it up again there.
8460 caller_lookup
= TRUE
;
8464 kern_return_t error_code
;
8466 fault_info
.cluster_size
= xfer_size
;
8468 vm_object_paging_begin(object
);
8470 result
= vm_fault_page(object
, dst_offset
,
8471 prot
| VM_PROT_WRITE
, FALSE
,
8473 &prot
, &dst_page
, &top_page
,
8475 &error_code
, no_zero_fill
,
8476 FALSE
, &fault_info
);
8478 /* our lookup is no longer valid at this point */
8479 caller_lookup
= FALSE
;
8483 case VM_FAULT_SUCCESS
:
8485 if ( !dst_page
->absent
) {
8486 PAGE_WAKEUP_DONE(dst_page
);
8489 * we only get back an absent page if we
8490 * requested that it not be zero-filled
8491 * because we are about to fill it via I/O
8493 * absent pages should be left BUSY
8494 * to prevent them from being faulted
8495 * into an address space before we've
8496 * had a chance to complete the I/O on
8497 * them since they may contain info that
8498 * shouldn't be seen by the faulting task
8502 * Release paging references and
8503 * top-level placeholder page, if any.
8505 if (top_page
!= VM_PAGE_NULL
) {
8506 vm_object_t local_object
;
8508 local_object
= top_page
->object
;
8510 if (top_page
->object
!= dst_page
->object
) {
8511 vm_object_lock(local_object
);
8512 VM_PAGE_FREE(top_page
);
8513 vm_object_paging_end(local_object
);
8514 vm_object_unlock(local_object
);
8516 VM_PAGE_FREE(top_page
);
8517 vm_object_paging_end(local_object
);
8520 vm_object_paging_end(object
);
8523 case VM_FAULT_RETRY
:
8524 vm_object_lock(object
);
8527 case VM_FAULT_MEMORY_SHORTAGE
:
8528 OSAddAtomic((size_in_pages
- entry
), &vm_upl_wait_for_pages
);
8530 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_START
, vm_upl_wait_for_pages
, 0, 0, 0);
8532 if (vm_page_wait(interruptible
)) {
8533 OSAddAtomic(-(size_in_pages
- entry
), &vm_upl_wait_for_pages
);
8535 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, 0);
8536 vm_object_lock(object
);
8540 OSAddAtomic(-(size_in_pages
- entry
), &vm_upl_wait_for_pages
);
8542 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, -1);
8546 case VM_FAULT_INTERRUPTED
:
8547 error_code
= MACH_SEND_INTERRUPTED
;
8548 case VM_FAULT_MEMORY_ERROR
:
8550 ret
= (error_code
? error_code
: KERN_MEMORY_ERROR
);
8552 vm_object_lock(object
);
8555 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
8556 /* success but no page: fail */
8557 vm_object_paging_end(object
);
8558 vm_object_unlock(object
);
8562 panic("vm_object_iopl_request: unexpected error"
8563 " 0x%x from vm_fault_page()\n", result
);
8565 } while (result
!= VM_FAULT_SUCCESS
);
8568 if (upl
->flags
& UPL_KERNEL_OBJECT
)
8569 goto record_phys_addr
;
8571 if (dst_page
->compressor
) {
8572 dst_page
->busy
= TRUE
;
8573 goto record_phys_addr
;
8576 if (dst_page
->cleaning
) {
8578 * Someone else is cleaning this page in place.
8579 * In theory, we should be able to proceed and use this
8580 * page but they'll probably end up clearing the "busy"
8581 * bit on it in upl_commit_range() but they didn't set
8582 * it, so they would clear our "busy" bit and open
8583 * us to race conditions.
8584 * We'd better wait for the cleaning to complete and
8587 vm_object_iopl_request_sleep_for_cleaning
++;
8588 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
8591 if (dst_page
->laundry
) {
8592 dst_page
->pageout
= FALSE
;
8594 vm_pageout_steal_laundry(dst_page
, FALSE
);
8596 if ( (cntrl_flags
& UPL_NEED_32BIT_ADDR
) &&
8597 dst_page
->phys_page
>= (max_valid_dma_address
>> PAGE_SHIFT
) ) {
8602 * support devices that can't DMA above 32 bits
8603 * by substituting pages from a pool of low address
8604 * memory for any pages we find above the 4G mark
8605 * can't substitute if the page is already wired because
8606 * we don't know whether that physical address has been
8607 * handed out to some other 64 bit capable DMA device to use
8609 if (VM_PAGE_WIRED(dst_page
)) {
8610 ret
= KERN_PROTECTION_FAILURE
;
8613 low_page
= vm_page_grablo();
8615 if (low_page
== VM_PAGE_NULL
) {
8616 ret
= KERN_RESOURCE_SHORTAGE
;
8620 * from here until the vm_page_replace completes
8621 * we musn't drop the object lock... we don't
8622 * want anyone refaulting this page in and using
8623 * it after we disconnect it... we want the fault
8624 * to find the new page being substituted.
8626 if (dst_page
->pmapped
)
8627 refmod
= pmap_disconnect(dst_page
->phys_page
);
8631 if (!dst_page
->absent
)
8632 vm_page_copy(dst_page
, low_page
);
8634 low_page
->reference
= dst_page
->reference
;
8635 low_page
->dirty
= dst_page
->dirty
;
8636 low_page
->absent
= dst_page
->absent
;
8638 if (refmod
& VM_MEM_REFERENCED
)
8639 low_page
->reference
= TRUE
;
8640 if (refmod
& VM_MEM_MODIFIED
) {
8641 SET_PAGE_DIRTY(low_page
, FALSE
);
8644 vm_page_replace(low_page
, object
, dst_offset
);
8646 dst_page
= low_page
;
8648 * vm_page_grablo returned the page marked
8649 * BUSY... we don't need a PAGE_WAKEUP_DONE
8650 * here, because we've never dropped the object lock
8652 if ( !dst_page
->absent
)
8653 dst_page
->busy
= FALSE
;
8655 if ( !dst_page
->busy
)
8656 dwp
->dw_mask
|= DW_vm_page_wire
;
8658 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
8660 * Mark the page "busy" to block any future page fault
8661 * on this page in addition to wiring it.
8662 * We'll also remove the mapping
8663 * of all these pages before leaving this routine.
8665 assert(!dst_page
->fictitious
);
8666 dst_page
->busy
= TRUE
;
8669 * expect the page to be used
8670 * page queues lock must be held to set 'reference'
8672 dwp
->dw_mask
|= DW_set_reference
;
8674 if (!(cntrl_flags
& UPL_COPYOUT_FROM
)) {
8675 SET_PAGE_DIRTY(dst_page
, TRUE
);
8677 if ((cntrl_flags
& UPL_REQUEST_FORCE_COHERENCY
) && dst_page
->written_by_kernel
== TRUE
) {
8678 pmap_sync_page_attributes_phys(dst_page
->phys_page
);
8679 dst_page
->written_by_kernel
= FALSE
;
8684 upl
->flags
|= UPL_HAS_BUSY
;
8686 lite_list
[entry
>>5] |= 1 << (entry
& 31);
8688 if (dst_page
->phys_page
> upl
->highest_page
)
8689 upl
->highest_page
= dst_page
->phys_page
;
8691 if (user_page_list
) {
8692 user_page_list
[entry
].phys_addr
= dst_page
->phys_page
;
8693 user_page_list
[entry
].pageout
= dst_page
->pageout
;
8694 user_page_list
[entry
].absent
= dst_page
->absent
;
8695 user_page_list
[entry
].dirty
= dst_page
->dirty
;
8696 user_page_list
[entry
].precious
= dst_page
->precious
;
8697 user_page_list
[entry
].device
= FALSE
;
8698 user_page_list
[entry
].needed
= FALSE
;
8699 if (dst_page
->clustered
== TRUE
)
8700 user_page_list
[entry
].speculative
= dst_page
->speculative
;
8702 user_page_list
[entry
].speculative
= FALSE
;
8703 user_page_list
[entry
].cs_validated
= dst_page
->cs_validated
;
8704 user_page_list
[entry
].cs_tainted
= dst_page
->cs_tainted
;
8705 user_page_list
[entry
].cs_nx
= dst_page
->cs_nx
;
8706 user_page_list
[entry
].mark
= FALSE
;
8708 if (object
!= kernel_object
&& object
!= compressor_object
) {
8710 * someone is explicitly grabbing this page...
8711 * update clustered and speculative state
8714 if (dst_page
->clustered
)
8715 VM_PAGE_CONSUME_CLUSTERED(dst_page
);
8719 dst_offset
+= PAGE_SIZE_64
;
8720 xfer_size
-= PAGE_SIZE
;
8723 VM_PAGE_ADD_DELAYED_WORK(dwp
, dst_page
, dw_count
);
8725 if (dw_count
>= dw_limit
) {
8726 vm_page_do_delayed_work(object
, UPL_MEMORY_TAG(cntrl_flags
), &dw_array
[0], dw_count
);
8733 assert(entry
== size_in_pages
);
8736 vm_page_do_delayed_work(object
, UPL_MEMORY_TAG(cntrl_flags
), &dw_array
[0], dw_count
);
8738 if (user_page_list
&& set_cache_attr_needed
== TRUE
)
8739 vm_object_set_pmap_cache_attr(object
, user_page_list
, size_in_pages
, TRUE
);
8741 if (page_list_count
!= NULL
) {
8742 if (upl
->flags
& UPL_INTERNAL
)
8743 *page_list_count
= 0;
8744 else if (*page_list_count
> size_in_pages
)
8745 *page_list_count
= size_in_pages
;
8747 vm_object_unlock(object
);
8749 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
8751 * We've marked all the pages "busy" so that future
8752 * page faults will block.
8753 * Now remove the mapping for these pages, so that they
8754 * can't be accessed without causing a page fault.
8756 vm_object_pmap_protect(object
, offset
, (vm_object_size_t
)size
,
8757 PMAP_NULL
, 0, VM_PROT_NONE
);
8758 assert(!object
->blocked_access
);
8759 object
->blocked_access
= TRUE
;
8762 return KERN_SUCCESS
;
8767 for (; offset
< dst_offset
; offset
+= PAGE_SIZE
) {
8768 boolean_t need_unwire
;
8770 dst_page
= vm_page_lookup(object
, offset
);
8772 if (dst_page
== VM_PAGE_NULL
)
8773 panic("vm_object_iopl_request: Wired page missing. \n");
8776 * if we've already processed this page in an earlier
8777 * dw_do_work, we need to undo the wiring... we will
8778 * leave the dirty and reference bits on if they
8779 * were set, since we don't have a good way of knowing
8780 * what the previous state was and we won't get here
8781 * under any normal circumstances... we will always
8782 * clear BUSY and wakeup any waiters via vm_page_free
8783 * or PAGE_WAKEUP_DONE
8788 if (dw_array
[dw_index
].dw_m
== dst_page
) {
8790 * still in the deferred work list
8791 * which means we haven't yet called
8792 * vm_page_wire on this page
8794 need_unwire
= FALSE
;
8800 vm_page_lock_queues();
8802 if (dst_page
->absent
|| free_wired_pages
== TRUE
) {
8803 vm_page_free(dst_page
);
8805 need_unwire
= FALSE
;
8807 if (need_unwire
== TRUE
)
8808 vm_page_unwire(dst_page
, TRUE
);
8810 PAGE_WAKEUP_DONE(dst_page
);
8812 vm_page_unlock_queues();
8814 if (need_unwire
== TRUE
)
8815 VM_STAT_INCR(reactivations
);
8820 if (! (upl
->flags
& UPL_KERNEL_OBJECT
)) {
8821 vm_object_activity_end(object
);
8822 vm_object_collapse(object
, 0, TRUE
);
8824 vm_object_unlock(object
);
8835 kern_return_t retval
;
8836 boolean_t upls_locked
;
8837 vm_object_t object1
, object2
;
8839 if (upl1
== UPL_NULL
|| upl2
== UPL_NULL
|| upl1
== upl2
|| ((upl1
->flags
& UPL_VECTOR
)==UPL_VECTOR
) || ((upl2
->flags
& UPL_VECTOR
)==UPL_VECTOR
)) {
8840 return KERN_INVALID_ARGUMENT
;
8843 upls_locked
= FALSE
;
8846 * Since we need to lock both UPLs at the same time,
8847 * avoid deadlocks by always taking locks in the same order.
8856 upls_locked
= TRUE
; /* the UPLs will need to be unlocked */
8858 object1
= upl1
->map_object
;
8859 object2
= upl2
->map_object
;
8861 if (upl1
->offset
!= 0 || upl2
->offset
!= 0 ||
8862 upl1
->size
!= upl2
->size
) {
8864 * We deal only with full objects, not subsets.
8865 * That's because we exchange the entire backing store info
8866 * for the objects: pager, resident pages, etc... We can't do
8869 retval
= KERN_INVALID_VALUE
;
8874 * Tranpose the VM objects' backing store.
8876 retval
= vm_object_transpose(object1
, object2
,
8877 (vm_object_size_t
) upl1
->size
);
8879 if (retval
== KERN_SUCCESS
) {
8881 * Make each UPL point to the correct VM object, i.e. the
8882 * object holding the pages that the UPL refers to...
8884 #if CONFIG_IOSCHED || UPL_DEBUG
8885 if ((upl1
->flags
& UPL_TRACKED_BY_OBJECT
) || (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)) {
8886 vm_object_lock(object1
);
8887 vm_object_lock(object2
);
8889 if (upl1
->flags
& UPL_TRACKED_BY_OBJECT
)
8890 queue_remove(&object1
->uplq
, upl1
, upl_t
, uplq
);
8891 if (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)
8892 queue_remove(&object2
->uplq
, upl2
, upl_t
, uplq
);
8894 upl1
->map_object
= object2
;
8895 upl2
->map_object
= object1
;
8897 #if CONFIG_IOSCHED || UPL_DEBUG
8898 if (upl1
->flags
& UPL_TRACKED_BY_OBJECT
)
8899 queue_enter(&object2
->uplq
, upl1
, upl_t
, uplq
);
8900 if (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)
8901 queue_enter(&object1
->uplq
, upl2
, upl_t
, uplq
);
8902 if ((upl1
->flags
& UPL_TRACKED_BY_OBJECT
) || (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)) {
8903 vm_object_unlock(object2
);
8904 vm_object_unlock(object1
);
8916 upls_locked
= FALSE
;
8928 upl_page_info_t
*user_page_list
;
8931 if ( !(upl
->flags
& UPL_INTERNAL
) || count
<= 0)
8934 size_in_pages
= upl
->size
/ PAGE_SIZE
;
8936 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
8938 while (count
-- && index
< size_in_pages
)
8939 user_page_list
[index
++].needed
= TRUE
;
8946 * Rationale: the user might have some encrypted data on disk (via
8947 * FileVault or any other mechanism). That data is then decrypted in
8948 * memory, which is safe as long as the machine is secure. But that
8949 * decrypted data in memory could be paged out to disk by the default
8950 * pager. The data would then be stored on disk in clear (not encrypted)
8951 * and it could be accessed by anyone who gets physical access to the
8952 * disk (if the laptop or the disk gets stolen for example). This weakens
8953 * the security offered by FileVault.
8955 * Solution: the default pager will optionally request that all the
8956 * pages it gathers for pageout be encrypted, via the UPL interfaces,
8957 * before it sends this UPL to disk via the vnode_pageout() path.
8961 * To avoid disrupting the VM LRU algorithms, we want to keep the
8962 * clean-in-place mechanisms, which allow us to send some extra pages to
8963 * swap (clustering) without actually removing them from the user's
8964 * address space. We don't want the user to unknowingly access encrypted
8965 * data, so we have to actually remove the encrypted pages from the page
8966 * table. When the user accesses the data, the hardware will fail to
8967 * locate the virtual page in its page table and will trigger a page
8968 * fault. We can then decrypt the page and enter it in the page table
8969 * again. Whenever we allow the user to access the contents of a page,
8970 * we have to make sure it's not encrypted.
8976 * Reserve of virtual addresses in the kernel address space.
8977 * We need to map the physical pages in the kernel, so that we
8978 * can call the encryption/decryption routines with a kernel
8979 * virtual address. We keep this pool of pre-allocated kernel
8980 * virtual addresses so that we don't have to scan the kernel's
8981 * virtaul address space each time we need to encrypt or decrypt
8983 * It would be nice to be able to encrypt and decrypt in physical
8984 * mode but that might not always be more efficient...
8986 decl_simple_lock_data(,vm_paging_lock
)
8987 #define VM_PAGING_NUM_PAGES 64
8988 vm_map_offset_t vm_paging_base_address
= 0;
8989 boolean_t vm_paging_page_inuse
[VM_PAGING_NUM_PAGES
] = { FALSE
, };
8990 int vm_paging_max_index
= 0;
8991 int vm_paging_page_waiter
= 0;
8992 int vm_paging_page_waiter_total
= 0;
8993 unsigned long vm_paging_no_kernel_page
= 0;
8994 unsigned long vm_paging_objects_mapped
= 0;
8995 unsigned long vm_paging_pages_mapped
= 0;
8996 unsigned long vm_paging_objects_mapped_slow
= 0;
8997 unsigned long vm_paging_pages_mapped_slow
= 0;
9000 vm_paging_map_init(void)
9003 vm_map_offset_t page_map_offset
;
9004 vm_map_entry_t map_entry
;
9006 assert(vm_paging_base_address
== 0);
9009 * Initialize our pool of pre-allocated kernel
9010 * virtual addresses.
9012 page_map_offset
= 0;
9013 kr
= vm_map_find_space(kernel_map
,
9015 VM_PAGING_NUM_PAGES
* PAGE_SIZE
,
9019 if (kr
!= KERN_SUCCESS
) {
9020 panic("vm_paging_map_init: kernel_map full\n");
9022 VME_OBJECT_SET(map_entry
, kernel_object
);
9023 VME_OFFSET_SET(map_entry
, page_map_offset
);
9024 map_entry
->protection
= VM_PROT_NONE
;
9025 map_entry
->max_protection
= VM_PROT_NONE
;
9026 map_entry
->permanent
= TRUE
;
9027 vm_object_reference(kernel_object
);
9028 vm_map_unlock(kernel_map
);
9030 assert(vm_paging_base_address
== 0);
9031 vm_paging_base_address
= page_map_offset
;
9036 * vm_paging_map_object:
9037 * Maps part of a VM object's pages in the kernel
9038 * virtual address space, using the pre-allocated
9039 * kernel virtual addresses, if possible.
9041 * The VM object is locked. This lock will get
9042 * dropped and re-acquired though, so the caller
9043 * must make sure the VM object is kept alive
9044 * (by holding a VM map that has a reference
9045 * on it, for example, or taking an extra reference).
9046 * The page should also be kept busy to prevent
9047 * it from being reclaimed.
9050 vm_paging_map_object(
9053 vm_object_offset_t offset
,
9054 vm_prot_t protection
,
9055 boolean_t can_unlock_object
,
9056 vm_map_size_t
*size
, /* IN/OUT */
9057 vm_map_offset_t
*address
, /* OUT */
9058 boolean_t
*need_unmap
) /* OUT */
9061 vm_map_offset_t page_map_offset
;
9062 vm_map_size_t map_size
;
9063 vm_object_offset_t object_offset
;
9066 if (page
!= VM_PAGE_NULL
&& *size
== PAGE_SIZE
) {
9067 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9069 *address
= (vm_map_offset_t
)
9070 PHYSMAP_PTOV((pmap_paddr_t
)page
->phys_page
<<
9072 *need_unmap
= FALSE
;
9073 return KERN_SUCCESS
;
9075 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9080 * Use one of the pre-allocated kernel virtual addresses
9081 * and just enter the VM page in the kernel address space
9082 * at that virtual address.
9084 simple_lock(&vm_paging_lock
);
9087 * Try and find an available kernel virtual address
9088 * from our pre-allocated pool.
9090 page_map_offset
= 0;
9092 for (i
= 0; i
< VM_PAGING_NUM_PAGES
; i
++) {
9093 if (vm_paging_page_inuse
[i
] == FALSE
) {
9095 vm_paging_base_address
+
9100 if (page_map_offset
!= 0) {
9101 /* found a space to map our page ! */
9105 if (can_unlock_object
) {
9107 * If we can afford to unlock the VM object,
9108 * let's take the slow path now...
9113 * We can't afford to unlock the VM object, so
9114 * let's wait for a space to become available...
9116 vm_paging_page_waiter_total
++;
9117 vm_paging_page_waiter
++;
9118 kr
= assert_wait((event_t
)&vm_paging_page_waiter
, THREAD_UNINT
);
9119 if (kr
== THREAD_WAITING
) {
9120 simple_unlock(&vm_paging_lock
);
9121 kr
= thread_block(THREAD_CONTINUE_NULL
);
9122 simple_lock(&vm_paging_lock
);
9124 vm_paging_page_waiter
--;
9125 /* ... and try again */
9128 if (page_map_offset
!= 0) {
9130 * We found a kernel virtual address;
9131 * map the physical page to that virtual address.
9133 if (i
> vm_paging_max_index
) {
9134 vm_paging_max_index
= i
;
9136 vm_paging_page_inuse
[i
] = TRUE
;
9137 simple_unlock(&vm_paging_lock
);
9139 page
->pmapped
= TRUE
;
9142 * Keep the VM object locked over the PMAP_ENTER
9143 * and the actual use of the page by the kernel,
9144 * or this pmap mapping might get undone by a
9145 * vm_object_pmap_protect() call...
9147 PMAP_ENTER(kernel_pmap
,
9154 vm_paging_objects_mapped
++;
9155 vm_paging_pages_mapped
++;
9156 *address
= page_map_offset
;
9159 /* all done and mapped, ready to use ! */
9160 return KERN_SUCCESS
;
9164 * We ran out of pre-allocated kernel virtual
9165 * addresses. Just map the page in the kernel
9166 * the slow and regular way.
9168 vm_paging_no_kernel_page
++;
9169 simple_unlock(&vm_paging_lock
);
9172 if (! can_unlock_object
) {
9175 *need_unmap
= FALSE
;
9176 return KERN_NOT_SUPPORTED
;
9179 object_offset
= vm_object_trunc_page(offset
);
9180 map_size
= vm_map_round_page(*size
,
9181 VM_MAP_PAGE_MASK(kernel_map
));
9184 * Try and map the required range of the object
9188 vm_object_reference_locked(object
); /* for the map entry */
9189 vm_object_unlock(object
);
9191 kr
= vm_map_enter(kernel_map
,
9202 if (kr
!= KERN_SUCCESS
) {
9205 *need_unmap
= FALSE
;
9206 vm_object_deallocate(object
); /* for the map entry */
9207 vm_object_lock(object
);
9214 * Enter the mapped pages in the page table now.
9216 vm_object_lock(object
);
9218 * VM object must be kept locked from before PMAP_ENTER()
9219 * until after the kernel is done accessing the page(s).
9220 * Otherwise, the pmap mappings in the kernel could be
9221 * undone by a call to vm_object_pmap_protect().
9224 for (page_map_offset
= 0;
9226 map_size
-= PAGE_SIZE_64
, page_map_offset
+= PAGE_SIZE_64
) {
9228 page
= vm_page_lookup(object
, offset
+ page_map_offset
);
9229 if (page
== VM_PAGE_NULL
) {
9230 printf("vm_paging_map_object: no page !?");
9231 vm_object_unlock(object
);
9232 kr
= vm_map_remove(kernel_map
, *address
, *size
,
9234 assert(kr
== KERN_SUCCESS
);
9237 *need_unmap
= FALSE
;
9238 vm_object_lock(object
);
9239 return KERN_MEMORY_ERROR
;
9241 page
->pmapped
= TRUE
;
9243 //assert(pmap_verify_free(page->phys_page));
9244 PMAP_ENTER(kernel_pmap
,
9245 *address
+ page_map_offset
,
9253 vm_paging_objects_mapped_slow
++;
9254 vm_paging_pages_mapped_slow
+= (unsigned long) (map_size
/ PAGE_SIZE_64
);
9258 return KERN_SUCCESS
;
9263 * vm_paging_unmap_object:
9264 * Unmaps part of a VM object's pages from the kernel
9265 * virtual address space.
9267 * The VM object is locked. This lock will get
9268 * dropped and re-acquired though.
9271 vm_paging_unmap_object(
9273 vm_map_offset_t start
,
9274 vm_map_offset_t end
)
9279 if ((vm_paging_base_address
== 0) ||
9280 (start
< vm_paging_base_address
) ||
9281 (end
> (vm_paging_base_address
9282 + (VM_PAGING_NUM_PAGES
* PAGE_SIZE
)))) {
9284 * We didn't use our pre-allocated pool of
9285 * kernel virtual address. Deallocate the
9288 if (object
!= VM_OBJECT_NULL
) {
9289 vm_object_unlock(object
);
9291 kr
= vm_map_remove(kernel_map
, start
, end
, VM_MAP_NO_FLAGS
);
9292 if (object
!= VM_OBJECT_NULL
) {
9293 vm_object_lock(object
);
9295 assert(kr
== KERN_SUCCESS
);
9298 * We used a kernel virtual address from our
9299 * pre-allocated pool. Put it back in the pool
9302 assert(end
- start
== PAGE_SIZE
);
9303 i
= (int) ((start
- vm_paging_base_address
) >> PAGE_SHIFT
);
9304 assert(i
>= 0 && i
< VM_PAGING_NUM_PAGES
);
9306 /* undo the pmap mapping */
9307 pmap_remove(kernel_pmap
, start
, end
);
9309 simple_lock(&vm_paging_lock
);
9310 vm_paging_page_inuse
[i
] = FALSE
;
9311 if (vm_paging_page_waiter
) {
9312 thread_wakeup(&vm_paging_page_waiter
);
9314 simple_unlock(&vm_paging_lock
);
9321 * "iv" is the "initial vector". Ideally, we want to
9322 * have a different one for each page we encrypt, so that
9323 * crackers can't find encryption patterns too easily.
9325 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
9326 boolean_t swap_crypt_ctx_initialized
= FALSE
;
9327 uint32_t swap_crypt_key
[8]; /* big enough for a 256 key */
9328 aes_ctx swap_crypt_ctx
;
9329 const unsigned char swap_crypt_null_iv
[AES_BLOCK_SIZE
] = {0xa, };
9332 boolean_t swap_crypt_ctx_tested
= FALSE
;
9333 unsigned char swap_crypt_test_page_ref
[4096] __attribute__((aligned(4096)));
9334 unsigned char swap_crypt_test_page_encrypt
[4096] __attribute__((aligned(4096)));
9335 unsigned char swap_crypt_test_page_decrypt
[4096] __attribute__((aligned(4096)));
9339 * Initialize the encryption context: key and key size.
9341 void swap_crypt_ctx_initialize(void); /* forward */
9343 swap_crypt_ctx_initialize(void)
9348 * No need for locking to protect swap_crypt_ctx_initialized
9349 * because the first use of encryption will come from the
9350 * pageout thread (we won't pagein before there's been a pageout)
9351 * and there's only one pageout thread.
9353 if (swap_crypt_ctx_initialized
== FALSE
) {
9355 i
< (sizeof (swap_crypt_key
) /
9356 sizeof (swap_crypt_key
[0]));
9358 swap_crypt_key
[i
] = random();
9360 aes_encrypt_key((const unsigned char *) swap_crypt_key
,
9361 SWAP_CRYPT_AES_KEY_SIZE
,
9362 &swap_crypt_ctx
.encrypt
);
9363 aes_decrypt_key((const unsigned char *) swap_crypt_key
,
9364 SWAP_CRYPT_AES_KEY_SIZE
,
9365 &swap_crypt_ctx
.decrypt
);
9366 swap_crypt_ctx_initialized
= TRUE
;
9371 * Validate the encryption algorithms.
9373 if (swap_crypt_ctx_tested
== FALSE
) {
9375 for (i
= 0; i
< 4096; i
++) {
9376 swap_crypt_test_page_ref
[i
] = (char) i
;
9379 aes_encrypt_cbc(swap_crypt_test_page_ref
,
9381 PAGE_SIZE
/ AES_BLOCK_SIZE
,
9382 swap_crypt_test_page_encrypt
,
9383 &swap_crypt_ctx
.encrypt
);
9385 aes_decrypt_cbc(swap_crypt_test_page_encrypt
,
9387 PAGE_SIZE
/ AES_BLOCK_SIZE
,
9388 swap_crypt_test_page_decrypt
,
9389 &swap_crypt_ctx
.decrypt
);
9390 /* compare result with original */
9391 for (i
= 0; i
< 4096; i
++) {
9392 if (swap_crypt_test_page_decrypt
[i
] !=
9393 swap_crypt_test_page_ref
[i
]) {
9394 panic("encryption test failed");
9399 aes_encrypt_cbc(swap_crypt_test_page_decrypt
,
9401 PAGE_SIZE
/ AES_BLOCK_SIZE
,
9402 swap_crypt_test_page_decrypt
,
9403 &swap_crypt_ctx
.encrypt
);
9404 /* decrypt in place */
9405 aes_decrypt_cbc(swap_crypt_test_page_decrypt
,
9407 PAGE_SIZE
/ AES_BLOCK_SIZE
,
9408 swap_crypt_test_page_decrypt
,
9409 &swap_crypt_ctx
.decrypt
);
9410 for (i
= 0; i
< 4096; i
++) {
9411 if (swap_crypt_test_page_decrypt
[i
] !=
9412 swap_crypt_test_page_ref
[i
]) {
9413 panic("in place encryption test failed");
9417 swap_crypt_ctx_tested
= TRUE
;
9425 * Encrypt the given page, for secure paging.
9426 * The page might already be mapped at kernel virtual
9427 * address "kernel_mapping_offset". Otherwise, we need
9431 * The page's object is locked, but this lock will be released
9433 * The page is busy and not accessible by users (not entered in any pmap).
9438 vm_map_offset_t kernel_mapping_offset
)
9441 vm_map_size_t kernel_mapping_size
;
9442 boolean_t kernel_mapping_needs_unmap
;
9443 vm_offset_t kernel_vaddr
;
9445 unsigned char aes_iv
[AES_BLOCK_SIZE
];
9447 memory_object_t pager_object
;
9448 vm_object_offset_t paging_offset
;
9452 if (! vm_pages_encrypted
) {
9453 vm_pages_encrypted
= TRUE
;
9458 if (page
->encrypted
) {
9460 * Already encrypted: no need to do it again.
9462 vm_page_encrypt_already_encrypted_counter
++;
9465 assert(page
->dirty
|| page
->precious
);
9467 ASSERT_PAGE_DECRYPTED(page
);
9470 * Take a paging-in-progress reference to keep the object
9471 * alive even if we have to unlock it (in vm_paging_map_object()
9474 vm_object_paging_begin(page
->object
);
9476 if (kernel_mapping_offset
== 0) {
9478 * The page hasn't already been mapped in kernel space
9479 * by the caller. Map it now, so that we can access
9480 * its contents and encrypt them.
9482 kernel_mapping_size
= PAGE_SIZE
;
9483 kernel_mapping_needs_unmap
= FALSE
;
9484 kr
= vm_paging_map_object(page
,
9487 VM_PROT_READ
| VM_PROT_WRITE
,
9489 &kernel_mapping_size
,
9490 &kernel_mapping_offset
,
9491 &kernel_mapping_needs_unmap
);
9492 if (kr
!= KERN_SUCCESS
) {
9493 panic("vm_page_encrypt: "
9494 "could not map page in kernel: 0x%x\n",
9498 kernel_mapping_size
= 0;
9499 kernel_mapping_needs_unmap
= FALSE
;
9501 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
9503 if (swap_crypt_ctx_initialized
== FALSE
) {
9504 swap_crypt_ctx_initialize();
9506 assert(swap_crypt_ctx_initialized
);
9509 * Prepare an "initial vector" for the encryption.
9510 * We use the "pager" and the "paging_offset" for that
9511 * page to obfuscate the encrypted data a bit more and
9512 * prevent crackers from finding patterns that they could
9513 * use to break the key.
9515 bzero(&encrypt_iv
.aes_iv
[0], sizeof (encrypt_iv
.aes_iv
));
9516 encrypt_iv
.vm
.pager_object
= page
->object
->pager
;
9517 encrypt_iv
.vm
.paging_offset
=
9518 page
->object
->paging_offset
+ page
->offset
;
9520 /* encrypt the "initial vector" */
9521 aes_encrypt_cbc((const unsigned char *) &encrypt_iv
.aes_iv
[0],
9524 &encrypt_iv
.aes_iv
[0],
9525 &swap_crypt_ctx
.encrypt
);
9530 aes_encrypt_cbc((const unsigned char *) kernel_vaddr
,
9531 &encrypt_iv
.aes_iv
[0],
9532 PAGE_SIZE
/ AES_BLOCK_SIZE
,
9533 (unsigned char *) kernel_vaddr
,
9534 &swap_crypt_ctx
.encrypt
);
9536 vm_page_encrypt_counter
++;
9539 * Unmap the page from the kernel's address space,
9540 * if we had to map it ourselves. Otherwise, let
9541 * the caller undo the mapping if needed.
9543 if (kernel_mapping_needs_unmap
) {
9544 vm_paging_unmap_object(page
->object
,
9545 kernel_mapping_offset
,
9546 kernel_mapping_offset
+ kernel_mapping_size
);
9550 * Clear the "reference" and "modified" bits.
9551 * This should clean up any impact the encryption had
9553 * The page was kept busy and disconnected from all pmaps,
9554 * so it can't have been referenced or modified from user
9556 * The software bits will be reset later after the I/O
9557 * has completed (in upl_commit_range()).
9559 pmap_clear_refmod(page
->phys_page
, VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
9561 page
->encrypted
= TRUE
;
9563 vm_object_paging_end(page
->object
);
9569 * Decrypt the given page.
9570 * The page might already be mapped at kernel virtual
9571 * address "kernel_mapping_offset". Otherwise, we need
9575 * The page's VM object is locked but will be unlocked and relocked.
9576 * The page is busy and not accessible by users (not entered in any pmap).
9581 vm_map_offset_t kernel_mapping_offset
)
9584 vm_map_size_t kernel_mapping_size
;
9585 vm_offset_t kernel_vaddr
;
9586 boolean_t kernel_mapping_needs_unmap
;
9588 unsigned char aes_iv
[AES_BLOCK_SIZE
];
9590 memory_object_t pager_object
;
9591 vm_object_offset_t paging_offset
;
9594 boolean_t was_dirty
;
9597 assert(page
->encrypted
);
9599 was_dirty
= page
->dirty
;
9602 * Take a paging-in-progress reference to keep the object
9603 * alive even if we have to unlock it (in vm_paging_map_object()
9606 vm_object_paging_begin(page
->object
);
9608 if (kernel_mapping_offset
== 0) {
9610 * The page hasn't already been mapped in kernel space
9611 * by the caller. Map it now, so that we can access
9612 * its contents and decrypt them.
9614 kernel_mapping_size
= PAGE_SIZE
;
9615 kernel_mapping_needs_unmap
= FALSE
;
9616 kr
= vm_paging_map_object(page
,
9619 VM_PROT_READ
| VM_PROT_WRITE
,
9621 &kernel_mapping_size
,
9622 &kernel_mapping_offset
,
9623 &kernel_mapping_needs_unmap
);
9624 if (kr
!= KERN_SUCCESS
) {
9625 panic("vm_page_decrypt: "
9626 "could not map page in kernel: 0x%x\n",
9630 kernel_mapping_size
= 0;
9631 kernel_mapping_needs_unmap
= FALSE
;
9633 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
9635 assert(swap_crypt_ctx_initialized
);
9638 * Prepare an "initial vector" for the decryption.
9639 * It has to be the same as the "initial vector" we
9640 * used to encrypt that page.
9642 bzero(&decrypt_iv
.aes_iv
[0], sizeof (decrypt_iv
.aes_iv
));
9643 decrypt_iv
.vm
.pager_object
= page
->object
->pager
;
9644 decrypt_iv
.vm
.paging_offset
=
9645 page
->object
->paging_offset
+ page
->offset
;
9647 /* encrypt the "initial vector" */
9648 aes_encrypt_cbc((const unsigned char *) &decrypt_iv
.aes_iv
[0],
9651 &decrypt_iv
.aes_iv
[0],
9652 &swap_crypt_ctx
.encrypt
);
9657 aes_decrypt_cbc((const unsigned char *) kernel_vaddr
,
9658 &decrypt_iv
.aes_iv
[0],
9659 PAGE_SIZE
/ AES_BLOCK_SIZE
,
9660 (unsigned char *) kernel_vaddr
,
9661 &swap_crypt_ctx
.decrypt
);
9662 vm_page_decrypt_counter
++;
9665 * Unmap the page from the kernel's address space,
9666 * if we had to map it ourselves. Otherwise, let
9667 * the caller undo the mapping if needed.
9669 if (kernel_mapping_needs_unmap
) {
9670 vm_paging_unmap_object(page
->object
,
9672 kernel_vaddr
+ PAGE_SIZE
);
9677 * The pager did not specify that the page would be
9678 * clean when it got paged in, so let's not clean it here
9683 * After decryption, the page is actually still clean.
9684 * It was encrypted as part of paging, which "cleans"
9685 * the "dirty" pages.
9686 * Noone could access it after it was encrypted
9687 * and the decryption doesn't count.
9689 page
->dirty
= FALSE
;
9690 assert (page
->cs_validated
== FALSE
);
9691 pmap_clear_refmod(page
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
9693 page
->encrypted
= FALSE
;
9696 * We've just modified the page's contents via the data cache and part
9697 * of the new contents might still be in the cache and not yet in RAM.
9698 * Since the page is now available and might get gathered in a UPL to
9699 * be part of a DMA transfer from a driver that expects the memory to
9700 * be coherent at this point, we have to flush the data cache.
9702 pmap_sync_page_attributes_phys(page
->phys_page
);
9704 * Since the page is not mapped yet, some code might assume that it
9705 * doesn't need to invalidate the instruction cache when writing to
9706 * that page. That code relies on "pmapped" being FALSE, so that the
9707 * caches get synchronized when the page is first mapped.
9709 assert(pmap_verify_free(page
->phys_page
));
9710 page
->pmapped
= FALSE
;
9711 page
->wpmapped
= FALSE
;
9713 vm_object_paging_end(page
->object
);
9716 #if DEVELOPMENT || DEBUG
9717 unsigned long upl_encrypt_upls
= 0;
9718 unsigned long upl_encrypt_pages
= 0;
9725 * Encrypts all the pages in the UPL, within the specified range.
9731 upl_offset_t crypt_offset
,
9732 upl_size_t crypt_size
)
9734 upl_size_t upl_size
, subupl_size
=crypt_size
;
9735 upl_offset_t offset_in_upl
, subupl_offset
=crypt_offset
;
9736 vm_object_t upl_object
;
9737 vm_object_offset_t upl_offset
;
9739 vm_object_t shadow_object
;
9740 vm_object_offset_t shadow_offset
;
9741 vm_object_offset_t paging_offset
;
9742 vm_object_offset_t base_offset
;
9743 int isVectorUPL
= 0;
9744 upl_t vector_upl
= NULL
;
9746 if((isVectorUPL
= vector_upl_is_valid(upl
)))
9749 process_upl_to_encrypt
:
9751 crypt_size
= subupl_size
;
9752 crypt_offset
= subupl_offset
;
9753 upl
= vector_upl_subupl_byoffset(vector_upl
, &crypt_offset
, &crypt_size
);
9755 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n");
9756 subupl_size
-= crypt_size
;
9757 subupl_offset
+= crypt_size
;
9760 #if DEVELOPMENT || DEBUG
9762 upl_encrypt_pages
+= crypt_size
/ PAGE_SIZE
;
9764 upl_object
= upl
->map_object
;
9765 upl_offset
= upl
->offset
;
9766 upl_size
= upl
->size
;
9768 vm_object_lock(upl_object
);
9771 * Find the VM object that contains the actual pages.
9773 if (upl_object
->pageout
) {
9774 shadow_object
= upl_object
->shadow
;
9776 * The offset in the shadow object is actually also
9777 * accounted for in upl->offset. It possibly shouldn't be
9778 * this way, but for now don't account for it twice.
9781 assert(upl_object
->paging_offset
== 0); /* XXX ? */
9782 vm_object_lock(shadow_object
);
9784 shadow_object
= upl_object
;
9788 paging_offset
= shadow_object
->paging_offset
;
9789 vm_object_paging_begin(shadow_object
);
9791 if (shadow_object
!= upl_object
)
9792 vm_object_unlock(upl_object
);
9795 base_offset
= shadow_offset
;
9796 base_offset
+= upl_offset
;
9797 base_offset
+= crypt_offset
;
9798 base_offset
-= paging_offset
;
9800 assert(crypt_offset
+ crypt_size
<= upl_size
);
9802 for (offset_in_upl
= 0;
9803 offset_in_upl
< crypt_size
;
9804 offset_in_upl
+= PAGE_SIZE
) {
9805 page
= vm_page_lookup(shadow_object
,
9806 base_offset
+ offset_in_upl
);
9807 if (page
== VM_PAGE_NULL
) {
9808 panic("upl_encrypt: "
9809 "no page for (obj=%p,off=0x%llx+0x%x)!\n",
9815 * Disconnect the page from all pmaps, so that nobody can
9816 * access it while it's encrypted. After that point, all
9817 * accesses to this page will cause a page fault and block
9818 * while the page is busy being encrypted. After the
9819 * encryption completes, any access will cause a
9820 * page fault and the page gets decrypted at that time.
9822 pmap_disconnect(page
->phys_page
);
9823 vm_page_encrypt(page
, 0);
9825 if (vm_object_lock_avoid(shadow_object
)) {
9827 * Give vm_pageout_scan() a chance to convert more
9828 * pages from "clean-in-place" to "clean-and-free",
9829 * if it's interested in the same pages we selected
9832 vm_object_unlock(shadow_object
);
9834 vm_object_lock(shadow_object
);
9838 vm_object_paging_end(shadow_object
);
9839 vm_object_unlock(shadow_object
);
9841 if(isVectorUPL
&& subupl_size
)
9842 goto process_upl_to_encrypt
;
9845 #else /* ENCRYPTED_SWAP */
9849 __unused upl_offset_t crypt_offset
,
9850 __unused upl_size_t crypt_size
)
9856 __unused vm_page_t page
,
9857 __unused vm_map_offset_t kernel_mapping_offset
)
9863 __unused vm_page_t page
,
9864 __unused vm_map_offset_t kernel_mapping_offset
)
9868 #endif /* ENCRYPTED_SWAP */
9871 * page->object must be locked
9874 vm_pageout_steal_laundry(vm_page_t page
, boolean_t queues_locked
)
9876 if (!queues_locked
) {
9877 vm_page_lockspin_queues();
9881 * need to drop the laundry count...
9882 * we may also need to remove it
9883 * from the I/O paging queue...
9884 * vm_pageout_throttle_up handles both cases
9886 * the laundry and pageout_queue flags are cleared...
9888 vm_pageout_throttle_up(page
);
9890 vm_page_steal_pageout_page
++;
9892 if (!queues_locked
) {
9893 vm_page_unlock_queues();
9898 vector_upl_create(vm_offset_t upl_offset
)
9900 int vector_upl_size
= sizeof(struct _vector_upl
);
9903 vector_upl_t vector_upl
= (vector_upl_t
)kalloc(vector_upl_size
);
9905 upl
= upl_create(0,UPL_VECTOR
,0);
9906 upl
->vector_upl
= vector_upl
;
9907 upl
->offset
= upl_offset
;
9908 vector_upl
->size
= 0;
9909 vector_upl
->offset
= upl_offset
;
9910 vector_upl
->invalid_upls
=0;
9911 vector_upl
->num_upls
=0;
9912 vector_upl
->pagelist
= NULL
;
9914 for(i
=0; i
< MAX_VECTOR_UPL_ELEMENTS
; i
++) {
9915 vector_upl
->upl_iostates
[i
].size
= 0;
9916 vector_upl
->upl_iostates
[i
].offset
= 0;
9923 vector_upl_deallocate(upl_t upl
)
9926 vector_upl_t vector_upl
= upl
->vector_upl
;
9928 if(vector_upl
->invalid_upls
!= vector_upl
->num_upls
)
9929 panic("Deallocating non-empty Vectored UPL\n");
9930 kfree(vector_upl
->pagelist
,(sizeof(struct upl_page_info
)*(vector_upl
->size
/PAGE_SIZE
)));
9931 vector_upl
->invalid_upls
=0;
9932 vector_upl
->num_upls
= 0;
9933 vector_upl
->pagelist
= NULL
;
9934 vector_upl
->size
= 0;
9935 vector_upl
->offset
= 0;
9936 kfree(vector_upl
, sizeof(struct _vector_upl
));
9937 vector_upl
= (vector_upl_t
)0xfeedfeed;
9940 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9943 panic("vector_upl_deallocate was passed a NULL upl\n");
9947 vector_upl_is_valid(upl_t upl
)
9949 if(upl
&& ((upl
->flags
& UPL_VECTOR
)==UPL_VECTOR
)) {
9950 vector_upl_t vector_upl
= upl
->vector_upl
;
9951 if(vector_upl
== NULL
|| vector_upl
== (vector_upl_t
)0xfeedfeed || vector_upl
== (vector_upl_t
)0xfeedbeef)
9960 vector_upl_set_subupl(upl_t upl
,upl_t subupl
, uint32_t io_size
)
9962 if(vector_upl_is_valid(upl
)) {
9963 vector_upl_t vector_upl
= upl
->vector_upl
;
9968 if(io_size
< PAGE_SIZE
)
9969 io_size
= PAGE_SIZE
;
9970 subupl
->vector_upl
= (void*)vector_upl
;
9971 vector_upl
->upl_elems
[vector_upl
->num_upls
++] = subupl
;
9972 vector_upl
->size
+= io_size
;
9973 upl
->size
+= io_size
;
9976 uint32_t i
=0,invalid_upls
=0;
9977 for(i
= 0; i
< vector_upl
->num_upls
; i
++) {
9978 if(vector_upl
->upl_elems
[i
] == subupl
)
9981 if(i
== vector_upl
->num_upls
)
9982 panic("Trying to remove sub-upl when none exists");
9984 vector_upl
->upl_elems
[i
] = NULL
;
9985 invalid_upls
= hw_atomic_add(&(vector_upl
)->invalid_upls
, 1);
9986 if(invalid_upls
== vector_upl
->num_upls
)
9993 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9996 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9999 panic("vector_upl_set_subupl was passed a NULL upl\n");
10005 vector_upl_set_pagelist(upl_t upl
)
10007 if(vector_upl_is_valid(upl
)) {
10009 vector_upl_t vector_upl
= upl
->vector_upl
;
10012 vm_offset_t pagelist_size
=0, cur_upl_pagelist_size
=0;
10014 vector_upl
->pagelist
= (upl_page_info_array_t
)kalloc(sizeof(struct upl_page_info
)*(vector_upl
->size
/PAGE_SIZE
));
10016 for(i
=0; i
< vector_upl
->num_upls
; i
++) {
10017 cur_upl_pagelist_size
= sizeof(struct upl_page_info
) * vector_upl
->upl_elems
[i
]->size
/PAGE_SIZE
;
10018 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl
->upl_elems
[i
]), (char*)vector_upl
->pagelist
+ pagelist_size
, cur_upl_pagelist_size
);
10019 pagelist_size
+= cur_upl_pagelist_size
;
10020 if(vector_upl
->upl_elems
[i
]->highest_page
> upl
->highest_page
)
10021 upl
->highest_page
= vector_upl
->upl_elems
[i
]->highest_page
;
10023 assert( pagelist_size
== (sizeof(struct upl_page_info
)*(vector_upl
->size
/PAGE_SIZE
)) );
10026 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
10029 panic("vector_upl_set_pagelist was passed a NULL upl\n");
10034 vector_upl_subupl_byindex(upl_t upl
, uint32_t index
)
10036 if(vector_upl_is_valid(upl
)) {
10037 vector_upl_t vector_upl
= upl
->vector_upl
;
10039 if(index
< vector_upl
->num_upls
)
10040 return vector_upl
->upl_elems
[index
];
10043 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
10049 vector_upl_subupl_byoffset(upl_t upl
, upl_offset_t
*upl_offset
, upl_size_t
*upl_size
)
10051 if(vector_upl_is_valid(upl
)) {
10053 vector_upl_t vector_upl
= upl
->vector_upl
;
10056 upl_t subupl
= NULL
;
10057 vector_upl_iostates_t subupl_state
;
10059 for(i
=0; i
< vector_upl
->num_upls
; i
++) {
10060 subupl
= vector_upl
->upl_elems
[i
];
10061 subupl_state
= vector_upl
->upl_iostates
[i
];
10062 if( *upl_offset
<= (subupl_state
.offset
+ subupl_state
.size
- 1)) {
10063 /* We could have been passed an offset/size pair that belongs
10064 * to an UPL element that has already been committed/aborted.
10065 * If so, return NULL.
10069 if((subupl_state
.offset
+ subupl_state
.size
) < (*upl_offset
+ *upl_size
)) {
10070 *upl_size
= (subupl_state
.offset
+ subupl_state
.size
) - *upl_offset
;
10071 if(*upl_size
> subupl_state
.size
)
10072 *upl_size
= subupl_state
.size
;
10074 if(*upl_offset
>= subupl_state
.offset
)
10075 *upl_offset
-= subupl_state
.offset
;
10077 panic("Vector UPL offset miscalculation\n");
10083 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
10089 vector_upl_get_submap(upl_t upl
, vm_map_t
*v_upl_submap
, vm_offset_t
*submap_dst_addr
)
10091 *v_upl_submap
= NULL
;
10093 if(vector_upl_is_valid(upl
)) {
10094 vector_upl_t vector_upl
= upl
->vector_upl
;
10096 *v_upl_submap
= vector_upl
->submap
;
10097 *submap_dst_addr
= vector_upl
->submap_dst_addr
;
10100 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10103 panic("vector_upl_get_submap was passed a null UPL\n");
10107 vector_upl_set_submap(upl_t upl
, vm_map_t submap
, vm_offset_t submap_dst_addr
)
10109 if(vector_upl_is_valid(upl
)) {
10110 vector_upl_t vector_upl
= upl
->vector_upl
;
10112 vector_upl
->submap
= submap
;
10113 vector_upl
->submap_dst_addr
= submap_dst_addr
;
10116 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
10119 panic("vector_upl_get_submap was passed a NULL UPL\n");
10123 vector_upl_set_iostate(upl_t upl
, upl_t subupl
, upl_offset_t offset
, upl_size_t size
)
10125 if(vector_upl_is_valid(upl
)) {
10127 vector_upl_t vector_upl
= upl
->vector_upl
;
10130 for(i
= 0; i
< vector_upl
->num_upls
; i
++) {
10131 if(vector_upl
->upl_elems
[i
] == subupl
)
10135 if(i
== vector_upl
->num_upls
)
10136 panic("setting sub-upl iostate when none exists");
10138 vector_upl
->upl_iostates
[i
].offset
= offset
;
10139 if(size
< PAGE_SIZE
)
10141 vector_upl
->upl_iostates
[i
].size
= size
;
10144 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
10147 panic("vector_upl_set_iostate was passed a NULL UPL\n");
10151 vector_upl_get_iostate(upl_t upl
, upl_t subupl
, upl_offset_t
*offset
, upl_size_t
*size
)
10153 if(vector_upl_is_valid(upl
)) {
10155 vector_upl_t vector_upl
= upl
->vector_upl
;
10158 for(i
= 0; i
< vector_upl
->num_upls
; i
++) {
10159 if(vector_upl
->upl_elems
[i
] == subupl
)
10163 if(i
== vector_upl
->num_upls
)
10164 panic("getting sub-upl iostate when none exists");
10166 *offset
= vector_upl
->upl_iostates
[i
].offset
;
10167 *size
= vector_upl
->upl_iostates
[i
].size
;
10170 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
10173 panic("vector_upl_get_iostate was passed a NULL UPL\n");
10177 vector_upl_get_iostate_byindex(upl_t upl
, uint32_t index
, upl_offset_t
*offset
, upl_size_t
*size
)
10179 if(vector_upl_is_valid(upl
)) {
10180 vector_upl_t vector_upl
= upl
->vector_upl
;
10182 if(index
< vector_upl
->num_upls
) {
10183 *offset
= vector_upl
->upl_iostates
[index
].offset
;
10184 *size
= vector_upl
->upl_iostates
[index
].size
;
10187 *offset
= *size
= 0;
10190 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
10193 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
10197 upl_get_internal_vectorupl_pagelist(upl_t upl
)
10199 return ((vector_upl_t
)(upl
->vector_upl
))->pagelist
;
10203 upl_get_internal_vectorupl(upl_t upl
)
10205 return upl
->vector_upl
;
10209 upl_get_internal_pagelist_offset(void)
10211 return sizeof(struct upl
);
10220 upl
->flags
|= UPL_CLEAR_DIRTY
;
10222 upl
->flags
&= ~UPL_CLEAR_DIRTY
;
10227 upl_set_referenced(
10233 upl
->ext_ref_count
++;
10235 if (!upl
->ext_ref_count
) {
10236 panic("upl_set_referenced not %p\n", upl
);
10238 upl
->ext_ref_count
--;
10247 vm_offset_t upl_offset
,
10252 if ((upl
->flags
& UPL_EXPEDITE_SUPPORTED
) == 0)
10255 assert(upl
->upl_reprio_info
!= 0);
10256 for(i
= (int)(upl_offset
/ PAGE_SIZE
), j
= 0; j
< io_size
; i
++, j
+= PAGE_SIZE
) {
10257 UPL_SET_REPRIO_INFO(upl
, i
, blkno
, io_size
);
10263 vm_page_is_slideable(vm_page_t m
)
10265 boolean_t result
= FALSE
;
10266 vm_shared_region_slide_info_t si
;
10268 vm_object_lock_assert_held(m
->object
);
10270 /* make sure our page belongs to the one object allowed to do this */
10271 if (!m
->object
->object_slid
) {
10275 si
= m
->object
->vo_slide_info
;
10280 if(!m
->slid
&& (si
->start
<= m
->offset
&& si
->end
> m
->offset
)) {
10288 int vm_page_slide_counter
= 0;
10289 int vm_page_slide_errors
= 0;
10293 vm_map_offset_t kernel_mapping_offset
)
10296 vm_map_size_t kernel_mapping_size
;
10297 boolean_t kernel_mapping_needs_unmap
;
10298 vm_offset_t kernel_vaddr
;
10299 uint32_t pageIndex
;
10300 uint32_t slide_chunk
;
10302 assert(!page
->slid
);
10303 assert(page
->object
->object_slid
);
10304 vm_object_lock_assert_exclusive(page
->object
);
10307 return KERN_FAILURE
;
10310 * Take a paging-in-progress reference to keep the object
10311 * alive even if we have to unlock it (in vm_paging_map_object()
10314 vm_object_paging_begin(page
->object
);
10316 if (kernel_mapping_offset
== 0) {
10318 * The page hasn't already been mapped in kernel space
10319 * by the caller. Map it now, so that we can access
10320 * its contents and decrypt them.
10322 kernel_mapping_size
= PAGE_SIZE
;
10323 kernel_mapping_needs_unmap
= FALSE
;
10324 kr
= vm_paging_map_object(page
,
10327 VM_PROT_READ
| VM_PROT_WRITE
,
10329 &kernel_mapping_size
,
10330 &kernel_mapping_offset
,
10331 &kernel_mapping_needs_unmap
);
10332 if (kr
!= KERN_SUCCESS
) {
10333 panic("vm_page_slide: "
10334 "could not map page in kernel: 0x%x\n",
10338 kernel_mapping_size
= 0;
10339 kernel_mapping_needs_unmap
= FALSE
;
10341 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
10344 * Slide the pointers on the page.
10347 /*assert that slide_file_info.start/end are page-aligned?*/
10349 assert(!page
->slid
);
10350 assert(page
->object
->object_slid
);
10352 #define PAGE_SIZE_FOR_SR_SLIDE 4096
10353 pageIndex
= (uint32_t)((page
->offset
-
10354 page
->object
->vo_slide_info
->start
) /
10355 PAGE_SIZE_FOR_SR_SLIDE
);
10356 for (slide_chunk
= 0;
10357 slide_chunk
< PAGE_SIZE
/ PAGE_SIZE_FOR_SR_SLIDE
;
10359 kr
= vm_shared_region_slide_page(page
->object
->vo_slide_info
,
10362 PAGE_SIZE_FOR_SR_SLIDE
)),
10363 (pageIndex
+ slide_chunk
));
10364 if (kr
!= KERN_SUCCESS
) {
10369 vm_page_slide_counter
++;
10372 * Unmap the page from the kernel's address space,
10374 if (kernel_mapping_needs_unmap
) {
10375 vm_paging_unmap_object(page
->object
,
10377 kernel_vaddr
+ PAGE_SIZE
);
10380 page
->dirty
= FALSE
;
10381 pmap_clear_refmod(page
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
10383 if (kr
!= KERN_SUCCESS
|| cs_debug
> 1) {
10384 printf("vm_page_slide(%p): "
10385 "obj %p off 0x%llx mobj %p moff 0x%llx\n",
10387 page
->object
, page
->offset
,
10388 page
->object
->pager
,
10389 page
->offset
+ page
->object
->paging_offset
);
10392 if (kr
== KERN_SUCCESS
) {
10395 page
->error
= TRUE
;
10396 vm_page_slide_errors
++;
10399 vm_object_paging_end(page
->object
);
10404 void inline memoryshot(unsigned int event
, unsigned int control
)
10406 if (vm_debug_events
) {
10407 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE
, event
)) | control
,
10408 vm_page_active_count
, vm_page_inactive_count
,
10409 vm_page_free_count
, vm_page_speculative_count
,
10410 vm_page_throttled_count
);
10420 boolean_t
upl_device_page(upl_page_info_t
*upl
)
10422 return(UPL_DEVICE_PAGE(upl
));
10424 boolean_t
upl_page_present(upl_page_info_t
*upl
, int index
)
10426 return(UPL_PAGE_PRESENT(upl
, index
));
10428 boolean_t
upl_speculative_page(upl_page_info_t
*upl
, int index
)
10430 return(UPL_SPECULATIVE_PAGE(upl
, index
));
10432 boolean_t
upl_dirty_page(upl_page_info_t
*upl
, int index
)
10434 return(UPL_DIRTY_PAGE(upl
, index
));
10436 boolean_t
upl_valid_page(upl_page_info_t
*upl
, int index
)
10438 return(UPL_VALID_PAGE(upl
, index
));
10440 ppnum_t
upl_phys_page(upl_page_info_t
*upl
, int index
)
10442 return(UPL_PHYS_PAGE(upl
, index
));
10445 void upl_page_set_mark(upl_page_info_t
*upl
, int index
, boolean_t v
)
10447 upl
[index
].mark
= v
;
10450 boolean_t
upl_page_get_mark(upl_page_info_t
*upl
, int index
)
10452 return upl
[index
].mark
;
10456 vm_countdirtypages(void)
10468 vm_page_lock_queues();
10469 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
10471 if (m
==(vm_page_t
)0) break;
10473 if(m
->dirty
) dpages
++;
10474 if(m
->pageout
) pgopages
++;
10475 if(m
->precious
) precpages
++;
10477 assert(m
->object
!= kernel_object
);
10478 m
= (vm_page_t
) queue_next(&m
->pageq
);
10479 if (m
==(vm_page_t
)0) break;
10481 } while (!queue_end(&vm_page_queue_inactive
,(queue_entry_t
) m
));
10482 vm_page_unlock_queues();
10484 vm_page_lock_queues();
10485 m
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
10487 if (m
==(vm_page_t
)0) break;
10491 assert(!m
->pageout
);
10492 assert(m
->object
!= kernel_object
);
10493 m
= (vm_page_t
) queue_next(&m
->pageq
);
10494 if (m
==(vm_page_t
)0) break;
10496 } while (!queue_end(&vm_page_queue_throttled
,(queue_entry_t
) m
));
10497 vm_page_unlock_queues();
10499 vm_page_lock_queues();
10500 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
10502 if (m
==(vm_page_t
)0) break;
10504 if(m
->dirty
) dpages
++;
10505 if(m
->pageout
) pgopages
++;
10506 if(m
->precious
) precpages
++;
10508 assert(m
->object
!= kernel_object
);
10509 m
= (vm_page_t
) queue_next(&m
->pageq
);
10510 if (m
==(vm_page_t
)0) break;
10512 } while (!queue_end(&vm_page_queue_anonymous
,(queue_entry_t
) m
));
10513 vm_page_unlock_queues();
10515 printf("IN Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
10521 vm_page_lock_queues();
10522 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
10525 if(m
== (vm_page_t
)0) break;
10526 if(m
->dirty
) dpages
++;
10527 if(m
->pageout
) pgopages
++;
10528 if(m
->precious
) precpages
++;
10530 assert(m
->object
!= kernel_object
);
10531 m
= (vm_page_t
) queue_next(&m
->pageq
);
10532 if(m
== (vm_page_t
)0) break;
10534 } while (!queue_end(&vm_page_queue_active
,(queue_entry_t
) m
));
10535 vm_page_unlock_queues();
10537 printf("AC Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
10540 #endif /* MACH_BSD */
10542 ppnum_t
upl_get_highest_page(
10545 return upl
->highest_page
;
10548 upl_size_t
upl_get_size(
10554 upl_t
upl_associated_upl(upl_t upl
)
10556 return upl
->associated_upl
;
10559 void upl_set_associated_upl(upl_t upl
, upl_t associated_upl
)
10561 upl
->associated_upl
= associated_upl
;
10565 kern_return_t
upl_ubc_alias_set(upl_t upl
, uintptr_t alias1
, uintptr_t alias2
)
10567 upl
->ubc_alias1
= alias1
;
10568 upl
->ubc_alias2
= alias2
;
10569 return KERN_SUCCESS
;
10571 int upl_ubc_alias_get(upl_t upl
, uintptr_t * al
, uintptr_t * al2
)
10574 *al
= upl
->ubc_alias1
;
10576 *al2
= upl
->ubc_alias2
;
10577 return KERN_SUCCESS
;
10579 #endif /* UPL_DEBUG */
10581 #if VM_PRESSURE_EVENTS
10583 * Upward trajectory.
10585 extern boolean_t
vm_compressor_low_on_space(void);
10588 VM_PRESSURE_NORMAL_TO_WARNING(void) {
10590 if (DEFAULT_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS
) {
10592 /* Available pages below our threshold */
10593 if (memorystatus_available_pages
< memorystatus_available_pages_pressure
) {
10594 /* No frozen processes to kill */
10595 if (memorystatus_frozen_count
== 0) {
10596 /* Not enough suspended processes available. */
10597 if (memorystatus_suspended_count
< MEMORYSTATUS_SUSPENDED_THRESHOLD
) {
10605 return ((AVAILABLE_NON_COMPRESSED_MEMORY
< VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD
) ? 1 : 0);
10610 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
10612 if (DEFAULT_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS
) {
10613 /* Available pages below our threshold */
10614 if (memorystatus_available_pages
< memorystatus_available_pages_critical
) {
10619 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY
< ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD
) / 10)) ? 1 : 0);
10624 * Downward trajectory.
10627 VM_PRESSURE_WARNING_TO_NORMAL(void) {
10629 if (DEFAULT_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS
) {
10630 /* Available pages above our threshold */
10631 unsigned int target_threshold
= memorystatus_available_pages_pressure
+ ((15 * memorystatus_available_pages_pressure
) / 100);
10632 if (memorystatus_available_pages
> target_threshold
) {
10637 return ((AVAILABLE_NON_COMPRESSED_MEMORY
> ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD
) / 10)) ? 1 : 0);
10642 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
10644 if (DEFAULT_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS
) {
10645 /* Available pages above our threshold */
10646 unsigned int target_threshold
= memorystatus_available_pages_critical
+ ((15 * memorystatus_available_pages_critical
) / 100);
10647 if (memorystatus_available_pages
> target_threshold
) {
10652 return ((AVAILABLE_NON_COMPRESSED_MEMORY
> ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD
) / 10)) ? 1 : 0);
10655 #endif /* VM_PRESSURE_EVENTS */