2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * The proverbial page-out daemon.
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/memory_object_default.h>
75 #include <mach/memory_object_control_server.h>
76 #include <mach/mach_host_server.h>
78 #include <mach/vm_map.h>
79 #include <mach/vm_param.h>
80 #include <mach/vm_statistics.h>
83 #include <kern/kern_types.h>
84 #include <kern/counters.h>
85 #include <kern/host_statistics.h>
86 #include <kern/machine.h>
87 #include <kern/misc_protos.h>
88 #include <kern/sched.h>
89 #include <kern/thread.h>
91 #include <kern/kalloc.h>
92 #include <kern/policy_internal.h>
93 #include <kern/thread_group.h>
95 #include <machine/vm_tuning.h>
96 #include <machine/commpage.h>
99 #include <vm/vm_compressor_pager.h>
100 #include <vm/vm_fault.h>
101 #include <vm/vm_map.h>
102 #include <vm/vm_object.h>
103 #include <vm/vm_page.h>
104 #include <vm/vm_pageout.h>
105 #include <vm/vm_protos.h> /* must be last */
106 #include <vm/memory_object.h>
107 #include <vm/vm_purgeable_internal.h>
108 #include <vm/vm_shared_region.h>
109 #include <vm/vm_compressor.h>
111 #include <san/kasan.h>
113 #if CONFIG_PHANTOM_CACHE
114 #include <vm/vm_phantom_cache.h>
120 #include <libkern/OSDebug.h>
123 extern void m_drain(void);
125 #if VM_PRESSURE_EVENTS
127 extern unsigned int memorystatus_available_pages
;
128 extern unsigned int memorystatus_available_pages_pressure
;
129 extern unsigned int memorystatus_available_pages_critical
;
130 #else /* CONFIG_JETSAM */
131 extern uint64_t memorystatus_available_pages
;
132 extern uint64_t memorystatus_available_pages_pressure
;
133 extern uint64_t memorystatus_available_pages_critical
;
134 #endif /* CONFIG_JETSAM */
136 extern unsigned int memorystatus_frozen_count
;
137 extern unsigned int memorystatus_suspended_count
;
139 extern vm_pressure_level_t memorystatus_vm_pressure_level
;
140 int memorystatus_purge_on_warning
= 2;
141 int memorystatus_purge_on_urgent
= 5;
142 int memorystatus_purge_on_critical
= 8;
144 void vm_pressure_response(void);
145 boolean_t vm_pressure_thread_running
= FALSE
;
146 extern void consider_vm_pressure_events(void);
148 #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
149 #endif /* VM_PRESSURE_EVENTS */
151 boolean_t vm_pressure_changed
= FALSE
;
153 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
154 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
157 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
158 #ifdef CONFIG_EMBEDDED
159 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
161 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
165 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
166 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
169 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
170 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
173 #ifndef VM_PAGE_LAUNDRY_MAX
174 #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
175 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
177 #ifndef VM_PAGEOUT_BURST_WAIT
178 #define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
179 #endif /* VM_PAGEOUT_BURST_WAIT */
181 #ifndef VM_PAGEOUT_EMPTY_WAIT
182 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
183 #endif /* VM_PAGEOUT_EMPTY_WAIT */
185 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
186 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
187 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
189 #ifndef VM_PAGEOUT_IDLE_WAIT
190 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
191 #endif /* VM_PAGEOUT_IDLE_WAIT */
193 #ifndef VM_PAGEOUT_SWAP_WAIT
194 #define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
195 #endif /* VM_PAGEOUT_SWAP_WAIT */
197 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
198 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
199 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
201 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
202 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
203 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
205 unsigned int vm_page_speculative_q_age_ms
= VM_PAGE_SPECULATIVE_Q_AGE_MS
;
206 unsigned int vm_page_speculative_percentage
= 5;
208 #ifndef VM_PAGE_SPECULATIVE_TARGET
209 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
210 #endif /* VM_PAGE_SPECULATIVE_TARGET */
214 * To obtain a reasonable LRU approximation, the inactive queue
215 * needs to be large enough to give pages on it a chance to be
216 * referenced a second time. This macro defines the fraction
217 * of active+inactive pages that should be inactive.
218 * The pageout daemon uses it to update vm_page_inactive_target.
220 * If vm_page_free_count falls below vm_page_free_target and
221 * vm_page_inactive_count is below vm_page_inactive_target,
222 * then the pageout daemon starts running.
225 #ifndef VM_PAGE_INACTIVE_TARGET
226 #ifdef CONFIG_EMBEDDED
227 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
229 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
231 #endif /* VM_PAGE_INACTIVE_TARGET */
234 * Once the pageout daemon starts running, it keeps going
235 * until vm_page_free_count meets or exceeds vm_page_free_target.
238 #ifndef VM_PAGE_FREE_TARGET
239 #ifdef CONFIG_EMBEDDED
240 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
242 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
244 #endif /* VM_PAGE_FREE_TARGET */
248 * The pageout daemon always starts running once vm_page_free_count
249 * falls below vm_page_free_min.
252 #ifndef VM_PAGE_FREE_MIN
253 #ifdef CONFIG_EMBEDDED
254 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
256 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
258 #endif /* VM_PAGE_FREE_MIN */
260 #ifdef CONFIG_EMBEDDED
261 #define VM_PAGE_FREE_RESERVED_LIMIT 100
262 #define VM_PAGE_FREE_MIN_LIMIT 1500
263 #define VM_PAGE_FREE_TARGET_LIMIT 2000
265 #define VM_PAGE_FREE_RESERVED_LIMIT 1700
266 #define VM_PAGE_FREE_MIN_LIMIT 3500
267 #define VM_PAGE_FREE_TARGET_LIMIT 4000
271 * When vm_page_free_count falls below vm_page_free_reserved,
272 * only vm-privileged threads can allocate pages. vm-privilege
273 * allows the pageout daemon and default pager (and any other
274 * associated threads needed for default pageout) to continue
275 * operation by dipping into the reserved pool of pages.
278 #ifndef VM_PAGE_FREE_RESERVED
279 #define VM_PAGE_FREE_RESERVED(n) \
280 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
281 #endif /* VM_PAGE_FREE_RESERVED */
284 * When we dequeue pages from the inactive list, they are
285 * reactivated (ie, put back on the active queue) if referenced.
286 * However, it is possible to starve the free list if other
287 * processors are referencing pages faster than we can turn off
288 * the referenced bit. So we limit the number of reactivations
289 * we will make per call of vm_pageout_scan().
291 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
292 #ifndef VM_PAGE_REACTIVATE_LIMIT
293 #ifdef CONFIG_EMBEDDED
294 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
296 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
298 #endif /* VM_PAGE_REACTIVATE_LIMIT */
299 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
302 extern boolean_t hibernate_cleaning_in_progress
;
305 * Exported variable used to broadcast the activation of the pageout scan
306 * Working Set uses this to throttle its use of pmap removes. In this
307 * way, code which runs within memory in an uncontested context does
308 * not keep encountering soft faults.
311 unsigned int vm_pageout_scan_event_counter
= 0;
314 * Forward declarations for internal routines.
317 struct vm_pageout_queue
*q
;
323 struct cq ciq
[MAX_COMPRESSOR_THREAD_COUNT
];
326 #if VM_PRESSURE_EVENTS
327 void vm_pressure_thread(void);
329 boolean_t
VM_PRESSURE_NORMAL_TO_WARNING(void);
330 boolean_t
VM_PRESSURE_WARNING_TO_CRITICAL(void);
332 boolean_t
VM_PRESSURE_WARNING_TO_NORMAL(void);
333 boolean_t
VM_PRESSURE_CRITICAL_TO_WARNING(void);
335 void vm_pageout_garbage_collect(int);
336 static void vm_pageout_iothread_external(void);
337 static void vm_pageout_iothread_internal(struct cq
*cq
);
338 static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue
*, boolean_t
);
340 extern void vm_pageout_continue(void);
341 extern void vm_pageout_scan(void);
342 void vm_tests(void); /* forward */
344 boolean_t vm_restricted_to_single_processor
= FALSE
;
346 static boolean_t vm_pageout_waiter
= FALSE
;
347 static boolean_t vm_pageout_running
= FALSE
;
348 #endif /* !CONFIG_EMBEDDED */
351 static thread_t vm_pageout_external_iothread
= THREAD_NULL
;
352 static thread_t vm_pageout_internal_iothread
= THREAD_NULL
;
354 unsigned int vm_pageout_reserved_internal
= 0;
355 unsigned int vm_pageout_reserved_really
= 0;
357 unsigned int vm_pageout_swap_wait
= 0;
358 unsigned int vm_pageout_idle_wait
= 0; /* milliseconds */
359 unsigned int vm_pageout_empty_wait
= 0; /* milliseconds */
360 unsigned int vm_pageout_burst_wait
= 0; /* milliseconds */
361 unsigned int vm_pageout_deadlock_wait
= 0; /* milliseconds */
362 unsigned int vm_pageout_deadlock_relief
= 0;
363 unsigned int vm_pageout_inactive_relief
= 0;
364 unsigned int vm_pageout_burst_active_throttle
= 0;
365 unsigned int vm_pageout_burst_inactive_throttle
= 0;
367 int vm_upl_wait_for_pages
= 0;
371 * These variables record the pageout daemon's actions:
372 * how many pages it looks at and what happens to those pages.
373 * No locking needed because only one thread modifies the variables.
376 unsigned int vm_pageout_active
= 0; /* debugging */
377 unsigned int vm_pageout_inactive
= 0; /* debugging */
378 unsigned int vm_pageout_inactive_throttled
= 0; /* debugging */
379 unsigned int vm_pageout_inactive_forced
= 0; /* debugging */
380 unsigned int vm_pageout_inactive_nolock
= 0; /* debugging */
381 unsigned int vm_pageout_inactive_avoid
= 0; /* debugging */
382 unsigned int vm_pageout_inactive_busy
= 0; /* debugging */
383 unsigned int vm_pageout_inactive_error
= 0; /* debugging */
384 unsigned int vm_pageout_inactive_absent
= 0; /* debugging */
385 unsigned int vm_pageout_inactive_notalive
= 0; /* debugging */
386 unsigned int vm_pageout_inactive_used
= 0; /* debugging */
387 unsigned int vm_pageout_cache_evicted
= 0; /* debugging */
388 unsigned int vm_pageout_inactive_clean
= 0; /* debugging */
389 unsigned int vm_pageout_speculative_clean
= 0; /* debugging */
390 unsigned int vm_pageout_speculative_dirty
= 0; /* debugging */
392 unsigned int vm_pageout_freed_from_cleaned
= 0;
393 unsigned int vm_pageout_freed_from_speculative
= 0;
394 unsigned int vm_pageout_freed_from_inactive_clean
= 0;
395 unsigned int vm_pageout_freed_after_compression
= 0;
397 extern uint32_t vm_compressor_pages_grabbed
;
398 extern uint32_t c_segment_pages_compressed
;
400 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty
= 0;
402 unsigned int vm_pageout_cleaned_reclaimed
= 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
403 unsigned int vm_pageout_cleaned_reactivated
= 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
404 unsigned int vm_pageout_cleaned_reference_reactivated
= 0;
405 unsigned int vm_pageout_cleaned_volatile_reactivated
= 0;
406 unsigned int vm_pageout_cleaned_fault_reactivated
= 0;
407 unsigned int vm_pageout_cleaned_commit_reactivated
= 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
408 unsigned int vm_pageout_cleaned_busy
= 0;
409 unsigned int vm_pageout_cleaned_nolock
= 0;
411 unsigned int vm_pageout_inactive_dirty_internal
= 0; /* debugging */
412 unsigned int vm_pageout_inactive_dirty_external
= 0; /* debugging */
413 unsigned int vm_pageout_inactive_deactivated
= 0; /* debugging */
414 unsigned int vm_pageout_inactive_anonymous
= 0; /* debugging */
415 unsigned int vm_pageout_dirty_no_pager
= 0; /* debugging */
416 unsigned int vm_pageout_purged_objects
= 0; /* used for sysctl vm stats */
417 unsigned int vm_stat_discard
= 0; /* debugging */
418 unsigned int vm_stat_discard_sent
= 0; /* debugging */
419 unsigned int vm_stat_discard_failure
= 0; /* debugging */
420 unsigned int vm_stat_discard_throttle
= 0; /* debugging */
421 unsigned int vm_pageout_reactivation_limit_exceeded
= 0; /* debugging */
422 unsigned int vm_pageout_inactive_force_reclaim
= 0; /* debugging */
423 unsigned int vm_pageout_skipped_external
= 0; /* debugging */
425 unsigned int vm_pageout_scan_reclaimed_throttled
= 0;
426 unsigned int vm_pageout_scan_active_throttled
= 0;
427 unsigned int vm_pageout_scan_inactive_throttled_internal
= 0;
428 unsigned int vm_pageout_scan_inactive_throttled_external
= 0;
429 unsigned int vm_pageout_scan_throttle
= 0; /* debugging */
430 unsigned int vm_pageout_scan_burst_throttle
= 0; /* debugging */
431 unsigned int vm_pageout_scan_empty_throttle
= 0; /* debugging */
432 unsigned int vm_pageout_scan_swap_throttle
= 0; /* debugging */
433 unsigned int vm_pageout_scan_deadlock_detected
= 0; /* debugging */
434 unsigned int vm_pageout_scan_active_throttle_success
= 0; /* debugging */
435 unsigned int vm_pageout_scan_inactive_throttle_success
= 0; /* debugging */
436 unsigned int vm_pageout_inactive_external_forced_jetsam_count
= 0; /* debugging */
437 unsigned int vm_pageout_scan_throttle_deferred
= 0; /* debugging */
438 unsigned int vm_pageout_scan_yield_unthrottled
= 0; /* debugging */
439 unsigned int vm_page_speculative_count_drifts
= 0;
440 unsigned int vm_page_speculative_count_drift_max
= 0;
442 uint32_t vm_compressor_failed
;
445 * Backing store throttle when BS is exhausted
447 unsigned int vm_backing_store_low
= 0;
449 unsigned int vm_pageout_out_of_line
= 0;
450 unsigned int vm_pageout_in_place
= 0;
452 unsigned int vm_page_steal_pageout_page
= 0;
454 struct vm_config vm_config
;
456 struct vm_pageout_queue vm_pageout_queue_internal
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
457 struct vm_pageout_queue vm_pageout_queue_external
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
459 unsigned int vm_page_speculative_target
= 0;
461 vm_object_t vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
463 boolean_t (* volatile consider_buffer_cache_collect
)(int) = NULL
;
465 #if DEVELOPMENT || DEBUG
466 unsigned long vm_cs_validated_resets
= 0;
469 int vm_debug_events
= 0;
471 #if CONFIG_MEMORYSTATUS
473 extern boolean_t
memorystatus_idle_exit_from_VM(void);
475 extern boolean_t
memorystatus_kill_on_VM_page_shortage(boolean_t async
);
476 extern void memorystatus_on_pageout_scan_end(void);
478 uint32_t vm_pageout_memorystatus_fb_factor_nr
= 5;
479 uint32_t vm_pageout_memorystatus_fb_factor_dr
= 2;
480 #if DEVELOPMENT || DEBUG
481 uint32_t vm_grab_anon_overrides
= 0;
482 uint32_t vm_grab_anon_nops
= 0;
487 #if MACH_CLUSTER_STATS
488 unsigned long vm_pageout_cluster_dirtied
= 0;
489 unsigned long vm_pageout_cluster_cleaned
= 0;
490 unsigned long vm_pageout_cluster_collisions
= 0;
491 unsigned long vm_pageout_cluster_clusters
= 0;
492 unsigned long vm_pageout_cluster_conversions
= 0;
493 unsigned long vm_pageout_target_collisions
= 0;
494 unsigned long vm_pageout_target_page_dirtied
= 0;
495 unsigned long vm_pageout_target_page_freed
= 0;
496 #define CLUSTER_STAT(clause) clause
497 #else /* MACH_CLUSTER_STATS */
498 #define CLUSTER_STAT(clause)
499 #endif /* MACH_CLUSTER_STATS */
502 #if DEVELOPMENT || DEBUG
503 vmct_stats_t vmct_stats
;
507 * Routine: vm_pageout_object_terminate
509 * Destroy the pageout_object, and perform all of the
510 * required cleanup actions.
513 * The object must be locked, and will be returned locked.
516 vm_pageout_object_terminate(
519 vm_object_t shadow_object
;
522 * Deal with the deallocation (last reference) of a pageout object
523 * (used for cleaning-in-place) by dropping the paging references/
524 * freeing pages in the original object.
527 assert(object
->pageout
);
528 shadow_object
= object
->shadow
;
529 vm_object_lock(shadow_object
);
531 while (!vm_page_queue_empty(&object
->memq
)) {
533 vm_object_offset_t offset
;
535 p
= (vm_page_t
) vm_page_queue_first(&object
->memq
);
538 assert(p
->free_when_done
);
539 p
->free_when_done
= FALSE
;
540 assert(!p
->cleaning
);
547 m
= vm_page_lookup(shadow_object
,
548 offset
+ object
->vo_shadow_offset
);
550 if(m
== VM_PAGE_NULL
)
553 assert((m
->dirty
) || (m
->precious
) ||
554 (m
->busy
&& m
->cleaning
));
557 * Handle the trusted pager throttle.
558 * Also decrement the burst throttle (if external).
560 vm_page_lock_queues();
561 if (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
)
562 vm_pageout_throttle_up(m
);
565 * Handle the "target" page(s). These pages are to be freed if
566 * successfully cleaned. Target pages are always busy, and are
567 * wired exactly once. The initial target pages are not mapped,
568 * (so cannot be referenced or modified) but converted target
569 * pages may have been modified between the selection as an
570 * adjacent page and conversion to a target.
572 if (m
->free_when_done
) {
574 assert(m
->vm_page_q_state
== VM_PAGE_IS_WIRED
);
575 assert(m
->wire_count
== 1);
577 m
->free_when_done
= FALSE
;
578 #if MACH_CLUSTER_STATS
579 if (m
->wanted
) vm_pageout_target_collisions
++;
582 * Revoke all access to the page. Since the object is
583 * locked, and the page is busy, this prevents the page
584 * from being dirtied after the pmap_disconnect() call
587 * Since the page is left "dirty" but "not modifed", we
588 * can detect whether the page was redirtied during
589 * pageout by checking the modify state.
591 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
)) & VM_MEM_MODIFIED
) {
592 SET_PAGE_DIRTY(m
, FALSE
);
598 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
599 vm_page_unwire(m
, TRUE
); /* reactivates */
600 VM_STAT_INCR(reactivations
);
603 CLUSTER_STAT(vm_pageout_target_page_freed
++;)
604 vm_page_free(m
);/* clears busy, etc. */
606 vm_page_unlock_queues();
610 * Handle the "adjacent" pages. These pages were cleaned in
611 * place, and should be left alone.
612 * If prep_pin_count is nonzero, then someone is using the
613 * page, so make it active.
615 if ((m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
) && !m
->private) {
619 vm_page_deactivate(m
);
621 if (m
->overwriting
) {
623 * the (COPY_OUT_FROM == FALSE) request_page_list case
627 * We do not re-set m->dirty !
628 * The page was busy so no extraneous activity
629 * could have occurred. COPY_INTO is a read into the
630 * new pages. CLEAN_IN_PLACE does actually write
631 * out the pages but handling outside of this code
632 * will take care of resetting dirty. We clear the
633 * modify however for the Programmed I/O case.
635 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m
));
641 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
642 * Occurs when the original page was wired
643 * at the time of the list request
645 assert(VM_PAGE_WIRED(m
));
646 vm_page_unwire(m
, TRUE
); /* reactivates */
648 m
->overwriting
= FALSE
;
651 * Set the dirty state according to whether or not the page was
652 * modified during the pageout. Note that we purposefully do
653 * NOT call pmap_clear_modify since the page is still mapped.
654 * If the page were to be dirtied between the 2 calls, this
655 * this fact would be lost. This code is only necessary to
656 * maintain statistics, since the pmap module is always
657 * consulted if m->dirty is false.
659 #if MACH_CLUSTER_STATS
660 m
->dirty
= pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
));
662 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
663 else vm_pageout_cluster_cleaned
++;
664 if (m
->wanted
) vm_pageout_cluster_collisions
++;
672 * Wakeup any thread waiting for the page to be un-cleaning.
675 vm_page_unlock_queues();
678 * Account for the paging reference taken in vm_paging_object_allocate.
680 vm_object_activity_end(shadow_object
);
681 vm_object_unlock(shadow_object
);
683 assert(object
->ref_count
== 0);
684 assert(object
->paging_in_progress
== 0);
685 assert(object
->activity_in_progress
== 0);
686 assert(object
->resident_page_count
== 0);
691 * Routine: vm_pageclean_setup
693 * Purpose: setup a page to be cleaned (made non-dirty), but not
694 * necessarily flushed from the VM page cache.
695 * This is accomplished by cleaning in place.
697 * The page must not be busy, and new_object
705 vm_object_t new_object
,
706 vm_object_offset_t new_offset
)
710 assert(!m
->cleaning
);
714 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
715 VM_PAGE_OBJECT(m
), m
->offset
, m
,
718 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m
));
721 * Mark original page as cleaning in place.
724 SET_PAGE_DIRTY(m
, FALSE
);
728 * Convert the fictitious page to a private shadow of
731 assert(new_m
->fictitious
);
732 assert(VM_PAGE_GET_PHYS_PAGE(new_m
) == vm_page_fictitious_addr
);
733 new_m
->fictitious
= FALSE
;
734 new_m
->private = TRUE
;
735 new_m
->free_when_done
= TRUE
;
736 VM_PAGE_SET_PHYS_PAGE(new_m
, VM_PAGE_GET_PHYS_PAGE(m
));
738 vm_page_lockspin_queues();
739 vm_page_wire(new_m
, VM_KERN_MEMORY_NONE
, TRUE
);
740 vm_page_unlock_queues();
742 vm_page_insert_wired(new_m
, new_object
, new_offset
, VM_KERN_MEMORY_NONE
);
743 assert(!new_m
->wanted
);
748 * Routine: vm_pageout_initialize_page
750 * Causes the specified page to be initialized in
751 * the appropriate memory object. This routine is used to push
752 * pages into a copy-object when they are modified in the
755 * The page is moved to a temporary object and paged out.
758 * The page in question must not be on any pageout queues.
759 * The object to which it belongs must be locked.
760 * The page must be busy, but not hold a paging reference.
763 * Move this page to a completely new object.
766 vm_pageout_initialize_page(
770 vm_object_offset_t paging_offset
;
771 memory_object_t pager
;
774 "vm_pageout_initialize_page, page 0x%X\n",
777 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
779 object
= VM_PAGE_OBJECT(m
);
782 assert(object
->internal
);
785 * Verify that we really want to clean this page
792 * Create a paging reference to let us play with the object.
794 paging_offset
= m
->offset
+ object
->paging_offset
;
796 if (m
->absent
|| m
->error
|| m
->restart
|| (!m
->dirty
&& !m
->precious
)) {
797 panic("reservation without pageout?"); /* alan */
800 vm_object_unlock(object
);
806 * If there's no pager, then we can't clean the page. This should
807 * never happen since this should be a copy object and therefore not
808 * an external object, so the pager should always be there.
811 pager
= object
->pager
;
813 if (pager
== MEMORY_OBJECT_NULL
) {
814 panic("missing pager for copy object");
821 * set the page for future call to vm_fault_list_request
823 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m
));
824 SET_PAGE_DIRTY(m
, FALSE
);
827 * keep the object from collapsing or terminating
829 vm_object_paging_begin(object
);
830 vm_object_unlock(object
);
833 * Write the data to its pager.
834 * Note that the data is passed by naming the new object,
835 * not a virtual address; the pager interface has been
836 * manipulated to use the "internal memory" data type.
837 * [The object reference from its allocation is donated
838 * to the eventual recipient.]
840 memory_object_data_initialize(pager
, paging_offset
, PAGE_SIZE
);
842 vm_object_lock(object
);
843 vm_object_paging_end(object
);
846 #if MACH_CLUSTER_STATS
847 #define MAXCLUSTERPAGES 16
849 unsigned long pages_in_cluster
;
850 unsigned long pages_at_higher_offsets
;
851 unsigned long pages_at_lower_offsets
;
852 } cluster_stats
[MAXCLUSTERPAGES
];
853 #endif /* MACH_CLUSTER_STATS */
857 * vm_pageout_cluster:
859 * Given a page, queue it to the appropriate I/O thread,
860 * which will page it out and attempt to clean adjacent pages
861 * in the same operation.
863 * The object and queues must be locked. We will take a
864 * paging reference to prevent deallocation or collapse when we
865 * release the object lock back at the call site. The I/O thread
866 * is responsible for consuming this reference
868 * The page must not be on any pageout queue.
870 int32_t vmct_active
= 0;
871 typedef enum vmct_state_t
{
876 vmct_state_t vmct_state
[MAX_COMPRESSOR_THREAD_COUNT
];
879 vm_pageout_cluster(vm_page_t m
)
881 vm_object_t object
= VM_PAGE_OBJECT(m
);
882 struct vm_pageout_queue
*q
;
886 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
887 object
, m
->offset
, m
, 0, 0);
890 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
891 vm_object_lock_assert_exclusive(object
);
894 * Only a certain kind of page is appreciated here.
896 assert((m
->dirty
|| m
->precious
) && (!VM_PAGE_WIRED(m
)));
897 assert(!m
->cleaning
&& !m
->laundry
);
898 assert(m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
901 * protect the object from collapse or termination
903 vm_object_activity_begin(object
);
905 if (object
->internal
== TRUE
) {
906 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
910 q
= &vm_pageout_queue_internal
;
912 q
= &vm_pageout_queue_external
;
915 * pgo_laundry count is tied to the laundry bit
920 m
->vm_page_q_state
= VM_PAGE_ON_PAGEOUT_Q
;
921 vm_page_queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
923 if (q
->pgo_idle
== TRUE
) {
925 thread_wakeup((event_t
) &q
->pgo_pending
);
931 unsigned long vm_pageout_throttle_up_count
= 0;
934 * A page is back from laundry or we are stealing it back from
935 * the laundering state. See if there are some pages waiting to
936 * go to laundry and if we can let some of them go now.
938 * Object and page queues must be locked.
941 vm_pageout_throttle_up(
944 struct vm_pageout_queue
*q
;
945 vm_object_t m_object
;
947 m_object
= VM_PAGE_OBJECT(m
);
949 assert(m_object
!= VM_OBJECT_NULL
);
950 assert(m_object
!= kernel_object
);
952 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
953 vm_object_lock_assert_exclusive(m_object
);
955 vm_pageout_throttle_up_count
++;
957 if (m_object
->internal
== TRUE
)
958 q
= &vm_pageout_queue_internal
;
960 q
= &vm_pageout_queue_external
;
962 if (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
) {
964 vm_page_queue_remove(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
965 m
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
967 VM_PAGE_ZERO_PAGEQ_ENTRY(m
);
969 vm_object_activity_end(m_object
);
971 if (m
->laundry
== TRUE
) {
976 if (q
->pgo_throttled
== TRUE
) {
977 q
->pgo_throttled
= FALSE
;
978 thread_wakeup((event_t
) &q
->pgo_laundry
);
980 if (q
->pgo_draining
== TRUE
&& q
->pgo_laundry
== 0) {
981 q
->pgo_draining
= FALSE
;
982 thread_wakeup((event_t
) (&q
->pgo_laundry
+1));
989 vm_pageout_throttle_up_batch(
990 struct vm_pageout_queue
*q
,
993 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
995 vm_pageout_throttle_up_count
+= batch_cnt
;
997 q
->pgo_laundry
-= batch_cnt
;
999 if (q
->pgo_throttled
== TRUE
) {
1000 q
->pgo_throttled
= FALSE
;
1001 thread_wakeup((event_t
) &q
->pgo_laundry
);
1003 if (q
->pgo_draining
== TRUE
&& q
->pgo_laundry
== 0) {
1004 q
->pgo_draining
= FALSE
;
1005 thread_wakeup((event_t
) (&q
->pgo_laundry
+1));
1012 * VM memory pressure monitoring.
1014 * vm_pageout_scan() keeps track of the number of pages it considers and
1015 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1017 * compute_memory_pressure() is called every second from compute_averages()
1018 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1019 * of recalimed pages in a new vm_pageout_stat[] bucket.
1021 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1022 * The caller provides the number of seconds ("nsecs") worth of statistics
1023 * it wants, up to 30 seconds.
1024 * It computes the number of pages reclaimed in the past "nsecs" seconds and
1025 * also returns the number of pages the system still needs to reclaim at this
1028 #define VM_PAGEOUT_STAT_SIZE 31
1029 struct vm_pageout_stat
{
1030 unsigned int considered
;
1031 unsigned int reclaimed_clean
;
1032 unsigned int pages_compressed
;
1033 unsigned int pages_grabbed_by_compressor
;
1034 unsigned int cleaned_dirty_external
;
1035 unsigned int throttled_internal_q
;
1036 unsigned int throttled_external_q
;
1037 unsigned int failed_compressions
;
1038 } vm_pageout_stats
[VM_PAGEOUT_STAT_SIZE
] = {{0,0,0,0,0,0,0,0}, };
1040 unsigned int vm_pageout_stat_now
= 0;
1041 unsigned int vm_memory_pressure
= 0;
1043 #define VM_PAGEOUT_STAT_BEFORE(i) \
1044 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1045 #define VM_PAGEOUT_STAT_AFTER(i) \
1046 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1048 #if VM_PAGE_BUCKETS_CHECK
1049 int vm_page_buckets_check_interval
= 10; /* in seconds */
1050 #endif /* VM_PAGE_BUCKETS_CHECK */
1053 * Called from compute_averages().
1056 compute_memory_pressure(
1059 unsigned int vm_pageout_next
;
1061 #if VM_PAGE_BUCKETS_CHECK
1062 /* check the consistency of VM page buckets at regular interval */
1063 static int counter
= 0;
1064 if ((++counter
% vm_page_buckets_check_interval
) == 0) {
1065 vm_page_buckets_check();
1067 #endif /* VM_PAGE_BUCKETS_CHECK */
1069 vm_memory_pressure
=
1070 vm_pageout_stats
[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now
)].reclaimed_clean
;
1072 commpage_set_memory_pressure( vm_memory_pressure
);
1074 /* move "now" forward */
1075 vm_pageout_next
= VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now
);
1076 vm_pageout_stats
[vm_pageout_next
].considered
= 0;
1077 vm_pageout_stats
[vm_pageout_next
].reclaimed_clean
= 0;
1078 vm_pageout_stats
[vm_pageout_next
].throttled_internal_q
= 0;
1079 vm_pageout_stats
[vm_pageout_next
].throttled_external_q
= 0;
1080 vm_pageout_stats
[vm_pageout_next
].cleaned_dirty_external
= 0;
1081 vm_pageout_stats
[vm_pageout_next
].pages_compressed
= 0;
1082 vm_pageout_stats
[vm_pageout_next
].pages_grabbed_by_compressor
= 0;
1083 vm_pageout_stats
[vm_pageout_next
].failed_compressions
= 0;
1085 vm_pageout_stat_now
= vm_pageout_next
;
1091 * mach_vm_ctl_page_free_wanted() is called indirectly, via
1092 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1093 * it must be safe in the restricted stackshot context. Locks and/or
1094 * blocking are not allowable.
1097 mach_vm_ctl_page_free_wanted(void)
1099 unsigned int page_free_target
, page_free_count
, page_free_wanted
;
1101 page_free_target
= vm_page_free_target
;
1102 page_free_count
= vm_page_free_count
;
1103 if (page_free_target
> page_free_count
) {
1104 page_free_wanted
= page_free_target
- page_free_count
;
1106 page_free_wanted
= 0;
1109 return page_free_wanted
;
1115 * mach_vm_pressure_monitor() is called when taking a stackshot, with
1116 * wait_for_pressure FALSE, so that code path must remain safe in the
1117 * restricted stackshot context. No blocking or locks are allowable.
1118 * on that code path.
1122 mach_vm_pressure_monitor(
1123 boolean_t wait_for_pressure
,
1124 unsigned int nsecs_monitored
,
1125 unsigned int *pages_reclaimed_p
,
1126 unsigned int *pages_wanted_p
)
1129 unsigned int vm_pageout_then
, vm_pageout_now
;
1130 unsigned int pages_reclaimed
;
1133 * We don't take the vm_page_queue_lock here because we don't want
1134 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1135 * thread when it's trying to reclaim memory. We don't need fully
1136 * accurate monitoring anyway...
1139 if (wait_for_pressure
) {
1140 /* wait until there's memory pressure */
1141 while (vm_page_free_count
>= vm_page_free_target
) {
1142 wr
= assert_wait((event_t
) &vm_page_free_wanted
,
1143 THREAD_INTERRUPTIBLE
);
1144 if (wr
== THREAD_WAITING
) {
1145 wr
= thread_block(THREAD_CONTINUE_NULL
);
1147 if (wr
== THREAD_INTERRUPTED
) {
1148 return KERN_ABORTED
;
1150 if (wr
== THREAD_AWAKENED
) {
1152 * The memory pressure might have already
1153 * been relieved but let's not block again
1154 * and let's report that there was memory
1155 * pressure at some point.
1162 /* provide the number of pages the system wants to reclaim */
1163 if (pages_wanted_p
!= NULL
) {
1164 *pages_wanted_p
= mach_vm_ctl_page_free_wanted();
1167 if (pages_reclaimed_p
== NULL
) {
1168 return KERN_SUCCESS
;
1171 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1172 vm_pageout_now
= vm_pageout_stat_now
;
1173 pages_reclaimed
= 0;
1174 for (vm_pageout_then
=
1175 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now
);
1176 vm_pageout_then
!= vm_pageout_now
&&
1177 nsecs_monitored
-- != 0;
1179 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then
)) {
1180 pages_reclaimed
+= vm_pageout_stats
[vm_pageout_then
].reclaimed_clean
;
1182 *pages_reclaimed_p
= pages_reclaimed
;
1184 return KERN_SUCCESS
;
1189 #if DEVELOPMENT || DEBUG
1192 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t
*, int);
1195 * condition variable used to make sure there is
1196 * only a single sweep going on at a time
1198 boolean_t vm_pageout_disconnect_all_pages_active
= FALSE
;
1202 vm_pageout_disconnect_all_pages()
1204 vm_page_lock_queues();
1206 if (vm_pageout_disconnect_all_pages_active
== TRUE
) {
1207 vm_page_unlock_queues();
1210 vm_pageout_disconnect_all_pages_active
= TRUE
;
1211 vm_page_unlock_queues();
1213 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled
, vm_page_throttled_count
);
1214 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
);
1215 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active
, vm_page_active_count
);
1217 vm_pageout_disconnect_all_pages_active
= FALSE
;
1222 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t
*q
, int qcount
)
1225 vm_object_t t_object
= NULL
;
1226 vm_object_t l_object
= NULL
;
1227 vm_object_t m_object
= NULL
;
1228 int delayed_unlock
= 0;
1229 int try_failed_count
= 0;
1230 int disconnected_count
= 0;
1231 int paused_count
= 0;
1232 int object_locked_count
= 0;
1234 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, (MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_DISCONNECT_ALL_PAGE_MAPPINGS
)) | DBG_FUNC_START
,
1235 q
, qcount
, 0, 0, 0);
1237 vm_page_lock_queues();
1239 while (qcount
&& !vm_page_queue_empty(q
)) {
1241 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
1243 m
= (vm_page_t
) vm_page_queue_first(q
);
1244 m_object
= VM_PAGE_OBJECT(m
);
1247 * check to see if we currently are working
1248 * with the same object... if so, we've
1249 * already got the lock
1251 if (m_object
!= l_object
) {
1253 * the object associated with candidate page is
1254 * different from the one we were just working
1255 * with... dump the lock if we still own it
1257 if (l_object
!= NULL
) {
1258 vm_object_unlock(l_object
);
1261 if (m_object
!= t_object
)
1262 try_failed_count
= 0;
1265 * Try to lock object; since we've alread got the
1266 * page queues lock, we can only 'try' for this one.
1267 * if the 'try' fails, we need to do a mutex_pause
1268 * to allow the owner of the object lock a chance to
1271 if ( !vm_object_lock_try_scan(m_object
)) {
1273 if (try_failed_count
> 20) {
1274 goto reenter_pg_on_q
;
1276 vm_page_unlock_queues();
1277 mutex_pause(try_failed_count
++);
1278 vm_page_lock_queues();
1283 t_object
= m_object
;
1286 object_locked_count
++;
1288 l_object
= m_object
;
1290 if ( !m_object
->alive
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
|| m
->free_when_done
) {
1292 * put it back on the head of its queue
1294 goto reenter_pg_on_q
;
1296 if (m
->pmapped
== TRUE
) {
1298 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
1300 disconnected_count
++;
1303 vm_page_queue_remove(q
, m
, vm_page_t
, pageq
);
1304 vm_page_queue_enter(q
, m
, vm_page_t
, pageq
);
1307 try_failed_count
= 0;
1309 if (delayed_unlock
++ > 128) {
1311 if (l_object
!= NULL
) {
1312 vm_object_unlock(l_object
);
1315 lck_mtx_yield(&vm_page_queue_lock
);
1319 if (l_object
!= NULL
) {
1320 vm_object_unlock(l_object
);
1323 vm_page_unlock_queues();
1325 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, (MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_DISCONNECT_ALL_PAGE_MAPPINGS
)) | DBG_FUNC_END
,
1326 q
, disconnected_count
, object_locked_count
, paused_count
, 0);
1333 vm_pageout_page_queue(vm_page_queue_head_t
*, int);
1336 * condition variable used to make sure there is
1337 * only a single sweep going on at a time
1339 boolean_t vm_pageout_anonymous_pages_active
= FALSE
;
1343 vm_pageout_anonymous_pages()
1345 if (VM_CONFIG_COMPRESSOR_IS_PRESENT
) {
1347 vm_page_lock_queues();
1349 if (vm_pageout_anonymous_pages_active
== TRUE
) {
1350 vm_page_unlock_queues();
1353 vm_pageout_anonymous_pages_active
= TRUE
;
1354 vm_page_unlock_queues();
1356 vm_pageout_page_queue(&vm_page_queue_throttled
, vm_page_throttled_count
);
1357 vm_pageout_page_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
);
1358 vm_pageout_page_queue(&vm_page_queue_active
, vm_page_active_count
);
1360 if (VM_CONFIG_SWAP_IS_PRESENT
)
1361 vm_consider_swapping();
1363 vm_page_lock_queues();
1364 vm_pageout_anonymous_pages_active
= FALSE
;
1365 vm_page_unlock_queues();
1371 vm_pageout_page_queue(vm_page_queue_head_t
*q
, int qcount
)
1374 vm_object_t t_object
= NULL
;
1375 vm_object_t l_object
= NULL
;
1376 vm_object_t m_object
= NULL
;
1377 int delayed_unlock
= 0;
1378 int try_failed_count
= 0;
1381 struct vm_pageout_queue
*iq
;
1385 iq
= &vm_pageout_queue_internal
;
1387 vm_page_lock_queues();
1389 while (qcount
&& !vm_page_queue_empty(q
)) {
1391 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
1393 if (VM_PAGE_Q_THROTTLED(iq
)) {
1395 if (l_object
!= NULL
) {
1396 vm_object_unlock(l_object
);
1399 iq
->pgo_draining
= TRUE
;
1401 assert_wait((event_t
) (&iq
->pgo_laundry
+ 1), THREAD_INTERRUPTIBLE
);
1402 vm_page_unlock_queues();
1404 thread_block(THREAD_CONTINUE_NULL
);
1406 vm_page_lock_queues();
1410 m
= (vm_page_t
) vm_page_queue_first(q
);
1411 m_object
= VM_PAGE_OBJECT(m
);
1414 * check to see if we currently are working
1415 * with the same object... if so, we've
1416 * already got the lock
1418 if (m_object
!= l_object
) {
1419 if ( !m_object
->internal
)
1420 goto reenter_pg_on_q
;
1423 * the object associated with candidate page is
1424 * different from the one we were just working
1425 * with... dump the lock if we still own it
1427 if (l_object
!= NULL
) {
1428 vm_object_unlock(l_object
);
1431 if (m_object
!= t_object
)
1432 try_failed_count
= 0;
1435 * Try to lock object; since we've alread got the
1436 * page queues lock, we can only 'try' for this one.
1437 * if the 'try' fails, we need to do a mutex_pause
1438 * to allow the owner of the object lock a chance to
1441 if ( !vm_object_lock_try_scan(m_object
)) {
1443 if (try_failed_count
> 20) {
1444 goto reenter_pg_on_q
;
1446 vm_page_unlock_queues();
1447 mutex_pause(try_failed_count
++);
1448 vm_page_lock_queues();
1451 t_object
= m_object
;
1454 l_object
= m_object
;
1456 if ( !m_object
->alive
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
|| m
->free_when_done
) {
1458 * page is not to be cleaned
1459 * put it back on the head of its queue
1461 goto reenter_pg_on_q
;
1463 phys_page
= VM_PAGE_GET_PHYS_PAGE(m
);
1465 if (m
->reference
== FALSE
&& m
->pmapped
== TRUE
) {
1466 refmod_state
= pmap_get_refmod(phys_page
);
1468 if (refmod_state
& VM_MEM_REFERENCED
)
1469 m
->reference
= TRUE
;
1470 if (refmod_state
& VM_MEM_MODIFIED
) {
1471 SET_PAGE_DIRTY(m
, FALSE
);
1474 if (m
->reference
== TRUE
) {
1475 m
->reference
= FALSE
;
1476 pmap_clear_refmod_options(phys_page
, VM_MEM_REFERENCED
, PMAP_OPTIONS_NOFLUSH
, (void *)NULL
);
1477 goto reenter_pg_on_q
;
1479 if (m
->pmapped
== TRUE
) {
1480 if (m
->dirty
|| m
->precious
) {
1481 pmap_options
= PMAP_OPTIONS_COMPRESSOR
;
1483 pmap_options
= PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1485 refmod_state
= pmap_disconnect_options(phys_page
, pmap_options
, NULL
);
1486 if (refmod_state
& VM_MEM_MODIFIED
) {
1487 SET_PAGE_DIRTY(m
, FALSE
);
1490 if ( !m
->dirty
&& !m
->precious
) {
1491 vm_page_unlock_queues();
1493 vm_page_lock_queues();
1498 if (!m_object
->pager_initialized
|| m_object
->pager
== MEMORY_OBJECT_NULL
) {
1500 if (!m_object
->pager_initialized
) {
1502 vm_page_unlock_queues();
1504 vm_object_collapse(m_object
, (vm_object_offset_t
) 0, TRUE
);
1506 if (!m_object
->pager_initialized
)
1507 vm_object_compressor_pager_create(m_object
);
1509 vm_page_lock_queues();
1512 if (!m_object
->pager_initialized
|| m_object
->pager
== MEMORY_OBJECT_NULL
)
1513 goto reenter_pg_on_q
;
1515 * vm_object_compressor_pager_create will drop the object lock
1516 * which means 'm' may no longer be valid to use
1521 * we've already factored out pages in the laundry which
1522 * means this page can't be on the pageout queue so it's
1523 * safe to do the vm_page_queues_remove
1525 vm_page_queues_remove(m
, TRUE
);
1527 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
1529 vm_pageout_cluster(m
);
1534 vm_page_queue_remove(q
, m
, vm_page_t
, pageq
);
1535 vm_page_queue_enter(q
, m
, vm_page_t
, pageq
);
1538 try_failed_count
= 0;
1540 if (delayed_unlock
++ > 128) {
1542 if (l_object
!= NULL
) {
1543 vm_object_unlock(l_object
);
1546 lck_mtx_yield(&vm_page_queue_lock
);
1550 if (l_object
!= NULL
) {
1551 vm_object_unlock(l_object
);
1554 vm_page_unlock_queues();
1560 * function in BSD to apply I/O throttle to the pageout thread
1562 extern void vm_pageout_io_throttle(void);
1564 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
1567 * If a "reusable" page somehow made it back into \
1568 * the active queue, it's been re-used and is not \
1569 * quite re-usable. \
1570 * If the VM object was "all_reusable", consider it \
1571 * as "all re-used" instead of converting it to \
1572 * "partially re-used", which could be expensive. \
1574 assert(VM_PAGE_OBJECT((m)) == (obj)); \
1575 if ((m)->reusable || \
1576 (obj)->all_reusable) { \
1577 vm_object_reuse_pages((obj), \
1579 (m)->offset + PAGE_SIZE_64, \
1585 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1586 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1589 #define FCS_DELAYED 1
1590 #define FCS_DEADLOCK_DETECTED 2
1592 struct flow_control
{
1597 #if CONFIG_BACKGROUND_QUEUE
1598 uint64_t vm_pageout_skipped_bq_internal
= 0;
1599 uint64_t vm_pageout_considered_bq_internal
= 0;
1600 uint64_t vm_pageout_considered_bq_external
= 0;
1601 uint64_t vm_pageout_rejected_bq_internal
= 0;
1602 uint64_t vm_pageout_rejected_bq_external
= 0;
1605 uint32_t vm_pageout_no_victim
= 0;
1606 uint32_t vm_pageout_considered_page
= 0;
1607 uint32_t vm_page_filecache_min
= 0;
1609 #define ANONS_GRABBED_LIMIT 2
1611 #if CONFIG_SECLUDED_MEMORY
1612 extern vm_page_t
vm_page_grab_secluded(void);
1613 uint64_t vm_pageout_secluded_burst_count
= 0;
1614 #endif /* CONFIG_SECLUDED_MEMORY */
1617 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t
*);
1618 static void vm_pageout_prepare_to_block(vm_object_t
*, int *, vm_page_t
*, int *, int);
1620 #define VM_PAGEOUT_PB_NO_ACTION 0
1621 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1622 #define VM_PAGEOUT_PB_THREAD_YIELD 2
1626 vm_pageout_delayed_unlock(int *delayed_unlock
, int *local_freed
, vm_page_t
*local_freeq
)
1629 vm_page_unlock_queues();
1632 vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
1633 vm_page_free_count
, *local_freed
, 0, 1);
1635 vm_page_free_list(*local_freeq
, TRUE
);
1637 VM_DEBUG_EVENT(vm_pageout_freelist
,VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
1638 vm_page_free_count
, 0, 0, 1);
1640 *local_freeq
= NULL
;
1643 vm_page_lock_queues();
1645 lck_mtx_yield(&vm_page_queue_lock
);
1647 *delayed_unlock
= 1;
1652 vm_pageout_prepare_to_block(vm_object_t
*object
, int *delayed_unlock
,
1653 vm_page_t
*local_freeq
, int *local_freed
, int action
)
1655 vm_page_unlock_queues();
1657 if (*object
!= NULL
) {
1658 vm_object_unlock(*object
);
1661 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
1665 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_START
,
1666 vm_page_free_count
, *local_freed
, 0, 2);
1668 vm_page_free_list(*local_freeq
, TRUE
);
1670 VM_DEBUG_EVENT(vm_pageout_freelist
, VM_PAGEOUT_FREELIST
, DBG_FUNC_END
,
1671 vm_page_free_count
, 0, 0, 2);
1673 *local_freeq
= NULL
;
1676 *delayed_unlock
= 1;
1680 case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER
:
1681 vm_consider_waking_compactor_swapper();
1683 case VM_PAGEOUT_PB_THREAD_YIELD
:
1684 thread_yield_internal(1);
1686 case VM_PAGEOUT_PB_NO_ACTION
:
1690 vm_page_lock_queues();
1694 int last_vm_pageout_freed_from_inactive_clean
= 0;
1695 int last_vm_pageout_freed_from_cleaned
= 0;
1696 int last_vm_pageout_freed_from_speculative
= 0;
1697 int last_vm_pageout_freed_after_compression
= 0;
1698 int last_vm_pageout_enqueued_cleaned_from_inactive_dirty
= 0;
1699 int last_vm_pageout_inactive_force_reclaim
= 0;
1700 int last_vm_pageout_scan_inactive_throttled_external
= 0;
1701 int last_vm_pageout_scan_inactive_throttled_internal
= 0;
1702 int last_vm_pageout_reactivation_limit_exceeded
= 0;
1703 int last_vm_pageout_considered_page
= 0;
1704 int last_vm_compressor_pages_grabbed
= 0;
1705 int last_vm_compressor_failed
= 0;
1706 int last_vm_pageout_skipped_external
= 0;
1709 void update_vm_info(void)
1711 int tmp1
, tmp2
, tmp3
, tmp4
;
1716 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_INFO1
)) | DBG_FUNC_NONE
,
1717 vm_page_active_count
,
1718 vm_page_speculative_count
,
1719 vm_page_inactive_count
,
1720 vm_page_anonymous_count
,
1723 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_INFO2
)) | DBG_FUNC_NONE
,
1726 VM_PAGE_COMPRESSOR_COUNT
,
1729 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_INFO3
)) | DBG_FUNC_NONE
,
1730 c_segment_pages_compressed
,
1731 vm_page_internal_count
,
1732 vm_page_external_count
,
1733 vm_page_xpmapped_external_count
,
1737 if ((vm_pageout_considered_page
- last_vm_pageout_considered_page
) == 0 &&
1738 (vm_pageout_enqueued_cleaned_from_inactive_dirty
- last_vm_pageout_enqueued_cleaned_from_inactive_dirty
== 0) &&
1739 (vm_pageout_freed_after_compression
- last_vm_pageout_freed_after_compression
== 0))
1743 tmp1
= vm_pageout_considered_page
;
1744 tmp2
= vm_pageout_freed_from_speculative
;
1745 tmp3
= vm_pageout_freed_from_inactive_clean
;
1747 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_INFO4
)) | DBG_FUNC_NONE
,
1748 tmp1
- last_vm_pageout_considered_page
,
1749 tmp2
- last_vm_pageout_freed_from_speculative
,
1750 tmp3
- last_vm_pageout_freed_from_inactive_clean
,
1753 last_vm_pageout_considered_page
= tmp1
;
1754 last_vm_pageout_freed_from_speculative
= tmp2
;
1755 last_vm_pageout_freed_from_inactive_clean
= tmp3
;
1758 tmp1
= vm_pageout_scan_inactive_throttled_external
;
1759 tmp2
= vm_pageout_enqueued_cleaned_from_inactive_dirty
;
1760 tmp3
= vm_pageout_freed_from_cleaned
;
1762 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_INFO5
)) | DBG_FUNC_NONE
,
1763 tmp1
- last_vm_pageout_scan_inactive_throttled_external
,
1764 tmp2
- last_vm_pageout_enqueued_cleaned_from_inactive_dirty
,
1765 tmp3
- last_vm_pageout_freed_from_cleaned
,
1768 vm_pageout_stats
[vm_pageout_stat_now
].throttled_external_q
+= (tmp1
- last_vm_pageout_scan_inactive_throttled_external
);
1769 vm_pageout_stats
[vm_pageout_stat_now
].cleaned_dirty_external
+= (tmp2
- last_vm_pageout_enqueued_cleaned_from_inactive_dirty
);
1771 last_vm_pageout_scan_inactive_throttled_external
= tmp1
;
1772 last_vm_pageout_enqueued_cleaned_from_inactive_dirty
= tmp2
;
1773 last_vm_pageout_freed_from_cleaned
= tmp3
;
1776 tmp1
= vm_pageout_scan_inactive_throttled_internal
;
1777 tmp2
= vm_pageout_freed_after_compression
;
1778 tmp3
= vm_compressor_pages_grabbed
;
1779 tmp4
= vm_pageout_skipped_external
;
1781 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_INFO6
)) | DBG_FUNC_NONE
,
1782 tmp1
- last_vm_pageout_scan_inactive_throttled_internal
,
1783 tmp2
- last_vm_pageout_freed_after_compression
,
1784 tmp3
- last_vm_compressor_pages_grabbed
,
1785 tmp4
- last_vm_pageout_skipped_external
,
1788 vm_pageout_stats
[vm_pageout_stat_now
].throttled_internal_q
+= (tmp1
- last_vm_pageout_scan_inactive_throttled_internal
);
1789 vm_pageout_stats
[vm_pageout_stat_now
].pages_compressed
+= (tmp2
- last_vm_pageout_freed_after_compression
);
1790 vm_pageout_stats
[vm_pageout_stat_now
].pages_grabbed_by_compressor
+= (tmp3
- last_vm_compressor_pages_grabbed
);
1792 last_vm_pageout_scan_inactive_throttled_internal
= tmp1
;
1793 last_vm_pageout_freed_after_compression
= tmp2
;
1794 last_vm_compressor_pages_grabbed
= tmp3
;
1795 last_vm_pageout_skipped_external
= tmp4
;
1798 if ((vm_pageout_reactivation_limit_exceeded
- last_vm_pageout_reactivation_limit_exceeded
) == 0 &&
1799 (vm_pageout_inactive_force_reclaim
- last_vm_pageout_inactive_force_reclaim
) == 0 &&
1800 (vm_compressor_failed
- last_vm_compressor_failed
) == 0)
1803 tmp1
= vm_pageout_reactivation_limit_exceeded
;
1804 tmp2
= vm_pageout_inactive_force_reclaim
;
1805 tmp3
= vm_compressor_failed
;
1807 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_INFO7
)) | DBG_FUNC_NONE
,
1808 tmp1
- last_vm_pageout_reactivation_limit_exceeded
,
1809 tmp2
- last_vm_pageout_inactive_force_reclaim
,
1810 tmp3
- last_vm_compressor_failed
,
1813 vm_pageout_stats
[vm_pageout_stat_now
].failed_compressions
+= (tmp3
- last_vm_compressor_failed
);
1815 last_vm_pageout_reactivation_limit_exceeded
= tmp1
;
1816 last_vm_pageout_inactive_force_reclaim
= tmp2
;
1817 last_vm_compressor_failed
= tmp3
;
1822 * vm_pageout_scan does the dirty work for the pageout daemon.
1823 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1824 * held and vm_page_free_wanted == 0.
1827 vm_pageout_scan(void)
1829 unsigned int loop_count
= 0;
1830 unsigned int inactive_burst_count
= 0;
1831 unsigned int active_burst_count
= 0;
1832 unsigned int reactivated_this_call
;
1833 unsigned int reactivate_limit
;
1834 vm_page_t local_freeq
= NULL
;
1835 int local_freed
= 0;
1837 int delayed_unlock_limit
= 0;
1838 int refmod_state
= 0;
1839 int vm_pageout_deadlock_target
= 0;
1840 struct vm_pageout_queue
*iq
;
1841 struct vm_pageout_queue
*eq
;
1842 struct vm_speculative_age_q
*sq
;
1843 struct flow_control flow_control
= { 0, { 0, 0 } };
1844 boolean_t inactive_throttled
= FALSE
;
1845 boolean_t try_failed
;
1847 unsigned int msecs
= 0;
1848 vm_object_t object
= NULL
;
1849 uint32_t inactive_reclaim_run
;
1850 boolean_t exceeded_burst_throttle
;
1851 boolean_t grab_anonymous
= FALSE
;
1852 boolean_t force_anonymous
= FALSE
;
1853 boolean_t force_speculative_aging
= FALSE
;
1854 int anons_grabbed
= 0;
1855 int page_prev_q_state
= 0;
1856 #if CONFIG_BACKGROUND_QUEUE
1857 boolean_t page_from_bg_q
= FALSE
;
1859 int cache_evict_throttle
= 0;
1860 uint32_t vm_pageout_inactive_external_forced_reactivate_limit
= 0;
1861 int force_purge
= 0;
1862 #define DELAY_SPECULATIVE_AGE 1000
1863 int delay_speculative_age
= 0;
1864 vm_object_t m_object
= VM_OBJECT_NULL
;
1866 #if VM_PRESSURE_EVENTS
1867 vm_pressure_level_t pressure_level
;
1868 #endif /* VM_PRESSURE_EVENTS */
1870 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan
, VM_PAGEOUT_SCAN
, DBG_FUNC_START
,
1871 vm_pageout_speculative_clean
, vm_pageout_inactive_clean
,
1872 vm_pageout_inactive_dirty_internal
, vm_pageout_inactive_dirty_external
);
1874 flow_control
.state
= FCS_IDLE
;
1875 iq
= &vm_pageout_queue_internal
;
1876 eq
= &vm_pageout_queue_external
;
1877 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
1880 XPR(XPR_VM_PAGEOUT
, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1882 /* Ask the pmap layer to return any pages it no longer needs. */
1883 pmap_release_pages_fast();
1885 vm_page_lock_queues();
1889 * Calculate the max number of referenced pages on the inactive
1890 * queue that we will reactivate.
1892 reactivated_this_call
= 0;
1893 reactivate_limit
= VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count
+
1894 vm_page_inactive_count
);
1895 inactive_reclaim_run
= 0;
1897 vm_pageout_inactive_external_forced_reactivate_limit
= vm_page_active_count
+ vm_page_inactive_count
;
1900 * We want to gradually dribble pages from the active queue
1901 * to the inactive queue. If we let the inactive queue get
1902 * very small, and then suddenly dump many pages into it,
1903 * those pages won't get a sufficient chance to be referenced
1904 * before we start taking them from the inactive queue.
1906 * We must limit the rate at which we send pages to the pagers
1907 * so that we don't tie up too many pages in the I/O queues.
1908 * We implement a throttling mechanism using the laundry count
1909 * to limit the number of pages outstanding to the default
1910 * and external pagers. We can bypass the throttles and look
1911 * for clean pages if the pageout queues don't drain in a timely
1912 * fashion since this may indicate that the pageout paths are
1913 * stalled waiting for memory, which only we can provide.
1919 assert(object
== NULL
);
1920 assert(delayed_unlock
!= 0);
1923 * Recalculate vm_page_inactivate_target.
1925 vm_page_inactive_target
= VM_PAGE_INACTIVE_TARGET(vm_page_active_count
+
1926 vm_page_inactive_count
+
1927 vm_page_speculative_count
);
1929 vm_page_anonymous_min
= vm_page_inactive_target
/ 20;
1933 * don't want to wake the pageout_scan thread up everytime we fall below
1934 * the targets... set a low water mark at 0.25% below the target
1936 vm_page_inactive_min
= vm_page_inactive_target
- (vm_page_inactive_target
/ 400);
1938 if (vm_page_speculative_percentage
> 50)
1939 vm_page_speculative_percentage
= 50;
1940 else if (vm_page_speculative_percentage
<= 0)
1941 vm_page_speculative_percentage
= 1;
1943 vm_page_speculative_target
= VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count
+
1944 vm_page_inactive_count
);
1951 DTRACE_VM2(rev
, int, 1, (uint64_t *), NULL
);
1953 if (vm_upl_wait_for_pages
< 0)
1954 vm_upl_wait_for_pages
= 0;
1956 delayed_unlock_limit
= VM_PAGEOUT_DELAYED_UNLOCK_LIMIT
+ vm_upl_wait_for_pages
;
1958 if (delayed_unlock_limit
> VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX
)
1959 delayed_unlock_limit
= VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX
;
1961 #if CONFIG_SECLUDED_MEMORY
1963 * Deal with secluded_q overflow.
1965 if (vm_page_secluded_count
> vm_page_secluded_target
) {
1966 unsigned int secluded_overflow
;
1967 vm_page_t secluded_page
;
1969 if (object
!= NULL
) {
1970 vm_object_unlock(object
);
1972 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
1975 * SECLUDED_AGING_BEFORE_ACTIVE:
1976 * Excess secluded pages go to the active queue and
1977 * will later go to the inactive queue.
1979 active_burst_count
= MIN(vm_pageout_burst_active_throttle
,
1980 vm_page_secluded_count_inuse
);
1981 secluded_overflow
= (vm_page_secluded_count
-
1982 vm_page_secluded_target
);
1983 while (secluded_overflow
-- > 0 &&
1984 vm_page_secluded_count
> vm_page_secluded_target
) {
1985 assert((vm_page_secluded_count_free
+
1986 vm_page_secluded_count_inuse
) ==
1987 vm_page_secluded_count
);
1988 secluded_page
= (vm_page_t
)vm_page_queue_first(&vm_page_queue_secluded
);
1989 assert(secluded_page
->vm_page_q_state
==
1990 VM_PAGE_ON_SECLUDED_Q
);
1991 vm_page_queues_remove(secluded_page
, FALSE
);
1992 assert(!secluded_page
->fictitious
);
1993 assert(!VM_PAGE_WIRED(secluded_page
));
1994 if (secluded_page
->vm_page_object
== 0) {
1995 /* transfer to free queue */
1996 assert(secluded_page
->busy
);
1997 secluded_page
->snext
= local_freeq
;
1998 local_freeq
= secluded_page
;
2001 /* transfer to head of active queue */
2002 vm_page_enqueue_active(secluded_page
, FALSE
);
2003 if (active_burst_count
-- == 0) {
2004 vm_pageout_secluded_burst_count
++;
2008 secluded_page
= VM_PAGE_NULL
;
2010 if (delayed_unlock
++ > delayed_unlock_limit
) {
2011 vm_pageout_delayed_unlock(&delayed_unlock
, &local_freed
, &local_freeq
);
2015 #endif /* CONFIG_SECLUDED_MEMORY */
2017 assert(delayed_unlock
);
2020 * Move pages from active to inactive if we're below the target
2022 if ((vm_page_inactive_count
+ vm_page_speculative_count
) >= vm_page_inactive_target
)
2023 goto done_moving_active_pages
;
2025 if (object
!= NULL
) {
2026 vm_object_unlock(object
);
2028 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
2031 * Don't sweep through active queue more than the throttle
2032 * which should be kept relatively low
2034 active_burst_count
= MIN(vm_pageout_burst_active_throttle
, vm_page_active_count
);
2036 VM_DEBUG_EVENT(vm_pageout_balance
, VM_PAGEOUT_BALANCE
, DBG_FUNC_START
,
2037 vm_pageout_inactive
, vm_pageout_inactive_used
, vm_page_free_count
, local_freed
);
2039 VM_DEBUG_EVENT(vm_pageout_balance
, VM_PAGEOUT_BALANCE
, DBG_FUNC_NONE
,
2040 vm_pageout_speculative_clean
, vm_pageout_inactive_clean
,
2041 vm_pageout_inactive_dirty_internal
, vm_pageout_inactive_dirty_external
);
2042 memoryshot(VM_PAGEOUT_BALANCE
, DBG_FUNC_START
);
2045 while (!vm_page_queue_empty(&vm_page_queue_active
) && active_burst_count
--) {
2047 vm_pageout_active
++;
2049 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
2051 assert(m
->vm_page_q_state
== VM_PAGE_ON_ACTIVE_Q
);
2052 assert(!m
->laundry
);
2053 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
2054 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
2056 DTRACE_VM2(scan
, int, 1, (uint64_t *), NULL
);
2059 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
2061 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
2062 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
2063 * new reference happens. If no futher references happen on the page after that remote TLB flushes
2064 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
2065 * by pageout_scan, which is just fine since the last reference would have happened quite far
2066 * in the past (TLB caches don't hang around for very long), and of course could just as easily
2067 * have happened before we moved the page
2069 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m
), VM_MEM_REFERENCED
, PMAP_OPTIONS_NOFLUSH
, (void *)NULL
);
2072 * The page might be absent or busy,
2073 * but vm_page_deactivate can handle that.
2074 * FALSE indicates that we don't want a H/W clear reference
2076 vm_page_deactivate_internal(m
, FALSE
);
2078 if (delayed_unlock
++ > delayed_unlock_limit
) {
2079 vm_pageout_delayed_unlock(&delayed_unlock
, &local_freed
, &local_freeq
);
2083 VM_DEBUG_EVENT(vm_pageout_balance
, VM_PAGEOUT_BALANCE
, DBG_FUNC_END
,
2084 vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
, vm_page_inactive_target
);
2085 memoryshot(VM_PAGEOUT_BALANCE
, DBG_FUNC_END
);
2087 /**********************************************************************
2088 * above this point we're playing with the active and secluded queues
2089 * below this point we're playing with the throttling mechanisms
2090 * and the inactive queue
2091 **********************************************************************/
2093 done_moving_active_pages
:
2095 if (vm_page_free_count
+ local_freed
>= vm_page_free_target
)
2097 vm_pageout_prepare_to_block(&object
, &delayed_unlock
, &local_freeq
, &local_freed
,
2098 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER
);
2100 * make sure the pageout I/O threads are running
2101 * throttled in case there are still requests
2102 * in the laundry... since we have met our targets
2103 * we don't need the laundry to be cleaned in a timely
2104 * fashion... so let's avoid interfering with foreground
2107 vm_pageout_adjust_eq_iothrottle(eq
, TRUE
);
2110 * recalculate vm_page_inactivate_target
2112 vm_page_inactive_target
= VM_PAGE_INACTIVE_TARGET(vm_page_active_count
+
2113 vm_page_inactive_count
+
2114 vm_page_speculative_count
);
2115 #ifndef CONFIG_EMBEDDED
2116 if (((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_target
) &&
2117 !vm_page_queue_empty(&vm_page_queue_active
)) {
2119 * inactive target still not met... keep going
2120 * until we get the queues balanced...
2125 lck_mtx_lock(&vm_page_queue_free_lock
);
2127 if ((vm_page_free_count
>= vm_page_free_target
) &&
2128 (vm_page_free_wanted
== 0) && (vm_page_free_wanted_privileged
== 0)) {
2130 * done - we have met our target *and*
2131 * there is no one waiting for a page.
2134 assert(vm_pageout_scan_wants_object
== VM_OBJECT_NULL
);
2136 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan
, VM_PAGEOUT_SCAN
, DBG_FUNC_NONE
,
2137 vm_pageout_inactive
, vm_pageout_inactive_used
, 0, 0);
2138 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan
, VM_PAGEOUT_SCAN
, DBG_FUNC_END
,
2139 vm_pageout_speculative_clean
, vm_pageout_inactive_clean
,
2140 vm_pageout_inactive_dirty_internal
, vm_pageout_inactive_dirty_external
);
2144 lck_mtx_unlock(&vm_page_queue_free_lock
);
2148 * Before anything, we check if we have any ripe volatile
2149 * objects around. If so, try to purge the first object.
2150 * If the purge fails, fall through to reclaim a page instead.
2151 * If the purge succeeds, go back to the top and reevalute
2152 * the new memory situation.
2155 assert (available_for_purge
>=0);
2156 force_purge
= 0; /* no force-purging */
2158 #if VM_PRESSURE_EVENTS
2159 pressure_level
= memorystatus_vm_pressure_level
;
2161 if (pressure_level
> kVMPressureNormal
) {
2163 if (pressure_level
>= kVMPressureCritical
) {
2164 force_purge
= memorystatus_purge_on_critical
;
2165 } else if (pressure_level
>= kVMPressureUrgent
) {
2166 force_purge
= memorystatus_purge_on_urgent
;
2167 } else if (pressure_level
>= kVMPressureWarning
) {
2168 force_purge
= memorystatus_purge_on_warning
;
2171 #endif /* VM_PRESSURE_EVENTS */
2173 if (available_for_purge
|| force_purge
) {
2175 if (object
!= NULL
) {
2176 vm_object_unlock(object
);
2180 memoryshot(VM_PAGEOUT_PURGEONE
, DBG_FUNC_START
);
2182 VM_DEBUG_EVENT(vm_pageout_purgeone
, VM_PAGEOUT_PURGEONE
, DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0);
2183 if (vm_purgeable_object_purge_one(force_purge
, C_DONT_BLOCK
)) {
2184 vm_pageout_purged_objects
++;
2185 VM_DEBUG_EVENT(vm_pageout_purgeone
, VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
, vm_page_free_count
, 0, 0, 0);
2186 memoryshot(VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
);
2189 VM_DEBUG_EVENT(vm_pageout_purgeone
, VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
, 0, 0, 0, -1);
2190 memoryshot(VM_PAGEOUT_PURGEONE
, DBG_FUNC_END
);
2193 if (vm_page_queue_empty(&sq
->age_q
) && vm_page_speculative_count
) {
2195 * try to pull pages from the aging bins...
2196 * see vm_page.h for an explanation of how
2197 * this mechanism works
2199 struct vm_speculative_age_q
*aq
;
2200 boolean_t can_steal
= FALSE
;
2201 int num_scanned_queues
;
2203 aq
= &vm_page_queue_speculative
[speculative_steal_index
];
2205 num_scanned_queues
= 0;
2206 while (vm_page_queue_empty(&aq
->age_q
) &&
2207 num_scanned_queues
++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q
) {
2209 speculative_steal_index
++;
2211 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2212 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2214 aq
= &vm_page_queue_speculative
[speculative_steal_index
];
2217 if (num_scanned_queues
== VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1) {
2219 * XXX We've scanned all the speculative
2220 * queues but still haven't found one
2221 * that is not empty, even though
2222 * vm_page_speculative_count is not 0.
2224 * report the anomaly...
2226 printf("vm_pageout_scan: "
2227 "all speculative queues empty "
2228 "but count=%d. Re-adjusting.\n",
2229 vm_page_speculative_count
);
2230 if (vm_page_speculative_count
> vm_page_speculative_count_drift_max
)
2231 vm_page_speculative_count_drift_max
= vm_page_speculative_count
;
2232 vm_page_speculative_count_drifts
++;
2233 #if DEVELOPMENT || DEBUG
2234 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count
);
2235 #endif /* DEVELOPMENT || DEBUG */
2237 vm_page_speculative_count
= 0;
2238 /* ... and continue */
2242 if (vm_page_speculative_count
> vm_page_speculative_target
|| force_speculative_aging
== TRUE
)
2245 if (!delay_speculative_age
) {
2246 mach_timespec_t ts_fully_aged
;
2248 ts_fully_aged
.tv_sec
= (VM_PAGE_MAX_SPECULATIVE_AGE_Q
* vm_page_speculative_q_age_ms
) / 1000;
2249 ts_fully_aged
.tv_nsec
= ((VM_PAGE_MAX_SPECULATIVE_AGE_Q
* vm_page_speculative_q_age_ms
) % 1000)
2250 * 1000 * NSEC_PER_USEC
;
2252 ADD_MACH_TIMESPEC(&ts_fully_aged
, &aq
->age_ts
);
2256 clock_get_system_nanotime(&sec
, &nsec
);
2257 ts
.tv_sec
= (unsigned int) sec
;
2260 if (CMP_MACH_TIMESPEC(&ts
, &ts_fully_aged
) >= 0)
2263 delay_speculative_age
++;
2265 delay_speculative_age
++;
2266 if (delay_speculative_age
== DELAY_SPECULATIVE_AGE
)
2267 delay_speculative_age
= 0;
2270 if (can_steal
== TRUE
)
2271 vm_page_speculate_ageit(aq
);
2273 force_speculative_aging
= FALSE
;
2275 #if CONFIG_BACKGROUND_QUEUE
2276 if (vm_page_queue_empty(&sq
->age_q
) && cache_evict_throttle
== 0 &&
2277 ((vm_page_background_mode
== VM_PAGE_BG_DISABLED
) || (vm_page_background_count
<= vm_page_background_target
)))
2279 if (vm_page_queue_empty(&sq
->age_q
) && cache_evict_throttle
== 0)
2284 if (object
!= NULL
) {
2285 vm_object_unlock(object
);
2288 pages_evicted
= vm_object_cache_evict(100, 10);
2290 if (pages_evicted
) {
2292 vm_pageout_cache_evicted
+= pages_evicted
;
2294 VM_DEBUG_EVENT(vm_pageout_cache_evict
, VM_PAGEOUT_CACHE_EVICT
, DBG_FUNC_NONE
,
2295 vm_page_free_count
, pages_evicted
, vm_pageout_cache_evicted
, 0);
2296 memoryshot(VM_PAGEOUT_CACHE_EVICT
, DBG_FUNC_NONE
);
2299 * we just freed up to 100 pages,
2300 * so go back to the top of the main loop
2301 * and re-evaulate the memory situation
2305 cache_evict_throttle
= 1000;
2307 if (cache_evict_throttle
)
2308 cache_evict_throttle
--;
2312 * don't let the filecache_min fall below 15% of available memory
2313 * on systems with an active compressor that isn't nearing its
2314 * limits w/r to accepting new data
2316 * on systems w/o the compressor/swapper, the filecache is always
2317 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2318 * since most (if not all) of the anonymous pages are in the
2319 * throttled queue (which isn't counted as available) which
2320 * effectively disables this filter
2322 if (vm_compressor_low_on_space())
2323 vm_page_filecache_min
= 0;
2325 vm_page_filecache_min
= (AVAILABLE_NON_COMPRESSED_MEMORY
/ 7);
2327 if (vm_compressor_out_of_space())
2328 vm_page_filecache_min
= 0;
2331 * don't let the filecache_min fall below 33% of available memory...
2333 vm_page_filecache_min
= (AVAILABLE_NON_COMPRESSED_MEMORY
/ 3);
2336 if (vm_page_free_count
< (vm_page_free_reserved
/ 4))
2337 vm_page_filecache_min
= 0;
2339 exceeded_burst_throttle
= FALSE
;
2341 * Sometimes we have to pause:
2342 * 1) No inactive pages - nothing to do.
2343 * 2) Loop control - no acceptable pages found on the inactive queue
2344 * within the last vm_pageout_burst_inactive_throttle iterations
2345 * 3) Flow control - default pageout queue is full
2347 if (vm_page_queue_empty(&vm_page_queue_inactive
) &&
2348 vm_page_queue_empty(&vm_page_queue_anonymous
) &&
2349 vm_page_queue_empty(&sq
->age_q
)) {
2350 vm_pageout_scan_empty_throttle
++;
2351 msecs
= vm_pageout_empty_wait
;
2352 goto vm_pageout_scan_delay
;
2354 } else if (inactive_burst_count
>=
2355 MIN(vm_pageout_burst_inactive_throttle
,
2356 (vm_page_inactive_count
+
2357 vm_page_speculative_count
))) {
2358 vm_pageout_scan_burst_throttle
++;
2359 msecs
= vm_pageout_burst_wait
;
2361 exceeded_burst_throttle
= TRUE
;
2362 goto vm_pageout_scan_delay
;
2364 } else if (vm_page_free_count
> (vm_page_free_reserved
/ 4) &&
2365 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
2366 vm_pageout_scan_swap_throttle
++;
2367 msecs
= vm_pageout_swap_wait
;
2368 goto vm_pageout_scan_delay
;
2370 } else if (VM_PAGE_Q_THROTTLED(iq
) &&
2371 VM_DYNAMIC_PAGING_ENABLED()) {
2375 switch (flow_control
.state
) {
2378 if ((vm_page_free_count
+ local_freed
) < vm_page_free_target
) {
2380 vm_pageout_prepare_to_block(&object
, &delayed_unlock
, &local_freeq
, &local_freed
,
2381 VM_PAGEOUT_PB_THREAD_YIELD
);
2382 if (!VM_PAGE_Q_THROTTLED(iq
)) {
2383 vm_pageout_scan_yield_unthrottled
++;
2386 if (vm_page_pageable_external_count
> vm_page_filecache_min
&&
2387 !vm_page_queue_empty(&vm_page_queue_inactive
)) {
2388 anons_grabbed
= ANONS_GRABBED_LIMIT
;
2389 vm_pageout_scan_throttle_deferred
++;
2390 goto consider_inactive
;
2392 if (((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_target
) && vm_page_active_count
)
2395 reset_deadlock_timer
:
2396 ts
.tv_sec
= vm_pageout_deadlock_wait
/ 1000;
2397 ts
.tv_nsec
= (vm_pageout_deadlock_wait
% 1000) * 1000 * NSEC_PER_USEC
;
2398 clock_get_system_nanotime(&sec
, &nsec
);
2399 flow_control
.ts
.tv_sec
= (unsigned int) sec
;
2400 flow_control
.ts
.tv_nsec
= nsec
;
2401 ADD_MACH_TIMESPEC(&flow_control
.ts
, &ts
);
2403 flow_control
.state
= FCS_DELAYED
;
2404 msecs
= vm_pageout_deadlock_wait
;
2409 clock_get_system_nanotime(&sec
, &nsec
);
2410 ts
.tv_sec
= (unsigned int) sec
;
2413 if (CMP_MACH_TIMESPEC(&ts
, &flow_control
.ts
) >= 0) {
2415 * the pageout thread for the default pager is potentially
2416 * deadlocked since the
2417 * default pager queue has been throttled for more than the
2418 * allowable time... we need to move some clean pages or dirty
2419 * pages belonging to the external pagers if they aren't throttled
2420 * vm_page_free_wanted represents the number of threads currently
2421 * blocked waiting for pages... we'll move one page for each of
2422 * these plus a fixed amount to break the logjam... once we're done
2423 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2424 * with a new timeout target since we have no way of knowing
2425 * whether we've broken the deadlock except through observation
2426 * of the queue associated with the default pager... we need to
2427 * stop moving pages and allow the system to run to see what
2428 * state it settles into.
2430 vm_pageout_deadlock_target
= vm_pageout_deadlock_relief
+ vm_page_free_wanted
+ vm_page_free_wanted_privileged
;
2431 vm_pageout_scan_deadlock_detected
++;
2432 flow_control
.state
= FCS_DEADLOCK_DETECTED
;
2433 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
2434 goto consider_inactive
;
2437 * just resniff instead of trying
2438 * to compute a new delay time... we're going to be
2439 * awakened immediately upon a laundry completion,
2440 * so we won't wait any longer than necessary
2442 msecs
= vm_pageout_idle_wait
;
2445 case FCS_DEADLOCK_DETECTED
:
2446 if (vm_pageout_deadlock_target
)
2447 goto consider_inactive
;
2448 goto reset_deadlock_timer
;
2451 vm_pageout_scan_delay
:
2452 vm_pageout_prepare_to_block(&object
, &delayed_unlock
, &local_freeq
, &local_freed
,
2453 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER
);
2455 if (flow_control
.state
== FCS_DELAYED
&&
2456 !VM_PAGE_Q_THROTTLED(iq
)) {
2457 flow_control
.state
= FCS_IDLE
;
2458 goto consider_inactive
;
2461 if (vm_page_free_count
>= vm_page_free_target
) {
2463 * we're here because
2464 * 1) someone else freed up some pages while we had
2465 * the queues unlocked above
2466 * and we've hit one of the 3 conditions that
2467 * cause us to pause the pageout scan thread
2469 * since we already have enough free pages,
2470 * let's avoid stalling and return normally
2472 * before we return, make sure the pageout I/O threads
2473 * are running throttled in case there are still requests
2474 * in the laundry... since we have enough free pages
2475 * we don't need the laundry to be cleaned in a timely
2476 * fashion... so let's avoid interfering with foreground
2479 * we don't want to hold vm_page_queue_free_lock when
2480 * calling vm_pageout_adjust_eq_iothrottle (since it
2481 * may cause other locks to be taken), we do the intitial
2482 * check outside of the lock. Once we take the lock,
2483 * we recheck the condition since it may have changed.
2484 * if it has, no problem, we will make the threads
2485 * non-throttled before actually blocking
2487 vm_pageout_adjust_eq_iothrottle(eq
, TRUE
);
2489 lck_mtx_lock(&vm_page_queue_free_lock
);
2491 if (vm_page_free_count
>= vm_page_free_target
&&
2492 (vm_page_free_wanted
== 0) && (vm_page_free_wanted_privileged
== 0)) {
2493 goto return_from_scan
;
2495 lck_mtx_unlock(&vm_page_queue_free_lock
);
2497 if ((vm_page_free_count
+ vm_page_cleaned_count
) < vm_page_free_target
) {
2499 * we're most likely about to block due to one of
2500 * the 3 conditions that cause vm_pageout_scan to
2501 * not be able to make forward progress w/r
2502 * to providing new pages to the free queue,
2503 * so unthrottle the I/O threads in case we
2504 * have laundry to be cleaned... it needs
2505 * to be completed ASAP.
2507 * even if we don't block, we want the io threads
2508 * running unthrottled since the sum of free +
2509 * clean pages is still under our free target
2511 vm_pageout_adjust_eq_iothrottle(eq
, FALSE
);
2513 if (vm_page_cleaned_count
> 0 && exceeded_burst_throttle
== FALSE
) {
2515 * if we get here we're below our free target and
2516 * we're stalling due to a full laundry queue or
2517 * we don't have any inactive pages other then
2518 * those in the clean queue...
2519 * however, we have pages on the clean queue that
2520 * can be moved to the free queue, so let's not
2521 * stall the pageout scan
2523 flow_control
.state
= FCS_IDLE
;
2524 goto consider_inactive
;
2526 VM_CHECK_MEMORYSTATUS
;
2528 if (flow_control
.state
!= FCS_IDLE
)
2529 vm_pageout_scan_throttle
++;
2530 iq
->pgo_throttled
= TRUE
;
2532 assert_wait_timeout((event_t
) &iq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, msecs
, 1000*NSEC_PER_USEC
);
2533 counter(c_vm_pageout_scan_block
++);
2535 vm_page_unlock_queues();
2537 assert(vm_pageout_scan_wants_object
== VM_OBJECT_NULL
);
2539 VM_DEBUG_EVENT(vm_pageout_thread_block
, VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_START
,
2540 iq
->pgo_laundry
, iq
->pgo_maxlaundry
, msecs
, 0);
2541 memoryshot(VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_START
);
2543 thread_block(THREAD_CONTINUE_NULL
);
2545 VM_DEBUG_EVENT(vm_pageout_thread_block
, VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_END
,
2546 iq
->pgo_laundry
, iq
->pgo_maxlaundry
, msecs
, 0);
2547 memoryshot(VM_PAGEOUT_THREAD_BLOCK
, DBG_FUNC_END
);
2549 vm_page_lock_queues();
2551 iq
->pgo_throttled
= FALSE
;
2553 if (loop_count
>= vm_page_inactive_count
)
2555 inactive_burst_count
= 0;
2562 flow_control
.state
= FCS_IDLE
;
2564 vm_pageout_inactive_external_forced_reactivate_limit
= MIN((vm_page_active_count
+ vm_page_inactive_count
),
2565 vm_pageout_inactive_external_forced_reactivate_limit
);
2567 inactive_burst_count
++;
2568 vm_pageout_inactive
++;
2575 uint32_t inactive_external_count
;
2577 #if CONFIG_BACKGROUND_QUEUE
2578 page_from_bg_q
= FALSE
;
2579 #endif /* CONFIG_BACKGROUND_QUEUE */
2582 m_object
= VM_OBJECT_NULL
;
2584 if (VM_DYNAMIC_PAGING_ENABLED()) {
2585 assert(vm_page_throttled_count
== 0);
2586 assert(vm_page_queue_empty(&vm_page_queue_throttled
));
2590 * Try for a clean-queue inactive page.
2591 * These are pages that vm_pageout_scan tried to steal earlier, but
2592 * were dirty and had to be cleaned. Pick them up now that they are clean.
2594 if (!vm_page_queue_empty(&vm_page_queue_cleaned
)) {
2595 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_cleaned
);
2597 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
);
2603 * The next most eligible pages are ones we paged in speculatively,
2604 * but which have not yet been touched and have been aged out.
2606 if (!vm_page_queue_empty(&sq
->age_q
)) {
2607 m
= (vm_page_t
) vm_page_queue_first(&sq
->age_q
);
2609 assert(m
->vm_page_q_state
== VM_PAGE_ON_SPECULATIVE_Q
);
2611 if (!m
->dirty
|| force_anonymous
== FALSE
)
2617 #if CONFIG_BACKGROUND_QUEUE
2618 if (vm_page_background_mode
!= VM_PAGE_BG_DISABLED
&& (vm_page_background_count
> vm_page_background_target
)) {
2619 vm_object_t bg_m_object
= NULL
;
2621 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_background
);
2623 bg_m_object
= VM_PAGE_OBJECT(m
);
2625 if (!VM_PAGE_PAGEABLE(m
)) {
2627 * This page is on the background queue
2628 * but not on a pageable queue. This is
2629 * likely a transient state and whoever
2630 * took it out of its pageable queue
2631 * will likely put it back on a pageable
2632 * queue soon but we can't deal with it
2633 * at this point, so let's ignore this
2636 } else if (force_anonymous
== FALSE
|| bg_m_object
->internal
) {
2638 if (bg_m_object
->internal
&&
2639 ((vm_compressor_out_of_space() == TRUE
) ||
2640 (vm_page_free_count
< (vm_page_free_reserved
/ 4)))) {
2642 vm_pageout_skipped_bq_internal
++;
2644 page_from_bg_q
= TRUE
;
2646 if (bg_m_object
->internal
)
2647 vm_pageout_considered_bq_internal
++;
2649 vm_pageout_considered_bq_external
++;
2657 grab_anonymous
= (vm_page_anonymous_count
> vm_page_anonymous_min
);
2658 inactive_external_count
= vm_page_inactive_count
- vm_page_anonymous_count
;
2660 if ((vm_page_pageable_external_count
< vm_page_filecache_min
|| force_anonymous
== TRUE
) ||
2661 ((inactive_external_count
< vm_page_anonymous_count
) && (inactive_external_count
< (vm_page_pageable_external_count
/ 3)))) {
2662 grab_anonymous
= TRUE
;
2665 vm_pageout_skipped_external
++;
2666 goto want_anonymous
;
2669 /* If the file-backed pool has accumulated
2670 * significantly more pages than the jetsam
2671 * threshold, prefer to reclaim those
2672 * inline to minimise compute overhead of reclaiming
2674 * This calculation does not account for the CPU local
2675 * external page queues, as those are expected to be
2676 * much smaller relative to the global pools.
2678 if (grab_anonymous
== TRUE
&& !VM_PAGE_Q_THROTTLED(eq
)) {
2679 if (vm_page_pageable_external_count
>
2680 vm_page_filecache_min
) {
2681 if ((vm_page_pageable_external_count
*
2682 vm_pageout_memorystatus_fb_factor_dr
) >
2683 (memorystatus_available_pages_critical
*
2684 vm_pageout_memorystatus_fb_factor_nr
)) {
2685 grab_anonymous
= FALSE
;
2686 #if DEVELOPMENT || DEBUG
2687 vm_grab_anon_overrides
++;
2691 #if DEVELOPMENT || DEBUG
2692 if (grab_anonymous
) {
2693 vm_grab_anon_nops
++;
2697 #endif /* CONFIG_JETSAM */
2700 if (grab_anonymous
== FALSE
|| anons_grabbed
>= ANONS_GRABBED_LIMIT
|| vm_page_queue_empty(&vm_page_queue_anonymous
)) {
2702 if ( !vm_page_queue_empty(&vm_page_queue_inactive
) ) {
2703 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_inactive
);
2705 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_EXTERNAL_Q
);
2708 if (vm_page_pageable_external_count
< vm_page_filecache_min
) {
2709 if ((++reactivated_this_call
% 100))
2710 goto must_activate_page
;
2712 * steal 1% of the file backed pages even if
2713 * we are under the limit that has been set
2714 * for a healthy filecache
2720 if ( !vm_page_queue_empty(&vm_page_queue_anonymous
) ) {
2721 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_anonymous
);
2723 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
);
2730 * if we've gotten here, we have no victim page.
2731 * check to see if we've not finished balancing the queues
2732 * or we have a page on the aged speculative queue that we
2733 * skipped due to force_anonymous == TRUE.. or we have
2734 * speculative pages that we can prematurely age... if
2735 * one of these cases we'll keep going, else panic
2737 force_anonymous
= FALSE
;
2738 vm_pageout_no_victim
++;
2740 if ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_target
)
2741 goto done_with_inactivepage
;
2743 if (!vm_page_queue_empty(&sq
->age_q
))
2744 goto done_with_inactivepage
;
2746 if (vm_page_speculative_count
) {
2747 force_speculative_aging
= TRUE
;
2748 goto done_with_inactivepage
;
2750 panic("vm_pageout: no victim");
2754 assert(VM_PAGE_PAGEABLE(m
));
2755 m_object
= VM_PAGE_OBJECT(m
);
2756 force_anonymous
= FALSE
;
2758 page_prev_q_state
= m
->vm_page_q_state
;
2760 * we just found this page on one of our queues...
2761 * it can't also be on the pageout queue, so safe
2762 * to call vm_page_queues_remove
2764 vm_page_queues_remove(m
, TRUE
);
2766 assert(!m
->laundry
);
2767 assert(!m
->private);
2768 assert(!m
->fictitious
);
2769 assert(m_object
!= kernel_object
);
2770 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
2772 vm_pageout_stats
[vm_pageout_stat_now
].considered
++;
2773 vm_pageout_considered_page
++;
2775 DTRACE_VM2(scan
, int, 1, (uint64_t *), NULL
);
2778 * check to see if we currently are working
2779 * with the same object... if so, we've
2780 * already got the lock
2782 if (m_object
!= object
) {
2784 * the object associated with candidate page is
2785 * different from the one we were just working
2786 * with... dump the lock if we still own it
2788 if (object
!= NULL
) {
2789 vm_object_unlock(object
);
2791 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
2794 * Try to lock object; since we've alread got the
2795 * page queues lock, we can only 'try' for this one.
2796 * if the 'try' fails, we need to do a mutex_pause
2797 * to allow the owner of the object lock a chance to
2798 * run... otherwise, we're likely to trip over this
2799 * object in the same state as we work our way through
2800 * the queue... clumps of pages associated with the same
2801 * object are fairly typical on the inactive and active queues
2803 if (!vm_object_lock_try_scan(m_object
)) {
2804 vm_page_t m_want
= NULL
;
2806 vm_pageout_inactive_nolock
++;
2808 if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
)
2809 vm_pageout_cleaned_nolock
++;
2811 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m
));
2812 m
->reference
= FALSE
;
2814 #if !CONFIG_EMBEDDED
2816 * m->object must be stable since we hold the page queues lock...
2817 * we can update the scan_collisions field sans the object lock
2818 * since it is a separate field and this is the only spot that does
2819 * a read-modify-write operation and it is never executed concurrently...
2820 * we can asynchronously set this field to 0 when creating a UPL, so it
2821 * is possible for the value to be a bit non-determistic, but that's ok
2822 * since it's only used as a hint
2826 * This is not used on EMBEDDED because having this variable set *could* lead
2827 * us to self-cannibalize pages from m_object to fill a UPL for a pagein.
2828 * And, there's a high probability that the object that vm_pageout_scan
2829 * wants and collides on is a very popular object e.g. the shared cache on EMBEDDED.
2830 * The older pages that we cannibalize from the shared cache could be really
2831 * important text pages e.g. the system call stubs.
2833 m_object
->scan_collisions
= 1;
2834 #endif /* !CONFIG_EMBEDDED */
2836 if ( !vm_page_queue_empty(&sq
->age_q
) )
2837 m_want
= (vm_page_t
) vm_page_queue_first(&sq
->age_q
);
2838 else if ( !vm_page_queue_empty(&vm_page_queue_cleaned
))
2839 m_want
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_cleaned
);
2840 else if ( !vm_page_queue_empty(&vm_page_queue_inactive
) &&
2841 (anons_grabbed
>= ANONS_GRABBED_LIMIT
|| vm_page_queue_empty(&vm_page_queue_anonymous
)))
2842 m_want
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_inactive
);
2843 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous
))
2844 m_want
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_anonymous
);
2847 * this is the next object we're going to be interested in
2848 * try to make sure its available after the mutex_yield
2852 vm_pageout_scan_wants_object
= VM_PAGE_OBJECT(m_want
);
2855 * force us to dump any collected free pages
2856 * and to pause before moving on
2863 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
2867 assert(m_object
== object
);
2868 assert(VM_PAGE_OBJECT(m
) == m_object
);
2872 * Somebody is already playing with this page.
2873 * Put it back on the appropriate queue
2876 vm_pageout_inactive_busy
++;
2878 if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
)
2879 vm_pageout_cleaned_busy
++;
2881 if (page_prev_q_state
== VM_PAGE_ON_SPECULATIVE_Q
)
2882 vm_page_enqueue_inactive(m
, FALSE
);
2884 vm_page_activate(m
);
2885 #if CONFIG_BACKGROUND_QUEUE
2886 if (page_from_bg_q
== TRUE
) {
2887 if (m_object
->internal
)
2888 vm_pageout_rejected_bq_internal
++;
2890 vm_pageout_rejected_bq_external
++;
2893 goto done_with_inactivepage
;
2898 * If it's absent, in error or the object is no longer alive,
2899 * we can reclaim the page... in the no longer alive case,
2900 * there are 2 states the page can be in that preclude us
2901 * from reclaiming it - busy or cleaning - that we've already
2904 if (m
->absent
|| m
->error
|| !object
->alive
) {
2907 vm_pageout_inactive_absent
++;
2908 else if (!object
->alive
)
2909 vm_pageout_inactive_notalive
++;
2911 vm_pageout_inactive_error
++;
2913 if (vm_pageout_deadlock_target
) {
2914 vm_pageout_scan_inactive_throttle_success
++;
2915 vm_pageout_deadlock_target
--;
2918 DTRACE_VM2(dfree
, int, 1, (uint64_t *), NULL
);
2920 if (object
->internal
) {
2921 DTRACE_VM2(anonfree
, int, 1, (uint64_t *), NULL
);
2923 DTRACE_VM2(fsfree
, int, 1, (uint64_t *), NULL
);
2925 assert(!m
->cleaning
);
2926 assert(!m
->laundry
);
2931 * remove page from object here since we're already
2932 * behind the object lock... defer the rest of the work
2933 * we'd normally do in vm_page_free_prepare_object
2934 * until 'vm_page_free_list' is called
2937 vm_page_remove(m
, TRUE
);
2939 assert(m
->pageq
.next
== 0 && m
->pageq
.prev
== 0);
2940 m
->snext
= local_freeq
;
2944 if (page_prev_q_state
== VM_PAGE_ON_SPECULATIVE_Q
)
2945 vm_pageout_freed_from_speculative
++;
2946 else if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
)
2947 vm_pageout_freed_from_cleaned
++;
2949 vm_pageout_freed_from_inactive_clean
++;
2951 vm_pageout_stats
[vm_pageout_stat_now
].reclaimed_clean
++;
2953 inactive_burst_count
= 0;
2954 goto done_with_inactivepage
;
2957 * If the object is empty, the page must be reclaimed even
2959 * If the page belongs to a volatile object, we stick it back
2962 if (object
->copy
== VM_OBJECT_NULL
) {
2963 if (object
->purgable
== VM_PURGABLE_EMPTY
) {
2964 if (m
->pmapped
== TRUE
) {
2965 /* unmap the page */
2966 refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
2967 if (refmod_state
& VM_MEM_MODIFIED
) {
2968 SET_PAGE_DIRTY(m
, FALSE
);
2971 if (m
->dirty
|| m
->precious
) {
2972 /* we saved the cost of cleaning this page ! */
2973 vm_page_purged_count
++;
2978 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE
) {
2980 * With the VM compressor, the cost of
2981 * reclaiming a page is much lower (no I/O),
2982 * so if we find a "volatile" page, it's better
2983 * to let it get compressed rather than letting
2984 * it occupy a full page until it gets purged.
2985 * So no need to check for "volatile" here.
2987 } else if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
2989 * Avoid cleaning a "volatile" page which might
2993 /* if it's wired, we can't put it on our queue */
2994 assert(!VM_PAGE_WIRED(m
));
2996 /* just stick it back on! */
2997 reactivated_this_call
++;
2999 if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
)
3000 vm_pageout_cleaned_volatile_reactivated
++;
3002 goto reactivate_page
;
3006 * If it's being used, reactivate.
3007 * (Fictitious pages are either busy or absent.)
3008 * First, update the reference and dirty bits
3009 * to make sure the page is unreferenced.
3013 if (m
->reference
== FALSE
&& m
->pmapped
== TRUE
) {
3014 refmod_state
= pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m
));
3016 if (refmod_state
& VM_MEM_REFERENCED
)
3017 m
->reference
= TRUE
;
3018 if (refmod_state
& VM_MEM_MODIFIED
) {
3019 SET_PAGE_DIRTY(m
, FALSE
);
3024 * if (m->cleaning && !m->free_when_done)
3025 * If already cleaning this page in place and it hasn't
3026 * been recently referenced, just pull off the queue.
3027 * We can leave the page mapped, and upl_commit_range
3028 * will put it on the clean queue.
3030 * if (m->free_when_done && !m->cleaning)
3031 * an msync INVALIDATE is in progress...
3032 * this page has been marked for destruction
3033 * after it has been cleaned,
3034 * but not yet gathered into a UPL
3035 * where 'cleaning' will be set...
3036 * just leave it off the paging queues
3038 * if (m->free_when_done && m->clenaing)
3039 * an msync INVALIDATE is in progress
3040 * and the UPL has already gathered this page...
3041 * just leave it off the paging queues
3045 * page with m->free_when_done and still on the queues means that an
3046 * MS_INVALIDATE is in progress on this page... leave it alone
3048 if (m
->free_when_done
) {
3049 goto done_with_inactivepage
;
3052 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */
3054 if (m
->reference
== TRUE
) {
3055 reactivated_this_call
++;
3056 goto reactivate_page
;
3058 goto done_with_inactivepage
;
3062 if (m
->reference
|| m
->dirty
) {
3063 /* deal with a rogue "reusable" page */
3064 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m
, m_object
);
3068 #if CONFIG_BACKGROUND_QUEUE
3069 page_from_bg_q
== FALSE
&&
3072 (m
->xpmapped
&& !object
->internal
&& (vm_page_xpmapped_external_count
< (vm_page_external_count
/ 4))))) {
3074 * The page we pulled off the inactive list has
3075 * been referenced. It is possible for other
3076 * processors to be touching pages faster than we
3077 * can clear the referenced bit and traverse the
3078 * inactive queue, so we limit the number of
3081 if (++reactivated_this_call
>= reactivate_limit
) {
3082 vm_pageout_reactivation_limit_exceeded
++;
3083 } else if (++inactive_reclaim_run
>= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM
) {
3084 vm_pageout_inactive_force_reclaim
++;
3088 if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
)
3089 vm_pageout_cleaned_reference_reactivated
++;
3091 if ( !object
->internal
&& object
->pager
!= MEMORY_OBJECT_NULL
&&
3092 vnode_pager_get_isinuse(object
->pager
, &isinuse
) == KERN_SUCCESS
&& !isinuse
) {
3094 * no explict mappings of this object exist
3095 * and it's not open via the filesystem
3097 vm_page_deactivate(m
);
3098 vm_pageout_inactive_deactivated
++;
3102 * The page was/is being used, so put back on active list.
3104 vm_page_activate(m
);
3105 VM_STAT_INCR(reactivations
);
3106 inactive_burst_count
= 0;
3108 #if CONFIG_BACKGROUND_QUEUE
3109 if (page_from_bg_q
== TRUE
) {
3110 if (m_object
->internal
)
3111 vm_pageout_rejected_bq_internal
++;
3113 vm_pageout_rejected_bq_external
++;
3116 if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
)
3117 vm_pageout_cleaned_reactivated
++;
3118 vm_pageout_inactive_used
++;
3120 goto done_with_inactivepage
;
3123 * Make sure we call pmap_get_refmod() if it
3124 * wasn't already called just above, to update
3127 if ((refmod_state
== -1) && !m
->dirty
&& m
->pmapped
) {
3128 refmod_state
= pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m
));
3129 if (refmod_state
& VM_MEM_MODIFIED
) {
3130 SET_PAGE_DIRTY(m
, FALSE
);
3136 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
3137 object
, m
->offset
, m
, 0,0);
3140 * we've got a candidate page to steal...
3142 * m->dirty is up to date courtesy of the
3143 * preceding check for m->reference... if
3144 * we get here, then m->reference had to be
3145 * FALSE (or possibly "reactivate_limit" was
3146 * exceeded), but in either case we called
3147 * pmap_get_refmod() and updated both
3148 * m->reference and m->dirty
3150 * if it's dirty or precious we need to
3151 * see if the target queue is throtttled
3152 * it if is, we need to skip over it by moving it back
3153 * to the end of the inactive queue
3156 inactive_throttled
= FALSE
;
3158 if (m
->dirty
|| m
->precious
) {
3159 if (object
->internal
) {
3160 if (VM_PAGE_Q_THROTTLED(iq
))
3161 inactive_throttled
= TRUE
;
3162 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
3163 inactive_throttled
= TRUE
;
3167 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3168 object
->internal
&& m
->dirty
&&
3169 (object
->purgable
== VM_PURGABLE_DENY
||
3170 object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3171 object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3172 vm_page_check_pageable_safe(m
);
3173 assert(m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
3174 vm_page_queue_enter(&vm_page_queue_throttled
, m
,
3176 m
->vm_page_q_state
= VM_PAGE_ON_THROTTLED_Q
;
3177 vm_page_throttled_count
++;
3179 vm_pageout_scan_reclaimed_throttled
++;
3181 inactive_burst_count
= 0;
3182 goto done_with_inactivepage
;
3184 if (inactive_throttled
== TRUE
) {
3186 if (object
->internal
== FALSE
) {
3188 * we need to break up the following potential deadlock case...
3189 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3190 * b) The thread doing the writing is waiting for pages while holding the truncate lock
3191 * c) Most of the pages in the inactive queue belong to this file.
3193 * we are potentially in this deadlock because...
3194 * a) the external pageout queue is throttled
3195 * b) we're done with the active queue and moved on to the inactive queue
3196 * c) we've got a dirty external page
3198 * since we don't know the reason for the external pageout queue being throttled we
3199 * must suspect that we are deadlocked, so move the current page onto the active queue
3200 * in an effort to cause a page from the active queue to 'age' to the inactive queue
3202 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3203 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3204 * pool the next time we select a victim page... if we can make enough new free pages,
3205 * the deadlock will break, the external pageout queue will empty and it will no longer
3208 * if we have jetsam configured, keep a count of the pages reactivated this way so
3209 * that we can try to find clean pages in the active/inactive queues before
3210 * deciding to jetsam a process
3212 vm_pageout_scan_inactive_throttled_external
++;
3214 vm_page_check_pageable_safe(m
);
3215 assert(m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
3216 vm_page_queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
3217 m
->vm_page_q_state
= VM_PAGE_ON_ACTIVE_Q
;
3218 vm_page_active_count
++;
3219 vm_page_pageable_external_count
++;
3221 vm_pageout_adjust_eq_iothrottle(eq
, FALSE
);
3223 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3224 vm_pageout_inactive_external_forced_reactivate_limit
--;
3226 if (vm_pageout_inactive_external_forced_reactivate_limit
<= 0) {
3227 vm_pageout_inactive_external_forced_reactivate_limit
= vm_page_active_count
+ vm_page_inactive_count
;
3229 * Possible deadlock scenario so request jetsam action
3232 vm_object_unlock(object
);
3233 object
= VM_OBJECT_NULL
;
3234 vm_page_unlock_queues();
3236 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam
, VM_PAGEOUT_JETSAM
, DBG_FUNC_START
,
3237 vm_page_active_count
, vm_page_inactive_count
, vm_page_free_count
, vm_page_free_count
);
3239 /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
3240 if (memorystatus_kill_on_VM_page_shortage(FALSE
) == TRUE
) {
3241 vm_pageout_inactive_external_forced_jetsam_count
++;
3244 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam
, VM_PAGEOUT_JETSAM
, DBG_FUNC_END
,
3245 vm_page_active_count
, vm_page_inactive_count
, vm_page_free_count
, vm_page_free_count
);
3247 vm_page_lock_queues();
3250 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3251 force_anonymous
= TRUE
;
3253 inactive_burst_count
= 0;
3254 goto done_with_inactivepage
;
3256 vm_pageout_scan_inactive_throttled_internal
++;
3257 goto must_activate_page
;
3262 * we've got a page that we can steal...
3263 * eliminate all mappings and make sure
3264 * we have the up-to-date modified state
3266 * if we need to do a pmap_disconnect then we
3267 * need to re-evaluate m->dirty since the pmap_disconnect
3268 * provides the true state atomically... the
3269 * page was still mapped up to the pmap_disconnect
3270 * and may have been dirtied at the last microsecond
3272 * Note that if 'pmapped' is FALSE then the page is not
3273 * and has not been in any map, so there is no point calling
3274 * pmap_disconnect(). m->dirty could have been set in anticipation
3275 * of likely usage of the page.
3277 if (m
->pmapped
== TRUE
) {
3281 * Don't count this page as going into the compressor
3282 * if any of these are true:
3283 * 1) compressed pager isn't enabled
3284 * 2) Freezer enabled device with compressed pager
3285 * backend (exclusive use) i.e. most of the VM system
3286 * (including vm_pageout_scan) has no knowledge of
3288 * 3) This page belongs to a file and hence will not be
3289 * sent into the compressor
3291 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE
||
3292 object
->internal
== FALSE
) {
3294 } else if (m
->dirty
|| m
->precious
) {
3296 * VM knows that this page is dirty (or
3297 * precious) and needs to be compressed
3298 * rather than freed.
3299 * Tell the pmap layer to count this page
3302 pmap_options
= PMAP_OPTIONS_COMPRESSOR
;
3305 * VM does not know if the page needs to
3306 * be preserved but the pmap layer might tell
3307 * us if any mapping has "modified" it.
3308 * Let's the pmap layer to count this page
3309 * as compressed if and only if it has been
3313 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
3315 refmod_state
= pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m
),
3318 if (refmod_state
& VM_MEM_MODIFIED
) {
3319 SET_PAGE_DIRTY(m
, FALSE
);
3323 * reset our count of pages that have been reclaimed
3324 * since the last page was 'stolen'
3326 inactive_reclaim_run
= 0;
3329 * If it's clean and not precious, we can free the page.
3331 if (!m
->dirty
&& !m
->precious
) {
3333 if (page_prev_q_state
== VM_PAGE_ON_SPECULATIVE_Q
)
3334 vm_pageout_speculative_clean
++;
3336 if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
)
3337 vm_pageout_inactive_anonymous
++;
3338 else if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
)
3339 vm_pageout_cleaned_reclaimed
++;
3341 vm_pageout_inactive_clean
++;
3344 * OK, at this point we have found a page we are going to free.
3346 #if CONFIG_PHANTOM_CACHE
3347 if (!object
->internal
)
3348 vm_phantom_cache_add_ghost(m
);
3354 * The page may have been dirtied since the last check
3355 * for a throttled target queue (which may have been skipped
3356 * if the page was clean then). With the dirty page
3357 * disconnected here, we can make one final check.
3359 if (object
->internal
) {
3360 if (VM_PAGE_Q_THROTTLED(iq
))
3361 inactive_throttled
= TRUE
;
3362 } else if (VM_PAGE_Q_THROTTLED(eq
)) {
3363 inactive_throttled
= TRUE
;
3366 if (inactive_throttled
== TRUE
)
3367 goto throttle_inactive
;
3369 #if VM_PRESSURE_EVENTS
3373 * If Jetsam is enabled, then the sending
3374 * of memory pressure notifications is handled
3375 * from the same thread that takes care of high-water
3376 * and other jetsams i.e. the memorystatus_thread.
3379 #else /* CONFIG_JETSAM */
3381 vm_pressure_response();
3383 #endif /* CONFIG_JETSAM */
3384 #endif /* VM_PRESSURE_EVENTS */
3386 if (page_prev_q_state
== VM_PAGE_ON_SPECULATIVE_Q
)
3387 vm_pageout_speculative_dirty
++;
3388 else if (page_prev_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
)
3389 vm_pageout_inactive_anonymous
++;
3391 if (object
->internal
)
3392 vm_pageout_inactive_dirty_internal
++;
3394 vm_pageout_inactive_dirty_external
++;
3397 * do NOT set the pageout bit!
3398 * sure, we might need free pages, but this page is going to take time to become free
3399 * anyway, so we may as well put it on the clean queue first and take it from there later
3400 * if necessary. that way, we'll ensure we don't free up too much. -mj
3402 vm_pageout_cluster(m
);
3404 done_with_inactivepage
:
3406 if (delayed_unlock
++ > delayed_unlock_limit
|| try_failed
== TRUE
) {
3408 vm_pageout_prepare_to_block(&object
, &delayed_unlock
, &local_freeq
, &local_freed
,
3409 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER
);
3410 if (try_failed
== TRUE
)
3411 lck_mtx_yield(&vm_page_queue_lock
);
3415 * back to top of pageout scan loop
3421 int vm_page_free_count_init
;
3424 vm_page_free_reserve(
3427 int free_after_reserve
;
3429 if (VM_CONFIG_COMPRESSOR_IS_PRESENT
) {
3431 if ((vm_page_free_reserved
+ pages
+ COMPRESSOR_FREE_RESERVED_LIMIT
) >= (VM_PAGE_FREE_RESERVED_LIMIT
+ COMPRESSOR_FREE_RESERVED_LIMIT
))
3432 vm_page_free_reserved
= VM_PAGE_FREE_RESERVED_LIMIT
+ COMPRESSOR_FREE_RESERVED_LIMIT
;
3434 vm_page_free_reserved
+= (pages
+ COMPRESSOR_FREE_RESERVED_LIMIT
);
3437 if ((vm_page_free_reserved
+ pages
) >= VM_PAGE_FREE_RESERVED_LIMIT
)
3438 vm_page_free_reserved
= VM_PAGE_FREE_RESERVED_LIMIT
;
3440 vm_page_free_reserved
+= pages
;
3442 free_after_reserve
= vm_page_free_count_init
- vm_page_free_reserved
;
3444 vm_page_free_min
= vm_page_free_reserved
+
3445 VM_PAGE_FREE_MIN(free_after_reserve
);
3447 if (vm_page_free_min
> VM_PAGE_FREE_MIN_LIMIT
)
3448 vm_page_free_min
= VM_PAGE_FREE_MIN_LIMIT
;
3450 vm_page_free_target
= vm_page_free_reserved
+
3451 VM_PAGE_FREE_TARGET(free_after_reserve
);
3453 if (vm_page_free_target
> VM_PAGE_FREE_TARGET_LIMIT
)
3454 vm_page_free_target
= VM_PAGE_FREE_TARGET_LIMIT
;
3456 if (vm_page_free_target
< vm_page_free_min
+ 5)
3457 vm_page_free_target
= vm_page_free_min
+ 5;
3459 vm_page_throttle_limit
= vm_page_free_target
- (vm_page_free_target
/ 2);
3463 * vm_pageout is the high level pageout daemon.
3467 vm_pageout_continue(void)
3469 DTRACE_VM2(pgrrun
, int, 1, (uint64_t *), NULL
);
3470 vm_pageout_scan_event_counter
++;
3472 #if !CONFIG_EMBEDDED
3473 lck_mtx_lock(&vm_page_queue_free_lock
);
3474 vm_pageout_running
= TRUE
;
3475 lck_mtx_unlock(&vm_page_queue_free_lock
);
3476 #endif /* CONFIG_EMBEDDED */
3480 * we hold both the vm_page_queue_free_lock
3481 * and the vm_page_queues_lock at this point
3483 assert(vm_page_free_wanted
== 0);
3484 assert(vm_page_free_wanted_privileged
== 0);
3485 assert_wait((event_t
) &vm_page_free_wanted
, THREAD_UNINT
);
3487 #if !CONFIG_EMBEDDED
3488 vm_pageout_running
= FALSE
;
3489 if (vm_pageout_waiter
) {
3490 vm_pageout_waiter
= FALSE
;
3491 thread_wakeup((event_t
)&vm_pageout_waiter
);
3493 #endif /* !CONFIG_EMBEDDED */
3495 lck_mtx_unlock(&vm_page_queue_free_lock
);
3496 vm_page_unlock_queues();
3498 counter(c_vm_pageout_block
++);
3499 thread_block((thread_continue_t
)vm_pageout_continue
);
3503 #if !CONFIG_EMBEDDED
3505 vm_pageout_wait(uint64_t deadline
)
3509 lck_mtx_lock(&vm_page_queue_free_lock
);
3510 for (kr
= KERN_SUCCESS
; vm_pageout_running
&& (KERN_SUCCESS
== kr
); ) {
3511 vm_pageout_waiter
= TRUE
;
3512 if (THREAD_AWAKENED
!= lck_mtx_sleep_deadline(
3513 &vm_page_queue_free_lock
, LCK_SLEEP_DEFAULT
,
3514 (event_t
) &vm_pageout_waiter
, THREAD_UNINT
, deadline
)) {
3515 kr
= KERN_OPERATION_TIMED_OUT
;
3518 lck_mtx_unlock(&vm_page_queue_free_lock
);
3522 #endif /* !CONFIG_EMBEDDED */
3526 vm_pageout_iothread_external_continue(struct vm_pageout_queue
*q
)
3530 vm_object_offset_t offset
;
3531 memory_object_t pager
;
3533 /* On systems without a compressor, the external IO thread clears its
3534 * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3537 if (vm_pageout_internal_iothread
!= THREAD_NULL
)
3538 current_thread()->options
&= ~TH_OPT_VMPRIV
;
3540 vm_page_lockspin_queues();
3542 while ( !vm_page_queue_empty(&q
->pgo_pending
) ) {
3545 vm_page_queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
3547 assert(m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
);
3550 * grab a snapshot of the object and offset this
3551 * page is tabled in so that we can relookup this
3552 * page after we've taken the object lock - these
3553 * fields are stable while we hold the page queues lock
3554 * but as soon as we drop it, there is nothing to keep
3555 * this page in this object... we hold an activity_in_progress
3556 * on this object which will keep it from terminating
3558 object
= VM_PAGE_OBJECT(m
);
3561 if (object
->object_slid
) {
3562 panic("slid page %p not allowed on this path\n", m
);
3564 m
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
3565 VM_PAGE_ZERO_PAGEQ_ENTRY(m
);
3567 vm_page_unlock_queues();
3569 vm_object_lock(object
);
3571 m
= vm_page_lookup(object
, offset
);
3574 m
->busy
|| m
->cleaning
|| !m
->laundry
|| (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
3576 * it's either the same page that someone else has
3577 * started cleaning (or it's finished cleaning or
3578 * been put back on the pageout queue), or
3579 * the page has been freed or we have found a
3580 * new page at this offset... in all of these cases
3581 * we merely need to release the activity_in_progress
3582 * we took when we put the page on the pageout queue
3584 vm_object_activity_end(object
);
3585 vm_object_unlock(object
);
3587 vm_page_lockspin_queues();
3590 pager
= object
->pager
;
3592 if (pager
== MEMORY_OBJECT_NULL
) {
3594 * This pager has been destroyed by either
3595 * memory_object_destroy or vm_object_destroy, and
3596 * so there is nowhere for the page to go.
3598 if (m
->free_when_done
) {
3600 * Just free the page... VM_PAGE_FREE takes
3601 * care of cleaning up all the state...
3602 * including doing the vm_pageout_throttle_up
3606 vm_page_lockspin_queues();
3608 vm_pageout_throttle_up(m
);
3609 vm_page_activate(m
);
3611 vm_page_unlock_queues();
3614 * And we are done with it.
3617 vm_object_activity_end(object
);
3618 vm_object_unlock(object
);
3620 vm_page_lockspin_queues();
3625 * we don't hold the page queue lock
3626 * so this check isn't safe to make
3631 * give back the activity_in_progress reference we
3632 * took when we queued up this page and replace it
3633 * it with a paging_in_progress reference that will
3634 * also hold the paging offset from changing and
3635 * prevent the object from terminating
3637 vm_object_activity_end(object
);
3638 vm_object_paging_begin(object
);
3639 vm_object_unlock(object
);
3642 * Send the data to the pager.
3643 * any pageout clustering happens there
3645 memory_object_data_return(pager
,
3646 m
->offset
+ object
->paging_offset
,
3654 vm_object_lock(object
);
3655 vm_object_paging_end(object
);
3656 vm_object_unlock(object
);
3658 vm_pageout_io_throttle();
3660 vm_page_lockspin_queues();
3662 q
->pgo_busy
= FALSE
;
3665 assert_wait((event_t
) &q
->pgo_pending
, THREAD_UNINT
);
3666 vm_page_unlock_queues();
3668 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_external_continue
, (void *) q
);
3673 #define MAX_FREE_BATCH 32
3674 uint32_t vm_compressor_time_thread
; /* Set via sysctl to record time accrued by
3679 #if DEVELOPMENT || DEBUG
3680 uint64_t compressor_epoch_start
, compressor_epoch_stop
, compressor_threads_runtime
;
3684 vm_pageout_iothread_internal_continue(struct cq
*);
3686 vm_pageout_iothread_internal_continue(struct cq
*cq
)
3688 struct vm_pageout_queue
*q
;
3690 boolean_t pgo_draining
;
3693 vm_page_t local_freeq
= NULL
;
3694 int local_freed
= 0;
3695 int local_batch_size
;
3697 #if DEVELOPMENT || DEBUG
3698 boolean_t marked_active
= FALSE
;
3700 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END
, 0, 0, 0, 0, 0);
3703 local_batch_size
= q
->pgo_maxlaundry
/ (vm_compressor_thread_count
* 2);
3705 #if RECORD_THE_COMPRESSED_DATA
3707 c_compressed_record_init();
3710 int pages_left_on_q
= 0;
3715 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START
, 0, 0, 0, 0, 0);
3717 vm_page_lock_queues();
3718 #if DEVELOPMENT || DEBUG
3719 if (marked_active
== FALSE
) {
3721 vmct_state
[cq
->id
] = VMCT_ACTIVE
;
3722 marked_active
= TRUE
;
3723 if (vmct_active
== 1) {
3724 compressor_epoch_start
= mach_absolute_time();
3728 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
3730 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START
, q
->pgo_laundry
, 0, 0, 0, 0);
3732 while ( !vm_page_queue_empty(&q
->pgo_pending
) && local_cnt
< local_batch_size
) {
3734 vm_page_queue_remove_first(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
3735 assert(m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
);
3738 m
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
3739 VM_PAGE_ZERO_PAGEQ_ENTRY(m
);
3746 if (local_q
== NULL
)
3751 if ((pgo_draining
= q
->pgo_draining
) == FALSE
) {
3752 vm_pageout_throttle_up_batch(q
, local_cnt
);
3753 pages_left_on_q
= q
->pgo_laundry
;
3755 pages_left_on_q
= q
->pgo_laundry
- local_cnt
;
3757 vm_page_unlock_queues();
3759 #if !RECORD_THE_COMPRESSED_DATA
3760 if (pages_left_on_q
>= local_batch_size
&& cq
->id
< (vm_compressor_thread_count
- 1)) {
3761 thread_wakeup((event_t
) ((uintptr_t)&q
->pgo_pending
+ cq
->id
+ 1));
3764 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END
, q
->pgo_laundry
, 0, 0, 0, 0);
3768 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START
, local_cnt
, 0, 0, 0, 0);
3774 if (vm_pageout_compress_page(&cq
->current_chead
, cq
->scratch_buf
, m
, FALSE
) == KERN_SUCCESS
) {
3776 m
->snext
= local_freeq
;
3780 if (local_freed
>= MAX_FREE_BATCH
) {
3781 vm_pageout_freed_after_compression
+= local_freed
;
3783 vm_page_free_list(local_freeq
, TRUE
);
3789 while (vm_page_free_count
< COMPRESSOR_FREE_RESERVED_LIMIT
) {
3790 kern_return_t wait_result
;
3791 int need_wakeup
= 0;
3794 vm_pageout_freed_after_compression
+= local_freed
;
3796 vm_page_free_list(local_freeq
, TRUE
);
3802 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3804 if (vm_page_free_count
< COMPRESSOR_FREE_RESERVED_LIMIT
) {
3806 if (vm_page_free_wanted_privileged
++ == 0)
3808 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, THREAD_UNINT
);
3810 lck_mtx_unlock(&vm_page_queue_free_lock
);
3813 thread_wakeup((event_t
)&vm_page_free_wanted
);
3815 if (wait_result
== THREAD_WAITING
)
3817 thread_block(THREAD_CONTINUE_NULL
);
3819 lck_mtx_unlock(&vm_page_queue_free_lock
);
3824 vm_pageout_freed_after_compression
+= local_freed
;
3826 vm_page_free_list(local_freeq
, TRUE
);
3830 if (pgo_draining
== TRUE
) {
3831 vm_page_lockspin_queues();
3832 vm_pageout_throttle_up_batch(q
, local_cnt
);
3833 vm_page_unlock_queues();
3836 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START
, 0, 0, 0, 0, 0);
3839 * queue lock is held and our q is empty
3841 q
->pgo_busy
= FALSE
;
3844 assert_wait((event_t
) ((uintptr_t)&q
->pgo_pending
+ cq
->id
), THREAD_UNINT
);
3845 #if DEVELOPMENT || DEBUG
3846 if (marked_active
== TRUE
) {
3848 vmct_state
[cq
->id
] = VMCT_IDLE
;
3850 if (vmct_active
== 0) {
3851 compressor_epoch_stop
= mach_absolute_time();
3852 assert(compressor_epoch_stop
> compressor_epoch_start
);
3853 /* This interval includes intervals where one or more
3854 * compressor threads were pre-empted
3856 vmct_stats
.vmct_cthreads_total
+= compressor_epoch_stop
- compressor_epoch_start
;
3861 vm_page_unlock_queues();
3862 #if DEVELOPMENT || DEBUG
3863 if (__improbable(vm_compressor_time_thread
)) {
3864 vmct_stats
.vmct_runtimes
[cq
->id
] = thread_get_runtime_self();
3865 vmct_stats
.vmct_pages
[cq
->id
] += ncomps
;
3866 vmct_stats
.vmct_iterations
[cq
->id
]++;
3867 if (ncomps
> vmct_stats
.vmct_maxpages
[cq
->id
]) {
3868 vmct_stats
.vmct_maxpages
[cq
->id
] = ncomps
;
3870 if (ncomps
< vmct_stats
.vmct_minpages
[cq
->id
]) {
3871 vmct_stats
.vmct_minpages
[cq
->id
] = ncomps
;
3876 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
3878 thread_block_parameter((thread_continue_t
)vm_pageout_iothread_internal_continue
, (void *) cq
);
3884 vm_pageout_compress_page(void **current_chead
, char *scratch_buf
, vm_page_t m
, boolean_t object_locked_by_caller
)
3887 memory_object_t pager
;
3888 int compressed_count_delta
;
3889 kern_return_t retval
;
3891 object
= VM_PAGE_OBJECT(m
);
3893 if (object
->object_slid
) {
3894 panic("slid page %p not allowed on this path\n", m
);
3896 assert(!m
->free_when_done
);
3897 assert(!m
->laundry
);
3899 pager
= object
->pager
;
3901 if (object_locked_by_caller
== FALSE
&& (!object
->pager_initialized
|| pager
== MEMORY_OBJECT_NULL
)) {
3903 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START
, object
, pager
, 0, 0, 0);
3905 vm_object_lock(object
);
3908 * If there is no memory object for the page, create
3909 * one and hand it to the compression pager.
3912 if (!object
->pager_initialized
)
3913 vm_object_collapse(object
, (vm_object_offset_t
) 0, TRUE
);
3914 if (!object
->pager_initialized
)
3915 vm_object_compressor_pager_create(object
);
3917 pager
= object
->pager
;
3919 if (!object
->pager_initialized
|| pager
== MEMORY_OBJECT_NULL
) {
3921 * Still no pager for the object,
3922 * or the pager has been destroyed.
3923 * Reactivate the page.
3925 * Should only happen if there is no
3928 PAGE_WAKEUP_DONE(m
);
3930 vm_page_lockspin_queues();
3931 vm_page_activate(m
);
3932 vm_pageout_dirty_no_pager
++;
3933 vm_page_unlock_queues();
3936 * And we are done with it.
3938 vm_object_activity_end(object
);
3939 vm_object_unlock(object
);
3941 return KERN_FAILURE
;
3943 vm_object_unlock(object
);
3945 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END
, object
, pager
, 0, 0, 0);
3947 assert(object
->pager_initialized
&& pager
!= MEMORY_OBJECT_NULL
);
3949 if (object_locked_by_caller
== FALSE
)
3950 assert(object
->activity_in_progress
> 0);
3952 retval
= vm_compressor_pager_put(
3954 m
->offset
+ object
->paging_offset
,
3955 VM_PAGE_GET_PHYS_PAGE(m
),
3958 &compressed_count_delta
);
3960 if (object_locked_by_caller
== FALSE
) {
3961 vm_object_lock(object
);
3963 assert(object
->activity_in_progress
> 0);
3964 assert(VM_PAGE_OBJECT(m
) == object
);
3967 vm_compressor_pager_count(pager
,
3968 compressed_count_delta
,
3969 FALSE
, /* shared_lock */
3972 assert( !VM_PAGE_WIRED(m
));
3974 if (retval
== KERN_SUCCESS
) {
3976 * If the object is purgeable, its owner's
3977 * purgeable ledgers will be updated in
3978 * vm_page_remove() but the page still
3979 * contributes to the owner's memory footprint,
3980 * so account for it as such.
3982 if (object
->purgable
!= VM_PURGABLE_DENY
&&
3983 object
->vo_purgeable_owner
!= NULL
) {
3984 /* one more compressed purgeable page */
3985 vm_purgeable_compressed_update(object
,
3988 VM_STAT_INCR(compressions
);
3991 vm_page_remove(m
, TRUE
);
3994 PAGE_WAKEUP_DONE(m
);
3996 vm_page_lockspin_queues();
3998 vm_page_activate(m
);
3999 vm_compressor_failed
++;
4001 vm_page_unlock_queues();
4003 if (object_locked_by_caller
== FALSE
) {
4004 vm_object_activity_end(object
);
4005 vm_object_unlock(object
);
4012 vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue
*eq
, boolean_t req_lowpriority
)
4016 if (hibernate_cleaning_in_progress
== TRUE
)
4017 req_lowpriority
= FALSE
;
4019 if (eq
->pgo_inited
== TRUE
&& eq
->pgo_lowpriority
!= req_lowpriority
) {
4021 vm_page_unlock_queues();
4023 if (req_lowpriority
== TRUE
) {
4024 policy
= THROTTLE_LEVEL_PAGEOUT_THROTTLED
;
4025 DTRACE_VM(laundrythrottle
);
4027 policy
= THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED
;
4028 DTRACE_VM(laundryunthrottle
);
4030 proc_set_thread_policy_with_tid(kernel_task
, eq
->pgo_tid
,
4031 TASK_POLICY_EXTERNAL
, TASK_POLICY_IO
, policy
);
4033 eq
->pgo_lowpriority
= req_lowpriority
;
4035 vm_page_lock_queues();
4041 vm_pageout_iothread_external(void)
4043 thread_t self
= current_thread();
4045 self
->options
|= TH_OPT_VMPRIV
;
4047 DTRACE_VM2(laundrythrottle
, int, 1, (uint64_t *), NULL
);
4049 proc_set_thread_policy(self
, TASK_POLICY_EXTERNAL
,
4050 TASK_POLICY_IO
, THROTTLE_LEVEL_PAGEOUT_THROTTLED
);
4052 vm_page_lock_queues();
4054 vm_pageout_queue_external
.pgo_tid
= self
->thread_id
;
4055 vm_pageout_queue_external
.pgo_lowpriority
= TRUE
;
4056 vm_pageout_queue_external
.pgo_inited
= TRUE
;
4058 vm_page_unlock_queues();
4060 vm_pageout_iothread_external_continue(&vm_pageout_queue_external
);
4067 vm_pageout_iothread_internal(struct cq
*cq
)
4069 thread_t self
= current_thread();
4071 self
->options
|= TH_OPT_VMPRIV
;
4073 vm_page_lock_queues();
4075 vm_pageout_queue_internal
.pgo_tid
= self
->thread_id
;
4076 vm_pageout_queue_internal
.pgo_lowpriority
= TRUE
;
4077 vm_pageout_queue_internal
.pgo_inited
= TRUE
;
4079 vm_page_unlock_queues();
4081 if (vm_restricted_to_single_processor
== TRUE
)
4082 thread_vm_bind_group_add();
4085 thread_set_thread_name(current_thread(), "VM_compressor");
4086 #if DEVELOPMENT || DEBUG
4087 vmct_stats
.vmct_minpages
[cq
->id
] = INT32_MAX
;
4089 vm_pageout_iothread_internal_continue(cq
);
4095 vm_set_buffer_cleanup_callout(boolean_t (*func
)(int))
4097 if (OSCompareAndSwapPtr(NULL
, func
, (void * volatile *) &consider_buffer_cache_collect
)) {
4098 return KERN_SUCCESS
;
4100 return KERN_FAILURE
; /* Already set */
4104 extern boolean_t memorystatus_manual_testing_on
;
4105 extern unsigned int memorystatus_level
;
4108 #if VM_PRESSURE_EVENTS
4110 boolean_t vm_pressure_events_enabled
= FALSE
;
4113 vm_pressure_response(void)
4116 vm_pressure_level_t old_level
= kVMPressureNormal
;
4118 unsigned int total_pages
;
4119 uint64_t available_memory
= 0;
4121 if (vm_pressure_events_enabled
== FALSE
)
4126 available_memory
= (uint64_t) memorystatus_available_pages
;
4128 #else /* CONFIG_EMBEDDED */
4130 available_memory
= (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY
;
4131 memorystatus_available_pages
= (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY
;
4133 #endif /* CONFIG_EMBEDDED */
4135 total_pages
= (unsigned int) atop_64(max_mem
);
4136 #if CONFIG_SECLUDED_MEMORY
4137 total_pages
-= vm_page_secluded_count
;
4138 #endif /* CONFIG_SECLUDED_MEMORY */
4139 memorystatus_level
= (unsigned int) ((available_memory
* 100) / total_pages
);
4141 if (memorystatus_manual_testing_on
) {
4145 old_level
= memorystatus_vm_pressure_level
;
4147 switch (memorystatus_vm_pressure_level
) {
4149 case kVMPressureNormal
:
4151 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4152 new_level
= kVMPressureCritical
;
4153 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4154 new_level
= kVMPressureWarning
;
4159 case kVMPressureWarning
:
4160 case kVMPressureUrgent
:
4162 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4163 new_level
= kVMPressureNormal
;
4164 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4165 new_level
= kVMPressureCritical
;
4170 case kVMPressureCritical
:
4172 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4173 new_level
= kVMPressureNormal
;
4174 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4175 new_level
= kVMPressureWarning
;
4184 if (new_level
!= -1) {
4185 memorystatus_vm_pressure_level
= (vm_pressure_level_t
) new_level
;
4187 if ((memorystatus_vm_pressure_level
!= kVMPressureNormal
) || (old_level
!= new_level
)) {
4188 if (vm_pressure_thread_running
== FALSE
) {
4189 thread_wakeup(&vm_pressure_thread
);
4192 if (old_level
!= new_level
) {
4193 thread_wakeup(&vm_pressure_changed
);
4199 #endif /* VM_PRESSURE_EVENTS */
4202 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure
, __unused
unsigned int *pressure_level
) {
4206 return KERN_FAILURE
;
4208 #elif !VM_PRESSURE_EVENTS
4210 return KERN_FAILURE
;
4212 #else /* VM_PRESSURE_EVENTS */
4214 kern_return_t kr
= KERN_SUCCESS
;
4216 if (pressure_level
!= NULL
) {
4218 vm_pressure_level_t old_level
= memorystatus_vm_pressure_level
;
4220 if (wait_for_pressure
== TRUE
) {
4221 wait_result_t wr
= 0;
4223 while (old_level
== *pressure_level
) {
4224 wr
= assert_wait((event_t
) &vm_pressure_changed
,
4225 THREAD_INTERRUPTIBLE
);
4226 if (wr
== THREAD_WAITING
) {
4227 wr
= thread_block(THREAD_CONTINUE_NULL
);
4229 if (wr
== THREAD_INTERRUPTED
) {
4230 return KERN_ABORTED
;
4232 if (wr
== THREAD_AWAKENED
) {
4234 old_level
= memorystatus_vm_pressure_level
;
4236 if (old_level
!= *pressure_level
) {
4243 *pressure_level
= old_level
;
4246 kr
= KERN_INVALID_ARGUMENT
;
4250 #endif /* VM_PRESSURE_EVENTS */
4253 #if VM_PRESSURE_EVENTS
4255 vm_pressure_thread(void) {
4256 static boolean_t thread_initialized
= FALSE
;
4258 if (thread_initialized
== TRUE
) {
4259 vm_pressure_thread_running
= TRUE
;
4260 consider_vm_pressure_events();
4261 vm_pressure_thread_running
= FALSE
;
4264 thread_initialized
= TRUE
;
4265 assert_wait((event_t
) &vm_pressure_thread
, THREAD_UNINT
);
4266 thread_block((thread_continue_t
)vm_pressure_thread
);
4268 #endif /* VM_PRESSURE_EVENTS */
4271 uint32_t vm_pageout_considered_page_last
= 0;
4274 * called once per-second via "compute_averages"
4277 compute_pageout_gc_throttle(__unused
void *arg
)
4279 if (vm_pageout_considered_page
!= vm_pageout_considered_page_last
) {
4281 vm_pageout_considered_page_last
= vm_pageout_considered_page
;
4283 thread_wakeup((event_t
) &vm_pageout_garbage_collect
);
4288 * vm_pageout_garbage_collect can also be called when the zone allocator needs
4289 * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4290 * jetsams. We need to check if the zone map size is above its jetsam limit to
4291 * decide if this was indeed the case.
4293 * We need to do this on a different thread because of the following reasons:
4295 * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4296 * itself causing the system to hang. We perform synchronous jetsams if we're
4297 * leaking in the VM map entries zone, so the leaking process could be doing a
4298 * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4299 * jetsam itself. We also need the vm_map lock on the process termination path,
4300 * which would now lead the dying process to deadlock against itself.
4302 * 2. The jetsam path might need to allocate zone memory itself. We could try
4303 * using the non-blocking variant of zalloc for this path, but we can still
4304 * end up trying to do a kernel_memory_allocate when the zone_map is almost
4308 extern boolean_t
is_zone_map_nearing_exhaustion(void);
4311 vm_pageout_garbage_collect(int collect
)
4314 if (is_zone_map_nearing_exhaustion()) {
4316 * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4318 * Bail out after calling zone_gc (which triggers the
4319 * zone-map-exhaustion jetsams). If we fall through, the subsequent
4320 * operations that clear out a bunch of caches might allocate zone
4321 * memory themselves (for eg. vm_map operations would need VM map
4322 * entries). Since the zone map is almost full at this point, we
4323 * could end up with a panic. We just need to quickly jetsam a
4324 * process and exit here.
4326 * It could so happen that we were woken up to relieve memory
4327 * pressure and the zone map also happened to be near its limit at
4328 * the time, in which case we'll skip out early. But that should be
4329 * ok; if memory pressure persists, the thread will simply be woken
4332 consider_zone_gc(TRUE
);
4335 /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4336 boolean_t buf_large_zfree
= FALSE
;
4337 boolean_t first_try
= TRUE
;
4341 consider_machine_collect();
4345 if (consider_buffer_cache_collect
!= NULL
) {
4346 buf_large_zfree
= (*consider_buffer_cache_collect
)(0);
4348 if (first_try
== TRUE
|| buf_large_zfree
== TRUE
) {
4350 * consider_zone_gc should be last, because the other operations
4351 * might return memory to zones.
4353 consider_zone_gc(FALSE
);
4357 } while (buf_large_zfree
== TRUE
&& vm_page_free_count
< vm_page_free_target
);
4359 consider_machine_adjust();
4363 assert_wait((event_t
) &vm_pageout_garbage_collect
, THREAD_UNINT
);
4365 thread_block_parameter((thread_continue_t
) vm_pageout_garbage_collect
, (void *)1);
4370 #if VM_PAGE_BUCKETS_CHECK
4371 #if VM_PAGE_FAKE_BUCKETS
4372 extern vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
4373 #endif /* VM_PAGE_FAKE_BUCKETS */
4374 #endif /* VM_PAGE_BUCKETS_CHECK */
4379 vm_set_restrictions()
4381 host_basic_info_data_t hinfo
;
4382 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
4385 host_info((host_t
)BSD_HOST
, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
4387 assert(hinfo
.max_cpus
> 0);
4389 if (hinfo
.max_cpus
<= 3) {
4391 * on systems with a limited number of CPUS, bind the
4392 * 4 major threads that can free memory and that tend to use
4393 * a fair bit of CPU under pressured conditions to a single processor.
4394 * This insures that these threads don't hog all of the available CPUs
4395 * (important for camera launch), while allowing them to run independently
4396 * w/r to locks... the 4 threads are
4397 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4398 * vm_compressor_swap_trigger_thread (minor and major compactions),
4399 * memorystatus_thread (jetsams).
4401 * the first time the thread is run, it is responsible for checking the
4402 * state of vm_restricted_to_single_processor, and if TRUE it calls
4403 * thread_bind_master... someday this should be replaced with a group
4404 * scheduling mechanism and KPI.
4406 vm_restricted_to_single_processor
= TRUE
;
4413 thread_t self
= current_thread();
4415 kern_return_t result
;
4419 * Set thread privileges.
4424 self
->options
|= TH_OPT_VMPRIV
;
4425 sched_set_thread_base_priority(self
, BASEPRI_VM
);
4426 thread_unlock(self
);
4428 if (!self
->reserved_stack
)
4429 self
->reserved_stack
= self
->kernel_stack
;
4431 if (vm_restricted_to_single_processor
== TRUE
)
4432 thread_vm_bind_group_add();
4436 thread_set_thread_name(current_thread(), "VM_pageout_scan");
4439 * Initialize some paging parameters.
4442 if (vm_pageout_swap_wait
== 0)
4443 vm_pageout_swap_wait
= VM_PAGEOUT_SWAP_WAIT
;
4445 if (vm_pageout_idle_wait
== 0)
4446 vm_pageout_idle_wait
= VM_PAGEOUT_IDLE_WAIT
;
4448 if (vm_pageout_burst_wait
== 0)
4449 vm_pageout_burst_wait
= VM_PAGEOUT_BURST_WAIT
;
4451 if (vm_pageout_empty_wait
== 0)
4452 vm_pageout_empty_wait
= VM_PAGEOUT_EMPTY_WAIT
;
4454 if (vm_pageout_deadlock_wait
== 0)
4455 vm_pageout_deadlock_wait
= VM_PAGEOUT_DEADLOCK_WAIT
;
4457 if (vm_pageout_deadlock_relief
== 0)
4458 vm_pageout_deadlock_relief
= VM_PAGEOUT_DEADLOCK_RELIEF
;
4460 if (vm_pageout_inactive_relief
== 0)
4461 vm_pageout_inactive_relief
= VM_PAGEOUT_INACTIVE_RELIEF
;
4463 if (vm_pageout_burst_active_throttle
== 0)
4464 vm_pageout_burst_active_throttle
= VM_PAGEOUT_BURST_ACTIVE_THROTTLE
;
4466 if (vm_pageout_burst_inactive_throttle
== 0)
4467 vm_pageout_burst_inactive_throttle
= VM_PAGEOUT_BURST_INACTIVE_THROTTLE
;
4470 * Set kernel task to low backing store privileged
4473 task_lock(kernel_task
);
4474 kernel_task
->priv_flags
|= VM_BACKING_STORE_PRIV
;
4475 task_unlock(kernel_task
);
4477 vm_page_free_count_init
= vm_page_free_count
;
4480 * even if we've already called vm_page_free_reserve
4481 * call it again here to insure that the targets are
4482 * accurately calculated (it uses vm_page_free_count_init)
4483 * calling it with an arg of 0 will not change the reserve
4484 * but will re-calculate free_min and free_target
4486 if (vm_page_free_reserved
< VM_PAGE_FREE_RESERVED(processor_count
)) {
4487 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count
)) - vm_page_free_reserved
);
4489 vm_page_free_reserve(0);
4492 vm_page_queue_init(&vm_pageout_queue_external
.pgo_pending
);
4493 vm_pageout_queue_external
.pgo_maxlaundry
= VM_PAGE_LAUNDRY_MAX
;
4494 vm_pageout_queue_external
.pgo_laundry
= 0;
4495 vm_pageout_queue_external
.pgo_idle
= FALSE
;
4496 vm_pageout_queue_external
.pgo_busy
= FALSE
;
4497 vm_pageout_queue_external
.pgo_throttled
= FALSE
;
4498 vm_pageout_queue_external
.pgo_draining
= FALSE
;
4499 vm_pageout_queue_external
.pgo_lowpriority
= FALSE
;
4500 vm_pageout_queue_external
.pgo_tid
= -1;
4501 vm_pageout_queue_external
.pgo_inited
= FALSE
;
4503 vm_page_queue_init(&vm_pageout_queue_internal
.pgo_pending
);
4504 vm_pageout_queue_internal
.pgo_maxlaundry
= 0;
4505 vm_pageout_queue_internal
.pgo_laundry
= 0;
4506 vm_pageout_queue_internal
.pgo_idle
= FALSE
;
4507 vm_pageout_queue_internal
.pgo_busy
= FALSE
;
4508 vm_pageout_queue_internal
.pgo_throttled
= FALSE
;
4509 vm_pageout_queue_internal
.pgo_draining
= FALSE
;
4510 vm_pageout_queue_internal
.pgo_lowpriority
= FALSE
;
4511 vm_pageout_queue_internal
.pgo_tid
= -1;
4512 vm_pageout_queue_internal
.pgo_inited
= FALSE
;
4514 /* internal pageout thread started when default pager registered first time */
4515 /* external pageout and garbage collection threads started here */
4517 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_external
, NULL
,
4519 &vm_pageout_external_iothread
);
4520 if (result
!= KERN_SUCCESS
)
4521 panic("vm_pageout_iothread_external: create failed");
4523 thread_deallocate(vm_pageout_external_iothread
);
4525 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_garbage_collect
, NULL
,
4528 if (result
!= KERN_SUCCESS
)
4529 panic("vm_pageout_garbage_collect: create failed");
4531 thread_deallocate(thread
);
4533 #if VM_PRESSURE_EVENTS
4534 result
= kernel_thread_start_priority((thread_continue_t
)vm_pressure_thread
, NULL
,
4538 if (result
!= KERN_SUCCESS
)
4539 panic("vm_pressure_thread: create failed");
4541 thread_deallocate(thread
);
4544 vm_object_reaper_init();
4547 bzero(&vm_config
, sizeof(vm_config
));
4549 switch(vm_compressor_mode
) {
4551 case VM_PAGER_DEFAULT
:
4552 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4554 case VM_PAGER_COMPRESSOR_WITH_SWAP
:
4555 vm_config
.compressor_is_present
= TRUE
;
4556 vm_config
.swap_is_present
= TRUE
;
4557 vm_config
.compressor_is_active
= TRUE
;
4558 vm_config
.swap_is_active
= TRUE
;
4561 case VM_PAGER_COMPRESSOR_NO_SWAP
:
4562 vm_config
.compressor_is_present
= TRUE
;
4563 vm_config
.swap_is_present
= TRUE
;
4564 vm_config
.compressor_is_active
= TRUE
;
4567 case VM_PAGER_FREEZER_DEFAULT
:
4568 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4570 case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP
:
4571 vm_config
.compressor_is_present
= TRUE
;
4572 vm_config
.swap_is_present
= TRUE
;
4575 case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP
:
4576 vm_config
.compressor_is_present
= TRUE
;
4577 vm_config
.swap_is_present
= TRUE
;
4578 vm_config
.compressor_is_active
= TRUE
;
4579 vm_config
.freezer_swap_is_active
= TRUE
;
4582 case VM_PAGER_NOT_CONFIGURED
:
4586 printf("unknown compressor mode - %x\n", vm_compressor_mode
);
4589 if (VM_CONFIG_COMPRESSOR_IS_PRESENT
)
4590 vm_compressor_pager_init();
4592 #if VM_PRESSURE_EVENTS
4593 vm_pressure_events_enabled
= TRUE
;
4594 #endif /* VM_PRESSURE_EVENTS */
4596 #if CONFIG_PHANTOM_CACHE
4597 vm_phantom_cache_init();
4599 #if VM_PAGE_BUCKETS_CHECK
4600 #if VM_PAGE_FAKE_BUCKETS
4601 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4602 (uint64_t) vm_page_fake_buckets_start
,
4603 (uint64_t) vm_page_fake_buckets_end
);
4604 pmap_protect(kernel_pmap
,
4605 vm_page_fake_buckets_start
,
4606 vm_page_fake_buckets_end
,
4608 // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4609 #endif /* VM_PAGE_FAKE_BUCKETS */
4610 #endif /* VM_PAGE_BUCKETS_CHECK */
4612 #if VM_OBJECT_TRACKING
4613 vm_object_tracking_init();
4614 #endif /* VM_OBJECT_TRACKING */
4618 vm_pageout_continue();
4623 * The vm_pageout_continue() call above never returns, so the code below is never
4624 * executed. We take advantage of this to declare several DTrace VM related probe
4625 * points that our kernel doesn't have an analog for. These are probe points that
4626 * exist in Solaris and are in the DTrace documentation, so people may have written
4627 * scripts that use them. Declaring the probe points here means their scripts will
4628 * compile and execute which we want for portability of the scripts, but since this
4629 * section of code is never reached, the probe points will simply never fire. Yes,
4630 * this is basically a hack. The problem is the DTrace probe points were chosen with
4631 * Solaris specific VM events in mind, not portability to different VM implementations.
4634 DTRACE_VM2(execfree
, int, 1, (uint64_t *), NULL
);
4635 DTRACE_VM2(execpgin
, int, 1, (uint64_t *), NULL
);
4636 DTRACE_VM2(execpgout
, int, 1, (uint64_t *), NULL
);
4637 DTRACE_VM2(pgswapin
, int, 1, (uint64_t *), NULL
);
4638 DTRACE_VM2(pgswapout
, int, 1, (uint64_t *), NULL
);
4639 DTRACE_VM2(swapin
, int, 1, (uint64_t *), NULL
);
4640 DTRACE_VM2(swapout
, int, 1, (uint64_t *), NULL
);
4647 int vm_compressor_thread_count
= 1;
4649 int vm_compressor_thread_count
= 2;
4653 vm_pageout_internal_start(void)
4655 kern_return_t result
;
4657 host_basic_info_data_t hinfo
;
4659 assert (VM_CONFIG_COMPRESSOR_IS_PRESENT
);
4661 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
4663 host_info((host_t
)BSD_HOST
, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
4665 assert(hinfo
.max_cpus
> 0);
4667 PE_parse_boot_argn("vmcomp_threads", &vm_compressor_thread_count
, sizeof(vm_compressor_thread_count
));
4668 if (vm_compressor_thread_count
>= hinfo
.max_cpus
)
4669 vm_compressor_thread_count
= hinfo
.max_cpus
- 1;
4670 if (vm_compressor_thread_count
<= 0)
4671 vm_compressor_thread_count
= 1;
4672 else if (vm_compressor_thread_count
> MAX_COMPRESSOR_THREAD_COUNT
)
4673 vm_compressor_thread_count
= MAX_COMPRESSOR_THREAD_COUNT
;
4675 vm_pageout_queue_internal
.pgo_maxlaundry
= (vm_compressor_thread_count
* 4) * VM_PAGE_LAUNDRY_MAX
;
4677 PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal
.pgo_maxlaundry
, sizeof(vm_pageout_queue_internal
.pgo_maxlaundry
));
4679 for (i
= 0; i
< vm_compressor_thread_count
; i
++) {
4681 ciq
[i
].q
= &vm_pageout_queue_internal
;
4682 ciq
[i
].current_chead
= NULL
;
4683 ciq
[i
].scratch_buf
= kalloc(COMPRESSOR_SCRATCH_BUF_SIZE
);
4685 result
= kernel_thread_start_priority((thread_continue_t
)vm_pageout_iothread_internal
, (void *)&ciq
[i
], BASEPRI_VM
, &vm_pageout_internal_iothread
);
4687 if (result
== KERN_SUCCESS
)
4688 thread_deallocate(vm_pageout_internal_iothread
);
4697 * To support I/O Expedite for compressed files we mark the upls with special flags.
4698 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4699 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4700 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4701 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4702 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4703 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4704 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4705 * unless the real I/O upl is being destroyed).
4710 upl_set_decmp_info(upl_t upl
, upl_t src_upl
)
4712 assert((src_upl
->flags
& UPL_DECMP_REQ
) != 0);
4715 if (src_upl
->decmp_io_upl
) {
4717 * If there is already an alive real I/O UPL, ignore this new UPL.
4718 * This case should rarely happen and even if it does, it just means
4719 * that we might issue a spurious expedite which the driver is expected
4722 upl_unlock(src_upl
);
4725 src_upl
->decmp_io_upl
= (void *)upl
;
4726 src_upl
->ref_count
++;
4728 upl
->flags
|= UPL_DECMP_REAL_IO
;
4729 upl
->decmp_io_upl
= (void *)src_upl
;
4730 upl_unlock(src_upl
);
4732 #endif /* CONFIG_IOSCHED */
4735 int upl_debug_enabled
= 1;
4737 int upl_debug_enabled
= 0;
4741 upl_create(int type
, int flags
, upl_size_t size
)
4744 vm_size_t page_field_size
= 0;
4746 vm_size_t upl_size
= sizeof(struct upl
);
4748 size
= round_page_32(size
);
4750 if (type
& UPL_CREATE_LITE
) {
4751 page_field_size
= (atop(size
) + 7) >> 3;
4752 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
4754 upl_flags
|= UPL_LITE
;
4756 if (type
& UPL_CREATE_INTERNAL
) {
4757 upl_size
+= sizeof(struct upl_page_info
) * atop(size
);
4759 upl_flags
|= UPL_INTERNAL
;
4761 upl
= (upl_t
)kalloc(upl_size
+ page_field_size
);
4763 if (page_field_size
)
4764 bzero((char *)upl
+ upl_size
, page_field_size
);
4766 upl
->flags
= upl_flags
| flags
;
4767 upl
->kaddr
= (vm_offset_t
)0;
4769 upl
->map_object
= NULL
;
4771 upl
->ext_ref_count
= 0;
4772 upl
->highest_page
= 0;
4774 upl
->vector_upl
= NULL
;
4775 upl
->associated_upl
= NULL
;
4777 if (type
& UPL_CREATE_IO_TRACKING
) {
4778 upl
->upl_priority
= proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO
);
4781 upl
->upl_reprio_info
= 0;
4782 upl
->decmp_io_upl
= 0;
4783 if ((type
& UPL_CREATE_INTERNAL
) && (type
& UPL_CREATE_EXPEDITE_SUP
)) {
4784 /* Only support expedite on internal UPLs */
4785 thread_t curthread
= current_thread();
4786 upl
->upl_reprio_info
= (uint64_t *)kalloc(sizeof(uint64_t) * atop(size
));
4787 bzero(upl
->upl_reprio_info
, (sizeof(uint64_t) * atop(size
)));
4788 upl
->flags
|= UPL_EXPEDITE_SUPPORTED
;
4789 if (curthread
->decmp_upl
!= NULL
)
4790 upl_set_decmp_info(upl
, curthread
->decmp_upl
);
4793 #if CONFIG_IOSCHED || UPL_DEBUG
4794 if ((type
& UPL_CREATE_IO_TRACKING
) || upl_debug_enabled
) {
4795 upl
->upl_creator
= current_thread();
4798 upl
->flags
|= UPL_TRACKED_BY_OBJECT
;
4803 upl
->ubc_alias1
= 0;
4804 upl
->ubc_alias2
= 0;
4807 upl
->upl_commit_index
= 0;
4808 bzero(&upl
->upl_commit_records
[0], sizeof(upl
->upl_commit_records
));
4810 (void) OSBacktrace(&upl
->upl_create_retaddr
[0], UPL_DEBUG_STACK_FRAMES
);
4811 #endif /* UPL_DEBUG */
4817 upl_destroy(upl_t upl
)
4819 int page_field_size
; /* bit field in word size buf */
4822 if (upl
->ext_ref_count
) {
4823 panic("upl(%p) ext_ref_count", upl
);
4827 if ((upl
->flags
& UPL_DECMP_REAL_IO
) && upl
->decmp_io_upl
) {
4829 src_upl
= upl
->decmp_io_upl
;
4830 assert((src_upl
->flags
& UPL_DECMP_REQ
) != 0);
4832 src_upl
->decmp_io_upl
= NULL
;
4833 upl_unlock(src_upl
);
4834 upl_deallocate(src_upl
);
4836 #endif /* CONFIG_IOSCHED */
4838 #if CONFIG_IOSCHED || UPL_DEBUG
4839 if ((upl
->flags
& UPL_TRACKED_BY_OBJECT
) && !(upl
->flags
& UPL_VECTOR
)) {
4842 if (upl
->flags
& UPL_SHADOWED
) {
4843 object
= upl
->map_object
->shadow
;
4845 object
= upl
->map_object
;
4848 vm_object_lock(object
);
4849 queue_remove(&object
->uplq
, upl
, upl_t
, uplq
);
4850 vm_object_activity_end(object
);
4851 vm_object_collapse(object
, 0, TRUE
);
4852 vm_object_unlock(object
);
4856 * drop a reference on the map_object whether or
4857 * not a pageout object is inserted
4859 if (upl
->flags
& UPL_SHADOWED
)
4860 vm_object_deallocate(upl
->map_object
);
4862 if (upl
->flags
& UPL_DEVICE_MEMORY
)
4866 page_field_size
= 0;
4868 if (upl
->flags
& UPL_LITE
) {
4869 page_field_size
= ((size
/PAGE_SIZE
) + 7) >> 3;
4870 page_field_size
= (page_field_size
+ 3) & 0xFFFFFFFC;
4872 upl_lock_destroy(upl
);
4873 upl
->vector_upl
= (vector_upl_t
) 0xfeedbeef;
4876 if (upl
->flags
& UPL_EXPEDITE_SUPPORTED
)
4877 kfree(upl
->upl_reprio_info
, sizeof(uint64_t) * (size
/PAGE_SIZE
));
4880 if (upl
->flags
& UPL_INTERNAL
) {
4882 sizeof(struct upl
) +
4883 (sizeof(struct upl_page_info
) * (size
/PAGE_SIZE
))
4886 kfree(upl
, sizeof(struct upl
) + page_field_size
);
4891 upl_deallocate(upl_t upl
)
4894 if (--upl
->ref_count
== 0) {
4895 if(vector_upl_is_valid(upl
))
4896 vector_upl_deallocate(upl
);
4906 upl_mark_decmp(upl_t upl
)
4908 if (upl
->flags
& UPL_TRACKED_BY_OBJECT
) {
4909 upl
->flags
|= UPL_DECMP_REQ
;
4910 upl
->upl_creator
->decmp_upl
= (void *)upl
;
4915 upl_unmark_decmp(upl_t upl
)
4917 if(upl
&& (upl
->flags
& UPL_DECMP_REQ
)) {
4918 upl
->upl_creator
->decmp_upl
= NULL
;
4922 #endif /* CONFIG_IOSCHED */
4924 #define VM_PAGE_Q_BACKING_UP(q) \
4925 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4927 boolean_t
must_throttle_writes(void);
4930 must_throttle_writes()
4932 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external
) &&
4933 vm_page_pageable_external_count
> (AVAILABLE_NON_COMPRESSED_MEMORY
* 6) / 10)
4940 #if DEVELOPMENT || DEBUG
4942 * Statistics about UPL enforcement of copy-on-write obligations.
4944 unsigned long upl_cow
= 0;
4945 unsigned long upl_cow_again
= 0;
4946 unsigned long upl_cow_pages
= 0;
4947 unsigned long upl_cow_again_pages
= 0;
4949 unsigned long iopl_cow
= 0;
4950 unsigned long iopl_cow_pages
= 0;
4954 * Routine: vm_object_upl_request
4956 * Cause the population of a portion of a vm_object.
4957 * Depending on the nature of the request, the pages
4958 * returned may be contain valid data or be uninitialized.
4959 * A page list structure, listing the physical pages
4960 * will be returned upon request.
4961 * This function is called by the file system or any other
4962 * supplier of backing store to a pager.
4963 * IMPORTANT NOTE: The caller must still respect the relationship
4964 * between the vm_object and its backing memory object. The
4965 * caller MUST NOT substitute changes in the backing file
4966 * without first doing a memory_object_lock_request on the
4967 * target range unless it is know that the pages are not
4968 * shared with another entity at the pager level.
4970 * if a page list structure is present
4971 * return the mapped physical pages, where a
4972 * page is not present, return a non-initialized
4973 * one. If the no_sync bit is turned on, don't
4974 * call the pager unlock to synchronize with other
4975 * possible copies of the page. Leave pages busy
4976 * in the original object, if a page list structure
4977 * was specified. When a commit of the page list
4978 * pages is done, the dirty bit will be set for each one.
4980 * If a page list structure is present, return
4981 * all mapped pages. Where a page does not exist
4982 * map a zero filled one. Leave pages busy in
4983 * the original object. If a page list structure
4984 * is not specified, this call is a no-op.
4986 * Note: access of default pager objects has a rather interesting
4987 * twist. The caller of this routine, presumably the file system
4988 * page cache handling code, will never actually make a request
4989 * against a default pager backed object. Only the default
4990 * pager will make requests on backing store related vm_objects
4991 * In this way the default pager can maintain the relationship
4992 * between backing store files (abstract memory objects) and
4993 * the vm_objects (cache objects), they support.
4997 __private_extern__ kern_return_t
4998 vm_object_upl_request(
5000 vm_object_offset_t offset
,
5003 upl_page_info_array_t user_page_list
,
5004 unsigned int *page_list_count
,
5005 upl_control_flags_t cntrl_flags
,
5008 vm_page_t dst_page
= VM_PAGE_NULL
;
5009 vm_object_offset_t dst_offset
;
5010 upl_size_t xfer_size
;
5011 unsigned int size_in_pages
;
5016 #if MACH_CLUSTER_STATS
5017 boolean_t encountered_lrp
= FALSE
;
5019 vm_page_t alias_page
= NULL
;
5020 int refmod_state
= 0;
5021 wpl_array_t lite_list
= NULL
;
5022 vm_object_t last_copy_object
;
5023 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
5024 struct vm_page_delayed_work
*dwp
;
5027 int io_tracking_flag
= 0;
5031 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
5033 * For forward compatibility's sake,
5034 * reject any unknown flag.
5036 return KERN_INVALID_VALUE
;
5038 if ( (!object
->internal
) && (object
->paging_offset
!= 0) )
5039 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5040 if (object
->phys_contiguous
)
5041 panic("vm_object_upl_request: contiguous object specified\n");
5044 if (size
> MAX_UPL_SIZE_BYTES
)
5045 size
= MAX_UPL_SIZE_BYTES
;
5047 if ( (cntrl_flags
& UPL_SET_INTERNAL
) && page_list_count
!= NULL
)
5048 *page_list_count
= MAX_UPL_SIZE_BYTES
>> PAGE_SHIFT
;
5050 #if CONFIG_IOSCHED || UPL_DEBUG
5051 if (object
->io_tracking
|| upl_debug_enabled
)
5052 io_tracking_flag
|= UPL_CREATE_IO_TRACKING
;
5055 if (object
->io_tracking
)
5056 io_tracking_flag
|= UPL_CREATE_EXPEDITE_SUP
;
5059 if (cntrl_flags
& UPL_SET_INTERNAL
) {
5060 if (cntrl_flags
& UPL_SET_LITE
) {
5062 upl
= upl_create(UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
| io_tracking_flag
, 0, size
);
5064 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
5065 lite_list
= (wpl_array_t
)
5066 (((uintptr_t)user_page_list
) +
5067 ((size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
5069 user_page_list
= NULL
;
5073 upl
= upl_create(UPL_CREATE_INTERNAL
| io_tracking_flag
, 0, size
);
5075 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
5077 user_page_list
= NULL
;
5081 if (cntrl_flags
& UPL_SET_LITE
) {
5083 upl
= upl_create(UPL_CREATE_EXTERNAL
| UPL_CREATE_LITE
| io_tracking_flag
, 0, size
);
5085 lite_list
= (wpl_array_t
) (((uintptr_t)upl
) + sizeof(struct upl
));
5090 upl
= upl_create(UPL_CREATE_EXTERNAL
| io_tracking_flag
, 0, size
);
5096 user_page_list
[0].device
= FALSE
;
5098 if (cntrl_flags
& UPL_SET_LITE
) {
5099 upl
->map_object
= object
;
5101 upl
->map_object
= vm_object_allocate(size
);
5103 * No neeed to lock the new object: nobody else knows
5104 * about it yet, so it's all ours so far.
5106 upl
->map_object
->shadow
= object
;
5107 upl
->map_object
->pageout
= TRUE
;
5108 upl
->map_object
->can_persist
= FALSE
;
5109 upl
->map_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
5110 upl
->map_object
->vo_shadow_offset
= offset
;
5111 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
5113 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
5115 upl
->flags
|= UPL_SHADOWED
;
5117 if (cntrl_flags
& UPL_FOR_PAGEOUT
)
5118 upl
->flags
|= UPL_PAGEOUT
;
5120 vm_object_lock(object
);
5121 vm_object_activity_begin(object
);
5124 #if CONFIG_SECLUDED_MEMORY
5125 if (object
->can_grab_secluded
) {
5126 grab_options
|= VM_PAGE_GRAB_SECLUDED
;
5128 #endif /* CONFIG_SECLUDED_MEMORY */
5131 * we can lock in the paging_offset once paging_in_progress is set
5134 upl
->offset
= offset
+ object
->paging_offset
;
5136 #if CONFIG_IOSCHED || UPL_DEBUG
5137 if (object
->io_tracking
|| upl_debug_enabled
) {
5138 vm_object_activity_begin(object
);
5139 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
5142 if ((cntrl_flags
& UPL_WILL_MODIFY
) && object
->copy
!= VM_OBJECT_NULL
) {
5144 * Honor copy-on-write obligations
5146 * The caller is gathering these pages and
5147 * might modify their contents. We need to
5148 * make sure that the copy object has its own
5149 * private copies of these pages before we let
5150 * the caller modify them.
5152 vm_object_update(object
,
5157 FALSE
, /* should_return */
5158 MEMORY_OBJECT_COPY_SYNC
,
5160 #if DEVELOPMENT || DEBUG
5162 upl_cow_pages
+= size
>> PAGE_SHIFT
;
5166 * remember which copy object we synchronized with
5168 last_copy_object
= object
->copy
;
5172 dst_offset
= offset
;
5173 size_in_pages
= size
/ PAGE_SIZE
;
5177 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
5179 if (vm_page_free_count
> (vm_page_free_target
+ size_in_pages
) ||
5180 object
->resident_page_count
< ((MAX_UPL_SIZE_BYTES
* 2) >> PAGE_SHIFT
))
5181 object
->scan_collisions
= 0;
5183 if ((cntrl_flags
& UPL_WILL_MODIFY
) && must_throttle_writes() == TRUE
) {
5184 boolean_t isSSD
= FALSE
;
5189 vnode_pager_get_isSSD(object
->pager
, &isSSD
);
5191 vm_object_unlock(object
);
5193 OSAddAtomic(size_in_pages
, &vm_upl_wait_for_pages
);
5196 delay(1000 * size_in_pages
);
5198 delay(5000 * size_in_pages
);
5199 OSAddAtomic(-size_in_pages
, &vm_upl_wait_for_pages
);
5201 vm_object_lock(object
);
5208 if ((alias_page
== NULL
) && !(cntrl_flags
& UPL_SET_LITE
)) {
5209 vm_object_unlock(object
);
5210 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
5211 vm_object_lock(object
);
5213 if (cntrl_flags
& UPL_COPYOUT_FROM
) {
5214 upl
->flags
|= UPL_PAGE_SYNC_DONE
;
5216 if ( ((dst_page
= vm_page_lookup(object
, dst_offset
)) == VM_PAGE_NULL
) ||
5217 dst_page
->fictitious
||
5220 dst_page
->cleaning
||
5221 (VM_PAGE_WIRED(dst_page
))) {
5224 user_page_list
[entry
].phys_addr
= 0;
5228 phys_page
= VM_PAGE_GET_PHYS_PAGE(dst_page
);
5231 * grab this up front...
5232 * a high percentange of the time we're going to
5233 * need the hardware modification state a bit later
5234 * anyway... so we can eliminate an extra call into
5235 * the pmap layer by grabbing it here and recording it
5237 if (dst_page
->pmapped
)
5238 refmod_state
= pmap_get_refmod(phys_page
);
5242 if ( (refmod_state
& VM_MEM_REFERENCED
) && VM_PAGE_INACTIVE(dst_page
)) {
5244 * page is on inactive list and referenced...
5245 * reactivate it now... this gets it out of the
5246 * way of vm_pageout_scan which would have to
5247 * reactivate it upon tripping over it
5249 dwp
->dw_mask
|= DW_vm_page_activate
;
5251 if (cntrl_flags
& UPL_RET_ONLY_DIRTY
) {
5253 * we're only asking for DIRTY pages to be returned
5255 if (dst_page
->laundry
|| !(cntrl_flags
& UPL_FOR_PAGEOUT
)) {
5257 * if we were the page stolen by vm_pageout_scan to be
5258 * cleaned (as opposed to a buddy being clustered in
5259 * or this request is not being driven by a PAGEOUT cluster
5260 * then we only need to check for the page being dirty or
5261 * precious to decide whether to return it
5263 if (dst_page
->dirty
|| dst_page
->precious
|| (refmod_state
& VM_MEM_MODIFIED
))
5268 * this is a request for a PAGEOUT cluster and this page
5269 * is merely along for the ride as a 'buddy'... not only
5270 * does it have to be dirty to be returned, but it also
5271 * can't have been referenced recently...
5273 if ( (hibernate_cleaning_in_progress
== TRUE
||
5274 (!((refmod_state
& VM_MEM_REFERENCED
) || dst_page
->reference
) ||
5275 (dst_page
->vm_page_q_state
== VM_PAGE_ON_THROTTLED_Q
))) &&
5276 ((refmod_state
& VM_MEM_MODIFIED
) || dst_page
->dirty
|| dst_page
->precious
) ) {
5281 * if we reach here, we're not to return
5282 * the page... go on to the next one
5284 if (dst_page
->laundry
== TRUE
) {
5286 * if we get here, the page is not 'cleaning' (filtered out above).
5287 * since it has been referenced, remove it from the laundry
5288 * so we don't pay the cost of an I/O to clean a page
5289 * we're just going to take back
5291 vm_page_lockspin_queues();
5293 vm_pageout_steal_laundry(dst_page
, TRUE
);
5294 vm_page_activate(dst_page
);
5296 vm_page_unlock_queues();
5299 user_page_list
[entry
].phys_addr
= 0;
5304 if (dst_page
->busy
) {
5305 if (cntrl_flags
& UPL_NOBLOCK
) {
5307 user_page_list
[entry
].phys_addr
= 0;
5313 * someone else is playing with the
5314 * page. We will have to wait.
5316 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
5320 if (dst_page
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
) {
5322 vm_page_lockspin_queues();
5324 if (dst_page
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
) {
5326 * we've buddied up a page for a clustered pageout
5327 * that has already been moved to the pageout
5328 * queue by pageout_scan... we need to remove
5329 * it from the queue and drop the laundry count
5332 vm_pageout_throttle_up(dst_page
);
5334 vm_page_unlock_queues();
5336 #if MACH_CLUSTER_STATS
5338 * pageout statistics gathering. count
5339 * all the pages we will page out that
5340 * were not counted in the initial
5341 * vm_pageout_scan work
5343 if (dst_page
->pageout
)
5344 encountered_lrp
= TRUE
;
5345 if ((dst_page
->dirty
|| (object
->internal
&& dst_page
->precious
))) {
5346 if (encountered_lrp
)
5347 CLUSTER_STAT(pages_at_higher_offsets
++;)
5349 CLUSTER_STAT(pages_at_lower_offsets
++;)
5352 hw_dirty
= refmod_state
& VM_MEM_MODIFIED
;
5353 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
5355 if (phys_page
> upl
->highest_page
)
5356 upl
->highest_page
= phys_page
;
5358 assert (!pmap_is_noencrypt(phys_page
));
5360 if (cntrl_flags
& UPL_SET_LITE
) {
5361 unsigned int pg_num
;
5363 pg_num
= (unsigned int) ((dst_offset
-offset
)/PAGE_SIZE
);
5364 assert(pg_num
== (dst_offset
-offset
)/PAGE_SIZE
);
5365 lite_list
[pg_num
>>5] |= 1 << (pg_num
& 31);
5368 pmap_clear_modify(phys_page
);
5371 * Mark original page as cleaning
5374 dst_page
->cleaning
= TRUE
;
5375 dst_page
->precious
= FALSE
;
5378 * use pageclean setup, it is more
5379 * convenient even for the pageout
5382 vm_object_lock(upl
->map_object
);
5383 vm_pageclean_setup(dst_page
, alias_page
, upl
->map_object
, size
- xfer_size
);
5384 vm_object_unlock(upl
->map_object
);
5386 alias_page
->absent
= FALSE
;
5390 SET_PAGE_DIRTY(dst_page
, FALSE
);
5392 dst_page
->dirty
= FALSE
;
5396 dst_page
->precious
= TRUE
;
5398 if ( !(cntrl_flags
& UPL_CLEAN_IN_PLACE
) ) {
5399 if ( !VM_PAGE_WIRED(dst_page
))
5400 dst_page
->free_when_done
= TRUE
;
5403 if ((cntrl_flags
& UPL_WILL_MODIFY
) && object
->copy
!= last_copy_object
) {
5405 * Honor copy-on-write obligations
5407 * The copy object has changed since we
5408 * last synchronized for copy-on-write.
5409 * Another copy object might have been
5410 * inserted while we released the object's
5411 * lock. Since someone could have seen the
5412 * original contents of the remaining pages
5413 * through that new object, we have to
5414 * synchronize with it again for the remaining
5415 * pages only. The previous pages are "busy"
5416 * so they can not be seen through the new
5417 * mapping. The new mapping will see our
5418 * upcoming changes for those previous pages,
5419 * but that's OK since they couldn't see what
5420 * was there before. It's just a race anyway
5421 * and there's no guarantee of consistency or
5422 * atomicity. We just don't want new mappings
5423 * to see both the *before* and *after* pages.
5425 if (object
->copy
!= VM_OBJECT_NULL
) {
5428 dst_offset
,/* current offset */
5429 xfer_size
, /* remaining size */
5432 FALSE
, /* should_return */
5433 MEMORY_OBJECT_COPY_SYNC
,
5436 #if DEVELOPMENT || DEBUG
5438 upl_cow_again_pages
+= xfer_size
>> PAGE_SHIFT
;
5442 * remember the copy object we synced with
5444 last_copy_object
= object
->copy
;
5446 dst_page
= vm_page_lookup(object
, dst_offset
);
5448 if (dst_page
!= VM_PAGE_NULL
) {
5450 if ((cntrl_flags
& UPL_RET_ONLY_ABSENT
)) {
5452 * skip over pages already present in the cache
5455 user_page_list
[entry
].phys_addr
= 0;
5459 if (dst_page
->fictitious
) {
5460 panic("need corner case for fictitious page");
5463 if (dst_page
->busy
|| dst_page
->cleaning
) {
5465 * someone else is playing with the
5466 * page. We will have to wait.
5468 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
5472 if (dst_page
->laundry
)
5473 vm_pageout_steal_laundry(dst_page
, FALSE
);
5475 if (object
->private) {
5477 * This is a nasty wrinkle for users
5478 * of upl who encounter device or
5479 * private memory however, it is
5480 * unavoidable, only a fault can
5481 * resolve the actual backing
5482 * physical page by asking the
5486 user_page_list
[entry
].phys_addr
= 0;
5490 if (object
->scan_collisions
) {
5492 * the pageout_scan thread is trying to steal
5493 * pages from this object, but has run into our
5494 * lock... grab 2 pages from the head of the object...
5495 * the first is freed on behalf of pageout_scan, the
5496 * 2nd is for our own use... we use vm_object_page_grab
5497 * in both cases to avoid taking pages from the free
5498 * list since we are under memory pressure and our
5499 * lock on this object is getting in the way of
5502 dst_page
= vm_object_page_grab(object
);
5504 if (dst_page
!= VM_PAGE_NULL
)
5505 vm_page_release(dst_page
,
5508 dst_page
= vm_object_page_grab(object
);
5510 if (dst_page
== VM_PAGE_NULL
) {
5512 * need to allocate a page
5514 dst_page
= vm_page_grab_options(grab_options
);
5516 if (dst_page
== VM_PAGE_NULL
) {
5517 if ( (cntrl_flags
& (UPL_RET_ONLY_ABSENT
| UPL_NOBLOCK
)) == (UPL_RET_ONLY_ABSENT
| UPL_NOBLOCK
)) {
5519 * we don't want to stall waiting for pages to come onto the free list
5520 * while we're already holding absent pages in this UPL
5521 * the caller will deal with the empty slots
5524 user_page_list
[entry
].phys_addr
= 0;
5529 * no pages available... wait
5530 * then try again for the same
5533 vm_object_unlock(object
);
5535 OSAddAtomic(size_in_pages
, &vm_upl_wait_for_pages
);
5537 VM_DEBUG_EVENT(vm_upl_page_wait
, VM_UPL_PAGE_WAIT
, DBG_FUNC_START
, vm_upl_wait_for_pages
, 0, 0, 0);
5540 OSAddAtomic(-size_in_pages
, &vm_upl_wait_for_pages
);
5542 VM_DEBUG_EVENT(vm_upl_page_wait
, VM_UPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, 0);
5544 vm_object_lock(object
);
5548 vm_page_insert(dst_page
, object
, dst_offset
);
5550 dst_page
->absent
= TRUE
;
5551 dst_page
->busy
= FALSE
;
5553 if (cntrl_flags
& UPL_RET_ONLY_ABSENT
) {
5555 * if UPL_RET_ONLY_ABSENT was specified,
5556 * than we're definitely setting up a
5557 * upl for a clustered read/pagein
5558 * operation... mark the pages as clustered
5559 * so upl_commit_range can put them on the
5562 dst_page
->clustered
= TRUE
;
5564 if ( !(cntrl_flags
& UPL_FILE_IO
))
5565 VM_STAT_INCR(pageins
);
5568 phys_page
= VM_PAGE_GET_PHYS_PAGE(dst_page
);
5570 dst_page
->overwriting
= TRUE
;
5572 if (dst_page
->pmapped
) {
5573 if ( !(cntrl_flags
& UPL_FILE_IO
))
5575 * eliminate all mappings from the
5576 * original object and its prodigy
5578 refmod_state
= pmap_disconnect(phys_page
);
5580 refmod_state
= pmap_get_refmod(phys_page
);
5584 hw_dirty
= refmod_state
& VM_MEM_MODIFIED
;
5585 dirty
= hw_dirty
? TRUE
: dst_page
->dirty
;
5587 if (cntrl_flags
& UPL_SET_LITE
) {
5588 unsigned int pg_num
;
5590 pg_num
= (unsigned int) ((dst_offset
-offset
)/PAGE_SIZE
);
5591 assert(pg_num
== (dst_offset
-offset
)/PAGE_SIZE
);
5592 lite_list
[pg_num
>>5] |= 1 << (pg_num
& 31);
5595 pmap_clear_modify(phys_page
);
5598 * Mark original page as cleaning
5601 dst_page
->cleaning
= TRUE
;
5602 dst_page
->precious
= FALSE
;
5605 * use pageclean setup, it is more
5606 * convenient even for the pageout
5609 vm_object_lock(upl
->map_object
);
5610 vm_pageclean_setup(dst_page
, alias_page
, upl
->map_object
, size
- xfer_size
);
5611 vm_object_unlock(upl
->map_object
);
5613 alias_page
->absent
= FALSE
;
5617 if (cntrl_flags
& UPL_REQUEST_SET_DIRTY
) {
5618 upl
->flags
&= ~UPL_CLEAR_DIRTY
;
5619 upl
->flags
|= UPL_SET_DIRTY
;
5621 upl
->flags
|= UPL_SET_DIRTY
;
5622 } else if (cntrl_flags
& UPL_CLEAN_IN_PLACE
) {
5624 * clean in place for read implies
5625 * that a write will be done on all
5626 * the pages that are dirty before
5627 * a upl commit is done. The caller
5628 * is obligated to preserve the
5629 * contents of all pages marked dirty
5631 upl
->flags
|= UPL_CLEAR_DIRTY
;
5633 dst_page
->dirty
= dirty
;
5636 dst_page
->precious
= TRUE
;
5638 if ( !VM_PAGE_WIRED(dst_page
)) {
5640 * deny access to the target page while
5641 * it is being worked on
5643 dst_page
->busy
= TRUE
;
5645 dwp
->dw_mask
|= DW_vm_page_wire
;
5648 * We might be about to satisfy a fault which has been
5649 * requested. So no need for the "restart" bit.
5651 dst_page
->restart
= FALSE
;
5652 if (!dst_page
->absent
&& !(cntrl_flags
& UPL_WILL_MODIFY
)) {
5654 * expect the page to be used
5656 dwp
->dw_mask
|= DW_set_reference
;
5658 if (cntrl_flags
& UPL_PRECIOUS
) {
5659 if (object
->internal
) {
5660 SET_PAGE_DIRTY(dst_page
, FALSE
);
5661 dst_page
->precious
= FALSE
;
5663 dst_page
->precious
= TRUE
;
5666 dst_page
->precious
= FALSE
;
5670 upl
->flags
|= UPL_HAS_BUSY
;
5672 if (phys_page
> upl
->highest_page
)
5673 upl
->highest_page
= phys_page
;
5674 assert (!pmap_is_noencrypt(phys_page
));
5675 if (user_page_list
) {
5676 user_page_list
[entry
].phys_addr
= phys_page
;
5677 user_page_list
[entry
].free_when_done
= dst_page
->free_when_done
;
5678 user_page_list
[entry
].absent
= dst_page
->absent
;
5679 user_page_list
[entry
].dirty
= dst_page
->dirty
;
5680 user_page_list
[entry
].precious
= dst_page
->precious
;
5681 user_page_list
[entry
].device
= FALSE
;
5682 user_page_list
[entry
].needed
= FALSE
;
5683 if (dst_page
->clustered
== TRUE
)
5684 user_page_list
[entry
].speculative
= (dst_page
->vm_page_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) ? TRUE
: FALSE
;
5686 user_page_list
[entry
].speculative
= FALSE
;
5687 user_page_list
[entry
].cs_validated
= dst_page
->cs_validated
;
5688 user_page_list
[entry
].cs_tainted
= dst_page
->cs_tainted
;
5689 user_page_list
[entry
].cs_nx
= dst_page
->cs_nx
;
5690 user_page_list
[entry
].mark
= FALSE
;
5693 * if UPL_RET_ONLY_ABSENT is set, then
5694 * we are working with a fresh page and we've
5695 * just set the clustered flag on it to
5696 * indicate that it was drug in as part of a
5697 * speculative cluster... so leave it alone
5699 if ( !(cntrl_flags
& UPL_RET_ONLY_ABSENT
)) {
5701 * someone is explicitly grabbing this page...
5702 * update clustered and speculative state
5705 if (dst_page
->clustered
)
5706 VM_PAGE_CONSUME_CLUSTERED(dst_page
);
5710 if (dwp
->dw_mask
& DW_vm_page_activate
)
5711 VM_STAT_INCR(reactivations
);
5713 VM_PAGE_ADD_DELAYED_WORK(dwp
, dst_page
, dw_count
);
5715 if (dw_count
>= dw_limit
) {
5716 vm_page_do_delayed_work(object
, tag
, &dw_array
[0], dw_count
);
5723 dst_offset
+= PAGE_SIZE_64
;
5724 xfer_size
-= PAGE_SIZE
;
5727 vm_page_do_delayed_work(object
, tag
, &dw_array
[0], dw_count
);
5729 if (alias_page
!= NULL
) {
5730 VM_PAGE_FREE(alias_page
);
5733 if (page_list_count
!= NULL
) {
5734 if (upl
->flags
& UPL_INTERNAL
)
5735 *page_list_count
= 0;
5736 else if (*page_list_count
> entry
)
5737 *page_list_count
= entry
;
5742 vm_object_unlock(object
);
5744 return KERN_SUCCESS
;
5748 * Routine: vm_object_super_upl_request
5750 * Cause the population of a portion of a vm_object
5751 * in much the same way as memory_object_upl_request.
5752 * Depending on the nature of the request, the pages
5753 * returned may be contain valid data or be uninitialized.
5754 * However, the region may be expanded up to the super
5755 * cluster size provided.
5758 __private_extern__ kern_return_t
5759 vm_object_super_upl_request(
5761 vm_object_offset_t offset
,
5763 upl_size_t super_cluster
,
5765 upl_page_info_t
*user_page_list
,
5766 unsigned int *page_list_count
,
5767 upl_control_flags_t cntrl_flags
,
5770 if (object
->paging_offset
> offset
|| ((cntrl_flags
& UPL_VECTOR
)==UPL_VECTOR
))
5771 return KERN_FAILURE
;
5773 assert(object
->paging_in_progress
);
5774 offset
= offset
- object
->paging_offset
;
5776 if (super_cluster
> size
) {
5778 vm_object_offset_t base_offset
;
5779 upl_size_t super_size
;
5780 vm_object_size_t super_size_64
;
5782 base_offset
= (offset
& ~((vm_object_offset_t
) super_cluster
- 1));
5783 super_size
= (offset
+ size
) > (base_offset
+ super_cluster
) ? super_cluster
<<1 : super_cluster
;
5784 super_size_64
= ((base_offset
+ super_size
) > object
->vo_size
) ? (object
->vo_size
- base_offset
) : super_size
;
5785 super_size
= (upl_size_t
) super_size_64
;
5786 assert(super_size
== super_size_64
);
5788 if (offset
> (base_offset
+ super_size
)) {
5789 panic("vm_object_super_upl_request: Missed target pageout"
5790 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5791 offset
, base_offset
, super_size
, super_cluster
,
5792 size
, object
->paging_offset
);
5795 * apparently there is a case where the vm requests a
5796 * page to be written out who's offset is beyond the
5799 if ((offset
+ size
) > (base_offset
+ super_size
)) {
5800 super_size_64
= (offset
+ size
) - base_offset
;
5801 super_size
= (upl_size_t
) super_size_64
;
5802 assert(super_size
== super_size_64
);
5805 offset
= base_offset
;
5808 return vm_object_upl_request(object
, offset
, size
, upl
, user_page_list
, page_list_count
, cntrl_flags
, tag
);
5812 int cs_executable_create_upl
= 0;
5813 extern int proc_selfpid(void);
5814 extern char *proc_name_address(void *p
);
5815 #endif /* CONFIG_EMBEDDED */
5820 vm_map_address_t offset
,
5821 upl_size_t
*upl_size
,
5823 upl_page_info_array_t page_list
,
5824 unsigned int *count
,
5825 upl_control_flags_t
*flags
,
5828 vm_map_entry_t entry
;
5829 upl_control_flags_t caller_flags
;
5830 int force_data_sync
;
5832 vm_object_t local_object
;
5833 vm_map_offset_t local_offset
;
5834 vm_map_offset_t local_start
;
5837 assert(page_aligned(offset
));
5839 caller_flags
= *flags
;
5841 if (caller_flags
& ~UPL_VALID_FLAGS
) {
5843 * For forward compatibility's sake,
5844 * reject any unknown flag.
5846 return KERN_INVALID_VALUE
;
5848 force_data_sync
= (caller_flags
& UPL_FORCE_DATA_SYNC
);
5849 sync_cow_data
= !(caller_flags
& UPL_COPYOUT_FROM
);
5852 return KERN_INVALID_ARGUMENT
;
5855 vm_map_lock_read(map
);
5857 if (!vm_map_lookup_entry(map
, offset
, &entry
)) {
5858 vm_map_unlock_read(map
);
5859 return KERN_FAILURE
;
5862 if ((entry
->vme_end
- offset
) < *upl_size
) {
5863 *upl_size
= (upl_size_t
) (entry
->vme_end
- offset
);
5864 assert(*upl_size
== entry
->vme_end
- offset
);
5867 if (caller_flags
& UPL_QUERY_OBJECT_TYPE
) {
5870 if (!entry
->is_sub_map
&&
5871 VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
5872 if (VME_OBJECT(entry
)->private)
5873 *flags
= UPL_DEV_MEMORY
;
5875 if (VME_OBJECT(entry
)->phys_contiguous
)
5876 *flags
|= UPL_PHYS_CONTIG
;
5878 vm_map_unlock_read(map
);
5879 return KERN_SUCCESS
;
5882 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
||
5883 !VME_OBJECT(entry
)->phys_contiguous
) {
5884 if (*upl_size
> MAX_UPL_SIZE_BYTES
)
5885 *upl_size
= MAX_UPL_SIZE_BYTES
;
5889 * Create an object if necessary.
5891 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
5893 if (vm_map_lock_read_to_write(map
))
5894 goto REDISCOVER_ENTRY
;
5896 VME_OBJECT_SET(entry
,
5897 vm_object_allocate((vm_size_t
)
5899 entry
->vme_start
)));
5900 VME_OFFSET_SET(entry
, 0);
5901 assert(entry
->use_pmap
);
5903 vm_map_lock_write_to_read(map
);
5906 if (!(caller_flags
& UPL_COPYOUT_FROM
) &&
5907 !(entry
->protection
& VM_PROT_WRITE
)) {
5908 vm_map_unlock_read(map
);
5909 return KERN_PROTECTION_FAILURE
;
5913 if (map
->pmap
!= kernel_pmap
&&
5914 (caller_flags
& UPL_COPYOUT_FROM
) &&
5915 (entry
->protection
& VM_PROT_EXECUTE
) &&
5916 !(entry
->protection
& VM_PROT_WRITE
)) {
5921 * We're about to create a read-only UPL backed by
5922 * memory from an executable mapping.
5923 * Wiring the pages would result in the pages being copied
5924 * (due to the "MAP_PRIVATE" mapping) and no longer
5925 * code-signed, so no longer eligible for execution.
5926 * Instead, let's copy the data into a kernel buffer and
5927 * create the UPL from this kernel buffer.
5928 * The kernel buffer is then freed, leaving the UPL holding
5929 * the last reference on the VM object, so the memory will
5930 * be released when the UPL is committed.
5933 vm_map_unlock_read(map
);
5934 /* allocate kernel buffer */
5935 ksize
= round_page(*upl_size
);
5937 ret
= kmem_alloc_pageable(kernel_map
,
5941 if (ret
== KERN_SUCCESS
) {
5942 /* copyin the user data */
5943 assert(page_aligned(offset
));
5944 ret
= copyinmap(map
, offset
, (void *)kaddr
, *upl_size
);
5946 if (ret
== KERN_SUCCESS
) {
5947 if (ksize
> *upl_size
) {
5948 /* zero out the extra space in kernel buffer */
5949 memset((void *)(kaddr
+ *upl_size
),
5953 /* create the UPL from the kernel buffer */
5954 ret
= vm_map_create_upl(kernel_map
, kaddr
, upl_size
,
5955 upl
, page_list
, count
, flags
, tag
);
5958 /* free the kernel buffer */
5959 kmem_free(kernel_map
, kaddr
, ksize
);
5963 #if DEVELOPMENT || DEBUG
5964 DTRACE_VM4(create_upl_from_executable
,
5966 vm_map_address_t
, offset
,
5967 upl_size_t
, *upl_size
,
5968 kern_return_t
, ret
);
5969 #endif /* DEVELOPMENT || DEBUG */
5972 #endif /* CONFIG_EMBEDDED */
5974 local_object
= VME_OBJECT(entry
);
5975 assert(local_object
!= VM_OBJECT_NULL
);
5977 if (!entry
->is_sub_map
&&
5978 !entry
->needs_copy
&&
5980 local_object
->vo_size
> *upl_size
&& /* partial UPL */
5981 entry
->wired_count
== 0 && /* No COW for entries that are wired */
5982 (map
->pmap
!= kernel_pmap
) && /* alias checks */
5983 (vm_map_entry_should_cow_for_true_share(entry
) /* case 1 */
5986 local_object
->internal
&&
5987 (local_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) &&
5988 local_object
->ref_count
> 1))) {
5993 * Set up the targeted range for copy-on-write to avoid
5994 * applying true_share/copy_delay to the entire object.
5997 * This map entry covers only part of an internal
5998 * object. There could be other map entries covering
5999 * other areas of this object and some of these map
6000 * entries could be marked as "needs_copy", which
6001 * assumes that the object is COPY_SYMMETRIC.
6002 * To avoid marking this object as COPY_DELAY and
6003 * "true_share", let's shadow it and mark the new
6004 * (smaller) object as "true_share" and COPY_DELAY.
6007 if (vm_map_lock_read_to_write(map
)) {
6008 goto REDISCOVER_ENTRY
;
6010 vm_map_lock_assert_exclusive(map
);
6011 assert(VME_OBJECT(entry
) == local_object
);
6013 vm_map_clip_start(map
,
6015 vm_map_trunc_page(offset
,
6016 VM_MAP_PAGE_MASK(map
)));
6017 vm_map_clip_end(map
,
6019 vm_map_round_page(offset
+ *upl_size
,
6020 VM_MAP_PAGE_MASK(map
)));
6021 if ((entry
->vme_end
- offset
) < *upl_size
) {
6022 *upl_size
= (upl_size_t
) (entry
->vme_end
- offset
);
6023 assert(*upl_size
== entry
->vme_end
- offset
);
6026 prot
= entry
->protection
& ~VM_PROT_WRITE
;
6027 if (override_nx(map
, VME_ALIAS(entry
)) && prot
)
6028 prot
|= VM_PROT_EXECUTE
;
6029 vm_object_pmap_protect(local_object
,
6031 entry
->vme_end
- entry
->vme_start
,
6032 ((entry
->is_shared
||
6033 map
->mapped_in_other_pmaps
)
6039 assert(entry
->wired_count
== 0);
6042 * Lock the VM object and re-check its status: if it's mapped
6043 * in another address space, we could still be racing with
6044 * another thread holding that other VM map exclusively.
6046 vm_object_lock(local_object
);
6047 if (local_object
->true_share
) {
6048 /* object is already in proper state: no COW needed */
6049 assert(local_object
->copy_strategy
!=
6050 MEMORY_OBJECT_COPY_SYMMETRIC
);
6052 /* not true_share: ask for copy-on-write below */
6053 assert(local_object
->copy_strategy
==
6054 MEMORY_OBJECT_COPY_SYMMETRIC
);
6055 entry
->needs_copy
= TRUE
;
6057 vm_object_unlock(local_object
);
6059 vm_map_lock_write_to_read(map
);
6062 if (entry
->needs_copy
) {
6064 * Honor copy-on-write for COPY_SYMMETRIC
6069 vm_object_offset_t new_offset
;
6072 vm_map_version_t version
;
6074 vm_prot_t fault_type
;
6078 if (caller_flags
& UPL_COPYOUT_FROM
) {
6079 fault_type
= VM_PROT_READ
| VM_PROT_COPY
;
6080 vm_counters
.create_upl_extra_cow
++;
6081 vm_counters
.create_upl_extra_cow_pages
+=
6082 (entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE
;
6084 fault_type
= VM_PROT_WRITE
;
6086 if (vm_map_lookup_locked(&local_map
,
6088 OBJECT_LOCK_EXCLUSIVE
,
6090 &new_offset
, &prot
, &wired
,
6092 &real_map
) != KERN_SUCCESS
) {
6093 if (fault_type
== VM_PROT_WRITE
) {
6094 vm_counters
.create_upl_lookup_failure_write
++;
6096 vm_counters
.create_upl_lookup_failure_copy
++;
6098 vm_map_unlock_read(local_map
);
6099 return KERN_FAILURE
;
6101 if (real_map
!= map
)
6102 vm_map_unlock(real_map
);
6103 vm_map_unlock_read(local_map
);
6105 vm_object_unlock(object
);
6107 goto REDISCOVER_ENTRY
;
6110 if (entry
->is_sub_map
) {
6113 submap
= VME_SUBMAP(entry
);
6114 local_start
= entry
->vme_start
;
6115 local_offset
= VME_OFFSET(entry
);
6117 vm_map_reference(submap
);
6118 vm_map_unlock_read(map
);
6120 ret
= vm_map_create_upl(submap
,
6121 local_offset
+ (offset
- local_start
),
6122 upl_size
, upl
, page_list
, count
, flags
, tag
);
6123 vm_map_deallocate(submap
);
6128 if (sync_cow_data
&&
6129 (VME_OBJECT(entry
)->shadow
||
6130 VME_OBJECT(entry
)->copy
)) {
6131 local_object
= VME_OBJECT(entry
);
6132 local_start
= entry
->vme_start
;
6133 local_offset
= VME_OFFSET(entry
);
6135 vm_object_reference(local_object
);
6136 vm_map_unlock_read(map
);
6138 if (local_object
->shadow
&& local_object
->copy
) {
6139 vm_object_lock_request(local_object
->shadow
,
6140 ((vm_object_offset_t
)
6141 ((offset
- local_start
) +
6143 local_object
->vo_shadow_offset
),
6145 MEMORY_OBJECT_DATA_SYNC
,
6148 sync_cow_data
= FALSE
;
6149 vm_object_deallocate(local_object
);
6151 goto REDISCOVER_ENTRY
;
6153 if (force_data_sync
) {
6154 local_object
= VME_OBJECT(entry
);
6155 local_start
= entry
->vme_start
;
6156 local_offset
= VME_OFFSET(entry
);
6158 vm_object_reference(local_object
);
6159 vm_map_unlock_read(map
);
6161 vm_object_lock_request(local_object
,
6162 ((vm_object_offset_t
)
6163 ((offset
- local_start
) +
6165 (vm_object_size_t
)*upl_size
,
6167 MEMORY_OBJECT_DATA_SYNC
,
6170 force_data_sync
= FALSE
;
6171 vm_object_deallocate(local_object
);
6173 goto REDISCOVER_ENTRY
;
6175 if (VME_OBJECT(entry
)->private)
6176 *flags
= UPL_DEV_MEMORY
;
6180 if (VME_OBJECT(entry
)->phys_contiguous
)
6181 *flags
|= UPL_PHYS_CONTIG
;
6183 local_object
= VME_OBJECT(entry
);
6184 local_offset
= VME_OFFSET(entry
);
6185 local_start
= entry
->vme_start
;
6189 * Wiring will copy the pages to the shadow object.
6190 * The shadow object will not be code-signed so
6191 * attempting to execute code from these copied pages
6192 * would trigger a code-signing violation.
6194 if (entry
->protection
& VM_PROT_EXECUTE
) {
6196 printf("pid %d[%s] create_upl out of executable range from "
6197 "0x%llx to 0x%llx: side effects may include "
6198 "code-signing violations later on\n",
6200 (current_task()->bsd_info
6201 ? proc_name_address(current_task()->bsd_info
)
6203 (uint64_t) entry
->vme_start
,
6204 (uint64_t) entry
->vme_end
);
6205 #endif /* MACH_ASSERT */
6206 DTRACE_VM2(cs_executable_create_upl
,
6207 uint64_t, (uint64_t)entry
->vme_start
,
6208 uint64_t, (uint64_t)entry
->vme_end
);
6209 cs_executable_create_upl
++;
6211 #endif /* CONFIG_EMBEDDED */
6213 vm_object_lock(local_object
);
6216 * Ensure that this object is "true_share" and "copy_delay" now,
6217 * while we're still holding the VM map lock. After we unlock the map,
6218 * anything could happen to that mapping, including some copy-on-write
6219 * activity. We need to make sure that the IOPL will point at the
6220 * same memory as the mapping.
6222 if (local_object
->true_share
) {
6223 assert(local_object
->copy_strategy
!=
6224 MEMORY_OBJECT_COPY_SYMMETRIC
);
6225 } else if (local_object
!= kernel_object
&&
6226 local_object
!= compressor_object
&&
6227 !local_object
->phys_contiguous
) {
6228 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6229 if (!local_object
->true_share
&&
6230 vm_object_tracking_inited
) {
6231 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
6233 num
= OSBacktrace(bt
,
6234 VM_OBJECT_TRACKING_BTDEPTH
);
6235 btlog_add_entry(vm_object_tracking_btlog
,
6237 VM_OBJECT_TRACKING_OP_TRUESHARE
,
6241 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6242 local_object
->true_share
= TRUE
;
6243 if (local_object
->copy_strategy
==
6244 MEMORY_OBJECT_COPY_SYMMETRIC
) {
6245 local_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
6249 vm_object_reference_locked(local_object
);
6250 vm_object_unlock(local_object
);
6252 vm_map_unlock_read(map
);
6254 ret
= vm_object_iopl_request(local_object
,
6255 ((vm_object_offset_t
)
6256 ((offset
- local_start
) + local_offset
)),
6263 vm_object_deallocate(local_object
);
6269 * Internal routine to enter a UPL into a VM map.
6271 * JMM - This should just be doable through the standard
6272 * vm_map_enter() API.
6278 vm_map_offset_t
*dst_addr
)
6281 vm_object_offset_t offset
;
6282 vm_map_offset_t addr
;
6285 int isVectorUPL
= 0, curr_upl
=0;
6286 upl_t vector_upl
= NULL
;
6287 vm_offset_t vector_upl_dst_addr
= 0;
6288 vm_map_t vector_upl_submap
= NULL
;
6289 upl_offset_t subupl_offset
= 0;
6290 upl_size_t subupl_size
= 0;
6292 if (upl
== UPL_NULL
)
6293 return KERN_INVALID_ARGUMENT
;
6295 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
6296 int mapped
=0,valid_upls
=0;
6299 upl_lock(vector_upl
);
6300 for(curr_upl
=0; curr_upl
< MAX_VECTOR_UPL_ELEMENTS
; curr_upl
++) {
6301 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
);
6305 if (UPL_PAGE_LIST_MAPPED
& upl
->flags
)
6310 if(mapped
!= valid_upls
)
6311 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped
, valid_upls
);
6313 upl_unlock(vector_upl
);
6314 return KERN_FAILURE
;
6318 kr
= kmem_suballoc(map
, &vector_upl_dst_addr
, vector_upl
->size
, FALSE
,
6319 VM_FLAGS_ANYWHERE
, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_NONE
,
6320 &vector_upl_submap
);
6321 if( kr
!= KERN_SUCCESS
)
6322 panic("Vector UPL submap allocation failed\n");
6323 map
= vector_upl_submap
;
6324 vector_upl_set_submap(vector_upl
, vector_upl_submap
, vector_upl_dst_addr
);
6330 process_upl_to_enter
:
6332 if(curr_upl
== MAX_VECTOR_UPL_ELEMENTS
) {
6333 *dst_addr
= vector_upl_dst_addr
;
6334 upl_unlock(vector_upl
);
6335 return KERN_SUCCESS
;
6337 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
++ );
6339 goto process_upl_to_enter
;
6341 vector_upl_get_iostate(vector_upl
, upl
, &subupl_offset
, &subupl_size
);
6342 *dst_addr
= (vm_map_offset_t
)(vector_upl_dst_addr
+ (vm_map_offset_t
)subupl_offset
);
6345 * check to see if already mapped
6347 if (UPL_PAGE_LIST_MAPPED
& upl
->flags
) {
6349 return KERN_FAILURE
;
6352 if ((!(upl
->flags
& UPL_SHADOWED
)) &&
6353 ((upl
->flags
& UPL_HAS_BUSY
) ||
6354 !((upl
->flags
& (UPL_DEVICE_MEMORY
| UPL_IO_WIRE
)) || (upl
->map_object
->phys_contiguous
)))) {
6357 vm_page_t alias_page
;
6358 vm_object_offset_t new_offset
;
6359 unsigned int pg_num
;
6360 wpl_array_t lite_list
;
6362 if (upl
->flags
& UPL_INTERNAL
) {
6363 lite_list
= (wpl_array_t
)
6364 ((((uintptr_t)upl
) + sizeof(struct upl
))
6365 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
6367 lite_list
= (wpl_array_t
)(((uintptr_t)upl
) + sizeof(struct upl
));
6369 object
= upl
->map_object
;
6370 upl
->map_object
= vm_object_allocate(upl
->size
);
6372 vm_object_lock(upl
->map_object
);
6374 upl
->map_object
->shadow
= object
;
6375 upl
->map_object
->pageout
= TRUE
;
6376 upl
->map_object
->can_persist
= FALSE
;
6377 upl
->map_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
6378 upl
->map_object
->vo_shadow_offset
= upl
->offset
- object
->paging_offset
;
6379 upl
->map_object
->wimg_bits
= object
->wimg_bits
;
6380 offset
= upl
->map_object
->vo_shadow_offset
;
6384 upl
->flags
|= UPL_SHADOWED
;
6387 pg_num
= (unsigned int) (new_offset
/ PAGE_SIZE
);
6388 assert(pg_num
== new_offset
/ PAGE_SIZE
);
6390 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
6392 VM_PAGE_GRAB_FICTITIOUS(alias_page
);
6394 vm_object_lock(object
);
6396 m
= vm_page_lookup(object
, offset
);
6397 if (m
== VM_PAGE_NULL
) {
6398 panic("vm_upl_map: page missing\n");
6402 * Convert the fictitious page to a private
6403 * shadow of the real page.
6405 assert(alias_page
->fictitious
);
6406 alias_page
->fictitious
= FALSE
;
6407 alias_page
->private = TRUE
;
6408 alias_page
->free_when_done
= TRUE
;
6410 * since m is a page in the upl it must
6411 * already be wired or BUSY, so it's
6412 * safe to assign the underlying physical
6415 VM_PAGE_SET_PHYS_PAGE(alias_page
, VM_PAGE_GET_PHYS_PAGE(m
));
6417 vm_object_unlock(object
);
6419 vm_page_lockspin_queues();
6420 vm_page_wire(alias_page
, VM_KERN_MEMORY_NONE
, TRUE
);
6421 vm_page_unlock_queues();
6423 vm_page_insert_wired(alias_page
, upl
->map_object
, new_offset
, VM_KERN_MEMORY_NONE
);
6425 assert(!alias_page
->wanted
);
6426 alias_page
->busy
= FALSE
;
6427 alias_page
->absent
= FALSE
;
6430 offset
+= PAGE_SIZE_64
;
6431 new_offset
+= PAGE_SIZE_64
;
6433 vm_object_unlock(upl
->map_object
);
6435 if (upl
->flags
& UPL_SHADOWED
)
6438 offset
= upl
->offset
- upl
->map_object
->paging_offset
;
6442 vm_object_reference(upl
->map_object
);
6447 * NEED A UPL_MAP ALIAS
6449 kr
= vm_map_enter(map
, dst_addr
, (vm_map_size_t
)size
, (vm_map_offset_t
) 0,
6450 VM_FLAGS_ANYWHERE
, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_OSFMK
,
6451 upl
->map_object
, offset
, FALSE
,
6452 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
6454 if (kr
!= KERN_SUCCESS
) {
6455 vm_object_deallocate(upl
->map_object
);
6461 kr
= vm_map_enter(map
, dst_addr
, (vm_map_size_t
)size
, (vm_map_offset_t
) 0,
6462 VM_FLAGS_FIXED
, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_OSFMK
,
6463 upl
->map_object
, offset
, FALSE
,
6464 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
6466 panic("vm_map_enter failed for a Vector UPL\n");
6468 vm_object_lock(upl
->map_object
);
6470 for (addr
= *dst_addr
; size
> 0; size
-= PAGE_SIZE
, addr
+= PAGE_SIZE
) {
6471 m
= vm_page_lookup(upl
->map_object
, offset
);
6476 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6477 * but only in kernel space. If this was on a user map,
6478 * we'd have to set the wpmapped bit. */
6479 /* m->wpmapped = TRUE; */
6480 assert(map
->pmap
== kernel_pmap
);
6482 PMAP_ENTER(map
->pmap
, addr
, m
, VM_PROT_DEFAULT
, VM_PROT_NONE
, 0, TRUE
, kr
);
6484 assert(kr
== KERN_SUCCESS
);
6486 kasan_notify_address(addr
, PAGE_SIZE_64
);
6489 offset
+= PAGE_SIZE_64
;
6491 vm_object_unlock(upl
->map_object
);
6494 * hold a reference for the mapping
6497 upl
->flags
|= UPL_PAGE_LIST_MAPPED
;
6498 upl
->kaddr
= (vm_offset_t
) *dst_addr
;
6499 assert(upl
->kaddr
== *dst_addr
);
6502 goto process_upl_to_enter
;
6506 return KERN_SUCCESS
;
6510 * Internal routine to remove a UPL mapping from a VM map.
6512 * XXX - This should just be doable through a standard
6513 * vm_map_remove() operation. Otherwise, implicit clean-up
6514 * of the target map won't be able to correctly remove
6515 * these (and release the reference on the UPL). Having
6516 * to do this means we can't map these into user-space
6526 int isVectorUPL
= 0, curr_upl
= 0;
6527 upl_t vector_upl
= NULL
;
6529 if (upl
== UPL_NULL
)
6530 return KERN_INVALID_ARGUMENT
;
6532 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
6533 int unmapped
=0, valid_upls
=0;
6535 upl_lock(vector_upl
);
6536 for(curr_upl
=0; curr_upl
< MAX_VECTOR_UPL_ELEMENTS
; curr_upl
++) {
6537 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
);
6541 if (!(UPL_PAGE_LIST_MAPPED
& upl
->flags
))
6546 if(unmapped
!= valid_upls
)
6547 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped
, valid_upls
);
6549 upl_unlock(vector_upl
);
6550 return KERN_FAILURE
;
6558 process_upl_to_remove
:
6560 if(curr_upl
== MAX_VECTOR_UPL_ELEMENTS
) {
6561 vm_map_t v_upl_submap
;
6562 vm_offset_t v_upl_submap_dst_addr
;
6563 vector_upl_get_submap(vector_upl
, &v_upl_submap
, &v_upl_submap_dst_addr
);
6565 vm_map_remove(map
, v_upl_submap_dst_addr
, v_upl_submap_dst_addr
+ vector_upl
->size
, VM_MAP_NO_FLAGS
);
6566 vm_map_deallocate(v_upl_submap
);
6567 upl_unlock(vector_upl
);
6568 return KERN_SUCCESS
;
6571 upl
= vector_upl_subupl_byindex(vector_upl
, curr_upl
++ );
6573 goto process_upl_to_remove
;
6576 if (upl
->flags
& UPL_PAGE_LIST_MAPPED
) {
6580 assert(upl
->ref_count
> 1);
6581 upl
->ref_count
--; /* removing mapping ref */
6583 upl
->flags
&= ~UPL_PAGE_LIST_MAPPED
;
6584 upl
->kaddr
= (vm_offset_t
) 0;
6591 vm_map_trunc_page(addr
,
6592 VM_MAP_PAGE_MASK(map
)),
6593 vm_map_round_page(addr
+ size
,
6594 VM_MAP_PAGE_MASK(map
)),
6597 return KERN_SUCCESS
;
6601 * If it's a Vectored UPL, we'll be removing the entire
6602 * submap anyways, so no need to remove individual UPL
6603 * element mappings from within the submap
6605 goto process_upl_to_remove
;
6610 return KERN_FAILURE
;
6617 upl_offset_t offset
,
6620 upl_page_info_t
*page_list
,
6621 mach_msg_type_number_t count
,
6624 upl_size_t xfer_size
, subupl_size
= size
;
6625 vm_object_t shadow_object
;
6627 vm_object_t m_object
;
6628 vm_object_offset_t target_offset
;
6629 upl_offset_t subupl_offset
= offset
;
6631 wpl_array_t lite_list
;
6633 int clear_refmod
= 0;
6634 int pgpgout_count
= 0;
6635 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
6636 struct vm_page_delayed_work
*dwp
;
6639 int isVectorUPL
= 0;
6640 upl_t vector_upl
= NULL
;
6641 boolean_t should_be_throttled
= FALSE
;
6643 vm_page_t nxt_page
= VM_PAGE_NULL
;
6644 int fast_path_possible
= 0;
6645 int fast_path_full_commit
= 0;
6646 int throttle_page
= 0;
6647 int unwired_count
= 0;
6648 int local_queue_count
= 0;
6649 vm_page_t first_local
, last_local
;
6653 if (upl
== UPL_NULL
)
6654 return KERN_INVALID_ARGUMENT
;
6659 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
6661 upl_lock(vector_upl
);
6666 process_upl_to_commit
:
6670 offset
= subupl_offset
;
6672 upl_unlock(vector_upl
);
6673 return KERN_SUCCESS
;
6675 upl
= vector_upl_subupl_byoffset(vector_upl
, &offset
, &size
);
6677 upl_unlock(vector_upl
);
6678 return KERN_FAILURE
;
6680 page_list
= UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl
);
6681 subupl_size
-= size
;
6682 subupl_offset
+= size
;
6686 if (upl
->upl_commit_index
< UPL_DEBUG_COMMIT_RECORDS
) {
6687 (void) OSBacktrace(&upl
->upl_commit_records
[upl
->upl_commit_index
].c_retaddr
[0], UPL_DEBUG_STACK_FRAMES
);
6689 upl
->upl_commit_records
[upl
->upl_commit_index
].c_beg
= offset
;
6690 upl
->upl_commit_records
[upl
->upl_commit_index
].c_end
= (offset
+ size
);
6692 upl
->upl_commit_index
++;
6695 if (upl
->flags
& UPL_DEVICE_MEMORY
)
6697 else if ((offset
+ size
) <= upl
->size
)
6703 upl_unlock(vector_upl
);
6705 return KERN_FAILURE
;
6707 if (upl
->flags
& UPL_SET_DIRTY
)
6708 flags
|= UPL_COMMIT_SET_DIRTY
;
6709 if (upl
->flags
& UPL_CLEAR_DIRTY
)
6710 flags
|= UPL_COMMIT_CLEAR_DIRTY
;
6712 if (upl
->flags
& UPL_INTERNAL
)
6713 lite_list
= (wpl_array_t
) ((((uintptr_t)upl
) + sizeof(struct upl
))
6714 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
6716 lite_list
= (wpl_array_t
) (((uintptr_t)upl
) + sizeof(struct upl
));
6718 object
= upl
->map_object
;
6720 if (upl
->flags
& UPL_SHADOWED
) {
6721 vm_object_lock(object
);
6722 shadow_object
= object
->shadow
;
6724 shadow_object
= object
;
6726 entry
= offset
/PAGE_SIZE
;
6727 target_offset
= (vm_object_offset_t
)offset
;
6729 assert(!(target_offset
& PAGE_MASK
));
6730 assert(!(xfer_size
& PAGE_MASK
));
6732 if (upl
->flags
& UPL_KERNEL_OBJECT
)
6733 vm_object_lock_shared(shadow_object
);
6735 vm_object_lock(shadow_object
);
6737 VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object
);
6739 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
6740 assert(shadow_object
->blocked_access
);
6741 shadow_object
->blocked_access
= FALSE
;
6742 vm_object_wakeup(object
, VM_OBJECT_EVENT_UNBLOCKED
);
6745 if (shadow_object
->code_signed
) {
6748 * If the object is code-signed, do not let this UPL tell
6749 * us if the pages are valid or not. Let the pages be
6750 * validated by VM the normal way (when they get mapped or
6753 flags
&= ~UPL_COMMIT_CS_VALIDATED
;
6757 * No page list to get the code-signing info from !?
6759 flags
&= ~UPL_COMMIT_CS_VALIDATED
;
6761 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object
->internal
)
6762 should_be_throttled
= TRUE
;
6766 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
6768 if ((upl
->flags
& UPL_IO_WIRE
) &&
6769 !(flags
& UPL_COMMIT_FREE_ABSENT
) &&
6771 shadow_object
->purgable
!= VM_PURGABLE_VOLATILE
&&
6772 shadow_object
->purgable
!= VM_PURGABLE_EMPTY
) {
6774 if (!vm_page_queue_empty(&shadow_object
->memq
)) {
6776 if (size
== shadow_object
->vo_size
) {
6777 nxt_page
= (vm_page_t
)vm_page_queue_first(&shadow_object
->memq
);
6778 fast_path_full_commit
= 1;
6780 fast_path_possible
= 1;
6782 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object
->internal
&&
6783 (shadow_object
->purgable
== VM_PURGABLE_DENY
||
6784 shadow_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
6785 shadow_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
6790 first_local
= VM_PAGE_NULL
;
6791 last_local
= VM_PAGE_NULL
;
6801 if (upl
->flags
& UPL_LITE
) {
6802 unsigned int pg_num
;
6804 if (nxt_page
!= VM_PAGE_NULL
) {
6806 nxt_page
= (vm_page_t
)vm_page_queue_next(&nxt_page
->listq
);
6807 target_offset
= m
->offset
;
6809 pg_num
= (unsigned int) (target_offset
/PAGE_SIZE
);
6810 assert(pg_num
== target_offset
/PAGE_SIZE
);
6812 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
6813 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
6815 if (!(upl
->flags
& UPL_KERNEL_OBJECT
) && m
== VM_PAGE_NULL
)
6816 m
= vm_page_lookup(shadow_object
, target_offset
+ (upl
->offset
- shadow_object
->paging_offset
));
6820 if (upl
->flags
& UPL_SHADOWED
) {
6821 if ((t
= vm_page_lookup(object
, target_offset
)) != VM_PAGE_NULL
) {
6823 t
->free_when_done
= FALSE
;
6827 if (!(upl
->flags
& UPL_KERNEL_OBJECT
) && m
== VM_PAGE_NULL
)
6828 m
= vm_page_lookup(shadow_object
, target_offset
+ object
->vo_shadow_offset
);
6831 if (m
== VM_PAGE_NULL
)
6832 goto commit_next_page
;
6834 m_object
= VM_PAGE_OBJECT(m
);
6836 if (m
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) {
6839 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6840 goto commit_next_page
;
6843 if (flags
& UPL_COMMIT_CS_VALIDATED
) {
6846 * Set the code signing bits according to
6847 * what the UPL says they should be.
6849 m
->cs_validated
= page_list
[entry
].cs_validated
;
6850 m
->cs_tainted
= page_list
[entry
].cs_tainted
;
6851 m
->cs_nx
= page_list
[entry
].cs_nx
;
6853 if (flags
& UPL_COMMIT_WRITTEN_BY_KERNEL
)
6854 m
->written_by_kernel
= TRUE
;
6856 if (upl
->flags
& UPL_IO_WIRE
) {
6859 page_list
[entry
].phys_addr
= 0;
6861 if (flags
& UPL_COMMIT_SET_DIRTY
) {
6862 SET_PAGE_DIRTY(m
, FALSE
);
6863 } else if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
6866 if (! (flags
& UPL_COMMIT_CS_VALIDATED
) &&
6867 m
->cs_validated
&& !m
->cs_tainted
) {
6870 * This page is no longer dirty
6871 * but could have been modified,
6872 * so it will need to be
6876 panic("upl_commit_range(%p): page %p was slid\n",
6880 m
->cs_validated
= FALSE
;
6881 #if DEVELOPMENT || DEBUG
6882 vm_cs_validated_resets
++;
6884 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
6886 clear_refmod
|= VM_MEM_MODIFIED
;
6888 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
6890 * We blocked access to the pages in this UPL.
6891 * Clear the "busy" bit and wake up any waiter
6894 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6896 if (fast_path_possible
) {
6897 assert(m_object
->purgable
!= VM_PURGABLE_EMPTY
);
6898 assert(m_object
->purgable
!= VM_PURGABLE_VOLATILE
);
6900 assert(m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
6901 assert(m
->wire_count
== 0);
6905 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6907 if (m
->wire_count
== 0)
6908 panic("wire_count == 0, m = %p, obj = %p\n", m
, shadow_object
);
6909 assert(m
->vm_page_q_state
== VM_PAGE_IS_WIRED
);
6912 * XXX FBDP need to update some other
6913 * counters here (purgeable_wired_count)
6916 assert(m
->wire_count
> 0);
6919 if (m
->wire_count
== 0) {
6920 m
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
6924 if (m
->wire_count
== 0) {
6925 assert(m
->pageq
.next
== 0 && m
->pageq
.prev
== 0);
6927 if (last_local
== VM_PAGE_NULL
) {
6928 assert(first_local
== VM_PAGE_NULL
);
6933 assert(first_local
!= VM_PAGE_NULL
);
6935 m
->pageq
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local
);
6936 first_local
->pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m
);
6939 local_queue_count
++;
6941 if (throttle_page
) {
6942 m
->vm_page_q_state
= VM_PAGE_ON_THROTTLED_Q
;
6944 if (flags
& UPL_COMMIT_INACTIVATE
) {
6945 if (shadow_object
->internal
)
6946 m
->vm_page_q_state
= VM_PAGE_ON_INACTIVE_INTERNAL_Q
;
6948 m
->vm_page_q_state
= VM_PAGE_ON_INACTIVE_EXTERNAL_Q
;
6950 m
->vm_page_q_state
= VM_PAGE_ON_ACTIVE_Q
;
6954 if (flags
& UPL_COMMIT_INACTIVATE
) {
6955 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
6956 clear_refmod
|= VM_MEM_REFERENCED
;
6959 if (flags
& UPL_COMMIT_FREE_ABSENT
)
6960 dwp
->dw_mask
|= DW_vm_page_free
;
6963 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
6965 if ( !(dwp
->dw_mask
& DW_vm_page_deactivate_internal
))
6966 dwp
->dw_mask
|= DW_vm_page_activate
;
6969 dwp
->dw_mask
|= DW_vm_page_unwire
;
6971 goto commit_next_page
;
6973 assert(m
->vm_page_q_state
!= VM_PAGE_USED_BY_COMPRESSOR
);
6976 page_list
[entry
].phys_addr
= 0;
6979 * make sure to clear the hardware
6980 * modify or reference bits before
6981 * releasing the BUSY bit on this page
6982 * otherwise we risk losing a legitimate
6985 if (flags
& UPL_COMMIT_CLEAR_DIRTY
) {
6988 clear_refmod
|= VM_MEM_MODIFIED
;
6991 dwp
->dw_mask
|= DW_vm_pageout_throttle_up
;
6993 if (VM_PAGE_WIRED(m
))
6994 m
->free_when_done
= FALSE
;
6996 if (! (flags
& UPL_COMMIT_CS_VALIDATED
) &&
6997 m
->cs_validated
&& !m
->cs_tainted
) {
7000 * This page is no longer dirty
7001 * but could have been modified,
7002 * so it will need to be
7006 panic("upl_commit_range(%p): page %p was slid\n",
7010 m
->cs_validated
= FALSE
;
7011 #if DEVELOPMENT || DEBUG
7012 vm_cs_validated_resets
++;
7014 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
7016 if (m
->overwriting
) {
7018 * the (COPY_OUT_FROM == FALSE) request_page_list case
7021 #if CONFIG_PHANTOM_CACHE
7022 if (m
->absent
&& !m_object
->internal
)
7023 dwp
->dw_mask
|= DW_vm_phantom_cache_update
;
7027 dwp
->dw_mask
|= DW_clear_busy
;
7030 * alternate (COPY_OUT_FROM == FALSE) page_list case
7031 * Occurs when the original page was wired
7032 * at the time of the list request
7034 assert(VM_PAGE_WIRED(m
));
7036 dwp
->dw_mask
|= DW_vm_page_unwire
; /* reactivates */
7038 m
->overwriting
= FALSE
;
7040 m
->cleaning
= FALSE
;
7042 if (m
->free_when_done
) {
7044 * With the clean queue enabled, UPL_PAGEOUT should
7045 * no longer set the pageout bit. It's pages now go
7046 * to the clean queue.
7048 assert(!(flags
& UPL_PAGEOUT
));
7049 assert(!m_object
->internal
);
7051 m
->free_when_done
= FALSE
;
7052 #if MACH_CLUSTER_STATS
7053 if (m
->wanted
) vm_pageout_target_collisions
++;
7055 if ((flags
& UPL_COMMIT_SET_DIRTY
) ||
7056 (m
->pmapped
&& (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
)) & VM_MEM_MODIFIED
))) {
7058 * page was re-dirtied after we started
7059 * the pageout... reactivate it since
7060 * we don't know whether the on-disk
7061 * copy matches what is now in memory
7063 SET_PAGE_DIRTY(m
, FALSE
);
7065 dwp
->dw_mask
|= DW_vm_page_activate
| DW_PAGE_WAKEUP
;
7067 if (upl
->flags
& UPL_PAGEOUT
) {
7068 CLUSTER_STAT(vm_pageout_target_page_dirtied
++;)
7069 VM_STAT_INCR(reactivations
);
7070 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
7074 * page has been successfully cleaned
7075 * go ahead and free it for other use
7077 if (m_object
->internal
) {
7078 DTRACE_VM2(anonpgout
, int, 1, (uint64_t *), NULL
);
7080 DTRACE_VM2(fspgout
, int, 1, (uint64_t *), NULL
);
7085 dwp
->dw_mask
|= DW_vm_page_free
;
7087 goto commit_next_page
;
7089 #if MACH_CLUSTER_STATS
7091 m
->dirty
= pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
));
7093 if (m
->dirty
) vm_pageout_cluster_dirtied
++;
7094 else vm_pageout_cluster_cleaned
++;
7095 if (m
->wanted
) vm_pageout_cluster_collisions
++;
7098 * It is a part of the semantic of COPYOUT_FROM
7099 * UPLs that a commit implies cache sync
7100 * between the vm page and the backing store
7101 * this can be used to strip the precious bit
7104 if ((upl
->flags
& UPL_PAGE_SYNC_DONE
) || (flags
& UPL_COMMIT_CLEAR_PRECIOUS
))
7105 m
->precious
= FALSE
;
7107 if (flags
& UPL_COMMIT_SET_DIRTY
) {
7108 SET_PAGE_DIRTY(m
, FALSE
);
7113 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7114 if (hibernate_cleaning_in_progress
== FALSE
&& !m
->dirty
&& (upl
->flags
& UPL_PAGEOUT
)) {
7117 VM_STAT_INCR(pageouts
);
7118 DTRACE_VM2(pgout
, int, 1, (uint64_t *), NULL
);
7120 dwp
->dw_mask
|= DW_enqueue_cleaned
;
7121 vm_pageout_enqueued_cleaned_from_inactive_dirty
++;
7122 } else if (should_be_throttled
== TRUE
&& (m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
)) {
7124 * page coming back in from being 'frozen'...
7125 * it was dirty before it was frozen, so keep it so
7126 * the vm_page_activate will notice that it really belongs
7127 * on the throttle queue and put it there
7129 SET_PAGE_DIRTY(m
, FALSE
);
7130 dwp
->dw_mask
|= DW_vm_page_activate
;
7133 if ((flags
& UPL_COMMIT_INACTIVATE
) && !m
->clustered
&& (m
->vm_page_q_state
!= VM_PAGE_ON_SPECULATIVE_Q
)) {
7134 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
7135 clear_refmod
|= VM_MEM_REFERENCED
;
7136 } else if ( !VM_PAGE_PAGEABLE(m
)) {
7138 if (m
->clustered
|| (flags
& UPL_COMMIT_SPECULATE
))
7139 dwp
->dw_mask
|= DW_vm_page_speculate
;
7140 else if (m
->reference
)
7141 dwp
->dw_mask
|= DW_vm_page_activate
;
7143 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
7144 clear_refmod
|= VM_MEM_REFERENCED
;
7148 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
7150 * We blocked access to the pages in this URL.
7151 * Clear the "busy" bit on this page before we
7152 * wake up any waiter.
7154 dwp
->dw_mask
|= DW_clear_busy
;
7157 * Wakeup any thread waiting for the page to be un-cleaning.
7159 dwp
->dw_mask
|= DW_PAGE_WAKEUP
;
7163 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m
), clear_refmod
);
7165 target_offset
+= PAGE_SIZE_64
;
7166 xfer_size
-= PAGE_SIZE
;
7170 if (dwp
->dw_mask
& ~(DW_clear_busy
| DW_PAGE_WAKEUP
)) {
7171 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
7173 if (dw_count
>= dw_limit
) {
7174 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7180 if (dwp
->dw_mask
& DW_clear_busy
)
7183 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
7189 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7191 if (fast_path_possible
) {
7193 assert(shadow_object
->purgable
!= VM_PURGABLE_VOLATILE
);
7194 assert(shadow_object
->purgable
!= VM_PURGABLE_EMPTY
);
7196 if (local_queue_count
|| unwired_count
) {
7198 if (local_queue_count
) {
7199 vm_page_t first_target
;
7200 vm_page_queue_head_t
*target_queue
;
7203 target_queue
= &vm_page_queue_throttled
;
7205 if (flags
& UPL_COMMIT_INACTIVATE
) {
7206 if (shadow_object
->internal
)
7207 target_queue
= &vm_page_queue_anonymous
;
7209 target_queue
= &vm_page_queue_inactive
;
7211 target_queue
= &vm_page_queue_active
;
7214 * Transfer the entire local queue to a regular LRU page queues.
7216 vm_page_lockspin_queues();
7218 first_target
= (vm_page_t
) vm_page_queue_first(target_queue
);
7220 if (vm_page_queue_empty(target_queue
))
7221 target_queue
->prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local
);
7223 first_target
->pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local
);
7225 target_queue
->next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local
);
7226 first_local
->pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue
);
7227 last_local
->pageq
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target
);
7230 * Adjust the global page counts.
7232 if (throttle_page
) {
7233 vm_page_throttled_count
+= local_queue_count
;
7235 if (flags
& UPL_COMMIT_INACTIVATE
) {
7236 if (shadow_object
->internal
)
7237 vm_page_anonymous_count
+= local_queue_count
;
7238 vm_page_inactive_count
+= local_queue_count
;
7240 token_new_pagecount
+= local_queue_count
;
7242 vm_page_active_count
+= local_queue_count
;
7244 if (shadow_object
->internal
)
7245 vm_page_pageable_internal_count
+= local_queue_count
;
7247 vm_page_pageable_external_count
+= local_queue_count
;
7250 vm_page_lockspin_queues();
7252 if (unwired_count
) {
7253 vm_page_wire_count
-= unwired_count
;
7254 VM_CHECK_MEMORYSTATUS
;
7256 vm_page_unlock_queues();
7258 VM_OBJECT_WIRED_PAGE_COUNT(shadow_object
, -unwired_count
);
7263 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
7265 } else if (upl
->flags
& UPL_LITE
) {
7271 if (!fast_path_full_commit
) {
7272 pg_num
= upl
->size
/PAGE_SIZE
;
7273 pg_num
= (pg_num
+ 31) >> 5;
7275 for (i
= 0; i
< pg_num
; i
++) {
7276 if (lite_list
[i
] != 0) {
7283 if (vm_page_queue_empty(&upl
->map_object
->memq
))
7286 if (occupied
== 0) {
7288 * If this UPL element belongs to a Vector UPL and is
7289 * empty, then this is the right function to deallocate
7290 * it. So go ahead set the *empty variable. The flag
7291 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7292 * should be considered relevant for the Vector UPL and not
7293 * the internal UPLs.
7295 if ((upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) || isVectorUPL
)
7298 if (object
== shadow_object
&& !(upl
->flags
& UPL_KERNEL_OBJECT
)) {
7300 * this is not a paging object
7301 * so we need to drop the paging reference
7302 * that was taken when we created the UPL
7303 * against this object
7305 vm_object_activity_end(shadow_object
);
7306 vm_object_collapse(shadow_object
, 0, TRUE
);
7309 * we dontated the paging reference to
7310 * the map object... vm_pageout_object_terminate
7311 * will drop this reference
7315 VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object
, shadow_object
->wire_tag
);
7316 vm_object_unlock(shadow_object
);
7317 if (object
!= shadow_object
)
7318 vm_object_unlock(object
);
7324 * If we completed our operations on an UPL that is
7325 * part of a Vectored UPL and if empty is TRUE, then
7326 * we should go ahead and deallocate this UPL element.
7327 * Then we check if this was the last of the UPL elements
7328 * within that Vectored UPL. If so, set empty to TRUE
7329 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7330 * can go ahead and deallocate the Vector UPL too.
7333 *empty
= vector_upl_set_subupl(vector_upl
, upl
, 0);
7334 upl_deallocate(upl
);
7336 goto process_upl_to_commit
;
7338 if (pgpgout_count
) {
7339 DTRACE_VM2(pgpgout
, int, pgpgout_count
, (uint64_t *), NULL
);
7342 return KERN_SUCCESS
;
7348 upl_offset_t offset
,
7353 upl_page_info_t
*user_page_list
= NULL
;
7354 upl_size_t xfer_size
, subupl_size
= size
;
7355 vm_object_t shadow_object
;
7357 vm_object_offset_t target_offset
;
7358 upl_offset_t subupl_offset
= offset
;
7360 wpl_array_t lite_list
;
7362 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
7363 struct vm_page_delayed_work
*dwp
;
7366 int isVectorUPL
= 0;
7367 upl_t vector_upl
= NULL
;
7371 if (upl
== UPL_NULL
)
7372 return KERN_INVALID_ARGUMENT
;
7374 if ( (upl
->flags
& UPL_IO_WIRE
) && !(error
& UPL_ABORT_DUMP_PAGES
) )
7375 return upl_commit_range(upl
, offset
, size
, UPL_COMMIT_FREE_ABSENT
, NULL
, 0, empty
);
7377 if((isVectorUPL
= vector_upl_is_valid(upl
))) {
7379 upl_lock(vector_upl
);
7384 process_upl_to_abort
:
7387 offset
= subupl_offset
;
7389 upl_unlock(vector_upl
);
7390 return KERN_SUCCESS
;
7392 upl
= vector_upl_subupl_byoffset(vector_upl
, &offset
, &size
);
7394 upl_unlock(vector_upl
);
7395 return KERN_FAILURE
;
7397 subupl_size
-= size
;
7398 subupl_offset
+= size
;
7404 if (upl
->upl_commit_index
< UPL_DEBUG_COMMIT_RECORDS
) {
7405 (void) OSBacktrace(&upl
->upl_commit_records
[upl
->upl_commit_index
].c_retaddr
[0], UPL_DEBUG_STACK_FRAMES
);
7407 upl
->upl_commit_records
[upl
->upl_commit_index
].c_beg
= offset
;
7408 upl
->upl_commit_records
[upl
->upl_commit_index
].c_end
= (offset
+ size
);
7409 upl
->upl_commit_records
[upl
->upl_commit_index
].c_aborted
= 1;
7411 upl
->upl_commit_index
++;
7414 if (upl
->flags
& UPL_DEVICE_MEMORY
)
7416 else if ((offset
+ size
) <= upl
->size
)
7422 upl_unlock(vector_upl
);
7425 return KERN_FAILURE
;
7427 if (upl
->flags
& UPL_INTERNAL
) {
7428 lite_list
= (wpl_array_t
)
7429 ((((uintptr_t)upl
) + sizeof(struct upl
))
7430 + ((upl
->size
/PAGE_SIZE
) * sizeof(upl_page_info_t
)));
7432 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
7434 lite_list
= (wpl_array_t
)
7435 (((uintptr_t)upl
) + sizeof(struct upl
));
7437 object
= upl
->map_object
;
7439 if (upl
->flags
& UPL_SHADOWED
) {
7440 vm_object_lock(object
);
7441 shadow_object
= object
->shadow
;
7443 shadow_object
= object
;
7445 entry
= offset
/PAGE_SIZE
;
7446 target_offset
= (vm_object_offset_t
)offset
;
7448 assert(!(target_offset
& PAGE_MASK
));
7449 assert(!(xfer_size
& PAGE_MASK
));
7451 if (upl
->flags
& UPL_KERNEL_OBJECT
)
7452 vm_object_lock_shared(shadow_object
);
7454 vm_object_lock(shadow_object
);
7456 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
7457 assert(shadow_object
->blocked_access
);
7458 shadow_object
->blocked_access
= FALSE
;
7459 vm_object_wakeup(object
, VM_OBJECT_EVENT_UNBLOCKED
);
7464 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
7466 if ((error
& UPL_ABORT_DUMP_PAGES
) && (upl
->flags
& UPL_KERNEL_OBJECT
))
7467 panic("upl_abort_range: kernel_object being DUMPED");
7471 unsigned int pg_num
;
7474 pg_num
= (unsigned int) (target_offset
/PAGE_SIZE
);
7475 assert(pg_num
== target_offset
/PAGE_SIZE
);
7480 needed
= user_page_list
[pg_num
].needed
;
7485 if (upl
->flags
& UPL_LITE
) {
7487 if (lite_list
[pg_num
>>5] & (1 << (pg_num
& 31))) {
7488 lite_list
[pg_num
>>5] &= ~(1 << (pg_num
& 31));
7490 if ( !(upl
->flags
& UPL_KERNEL_OBJECT
))
7491 m
= vm_page_lookup(shadow_object
, target_offset
+
7492 (upl
->offset
- shadow_object
->paging_offset
));
7495 if (upl
->flags
& UPL_SHADOWED
) {
7496 if ((t
= vm_page_lookup(object
, target_offset
)) != VM_PAGE_NULL
) {
7497 t
->free_when_done
= FALSE
;
7501 if (m
== VM_PAGE_NULL
)
7502 m
= vm_page_lookup(shadow_object
, target_offset
+ object
->vo_shadow_offset
);
7505 if ((upl
->flags
& UPL_KERNEL_OBJECT
))
7506 goto abort_next_page
;
7508 if (m
!= VM_PAGE_NULL
) {
7510 assert(m
->vm_page_q_state
!= VM_PAGE_USED_BY_COMPRESSOR
);
7513 boolean_t must_free
= TRUE
;
7516 * COPYOUT = FALSE case
7517 * check for error conditions which must
7518 * be passed back to the pages customer
7520 if (error
& UPL_ABORT_RESTART
) {
7525 } else if (error
& UPL_ABORT_UNAVAILABLE
) {
7529 } else if (error
& UPL_ABORT_ERROR
) {
7536 if (m
->clustered
&& needed
== FALSE
) {
7538 * This page was a part of a speculative
7539 * read-ahead initiated by the kernel
7540 * itself. No one is expecting this
7541 * page and no one will clean up its
7542 * error state if it ever becomes valid
7544 * We have to free it here.
7548 m
->cleaning
= FALSE
;
7550 if (m
->overwriting
&& !m
->busy
) {
7552 * this shouldn't happen since
7553 * this is an 'absent' page, but
7554 * it doesn't hurt to check for
7555 * the 'alternate' method of
7556 * stabilizing the page...
7557 * we will mark 'busy' to be cleared
7558 * in the following code which will
7559 * take care of the primary stabilzation
7560 * method (i.e. setting 'busy' to TRUE)
7562 dwp
->dw_mask
|= DW_vm_page_unwire
;
7564 m
->overwriting
= FALSE
;
7566 dwp
->dw_mask
|= (DW_clear_busy
| DW_PAGE_WAKEUP
);
7568 if (must_free
== TRUE
)
7569 dwp
->dw_mask
|= DW_vm_page_free
;
7571 dwp
->dw_mask
|= DW_vm_page_activate
;
7574 * Handle the trusted pager throttle.
7577 dwp
->dw_mask
|= DW_vm_pageout_throttle_up
;
7579 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
7581 * We blocked access to the pages in this UPL.
7582 * Clear the "busy" bit and wake up any waiter
7585 dwp
->dw_mask
|= DW_clear_busy
;
7587 if (m
->overwriting
) {
7589 dwp
->dw_mask
|= DW_clear_busy
;
7592 * deal with the 'alternate' method
7593 * of stabilizing the page...
7594 * we will either free the page
7595 * or mark 'busy' to be cleared
7596 * in the following code which will
7597 * take care of the primary stabilzation
7598 * method (i.e. setting 'busy' to TRUE)
7600 dwp
->dw_mask
|= DW_vm_page_unwire
;
7602 m
->overwriting
= FALSE
;
7604 m
->free_when_done
= FALSE
;
7605 m
->cleaning
= FALSE
;
7607 if (error
& UPL_ABORT_DUMP_PAGES
) {
7608 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
7610 dwp
->dw_mask
|= DW_vm_page_free
;
7612 if (!(dwp
->dw_mask
& DW_vm_page_unwire
)) {
7613 if (error
& UPL_ABORT_REFERENCE
) {
7615 * we've been told to explictly
7616 * reference this page... for
7617 * file I/O, this is done by
7618 * implementing an LRU on the inactive q
7620 dwp
->dw_mask
|= DW_vm_page_lru
;
7622 } else if ( !VM_PAGE_PAGEABLE(m
))
7623 dwp
->dw_mask
|= DW_vm_page_deactivate_internal
;
7625 dwp
->dw_mask
|= DW_PAGE_WAKEUP
;
7630 target_offset
+= PAGE_SIZE_64
;
7631 xfer_size
-= PAGE_SIZE
;
7635 if (dwp
->dw_mask
& ~(DW_clear_busy
| DW_PAGE_WAKEUP
)) {
7636 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
7638 if (dw_count
>= dw_limit
) {
7639 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7645 if (dwp
->dw_mask
& DW_clear_busy
)
7648 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
7654 vm_page_do_delayed_work(shadow_object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
7658 if (upl
->flags
& UPL_DEVICE_MEMORY
) {
7660 } else if (upl
->flags
& UPL_LITE
) {
7664 pg_num
= upl
->size
/PAGE_SIZE
;
7665 pg_num
= (pg_num
+ 31) >> 5;
7668 for (i
= 0; i
< pg_num
; i
++) {
7669 if (lite_list
[i
] != 0) {
7675 if (vm_page_queue_empty(&upl
->map_object
->memq
))
7678 if (occupied
== 0) {
7680 * If this UPL element belongs to a Vector UPL and is
7681 * empty, then this is the right function to deallocate
7682 * it. So go ahead set the *empty variable. The flag
7683 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7684 * should be considered relevant for the Vector UPL and
7685 * not the internal UPLs.
7687 if ((upl
->flags
& UPL_COMMIT_NOTIFY_EMPTY
) || isVectorUPL
)
7690 if (object
== shadow_object
&& !(upl
->flags
& UPL_KERNEL_OBJECT
)) {
7692 * this is not a paging object
7693 * so we need to drop the paging reference
7694 * that was taken when we created the UPL
7695 * against this object
7697 vm_object_activity_end(shadow_object
);
7698 vm_object_collapse(shadow_object
, 0, TRUE
);
7701 * we dontated the paging reference to
7702 * the map object... vm_pageout_object_terminate
7703 * will drop this reference
7707 vm_object_unlock(shadow_object
);
7708 if (object
!= shadow_object
)
7709 vm_object_unlock(object
);
7715 * If we completed our operations on an UPL that is
7716 * part of a Vectored UPL and if empty is TRUE, then
7717 * we should go ahead and deallocate this UPL element.
7718 * Then we check if this was the last of the UPL elements
7719 * within that Vectored UPL. If so, set empty to TRUE
7720 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7721 * can go ahead and deallocate the Vector UPL too.
7723 if(*empty
== TRUE
) {
7724 *empty
= vector_upl_set_subupl(vector_upl
, upl
,0);
7725 upl_deallocate(upl
);
7727 goto process_upl_to_abort
;
7730 return KERN_SUCCESS
;
7741 if (upl
== UPL_NULL
)
7742 return KERN_INVALID_ARGUMENT
;
7744 return upl_abort_range(upl
, 0, upl
->size
, error
, &empty
);
7748 /* an option on commit should be wire */
7752 upl_page_info_t
*page_list
,
7753 mach_msg_type_number_t count
)
7757 if (upl
== UPL_NULL
)
7758 return KERN_INVALID_ARGUMENT
;
7760 return upl_commit_range(upl
, 0, upl
->size
, 0, page_list
, count
, &empty
);
7771 vm_page_t m
, nxt_page
= VM_PAGE_NULL
;
7773 int wired_count
= 0;
7776 panic("iopl_valid_data: NULL upl");
7777 if (vector_upl_is_valid(upl
))
7778 panic("iopl_valid_data: vector upl");
7779 if ((upl
->flags
& (UPL_DEVICE_MEMORY
|UPL_SHADOWED
|UPL_ACCESS_BLOCKED
|UPL_IO_WIRE
|UPL_INTERNAL
)) != UPL_IO_WIRE
)
7780 panic("iopl_valid_data: unsupported upl, flags = %x", upl
->flags
);
7782 object
= upl
->map_object
;
7784 if (object
== kernel_object
|| object
== compressor_object
)
7785 panic("iopl_valid_data: object == kernel or compressor");
7787 if (object
->purgable
== VM_PURGABLE_VOLATILE
||
7788 object
->purgable
== VM_PURGABLE_EMPTY
)
7789 panic("iopl_valid_data: object %p purgable %d",
7790 object
, object
->purgable
);
7794 vm_object_lock(object
);
7795 VM_OBJECT_WIRED_PAGE_UPDATE_START(object
);
7797 if (object
->vo_size
== size
&& object
->resident_page_count
== (size
/ PAGE_SIZE
))
7798 nxt_page
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
7800 offset
= 0 + upl
->offset
- object
->paging_offset
;
7804 if (nxt_page
!= VM_PAGE_NULL
) {
7806 nxt_page
= (vm_page_t
)vm_page_queue_next(&nxt_page
->listq
);
7808 m
= vm_page_lookup(object
, offset
);
7809 offset
+= PAGE_SIZE
;
7811 if (m
== VM_PAGE_NULL
)
7812 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset
);
7816 panic("iopl_valid_data: busy page w/o absent");
7818 if (m
->pageq
.next
|| m
->pageq
.prev
)
7819 panic("iopl_valid_data: busy+absent page on page queue");
7821 panic("iopl_valid_data: %p is reusable", m
);
7826 assert(m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
7827 assert(m
->wire_count
== 0);
7829 assert(m
->wire_count
);
7830 if (m
->wire_count
== 1) {
7831 m
->vm_page_q_state
= VM_PAGE_IS_WIRED
;
7834 panic("iopl_valid_data: %p already wired\n", m
);
7837 PAGE_WAKEUP_DONE(m
);
7843 VM_OBJECT_WIRED_PAGE_COUNT(object
, wired_count
);
7844 assert(object
->resident_page_count
>= object
->wired_page_count
);
7846 /* no need to adjust purgeable accounting for this object: */
7847 assert(object
->purgable
!= VM_PURGABLE_VOLATILE
);
7848 assert(object
->purgable
!= VM_PURGABLE_EMPTY
);
7850 vm_page_lockspin_queues();
7851 vm_page_wire_count
+= wired_count
;
7852 vm_page_unlock_queues();
7854 VM_OBJECT_WIRED_PAGE_UPDATE_END(object
, tag
);
7855 vm_object_unlock(object
);
7860 vm_object_set_pmap_cache_attr(
7862 upl_page_info_array_t user_page_list
,
7863 unsigned int num_pages
,
7864 boolean_t batch_pmap_op
)
7866 unsigned int cache_attr
= 0;
7868 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
7869 assert(user_page_list
);
7870 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
7871 PMAP_BATCH_SET_CACHE_ATTR(object
, user_page_list
, cache_attr
, num_pages
, batch_pmap_op
);
7876 boolean_t
vm_object_iopl_wire_full(vm_object_t
, upl_t
, upl_page_info_array_t
, wpl_array_t
, upl_control_flags_t
, vm_tag_t
);
7877 kern_return_t
vm_object_iopl_wire_empty(vm_object_t
, upl_t
, upl_page_info_array_t
, wpl_array_t
, upl_control_flags_t
, vm_tag_t
, vm_object_offset_t
*, int);
7882 vm_object_iopl_wire_full(vm_object_t object
, upl_t upl
, upl_page_info_array_t user_page_list
,
7883 wpl_array_t lite_list
, upl_control_flags_t cntrl_flags
, vm_tag_t tag
)
7888 int delayed_unlock
= 0;
7889 boolean_t retval
= TRUE
;
7892 vm_object_lock_assert_exclusive(object
);
7893 assert(object
->purgable
!= VM_PURGABLE_VOLATILE
);
7894 assert(object
->purgable
!= VM_PURGABLE_EMPTY
);
7895 assert(object
->pager
== NULL
);
7896 assert(object
->copy
== NULL
);
7897 assert(object
->shadow
== NULL
);
7899 page_count
= object
->resident_page_count
;
7900 dst_page
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
7902 vm_page_lock_queues();
7904 while (page_count
--) {
7906 if (dst_page
->busy
||
7907 dst_page
->fictitious
||
7910 dst_page
->cleaning
||
7911 dst_page
->restart
||
7912 dst_page
->laundry
) {
7916 if ((cntrl_flags
& UPL_REQUEST_FORCE_COHERENCY
) && dst_page
->written_by_kernel
== TRUE
) {
7920 dst_page
->reference
= TRUE
;
7922 vm_page_wire(dst_page
, tag
, FALSE
);
7924 if (!(cntrl_flags
& UPL_COPYOUT_FROM
)) {
7925 SET_PAGE_DIRTY(dst_page
, FALSE
);
7927 entry
= (unsigned int)(dst_page
->offset
/ PAGE_SIZE
);
7928 assert(entry
>= 0 && entry
< object
->resident_page_count
);
7929 lite_list
[entry
>>5] |= 1 << (entry
& 31);
7931 phys_page
= VM_PAGE_GET_PHYS_PAGE(dst_page
);
7933 if (phys_page
> upl
->highest_page
)
7934 upl
->highest_page
= phys_page
;
7936 if (user_page_list
) {
7937 user_page_list
[entry
].phys_addr
= phys_page
;
7938 user_page_list
[entry
].absent
= dst_page
->absent
;
7939 user_page_list
[entry
].dirty
= dst_page
->dirty
;
7940 user_page_list
[entry
].free_when_done
= dst_page
->free_when_done
;
7941 user_page_list
[entry
].precious
= dst_page
->precious
;
7942 user_page_list
[entry
].device
= FALSE
;
7943 user_page_list
[entry
].speculative
= FALSE
;
7944 user_page_list
[entry
].cs_validated
= FALSE
;
7945 user_page_list
[entry
].cs_tainted
= FALSE
;
7946 user_page_list
[entry
].cs_nx
= FALSE
;
7947 user_page_list
[entry
].needed
= FALSE
;
7948 user_page_list
[entry
].mark
= FALSE
;
7950 if (delayed_unlock
++ > 256) {
7952 lck_mtx_yield(&vm_page_queue_lock
);
7954 VM_CHECK_MEMORYSTATUS
;
7956 dst_page
= (vm_page_t
)vm_page_queue_next(&dst_page
->listq
);
7959 vm_page_unlock_queues();
7961 VM_CHECK_MEMORYSTATUS
;
7968 vm_object_iopl_wire_empty(vm_object_t object
, upl_t upl
, upl_page_info_array_t user_page_list
,
7969 wpl_array_t lite_list
, upl_control_flags_t cntrl_flags
, vm_tag_t tag
, vm_object_offset_t
*dst_offset
, int page_count
)
7972 boolean_t no_zero_fill
= FALSE
;
7974 int pages_wired
= 0;
7975 int pages_inserted
= 0;
7977 uint64_t delayed_ledger_update
= 0;
7978 kern_return_t ret
= KERN_SUCCESS
;
7982 vm_object_lock_assert_exclusive(object
);
7983 assert(object
->purgable
!= VM_PURGABLE_VOLATILE
);
7984 assert(object
->purgable
!= VM_PURGABLE_EMPTY
);
7985 assert(object
->pager
== NULL
);
7986 assert(object
->copy
== NULL
);
7987 assert(object
->shadow
== NULL
);
7989 if (cntrl_flags
& UPL_SET_INTERRUPTIBLE
)
7990 interruptible
= THREAD_ABORTSAFE
;
7992 interruptible
= THREAD_UNINT
;
7994 if (cntrl_flags
& (UPL_NOZEROFILL
| UPL_NOZEROFILLIO
))
7995 no_zero_fill
= TRUE
;
7998 #if CONFIG_SECLUDED_MEMORY
7999 if (object
->can_grab_secluded
) {
8000 grab_options
|= VM_PAGE_GRAB_SECLUDED
;
8002 #endif /* CONFIG_SECLUDED_MEMORY */
8004 while (page_count
--) {
8006 while ((dst_page
= vm_page_grab_options(grab_options
))
8009 OSAddAtomic(page_count
, &vm_upl_wait_for_pages
);
8011 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_START
, vm_upl_wait_for_pages
, 0, 0, 0);
8013 if (vm_page_wait(interruptible
) == FALSE
) {
8017 OSAddAtomic(-page_count
, &vm_upl_wait_for_pages
);
8019 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, -1);
8021 ret
= MACH_SEND_INTERRUPTED
;
8024 OSAddAtomic(-page_count
, &vm_upl_wait_for_pages
);
8026 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, 0);
8028 if (no_zero_fill
== FALSE
)
8029 vm_page_zero_fill(dst_page
);
8031 dst_page
->absent
= TRUE
;
8033 dst_page
->reference
= TRUE
;
8035 if (!(cntrl_flags
& UPL_COPYOUT_FROM
)) {
8036 SET_PAGE_DIRTY(dst_page
, FALSE
);
8038 if (dst_page
->absent
== FALSE
) {
8039 assert(dst_page
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
8040 assert(dst_page
->wire_count
== 0);
8041 dst_page
->wire_count
++;
8042 dst_page
->vm_page_q_state
= VM_PAGE_IS_WIRED
;
8043 assert(dst_page
->wire_count
);
8045 PAGE_WAKEUP_DONE(dst_page
);
8049 vm_page_insert_internal(dst_page
, object
, *dst_offset
, tag
, FALSE
, TRUE
, TRUE
, TRUE
, &delayed_ledger_update
);
8051 lite_list
[entry
>>5] |= 1 << (entry
& 31);
8053 phys_page
= VM_PAGE_GET_PHYS_PAGE(dst_page
);
8055 if (phys_page
> upl
->highest_page
)
8056 upl
->highest_page
= phys_page
;
8058 if (user_page_list
) {
8059 user_page_list
[entry
].phys_addr
= phys_page
;
8060 user_page_list
[entry
].absent
= dst_page
->absent
;
8061 user_page_list
[entry
].dirty
= dst_page
->dirty
;
8062 user_page_list
[entry
].free_when_done
= FALSE
;
8063 user_page_list
[entry
].precious
= FALSE
;
8064 user_page_list
[entry
].device
= FALSE
;
8065 user_page_list
[entry
].speculative
= FALSE
;
8066 user_page_list
[entry
].cs_validated
= FALSE
;
8067 user_page_list
[entry
].cs_tainted
= FALSE
;
8068 user_page_list
[entry
].cs_nx
= FALSE
;
8069 user_page_list
[entry
].needed
= FALSE
;
8070 user_page_list
[entry
].mark
= FALSE
;
8073 *dst_offset
+= PAGE_SIZE_64
;
8077 vm_page_lockspin_queues();
8078 vm_page_wire_count
+= pages_wired
;
8079 vm_page_unlock_queues();
8081 if (pages_inserted
) {
8082 if (object
->internal
) {
8083 OSAddAtomic(pages_inserted
, &vm_page_internal_count
);
8085 OSAddAtomic(pages_inserted
, &vm_page_external_count
);
8088 if (delayed_ledger_update
) {
8091 owner
= object
->vo_purgeable_owner
;
8094 /* more non-volatile bytes */
8095 ledger_credit(owner
->ledger
,
8096 task_ledgers
.purgeable_nonvolatile
,
8097 delayed_ledger_update
);
8098 /* more footprint */
8099 ledger_credit(owner
->ledger
,
8100 task_ledgers
.phys_footprint
,
8101 delayed_ledger_update
);
8107 unsigned int vm_object_iopl_request_sleep_for_cleaning
= 0;
8111 vm_object_iopl_request(
8113 vm_object_offset_t offset
,
8116 upl_page_info_array_t user_page_list
,
8117 unsigned int *page_list_count
,
8118 upl_control_flags_t cntrl_flags
,
8122 vm_object_offset_t dst_offset
;
8123 upl_size_t xfer_size
;
8126 wpl_array_t lite_list
= NULL
;
8127 int no_zero_fill
= FALSE
;
8128 unsigned int size_in_pages
;
8132 struct vm_object_fault_info fault_info
;
8133 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
8134 struct vm_page_delayed_work
*dwp
;
8138 boolean_t caller_lookup
;
8139 int io_tracking_flag
= 0;
8143 boolean_t set_cache_attr_needed
= FALSE
;
8144 boolean_t free_wired_pages
= FALSE
;
8145 boolean_t fast_path_empty_req
= FALSE
;
8146 boolean_t fast_path_full_req
= FALSE
;
8148 if (cntrl_flags
& ~UPL_VALID_FLAGS
) {
8150 * For forward compatibility's sake,
8151 * reject any unknown flag.
8153 return KERN_INVALID_VALUE
;
8155 if (vm_lopage_needed
== FALSE
)
8156 cntrl_flags
&= ~UPL_NEED_32BIT_ADDR
;
8158 if (cntrl_flags
& UPL_NEED_32BIT_ADDR
) {
8159 if ( (cntrl_flags
& (UPL_SET_IO_WIRE
| UPL_SET_LITE
)) != (UPL_SET_IO_WIRE
| UPL_SET_LITE
))
8160 return KERN_INVALID_VALUE
;
8162 if (object
->phys_contiguous
) {
8163 if ((offset
+ object
->vo_shadow_offset
) >= (vm_object_offset_t
)max_valid_dma_address
)
8164 return KERN_INVALID_ADDRESS
;
8166 if (((offset
+ object
->vo_shadow_offset
) + size
) >= (vm_object_offset_t
)max_valid_dma_address
)
8167 return KERN_INVALID_ADDRESS
;
8170 if (cntrl_flags
& (UPL_NOZEROFILL
| UPL_NOZEROFILLIO
))
8171 no_zero_fill
= TRUE
;
8173 if (cntrl_flags
& UPL_COPYOUT_FROM
)
8174 prot
= VM_PROT_READ
;
8176 prot
= VM_PROT_READ
| VM_PROT_WRITE
;
8178 if ((!object
->internal
) && (object
->paging_offset
!= 0))
8179 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8181 #if CONFIG_IOSCHED || UPL_DEBUG
8182 if ((object
->io_tracking
&& object
!= kernel_object
) || upl_debug_enabled
)
8183 io_tracking_flag
|= UPL_CREATE_IO_TRACKING
;
8187 if (object
->io_tracking
) {
8188 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8189 if (object
!= kernel_object
)
8190 io_tracking_flag
|= UPL_CREATE_EXPEDITE_SUP
;
8194 if (object
->phys_contiguous
)
8199 if (cntrl_flags
& UPL_SET_INTERNAL
) {
8200 upl
= upl_create(UPL_CREATE_INTERNAL
| UPL_CREATE_LITE
| io_tracking_flag
, UPL_IO_WIRE
, psize
);
8202 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
8203 lite_list
= (wpl_array_t
) (((uintptr_t)user_page_list
) +
8204 ((psize
/ PAGE_SIZE
) * sizeof(upl_page_info_t
)));
8206 user_page_list
= NULL
;
8210 upl
= upl_create(UPL_CREATE_LITE
| io_tracking_flag
, UPL_IO_WIRE
, psize
);
8212 lite_list
= (wpl_array_t
) (((uintptr_t)upl
) + sizeof(struct upl
));
8218 user_page_list
[0].device
= FALSE
;
8221 upl
->map_object
= object
;
8224 size_in_pages
= size
/ PAGE_SIZE
;
8226 if (object
== kernel_object
&&
8227 !(cntrl_flags
& (UPL_NEED_32BIT_ADDR
| UPL_BLOCK_ACCESS
))) {
8228 upl
->flags
|= UPL_KERNEL_OBJECT
;
8230 vm_object_lock(object
);
8232 vm_object_lock_shared(object
);
8235 vm_object_lock(object
);
8236 vm_object_activity_begin(object
);
8239 * paging in progress also protects the paging_offset
8241 upl
->offset
= offset
+ object
->paging_offset
;
8243 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
8245 * The user requested that access to the pages in this UPL
8246 * be blocked until the UPL is commited or aborted.
8248 upl
->flags
|= UPL_ACCESS_BLOCKED
;
8251 #if CONFIG_IOSCHED || UPL_DEBUG
8252 if (upl
->flags
& UPL_TRACKED_BY_OBJECT
) {
8253 vm_object_activity_begin(object
);
8254 queue_enter(&object
->uplq
, upl
, upl_t
, uplq
);
8258 if (object
->phys_contiguous
) {
8260 if (upl
->flags
& UPL_ACCESS_BLOCKED
) {
8261 assert(!object
->blocked_access
);
8262 object
->blocked_access
= TRUE
;
8265 vm_object_unlock(object
);
8268 * don't need any shadow mappings for this one
8269 * since it is already I/O memory
8271 upl
->flags
|= UPL_DEVICE_MEMORY
;
8273 upl
->highest_page
= (ppnum_t
) ((offset
+ object
->vo_shadow_offset
+ size
- 1)>>PAGE_SHIFT
);
8275 if (user_page_list
) {
8276 user_page_list
[0].phys_addr
= (ppnum_t
) ((offset
+ object
->vo_shadow_offset
)>>PAGE_SHIFT
);
8277 user_page_list
[0].device
= TRUE
;
8279 if (page_list_count
!= NULL
) {
8280 if (upl
->flags
& UPL_INTERNAL
)
8281 *page_list_count
= 0;
8283 *page_list_count
= 1;
8285 return KERN_SUCCESS
;
8287 if (object
!= kernel_object
&& object
!= compressor_object
) {
8289 * Protect user space from future COW operations
8291 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8292 if (!object
->true_share
&&
8293 vm_object_tracking_inited
) {
8294 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
8297 num
= OSBacktrace(bt
,
8298 VM_OBJECT_TRACKING_BTDEPTH
);
8299 btlog_add_entry(vm_object_tracking_btlog
,
8301 VM_OBJECT_TRACKING_OP_TRUESHARE
,
8305 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8307 vm_object_lock_assert_exclusive(object
);
8308 object
->true_share
= TRUE
;
8310 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
)
8311 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
8314 if (!(cntrl_flags
& UPL_COPYOUT_FROM
) &&
8315 object
->copy
!= VM_OBJECT_NULL
) {
8317 * Honor copy-on-write obligations
8319 * The caller is gathering these pages and
8320 * might modify their contents. We need to
8321 * make sure that the copy object has its own
8322 * private copies of these pages before we let
8323 * the caller modify them.
8325 * NOTE: someone else could map the original object
8326 * after we've done this copy-on-write here, and they
8327 * could then see an inconsistent picture of the memory
8328 * while it's being modified via the UPL. To prevent this,
8329 * we would have to block access to these pages until the
8330 * UPL is released. We could use the UPL_BLOCK_ACCESS
8331 * code path for that...
8333 vm_object_update(object
,
8338 FALSE
, /* should_return */
8339 MEMORY_OBJECT_COPY_SYNC
,
8341 #if DEVELOPMENT || DEBUG
8343 iopl_cow_pages
+= size
>> PAGE_SHIFT
;
8346 if (!(cntrl_flags
& (UPL_NEED_32BIT_ADDR
| UPL_BLOCK_ACCESS
)) &&
8347 object
->purgable
!= VM_PURGABLE_VOLATILE
&&
8348 object
->purgable
!= VM_PURGABLE_EMPTY
&&
8349 object
->copy
== NULL
&&
8350 size
== object
->vo_size
&&
8352 object
->shadow
== NULL
&&
8353 object
->pager
== NULL
)
8355 if (object
->resident_page_count
== size_in_pages
)
8357 assert(object
!= compressor_object
);
8358 assert(object
!= kernel_object
);
8359 fast_path_full_req
= TRUE
;
8361 else if (object
->resident_page_count
== 0)
8363 assert(object
!= compressor_object
);
8364 assert(object
!= kernel_object
);
8365 fast_path_empty_req
= TRUE
;
8366 set_cache_attr_needed
= TRUE
;
8370 if (cntrl_flags
& UPL_SET_INTERRUPTIBLE
)
8371 interruptible
= THREAD_ABORTSAFE
;
8373 interruptible
= THREAD_UNINT
;
8378 dst_offset
= offset
;
8381 if (fast_path_full_req
) {
8383 if (vm_object_iopl_wire_full(object
, upl
, user_page_list
, lite_list
, cntrl_flags
, tag
) == TRUE
)
8386 * we couldn't complete the processing of this request on the fast path
8387 * so fall through to the slow path and finish up
8390 } else if (fast_path_empty_req
) {
8392 if (cntrl_flags
& UPL_REQUEST_NO_FAULT
) {
8393 ret
= KERN_MEMORY_ERROR
;
8396 ret
= vm_object_iopl_wire_empty(object
, upl
, user_page_list
, lite_list
, cntrl_flags
, tag
, &dst_offset
, size_in_pages
);
8399 free_wired_pages
= TRUE
;
8405 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
8406 fault_info
.user_tag
= 0;
8407 fault_info
.lo_offset
= offset
;
8408 fault_info
.hi_offset
= offset
+ xfer_size
;
8409 fault_info
.no_cache
= FALSE
;
8410 fault_info
.stealth
= FALSE
;
8411 fault_info
.io_sync
= FALSE
;
8412 fault_info
.cs_bypass
= FALSE
;
8413 fault_info
.mark_zf_absent
= TRUE
;
8414 fault_info
.interruptible
= interruptible
;
8415 fault_info
.batch_pmap_op
= TRUE
;
8418 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
8421 vm_fault_return_t result
;
8425 if (fast_path_full_req
) {
8427 * if we get here, it means that we ran into a page
8428 * state we couldn't handle in the fast path and
8429 * bailed out to the slow path... since the order
8430 * we look at pages is different between the 2 paths,
8431 * the following check is needed to determine whether
8432 * this page was already processed in the fast path
8434 if (lite_list
[entry
>>5] & (1 << (entry
& 31)))
8437 dst_page
= vm_page_lookup(object
, dst_offset
);
8439 if (dst_page
== VM_PAGE_NULL
||
8442 dst_page
->restart
||
8444 dst_page
->fictitious
) {
8446 if (object
== kernel_object
)
8447 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8448 if (object
== compressor_object
)
8449 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8451 if (cntrl_flags
& UPL_REQUEST_NO_FAULT
) {
8452 ret
= KERN_MEMORY_ERROR
;
8455 set_cache_attr_needed
= TRUE
;
8458 * We just looked up the page and the result remains valid
8459 * until the object lock is release, so send it to
8460 * vm_fault_page() (as "dst_page"), to avoid having to
8461 * look it up again there.
8463 caller_lookup
= TRUE
;
8467 kern_return_t error_code
;
8469 fault_info
.cluster_size
= xfer_size
;
8471 vm_object_paging_begin(object
);
8473 result
= vm_fault_page(object
, dst_offset
,
8474 prot
| VM_PROT_WRITE
, FALSE
,
8476 &prot
, &dst_page
, &top_page
,
8478 &error_code
, no_zero_fill
,
8479 FALSE
, &fault_info
);
8481 /* our lookup is no longer valid at this point */
8482 caller_lookup
= FALSE
;
8486 case VM_FAULT_SUCCESS
:
8488 if ( !dst_page
->absent
) {
8489 PAGE_WAKEUP_DONE(dst_page
);
8492 * we only get back an absent page if we
8493 * requested that it not be zero-filled
8494 * because we are about to fill it via I/O
8496 * absent pages should be left BUSY
8497 * to prevent them from being faulted
8498 * into an address space before we've
8499 * had a chance to complete the I/O on
8500 * them since they may contain info that
8501 * shouldn't be seen by the faulting task
8505 * Release paging references and
8506 * top-level placeholder page, if any.
8508 if (top_page
!= VM_PAGE_NULL
) {
8509 vm_object_t local_object
;
8511 local_object
= VM_PAGE_OBJECT(top_page
);
8514 * comparing 2 packed pointers
8516 if (top_page
->vm_page_object
!= dst_page
->vm_page_object
) {
8517 vm_object_lock(local_object
);
8518 VM_PAGE_FREE(top_page
);
8519 vm_object_paging_end(local_object
);
8520 vm_object_unlock(local_object
);
8522 VM_PAGE_FREE(top_page
);
8523 vm_object_paging_end(local_object
);
8526 vm_object_paging_end(object
);
8529 case VM_FAULT_RETRY
:
8530 vm_object_lock(object
);
8533 case VM_FAULT_MEMORY_SHORTAGE
:
8534 OSAddAtomic((size_in_pages
- entry
), &vm_upl_wait_for_pages
);
8536 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_START
, vm_upl_wait_for_pages
, 0, 0, 0);
8538 if (vm_page_wait(interruptible
)) {
8539 OSAddAtomic(-(size_in_pages
- entry
), &vm_upl_wait_for_pages
);
8541 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, 0);
8542 vm_object_lock(object
);
8546 OSAddAtomic(-(size_in_pages
- entry
), &vm_upl_wait_for_pages
);
8548 VM_DEBUG_EVENT(vm_iopl_page_wait
, VM_IOPL_PAGE_WAIT
, DBG_FUNC_END
, vm_upl_wait_for_pages
, 0, 0, -1);
8552 case VM_FAULT_INTERRUPTED
:
8553 error_code
= MACH_SEND_INTERRUPTED
;
8554 case VM_FAULT_MEMORY_ERROR
:
8556 ret
= (error_code
? error_code
: KERN_MEMORY_ERROR
);
8558 vm_object_lock(object
);
8561 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
8562 /* success but no page: fail */
8563 vm_object_paging_end(object
);
8564 vm_object_unlock(object
);
8568 panic("vm_object_iopl_request: unexpected error"
8569 " 0x%x from vm_fault_page()\n", result
);
8571 } while (result
!= VM_FAULT_SUCCESS
);
8574 phys_page
= VM_PAGE_GET_PHYS_PAGE(dst_page
);
8576 if (upl
->flags
& UPL_KERNEL_OBJECT
)
8577 goto record_phys_addr
;
8579 if (dst_page
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) {
8580 dst_page
->busy
= TRUE
;
8581 goto record_phys_addr
;
8584 if (dst_page
->cleaning
) {
8586 * Someone else is cleaning this page in place.
8587 * In theory, we should be able to proceed and use this
8588 * page but they'll probably end up clearing the "busy"
8589 * bit on it in upl_commit_range() but they didn't set
8590 * it, so they would clear our "busy" bit and open
8591 * us to race conditions.
8592 * We'd better wait for the cleaning to complete and
8595 vm_object_iopl_request_sleep_for_cleaning
++;
8596 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
8599 if (dst_page
->laundry
)
8600 vm_pageout_steal_laundry(dst_page
, FALSE
);
8602 if ( (cntrl_flags
& UPL_NEED_32BIT_ADDR
) &&
8603 phys_page
>= (max_valid_dma_address
>> PAGE_SHIFT
) ) {
8608 * support devices that can't DMA above 32 bits
8609 * by substituting pages from a pool of low address
8610 * memory for any pages we find above the 4G mark
8611 * can't substitute if the page is already wired because
8612 * we don't know whether that physical address has been
8613 * handed out to some other 64 bit capable DMA device to use
8615 if (VM_PAGE_WIRED(dst_page
)) {
8616 ret
= KERN_PROTECTION_FAILURE
;
8619 low_page
= vm_page_grablo();
8621 if (low_page
== VM_PAGE_NULL
) {
8622 ret
= KERN_RESOURCE_SHORTAGE
;
8626 * from here until the vm_page_replace completes
8627 * we musn't drop the object lock... we don't
8628 * want anyone refaulting this page in and using
8629 * it after we disconnect it... we want the fault
8630 * to find the new page being substituted.
8632 if (dst_page
->pmapped
)
8633 refmod
= pmap_disconnect(phys_page
);
8637 if (!dst_page
->absent
)
8638 vm_page_copy(dst_page
, low_page
);
8640 low_page
->reference
= dst_page
->reference
;
8641 low_page
->dirty
= dst_page
->dirty
;
8642 low_page
->absent
= dst_page
->absent
;
8644 if (refmod
& VM_MEM_REFERENCED
)
8645 low_page
->reference
= TRUE
;
8646 if (refmod
& VM_MEM_MODIFIED
) {
8647 SET_PAGE_DIRTY(low_page
, FALSE
);
8650 vm_page_replace(low_page
, object
, dst_offset
);
8652 dst_page
= low_page
;
8654 * vm_page_grablo returned the page marked
8655 * BUSY... we don't need a PAGE_WAKEUP_DONE
8656 * here, because we've never dropped the object lock
8658 if ( !dst_page
->absent
)
8659 dst_page
->busy
= FALSE
;
8661 phys_page
= VM_PAGE_GET_PHYS_PAGE(dst_page
);
8663 if ( !dst_page
->busy
)
8664 dwp
->dw_mask
|= DW_vm_page_wire
;
8666 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
8668 * Mark the page "busy" to block any future page fault
8669 * on this page in addition to wiring it.
8670 * We'll also remove the mapping
8671 * of all these pages before leaving this routine.
8673 assert(!dst_page
->fictitious
);
8674 dst_page
->busy
= TRUE
;
8677 * expect the page to be used
8678 * page queues lock must be held to set 'reference'
8680 dwp
->dw_mask
|= DW_set_reference
;
8682 if (!(cntrl_flags
& UPL_COPYOUT_FROM
)) {
8683 SET_PAGE_DIRTY(dst_page
, TRUE
);
8685 if ((cntrl_flags
& UPL_REQUEST_FORCE_COHERENCY
) && dst_page
->written_by_kernel
== TRUE
) {
8686 pmap_sync_page_attributes_phys(phys_page
);
8687 dst_page
->written_by_kernel
= FALSE
;
8692 upl
->flags
|= UPL_HAS_BUSY
;
8694 lite_list
[entry
>>5] |= 1 << (entry
& 31);
8696 if (phys_page
> upl
->highest_page
)
8697 upl
->highest_page
= phys_page
;
8699 if (user_page_list
) {
8700 user_page_list
[entry
].phys_addr
= phys_page
;
8701 user_page_list
[entry
].free_when_done
= dst_page
->free_when_done
;
8702 user_page_list
[entry
].absent
= dst_page
->absent
;
8703 user_page_list
[entry
].dirty
= dst_page
->dirty
;
8704 user_page_list
[entry
].precious
= dst_page
->precious
;
8705 user_page_list
[entry
].device
= FALSE
;
8706 user_page_list
[entry
].needed
= FALSE
;
8707 if (dst_page
->clustered
== TRUE
)
8708 user_page_list
[entry
].speculative
= (dst_page
->vm_page_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) ? TRUE
: FALSE
;
8710 user_page_list
[entry
].speculative
= FALSE
;
8711 user_page_list
[entry
].cs_validated
= dst_page
->cs_validated
;
8712 user_page_list
[entry
].cs_tainted
= dst_page
->cs_tainted
;
8713 user_page_list
[entry
].cs_nx
= dst_page
->cs_nx
;
8714 user_page_list
[entry
].mark
= FALSE
;
8716 if (object
!= kernel_object
&& object
!= compressor_object
) {
8718 * someone is explicitly grabbing this page...
8719 * update clustered and speculative state
8722 if (dst_page
->clustered
)
8723 VM_PAGE_CONSUME_CLUSTERED(dst_page
);
8727 dst_offset
+= PAGE_SIZE_64
;
8728 xfer_size
-= PAGE_SIZE
;
8731 VM_PAGE_ADD_DELAYED_WORK(dwp
, dst_page
, dw_count
);
8733 if (dw_count
>= dw_limit
) {
8734 vm_page_do_delayed_work(object
, tag
, &dw_array
[0], dw_count
);
8741 assert(entry
== size_in_pages
);
8744 vm_page_do_delayed_work(object
, tag
, &dw_array
[0], dw_count
);
8746 if (user_page_list
&& set_cache_attr_needed
== TRUE
)
8747 vm_object_set_pmap_cache_attr(object
, user_page_list
, size_in_pages
, TRUE
);
8749 if (page_list_count
!= NULL
) {
8750 if (upl
->flags
& UPL_INTERNAL
)
8751 *page_list_count
= 0;
8752 else if (*page_list_count
> size_in_pages
)
8753 *page_list_count
= size_in_pages
;
8755 vm_object_unlock(object
);
8757 if (cntrl_flags
& UPL_BLOCK_ACCESS
) {
8759 * We've marked all the pages "busy" so that future
8760 * page faults will block.
8761 * Now remove the mapping for these pages, so that they
8762 * can't be accessed without causing a page fault.
8764 vm_object_pmap_protect(object
, offset
, (vm_object_size_t
)size
,
8765 PMAP_NULL
, 0, VM_PROT_NONE
);
8766 assert(!object
->blocked_access
);
8767 object
->blocked_access
= TRUE
;
8770 return KERN_SUCCESS
;
8775 for (; offset
< dst_offset
; offset
+= PAGE_SIZE
) {
8776 boolean_t need_unwire
;
8778 dst_page
= vm_page_lookup(object
, offset
);
8780 if (dst_page
== VM_PAGE_NULL
)
8781 panic("vm_object_iopl_request: Wired page missing. \n");
8784 * if we've already processed this page in an earlier
8785 * dw_do_work, we need to undo the wiring... we will
8786 * leave the dirty and reference bits on if they
8787 * were set, since we don't have a good way of knowing
8788 * what the previous state was and we won't get here
8789 * under any normal circumstances... we will always
8790 * clear BUSY and wakeup any waiters via vm_page_free
8791 * or PAGE_WAKEUP_DONE
8796 if (dw_array
[dw_index
].dw_m
== dst_page
) {
8798 * still in the deferred work list
8799 * which means we haven't yet called
8800 * vm_page_wire on this page
8802 need_unwire
= FALSE
;
8808 vm_page_lock_queues();
8810 if (dst_page
->absent
|| free_wired_pages
== TRUE
) {
8811 vm_page_free(dst_page
);
8813 need_unwire
= FALSE
;
8815 if (need_unwire
== TRUE
)
8816 vm_page_unwire(dst_page
, TRUE
);
8818 PAGE_WAKEUP_DONE(dst_page
);
8820 vm_page_unlock_queues();
8822 if (need_unwire
== TRUE
)
8823 VM_STAT_INCR(reactivations
);
8828 if (! (upl
->flags
& UPL_KERNEL_OBJECT
)) {
8829 vm_object_activity_end(object
);
8830 vm_object_collapse(object
, 0, TRUE
);
8832 vm_object_unlock(object
);
8843 kern_return_t retval
;
8844 boolean_t upls_locked
;
8845 vm_object_t object1
, object2
;
8847 if (upl1
== UPL_NULL
|| upl2
== UPL_NULL
|| upl1
== upl2
|| ((upl1
->flags
& UPL_VECTOR
)==UPL_VECTOR
) || ((upl2
->flags
& UPL_VECTOR
)==UPL_VECTOR
)) {
8848 return KERN_INVALID_ARGUMENT
;
8851 upls_locked
= FALSE
;
8854 * Since we need to lock both UPLs at the same time,
8855 * avoid deadlocks by always taking locks in the same order.
8864 upls_locked
= TRUE
; /* the UPLs will need to be unlocked */
8866 object1
= upl1
->map_object
;
8867 object2
= upl2
->map_object
;
8869 if (upl1
->offset
!= 0 || upl2
->offset
!= 0 ||
8870 upl1
->size
!= upl2
->size
) {
8872 * We deal only with full objects, not subsets.
8873 * That's because we exchange the entire backing store info
8874 * for the objects: pager, resident pages, etc... We can't do
8877 retval
= KERN_INVALID_VALUE
;
8882 * Tranpose the VM objects' backing store.
8884 retval
= vm_object_transpose(object1
, object2
,
8885 (vm_object_size_t
) upl1
->size
);
8887 if (retval
== KERN_SUCCESS
) {
8889 * Make each UPL point to the correct VM object, i.e. the
8890 * object holding the pages that the UPL refers to...
8892 #if CONFIG_IOSCHED || UPL_DEBUG
8893 if ((upl1
->flags
& UPL_TRACKED_BY_OBJECT
) || (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)) {
8894 vm_object_lock(object1
);
8895 vm_object_lock(object2
);
8897 if (upl1
->flags
& UPL_TRACKED_BY_OBJECT
)
8898 queue_remove(&object1
->uplq
, upl1
, upl_t
, uplq
);
8899 if (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)
8900 queue_remove(&object2
->uplq
, upl2
, upl_t
, uplq
);
8902 upl1
->map_object
= object2
;
8903 upl2
->map_object
= object1
;
8905 #if CONFIG_IOSCHED || UPL_DEBUG
8906 if (upl1
->flags
& UPL_TRACKED_BY_OBJECT
)
8907 queue_enter(&object2
->uplq
, upl1
, upl_t
, uplq
);
8908 if (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)
8909 queue_enter(&object1
->uplq
, upl2
, upl_t
, uplq
);
8910 if ((upl1
->flags
& UPL_TRACKED_BY_OBJECT
) || (upl2
->flags
& UPL_TRACKED_BY_OBJECT
)) {
8911 vm_object_unlock(object2
);
8912 vm_object_unlock(object1
);
8924 upls_locked
= FALSE
;
8936 upl_page_info_t
*user_page_list
;
8939 if ( !(upl
->flags
& UPL_INTERNAL
) || count
<= 0)
8942 size_in_pages
= upl
->size
/ PAGE_SIZE
;
8944 user_page_list
= (upl_page_info_t
*) (((uintptr_t)upl
) + sizeof(struct upl
));
8946 while (count
-- && index
< size_in_pages
)
8947 user_page_list
[index
++].needed
= TRUE
;
8952 * Reserve of virtual addresses in the kernel address space.
8953 * We need to map the physical pages in the kernel, so that we
8954 * can call the code-signing or slide routines with a kernel
8955 * virtual address. We keep this pool of pre-allocated kernel
8956 * virtual addresses so that we don't have to scan the kernel's
8957 * virtaul address space each time we need to work with
8960 decl_simple_lock_data(,vm_paging_lock
)
8961 #define VM_PAGING_NUM_PAGES 64
8962 vm_map_offset_t vm_paging_base_address
= 0;
8963 boolean_t vm_paging_page_inuse
[VM_PAGING_NUM_PAGES
] = { FALSE
, };
8964 int vm_paging_max_index
= 0;
8965 int vm_paging_page_waiter
= 0;
8966 int vm_paging_page_waiter_total
= 0;
8967 unsigned long vm_paging_no_kernel_page
= 0;
8968 unsigned long vm_paging_objects_mapped
= 0;
8969 unsigned long vm_paging_pages_mapped
= 0;
8970 unsigned long vm_paging_objects_mapped_slow
= 0;
8971 unsigned long vm_paging_pages_mapped_slow
= 0;
8974 vm_paging_map_init(void)
8977 vm_map_offset_t page_map_offset
;
8978 vm_map_entry_t map_entry
;
8980 assert(vm_paging_base_address
== 0);
8983 * Initialize our pool of pre-allocated kernel
8984 * virtual addresses.
8986 page_map_offset
= 0;
8987 kr
= vm_map_find_space(kernel_map
,
8989 VM_PAGING_NUM_PAGES
* PAGE_SIZE
,
8992 VM_MAP_KERNEL_FLAGS_NONE
,
8993 VM_KERN_MEMORY_NONE
,
8995 if (kr
!= KERN_SUCCESS
) {
8996 panic("vm_paging_map_init: kernel_map full\n");
8998 VME_OBJECT_SET(map_entry
, kernel_object
);
8999 VME_OFFSET_SET(map_entry
, page_map_offset
);
9000 map_entry
->protection
= VM_PROT_NONE
;
9001 map_entry
->max_protection
= VM_PROT_NONE
;
9002 map_entry
->permanent
= TRUE
;
9003 vm_object_reference(kernel_object
);
9004 vm_map_unlock(kernel_map
);
9006 assert(vm_paging_base_address
== 0);
9007 vm_paging_base_address
= page_map_offset
;
9011 * vm_paging_map_object:
9012 * Maps part of a VM object's pages in the kernel
9013 * virtual address space, using the pre-allocated
9014 * kernel virtual addresses, if possible.
9016 * The VM object is locked. This lock will get
9017 * dropped and re-acquired though, so the caller
9018 * must make sure the VM object is kept alive
9019 * (by holding a VM map that has a reference
9020 * on it, for example, or taking an extra reference).
9021 * The page should also be kept busy to prevent
9022 * it from being reclaimed.
9025 vm_paging_map_object(
9028 vm_object_offset_t offset
,
9029 vm_prot_t protection
,
9030 boolean_t can_unlock_object
,
9031 vm_map_size_t
*size
, /* IN/OUT */
9032 vm_map_offset_t
*address
, /* OUT */
9033 boolean_t
*need_unmap
) /* OUT */
9036 vm_map_offset_t page_map_offset
;
9037 vm_map_size_t map_size
;
9038 vm_object_offset_t object_offset
;
9041 if (page
!= VM_PAGE_NULL
&& *size
== PAGE_SIZE
) {
9042 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9044 *address
= (vm_map_offset_t
)
9045 PHYSMAP_PTOV((pmap_paddr_t
)VM_PAGE_GET_PHYS_PAGE(page
) <<
9047 *need_unmap
= FALSE
;
9048 return KERN_SUCCESS
;
9049 #elif __arm__ || __arm64__
9050 *address
= (vm_map_offset_t
)
9051 phystokv((pmap_paddr_t
)VM_PAGE_GET_PHYS_PAGE(page
) << PAGE_SHIFT
);
9052 *need_unmap
= FALSE
;
9053 return KERN_SUCCESS
;
9055 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9060 * Use one of the pre-allocated kernel virtual addresses
9061 * and just enter the VM page in the kernel address space
9062 * at that virtual address.
9064 simple_lock(&vm_paging_lock
);
9067 * Try and find an available kernel virtual address
9068 * from our pre-allocated pool.
9070 page_map_offset
= 0;
9072 for (i
= 0; i
< VM_PAGING_NUM_PAGES
; i
++) {
9073 if (vm_paging_page_inuse
[i
] == FALSE
) {
9075 vm_paging_base_address
+
9080 if (page_map_offset
!= 0) {
9081 /* found a space to map our page ! */
9085 if (can_unlock_object
) {
9087 * If we can afford to unlock the VM object,
9088 * let's take the slow path now...
9093 * We can't afford to unlock the VM object, so
9094 * let's wait for a space to become available...
9096 vm_paging_page_waiter_total
++;
9097 vm_paging_page_waiter
++;
9098 kr
= assert_wait((event_t
)&vm_paging_page_waiter
, THREAD_UNINT
);
9099 if (kr
== THREAD_WAITING
) {
9100 simple_unlock(&vm_paging_lock
);
9101 kr
= thread_block(THREAD_CONTINUE_NULL
);
9102 simple_lock(&vm_paging_lock
);
9104 vm_paging_page_waiter
--;
9105 /* ... and try again */
9108 if (page_map_offset
!= 0) {
9110 * We found a kernel virtual address;
9111 * map the physical page to that virtual address.
9113 if (i
> vm_paging_max_index
) {
9114 vm_paging_max_index
= i
;
9116 vm_paging_page_inuse
[i
] = TRUE
;
9117 simple_unlock(&vm_paging_lock
);
9119 page
->pmapped
= TRUE
;
9122 * Keep the VM object locked over the PMAP_ENTER
9123 * and the actual use of the page by the kernel,
9124 * or this pmap mapping might get undone by a
9125 * vm_object_pmap_protect() call...
9127 PMAP_ENTER(kernel_pmap
,
9135 assert(kr
== KERN_SUCCESS
);
9136 vm_paging_objects_mapped
++;
9137 vm_paging_pages_mapped
++;
9138 *address
= page_map_offset
;
9142 kasan_notify_address(page_map_offset
, PAGE_SIZE
);
9145 /* all done and mapped, ready to use ! */
9146 return KERN_SUCCESS
;
9150 * We ran out of pre-allocated kernel virtual
9151 * addresses. Just map the page in the kernel
9152 * the slow and regular way.
9154 vm_paging_no_kernel_page
++;
9155 simple_unlock(&vm_paging_lock
);
9158 if (! can_unlock_object
) {
9161 *need_unmap
= FALSE
;
9162 return KERN_NOT_SUPPORTED
;
9165 object_offset
= vm_object_trunc_page(offset
);
9166 map_size
= vm_map_round_page(*size
,
9167 VM_MAP_PAGE_MASK(kernel_map
));
9170 * Try and map the required range of the object
9174 vm_object_reference_locked(object
); /* for the map entry */
9175 vm_object_unlock(object
);
9177 kr
= vm_map_enter(kernel_map
,
9182 VM_MAP_KERNEL_FLAGS_NONE
,
9183 VM_KERN_MEMORY_NONE
,
9190 if (kr
!= KERN_SUCCESS
) {
9193 *need_unmap
= FALSE
;
9194 vm_object_deallocate(object
); /* for the map entry */
9195 vm_object_lock(object
);
9202 * Enter the mapped pages in the page table now.
9204 vm_object_lock(object
);
9206 * VM object must be kept locked from before PMAP_ENTER()
9207 * until after the kernel is done accessing the page(s).
9208 * Otherwise, the pmap mappings in the kernel could be
9209 * undone by a call to vm_object_pmap_protect().
9212 for (page_map_offset
= 0;
9214 map_size
-= PAGE_SIZE_64
, page_map_offset
+= PAGE_SIZE_64
) {
9216 page
= vm_page_lookup(object
, offset
+ page_map_offset
);
9217 if (page
== VM_PAGE_NULL
) {
9218 printf("vm_paging_map_object: no page !?");
9219 vm_object_unlock(object
);
9220 kr
= vm_map_remove(kernel_map
, *address
, *size
,
9222 assert(kr
== KERN_SUCCESS
);
9225 *need_unmap
= FALSE
;
9226 vm_object_lock(object
);
9227 return KERN_MEMORY_ERROR
;
9229 page
->pmapped
= TRUE
;
9231 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9232 PMAP_ENTER(kernel_pmap
,
9233 *address
+ page_map_offset
,
9240 assert(kr
== KERN_SUCCESS
);
9242 kasan_notify_address(*address
+ page_map_offset
, PAGE_SIZE
);
9246 vm_paging_objects_mapped_slow
++;
9247 vm_paging_pages_mapped_slow
+= (unsigned long) (map_size
/ PAGE_SIZE_64
);
9251 return KERN_SUCCESS
;
9255 * vm_paging_unmap_object:
9256 * Unmaps part of a VM object's pages from the kernel
9257 * virtual address space.
9259 * The VM object is locked. This lock will get
9260 * dropped and re-acquired though.
9263 vm_paging_unmap_object(
9265 vm_map_offset_t start
,
9266 vm_map_offset_t end
)
9271 if ((vm_paging_base_address
== 0) ||
9272 (start
< vm_paging_base_address
) ||
9273 (end
> (vm_paging_base_address
9274 + (VM_PAGING_NUM_PAGES
* PAGE_SIZE
)))) {
9276 * We didn't use our pre-allocated pool of
9277 * kernel virtual address. Deallocate the
9280 if (object
!= VM_OBJECT_NULL
) {
9281 vm_object_unlock(object
);
9283 kr
= vm_map_remove(kernel_map
, start
, end
, VM_MAP_NO_FLAGS
);
9284 if (object
!= VM_OBJECT_NULL
) {
9285 vm_object_lock(object
);
9287 assert(kr
== KERN_SUCCESS
);
9290 * We used a kernel virtual address from our
9291 * pre-allocated pool. Put it back in the pool
9294 assert(end
- start
== PAGE_SIZE
);
9295 i
= (int) ((start
- vm_paging_base_address
) >> PAGE_SHIFT
);
9296 assert(i
>= 0 && i
< VM_PAGING_NUM_PAGES
);
9298 /* undo the pmap mapping */
9299 pmap_remove(kernel_pmap
, start
, end
);
9301 simple_lock(&vm_paging_lock
);
9302 vm_paging_page_inuse
[i
] = FALSE
;
9303 if (vm_paging_page_waiter
) {
9304 thread_wakeup(&vm_paging_page_waiter
);
9306 simple_unlock(&vm_paging_lock
);
9312 * page->object must be locked
9315 vm_pageout_steal_laundry(vm_page_t page
, boolean_t queues_locked
)
9317 if (!queues_locked
) {
9318 vm_page_lockspin_queues();
9321 page
->free_when_done
= FALSE
;
9323 * need to drop the laundry count...
9324 * we may also need to remove it
9325 * from the I/O paging queue...
9326 * vm_pageout_throttle_up handles both cases
9328 * the laundry and pageout_queue flags are cleared...
9330 vm_pageout_throttle_up(page
);
9332 vm_page_steal_pageout_page
++;
9334 if (!queues_locked
) {
9335 vm_page_unlock_queues();
9340 vector_upl_create(vm_offset_t upl_offset
)
9342 int vector_upl_size
= sizeof(struct _vector_upl
);
9345 vector_upl_t vector_upl
= (vector_upl_t
)kalloc(vector_upl_size
);
9347 upl
= upl_create(0,UPL_VECTOR
,0);
9348 upl
->vector_upl
= vector_upl
;
9349 upl
->offset
= upl_offset
;
9350 vector_upl
->size
= 0;
9351 vector_upl
->offset
= upl_offset
;
9352 vector_upl
->invalid_upls
=0;
9353 vector_upl
->num_upls
=0;
9354 vector_upl
->pagelist
= NULL
;
9356 for(i
=0; i
< MAX_VECTOR_UPL_ELEMENTS
; i
++) {
9357 vector_upl
->upl_iostates
[i
].size
= 0;
9358 vector_upl
->upl_iostates
[i
].offset
= 0;
9365 vector_upl_deallocate(upl_t upl
)
9368 vector_upl_t vector_upl
= upl
->vector_upl
;
9370 if(vector_upl
->invalid_upls
!= vector_upl
->num_upls
)
9371 panic("Deallocating non-empty Vectored UPL\n");
9372 kfree(vector_upl
->pagelist
,(sizeof(struct upl_page_info
)*(vector_upl
->size
/PAGE_SIZE
)));
9373 vector_upl
->invalid_upls
=0;
9374 vector_upl
->num_upls
= 0;
9375 vector_upl
->pagelist
= NULL
;
9376 vector_upl
->size
= 0;
9377 vector_upl
->offset
= 0;
9378 kfree(vector_upl
, sizeof(struct _vector_upl
));
9379 vector_upl
= (vector_upl_t
)0xfeedfeed;
9382 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9385 panic("vector_upl_deallocate was passed a NULL upl\n");
9389 vector_upl_is_valid(upl_t upl
)
9391 if(upl
&& ((upl
->flags
& UPL_VECTOR
)==UPL_VECTOR
)) {
9392 vector_upl_t vector_upl
= upl
->vector_upl
;
9393 if(vector_upl
== NULL
|| vector_upl
== (vector_upl_t
)0xfeedfeed || vector_upl
== (vector_upl_t
)0xfeedbeef)
9402 vector_upl_set_subupl(upl_t upl
,upl_t subupl
, uint32_t io_size
)
9404 if(vector_upl_is_valid(upl
)) {
9405 vector_upl_t vector_upl
= upl
->vector_upl
;
9410 if(io_size
< PAGE_SIZE
)
9411 io_size
= PAGE_SIZE
;
9412 subupl
->vector_upl
= (void*)vector_upl
;
9413 vector_upl
->upl_elems
[vector_upl
->num_upls
++] = subupl
;
9414 vector_upl
->size
+= io_size
;
9415 upl
->size
+= io_size
;
9418 uint32_t i
=0,invalid_upls
=0;
9419 for(i
= 0; i
< vector_upl
->num_upls
; i
++) {
9420 if(vector_upl
->upl_elems
[i
] == subupl
)
9423 if(i
== vector_upl
->num_upls
)
9424 panic("Trying to remove sub-upl when none exists");
9426 vector_upl
->upl_elems
[i
] = NULL
;
9427 invalid_upls
= hw_atomic_add(&(vector_upl
)->invalid_upls
, 1);
9428 if(invalid_upls
== vector_upl
->num_upls
)
9435 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9438 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9441 panic("vector_upl_set_subupl was passed a NULL upl\n");
9447 vector_upl_set_pagelist(upl_t upl
)
9449 if(vector_upl_is_valid(upl
)) {
9451 vector_upl_t vector_upl
= upl
->vector_upl
;
9454 vm_offset_t pagelist_size
=0, cur_upl_pagelist_size
=0;
9456 vector_upl
->pagelist
= (upl_page_info_array_t
)kalloc(sizeof(struct upl_page_info
)*(vector_upl
->size
/PAGE_SIZE
));
9458 for(i
=0; i
< vector_upl
->num_upls
; i
++) {
9459 cur_upl_pagelist_size
= sizeof(struct upl_page_info
) * vector_upl
->upl_elems
[i
]->size
/PAGE_SIZE
;
9460 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl
->upl_elems
[i
]), (char*)vector_upl
->pagelist
+ pagelist_size
, cur_upl_pagelist_size
);
9461 pagelist_size
+= cur_upl_pagelist_size
;
9462 if(vector_upl
->upl_elems
[i
]->highest_page
> upl
->highest_page
)
9463 upl
->highest_page
= vector_upl
->upl_elems
[i
]->highest_page
;
9465 assert( pagelist_size
== (sizeof(struct upl_page_info
)*(vector_upl
->size
/PAGE_SIZE
)) );
9468 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9471 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9476 vector_upl_subupl_byindex(upl_t upl
, uint32_t index
)
9478 if(vector_upl_is_valid(upl
)) {
9479 vector_upl_t vector_upl
= upl
->vector_upl
;
9481 if(index
< vector_upl
->num_upls
)
9482 return vector_upl
->upl_elems
[index
];
9485 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9491 vector_upl_subupl_byoffset(upl_t upl
, upl_offset_t
*upl_offset
, upl_size_t
*upl_size
)
9493 if(vector_upl_is_valid(upl
)) {
9495 vector_upl_t vector_upl
= upl
->vector_upl
;
9498 upl_t subupl
= NULL
;
9499 vector_upl_iostates_t subupl_state
;
9501 for(i
=0; i
< vector_upl
->num_upls
; i
++) {
9502 subupl
= vector_upl
->upl_elems
[i
];
9503 subupl_state
= vector_upl
->upl_iostates
[i
];
9504 if( *upl_offset
<= (subupl_state
.offset
+ subupl_state
.size
- 1)) {
9505 /* We could have been passed an offset/size pair that belongs
9506 * to an UPL element that has already been committed/aborted.
9507 * If so, return NULL.
9511 if((subupl_state
.offset
+ subupl_state
.size
) < (*upl_offset
+ *upl_size
)) {
9512 *upl_size
= (subupl_state
.offset
+ subupl_state
.size
) - *upl_offset
;
9513 if(*upl_size
> subupl_state
.size
)
9514 *upl_size
= subupl_state
.size
;
9516 if(*upl_offset
>= subupl_state
.offset
)
9517 *upl_offset
-= subupl_state
.offset
;
9519 panic("Vector UPL offset miscalculation\n");
9525 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9531 vector_upl_get_submap(upl_t upl
, vm_map_t
*v_upl_submap
, vm_offset_t
*submap_dst_addr
)
9533 *v_upl_submap
= NULL
;
9535 if(vector_upl_is_valid(upl
)) {
9536 vector_upl_t vector_upl
= upl
->vector_upl
;
9538 *v_upl_submap
= vector_upl
->submap
;
9539 *submap_dst_addr
= vector_upl
->submap_dst_addr
;
9542 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9545 panic("vector_upl_get_submap was passed a null UPL\n");
9549 vector_upl_set_submap(upl_t upl
, vm_map_t submap
, vm_offset_t submap_dst_addr
)
9551 if(vector_upl_is_valid(upl
)) {
9552 vector_upl_t vector_upl
= upl
->vector_upl
;
9554 vector_upl
->submap
= submap
;
9555 vector_upl
->submap_dst_addr
= submap_dst_addr
;
9558 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9561 panic("vector_upl_get_submap was passed a NULL UPL\n");
9565 vector_upl_set_iostate(upl_t upl
, upl_t subupl
, upl_offset_t offset
, upl_size_t size
)
9567 if(vector_upl_is_valid(upl
)) {
9569 vector_upl_t vector_upl
= upl
->vector_upl
;
9572 for(i
= 0; i
< vector_upl
->num_upls
; i
++) {
9573 if(vector_upl
->upl_elems
[i
] == subupl
)
9577 if(i
== vector_upl
->num_upls
)
9578 panic("setting sub-upl iostate when none exists");
9580 vector_upl
->upl_iostates
[i
].offset
= offset
;
9581 if(size
< PAGE_SIZE
)
9583 vector_upl
->upl_iostates
[i
].size
= size
;
9586 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9589 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9593 vector_upl_get_iostate(upl_t upl
, upl_t subupl
, upl_offset_t
*offset
, upl_size_t
*size
)
9595 if(vector_upl_is_valid(upl
)) {
9597 vector_upl_t vector_upl
= upl
->vector_upl
;
9600 for(i
= 0; i
< vector_upl
->num_upls
; i
++) {
9601 if(vector_upl
->upl_elems
[i
] == subupl
)
9605 if(i
== vector_upl
->num_upls
)
9606 panic("getting sub-upl iostate when none exists");
9608 *offset
= vector_upl
->upl_iostates
[i
].offset
;
9609 *size
= vector_upl
->upl_iostates
[i
].size
;
9612 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9615 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9619 vector_upl_get_iostate_byindex(upl_t upl
, uint32_t index
, upl_offset_t
*offset
, upl_size_t
*size
)
9621 if(vector_upl_is_valid(upl
)) {
9622 vector_upl_t vector_upl
= upl
->vector_upl
;
9624 if(index
< vector_upl
->num_upls
) {
9625 *offset
= vector_upl
->upl_iostates
[index
].offset
;
9626 *size
= vector_upl
->upl_iostates
[index
].size
;
9629 *offset
= *size
= 0;
9632 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9635 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9639 upl_get_internal_vectorupl_pagelist(upl_t upl
)
9641 return ((vector_upl_t
)(upl
->vector_upl
))->pagelist
;
9645 upl_get_internal_vectorupl(upl_t upl
)
9647 return upl
->vector_upl
;
9651 upl_get_internal_pagelist_offset(void)
9653 return sizeof(struct upl
);
9662 upl
->flags
|= UPL_CLEAR_DIRTY
;
9664 upl
->flags
&= ~UPL_CLEAR_DIRTY
;
9675 upl
->ext_ref_count
++;
9677 if (!upl
->ext_ref_count
) {
9678 panic("upl_set_referenced not %p\n", upl
);
9680 upl
->ext_ref_count
--;
9689 vm_offset_t upl_offset
,
9694 if ((upl
->flags
& UPL_EXPEDITE_SUPPORTED
) == 0)
9697 assert(upl
->upl_reprio_info
!= 0);
9698 for(i
= (int)(upl_offset
/ PAGE_SIZE
), j
= 0; j
< io_size
; i
++, j
+= PAGE_SIZE
) {
9699 UPL_SET_REPRIO_INFO(upl
, i
, blkno
, io_size
);
9705 vm_page_is_slideable(vm_page_t m
)
9707 boolean_t result
= FALSE
;
9708 vm_shared_region_slide_info_t si
;
9709 vm_object_t m_object
;
9711 m_object
= VM_PAGE_OBJECT(m
);
9713 vm_object_lock_assert_held(m_object
);
9715 /* make sure our page belongs to the one object allowed to do this */
9716 if (!m_object
->object_slid
) {
9720 si
= m_object
->vo_slide_info
;
9725 if(!m
->slid
&& (si
->start
<= m
->offset
&& si
->end
> m
->offset
)) {
9733 int vm_page_slide_counter
= 0;
9734 int vm_page_slide_errors
= 0;
9738 vm_map_offset_t kernel_mapping_offset
)
9741 vm_map_size_t kernel_mapping_size
;
9742 boolean_t kernel_mapping_needs_unmap
;
9743 vm_offset_t kernel_vaddr
;
9745 uint32_t slide_chunk
;
9746 vm_object_t page_object
;
9748 page_object
= VM_PAGE_OBJECT(page
);
9750 assert(!page
->slid
);
9751 assert(page_object
->object_slid
);
9752 vm_object_lock_assert_exclusive(page_object
);
9755 return KERN_FAILURE
;
9758 * Take a paging-in-progress reference to keep the object
9759 * alive even if we have to unlock it (in vm_paging_map_object()
9762 vm_object_paging_begin(page_object
);
9764 if (kernel_mapping_offset
== 0) {
9766 * The page hasn't already been mapped in kernel space
9767 * by the caller. Map it now, so that we can access
9768 * its contents and decrypt them.
9770 kernel_mapping_size
= PAGE_SIZE
;
9771 kernel_mapping_needs_unmap
= FALSE
;
9772 kr
= vm_paging_map_object(page
,
9775 VM_PROT_READ
| VM_PROT_WRITE
,
9777 &kernel_mapping_size
,
9778 &kernel_mapping_offset
,
9779 &kernel_mapping_needs_unmap
);
9780 if (kr
!= KERN_SUCCESS
) {
9781 panic("vm_page_slide: "
9782 "could not map page in kernel: 0x%x\n",
9786 kernel_mapping_size
= 0;
9787 kernel_mapping_needs_unmap
= FALSE
;
9789 kernel_vaddr
= CAST_DOWN(vm_offset_t
, kernel_mapping_offset
);
9792 * Slide the pointers on the page.
9795 /*assert that slide_file_info.start/end are page-aligned?*/
9797 assert(!page
->slid
);
9798 assert(page_object
->object_slid
);
9800 pageIndex
= (uint32_t)((page
->offset
-
9801 page_object
->vo_slide_info
->start
) /
9802 PAGE_SIZE_FOR_SR_SLIDE
);
9803 for (slide_chunk
= 0;
9804 slide_chunk
< PAGE_SIZE
/ PAGE_SIZE_FOR_SR_SLIDE
;
9806 kr
= vm_shared_region_slide_page(page_object
->vo_slide_info
,
9809 PAGE_SIZE_FOR_SR_SLIDE
)),
9810 (pageIndex
+ slide_chunk
));
9811 if (kr
!= KERN_SUCCESS
) {
9816 vm_page_slide_counter
++;
9819 * Unmap the page from the kernel's address space,
9821 if (kernel_mapping_needs_unmap
) {
9822 vm_paging_unmap_object(page_object
,
9824 kernel_vaddr
+ PAGE_SIZE
);
9827 page
->dirty
= FALSE
;
9828 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(page
), VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
9830 if (kr
!= KERN_SUCCESS
|| cs_debug
> 1) {
9831 printf("vm_page_slide(%p): "
9832 "obj %p off 0x%llx mobj %p moff 0x%llx\n",
9834 page_object
, page
->offset
,
9836 page
->offset
+ page_object
->paging_offset
);
9839 if (kr
== KERN_SUCCESS
) {
9843 vm_page_slide_errors
++;
9846 vm_object_paging_end(page_object
);
9851 void inline memoryshot(unsigned int event
, unsigned int control
)
9853 if (vm_debug_events
) {
9854 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE
, event
)) | control
,
9855 vm_page_active_count
, vm_page_inactive_count
,
9856 vm_page_free_count
, vm_page_speculative_count
,
9857 vm_page_throttled_count
);
9867 boolean_t
upl_device_page(upl_page_info_t
*upl
)
9869 return(UPL_DEVICE_PAGE(upl
));
9871 boolean_t
upl_page_present(upl_page_info_t
*upl
, int index
)
9873 return(UPL_PAGE_PRESENT(upl
, index
));
9875 boolean_t
upl_speculative_page(upl_page_info_t
*upl
, int index
)
9877 return(UPL_SPECULATIVE_PAGE(upl
, index
));
9879 boolean_t
upl_dirty_page(upl_page_info_t
*upl
, int index
)
9881 return(UPL_DIRTY_PAGE(upl
, index
));
9883 boolean_t
upl_valid_page(upl_page_info_t
*upl
, int index
)
9885 return(UPL_VALID_PAGE(upl
, index
));
9887 ppnum_t
upl_phys_page(upl_page_info_t
*upl
, int index
)
9889 return(UPL_PHYS_PAGE(upl
, index
));
9892 void upl_page_set_mark(upl_page_info_t
*upl
, int index
, boolean_t v
)
9894 upl
[index
].mark
= v
;
9897 boolean_t
upl_page_get_mark(upl_page_info_t
*upl
, int index
)
9899 return upl
[index
].mark
;
9903 vm_countdirtypages(void)
9915 vm_page_lock_queues();
9916 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_inactive
);
9918 if (m
==(vm_page_t
)0) break;
9920 if(m
->dirty
) dpages
++;
9921 if(m
->free_when_done
) pgopages
++;
9922 if(m
->precious
) precpages
++;
9924 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
9925 m
= (vm_page_t
) vm_page_queue_next(&m
->pageq
);
9926 if (m
==(vm_page_t
)0) break;
9928 } while (!vm_page_queue_end(&vm_page_queue_inactive
, (vm_page_queue_entry_t
) m
));
9929 vm_page_unlock_queues();
9931 vm_page_lock_queues();
9932 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_throttled
);
9934 if (m
==(vm_page_t
)0) break;
9938 assert(!m
->free_when_done
);
9939 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
9940 m
= (vm_page_t
) vm_page_queue_next(&m
->pageq
);
9941 if (m
==(vm_page_t
)0) break;
9943 } while (!vm_page_queue_end(&vm_page_queue_throttled
, (vm_page_queue_entry_t
) m
));
9944 vm_page_unlock_queues();
9946 vm_page_lock_queues();
9947 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_anonymous
);
9949 if (m
==(vm_page_t
)0) break;
9951 if(m
->dirty
) dpages
++;
9952 if(m
->free_when_done
) pgopages
++;
9953 if(m
->precious
) precpages
++;
9955 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
9956 m
= (vm_page_t
) vm_page_queue_next(&m
->pageq
);
9957 if (m
==(vm_page_t
)0) break;
9959 } while (!vm_page_queue_end(&vm_page_queue_anonymous
, (vm_page_queue_entry_t
) m
));
9960 vm_page_unlock_queues();
9962 printf("IN Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
9968 vm_page_lock_queues();
9969 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
9972 if(m
== (vm_page_t
)0) break;
9973 if(m
->dirty
) dpages
++;
9974 if(m
->free_when_done
) pgopages
++;
9975 if(m
->precious
) precpages
++;
9977 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
9978 m
= (vm_page_t
) vm_page_queue_next(&m
->pageq
);
9979 if(m
== (vm_page_t
)0) break;
9981 } while (!vm_page_queue_end(&vm_page_queue_active
, (vm_page_queue_entry_t
) m
));
9982 vm_page_unlock_queues();
9984 printf("AC Q: %d : %d : %d\n", dpages
, pgopages
, precpages
);
9987 #endif /* MACH_BSD */
9991 int upl_get_cached_tier(upl_t upl
)
9994 if (upl
->flags
& UPL_TRACKED_BY_OBJECT
)
9995 return (upl
->upl_priority
);
9998 #endif /* CONFIG_IOSCHED */
10000 ppnum_t
upl_get_highest_page(
10003 return upl
->highest_page
;
10006 upl_size_t
upl_get_size(
10012 upl_t
upl_associated_upl(upl_t upl
)
10014 return upl
->associated_upl
;
10017 void upl_set_associated_upl(upl_t upl
, upl_t associated_upl
)
10019 upl
->associated_upl
= associated_upl
;
10022 struct vnode
* upl_lookup_vnode(upl_t upl
)
10024 if (!upl
->map_object
->internal
)
10025 return vnode_pager_lookup_vnode(upl
->map_object
->pager
);
10031 kern_return_t
upl_ubc_alias_set(upl_t upl
, uintptr_t alias1
, uintptr_t alias2
)
10033 upl
->ubc_alias1
= alias1
;
10034 upl
->ubc_alias2
= alias2
;
10035 return KERN_SUCCESS
;
10037 int upl_ubc_alias_get(upl_t upl
, uintptr_t * al
, uintptr_t * al2
)
10040 *al
= upl
->ubc_alias1
;
10042 *al2
= upl
->ubc_alias2
;
10043 return KERN_SUCCESS
;
10045 #endif /* UPL_DEBUG */
10047 #if VM_PRESSURE_EVENTS
10049 * Upward trajectory.
10051 extern boolean_t
vm_compressor_low_on_space(void);
10054 VM_PRESSURE_NORMAL_TO_WARNING(void) {
10056 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE
) {
10058 /* Available pages below our threshold */
10059 if (memorystatus_available_pages
< memorystatus_available_pages_pressure
) {
10060 /* No frozen processes to kill */
10061 if (memorystatus_frozen_count
== 0) {
10062 /* Not enough suspended processes available. */
10063 if (memorystatus_suspended_count
< MEMORYSTATUS_SUSPENDED_THRESHOLD
) {
10071 return ((AVAILABLE_NON_COMPRESSED_MEMORY
< VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD
) ? 1 : 0);
10076 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
10078 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE
) {
10080 /* Available pages below our threshold */
10081 if (memorystatus_available_pages
< memorystatus_available_pages_critical
) {
10086 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY
< ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD
) / 10)) ? 1 : 0);
10091 * Downward trajectory.
10094 VM_PRESSURE_WARNING_TO_NORMAL(void) {
10096 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE
) {
10098 /* Available pages above our threshold */
10099 unsigned int target_threshold
= (unsigned int) (memorystatus_available_pages_pressure
+ ((15 * memorystatus_available_pages_pressure
) / 100));
10100 if (memorystatus_available_pages
> target_threshold
) {
10105 return ((AVAILABLE_NON_COMPRESSED_MEMORY
> ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD
) / 10)) ? 1 : 0);
10110 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
10112 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE
) {
10114 /* Available pages above our threshold */
10115 unsigned int target_threshold
= (unsigned int)(memorystatus_available_pages_critical
+ ((15 * memorystatus_available_pages_critical
) / 100));
10116 if (memorystatus_available_pages
> target_threshold
) {
10121 return ((AVAILABLE_NON_COMPRESSED_MEMORY
> ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD
) / 10)) ? 1 : 0);
10124 #endif /* VM_PRESSURE_EVENTS */
10128 #define VM_TEST_COLLAPSE_COMPRESSOR 0
10129 #define VM_TEST_WIRE_AND_EXTRACT 0
10130 #define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC 0
10132 #define VM_TEST_KERNEL_OBJECT_FAULT 0
10133 #endif /* __arm64__ */
10134 #define VM_TEST_DEVICE_PAGER_TRANSPOSE (DEVELOPMENT || DEBUG)
10136 #if VM_TEST_COLLAPSE_COMPRESSOR
10137 extern boolean_t vm_object_collapse_compressor_allowed
;
10138 #include <IOKit/IOLib.h>
10140 vm_test_collapse_compressor(void)
10142 vm_object_size_t backing_size
, top_size
;
10143 vm_object_t backing_object
, top_object
;
10144 vm_map_offset_t backing_offset
, top_offset
;
10145 unsigned char *backing_address
, *top_address
;
10148 printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
10150 /* create backing object */
10151 backing_size
= 15 * PAGE_SIZE
;
10152 backing_object
= vm_object_allocate(backing_size
);
10153 assert(backing_object
!= VM_OBJECT_NULL
);
10154 printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
10156 /* map backing object */
10157 backing_offset
= 0;
10158 kr
= vm_map_enter(kernel_map
, &backing_offset
, backing_size
, 0,
10159 VM_FLAGS_ANYWHERE
, VM_MAP_KERNEL_FLAGS_NONE
,
10160 backing_object
, 0, FALSE
,
10161 VM_PROT_DEFAULT
, VM_PROT_DEFAULT
, VM_INHERIT_DEFAULT
);
10162 assert(kr
== KERN_SUCCESS
);
10163 backing_address
= (unsigned char *) backing_offset
;
10164 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10165 "mapped backing object %p at 0x%llx\n",
10166 backing_object
, (uint64_t) backing_offset
);
10167 /* populate with pages to be compressed in backing object */
10168 backing_address
[0x1*PAGE_SIZE
] = 0xB1;
10169 backing_address
[0x4*PAGE_SIZE
] = 0xB4;
10170 backing_address
[0x7*PAGE_SIZE
] = 0xB7;
10171 backing_address
[0xa*PAGE_SIZE
] = 0xBA;
10172 backing_address
[0xd*PAGE_SIZE
] = 0xBD;
10173 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10174 "populated pages to be compressed in "
10175 "backing_object %p\n", backing_object
);
10176 /* compress backing object */
10177 vm_object_pageout(backing_object
);
10178 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
10180 /* wait for all the pages to be gone */
10181 while (*(volatile int *)&backing_object
->resident_page_count
!= 0)
10183 printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
10185 /* populate with pages to be resident in backing object */
10186 backing_address
[0x0*PAGE_SIZE
] = 0xB0;
10187 backing_address
[0x3*PAGE_SIZE
] = 0xB3;
10188 backing_address
[0x6*PAGE_SIZE
] = 0xB6;
10189 backing_address
[0x9*PAGE_SIZE
] = 0xB9;
10190 backing_address
[0xc*PAGE_SIZE
] = 0xBC;
10191 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10192 "populated pages to be resident in "
10193 "backing_object %p\n", backing_object
);
10194 /* leave the other pages absent */
10195 /* mess with the paging_offset of the backing_object */
10196 assert(backing_object
->paging_offset
== 0);
10197 backing_object
->paging_offset
= 0x3000;
10199 /* create top object */
10200 top_size
= 9 * PAGE_SIZE
;
10201 top_object
= vm_object_allocate(top_size
);
10202 assert(top_object
!= VM_OBJECT_NULL
);
10203 printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
10205 /* map top object */
10207 kr
= vm_map_enter(kernel_map
, &top_offset
, top_size
, 0,
10208 VM_FLAGS_ANYWHERE
, VM_MAP_KERNEL_FLAGS_NONE
,
10209 top_object
, 0, FALSE
,
10210 VM_PROT_DEFAULT
, VM_PROT_DEFAULT
, VM_INHERIT_DEFAULT
);
10211 assert(kr
== KERN_SUCCESS
);
10212 top_address
= (unsigned char *) top_offset
;
10213 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10214 "mapped top object %p at 0x%llx\n",
10215 top_object
, (uint64_t) top_offset
);
10216 /* populate with pages to be compressed in top object */
10217 top_address
[0x3*PAGE_SIZE
] = 0xA3;
10218 top_address
[0x4*PAGE_SIZE
] = 0xA4;
10219 top_address
[0x5*PAGE_SIZE
] = 0xA5;
10220 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10221 "populated pages to be compressed in "
10222 "top_object %p\n", top_object
);
10223 /* compress top object */
10224 vm_object_pageout(top_object
);
10225 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
10227 /* wait for all the pages to be gone */
10228 while (top_object
->resident_page_count
!= 0)
10230 printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
10232 /* populate with pages to be resident in top object */
10233 top_address
[0x0*PAGE_SIZE
] = 0xA0;
10234 top_address
[0x1*PAGE_SIZE
] = 0xA1;
10235 top_address
[0x2*PAGE_SIZE
] = 0xA2;
10236 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10237 "populated pages to be resident in "
10238 "top_object %p\n", top_object
);
10239 /* leave the other pages absent */
10241 /* link the 2 objects */
10242 vm_object_reference(backing_object
);
10243 top_object
->shadow
= backing_object
;
10244 top_object
->vo_shadow_offset
= 0x3000;
10245 printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
10246 top_object
, backing_object
);
10248 /* unmap backing object */
10249 vm_map_remove(kernel_map
,
10251 backing_offset
+ backing_size
,
10253 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10254 "unmapped backing_object %p [0x%llx:0x%llx]\n",
10256 (uint64_t) backing_offset
,
10257 (uint64_t) (backing_offset
+ backing_size
));
10260 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object
);
10261 vm_object_lock(top_object
);
10262 vm_object_collapse(top_object
, 0, FALSE
);
10263 vm_object_unlock(top_object
);
10264 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object
);
10267 if (top_object
->shadow
!= VM_OBJECT_NULL
) {
10268 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
10269 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10270 if (vm_object_collapse_compressor_allowed
) {
10271 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10274 /* check the contents of the mapping */
10275 unsigned char expect
[9] =
10276 { 0xA0, 0xA1, 0xA2, /* resident in top */
10277 0xA3, 0xA4, 0xA5, /* compressed in top */
10278 0xB9, /* resident in backing + shadow_offset */
10279 0xBD, /* compressed in backing + shadow_offset + paging_offset */
10280 0x00 }; /* absent in both */
10281 unsigned char actual
[9];
10282 unsigned int i
, errors
;
10285 for (i
= 0; i
< sizeof (actual
); i
++) {
10286 actual
[i
] = (unsigned char) top_address
[i
*PAGE_SIZE
];
10287 if (actual
[i
] != expect
[i
]) {
10291 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10292 "actual [%x %x %x %x %x %x %x %x %x] "
10293 "expect [%x %x %x %x %x %x %x %x %x] "
10295 actual
[0], actual
[1], actual
[2], actual
[3],
10296 actual
[4], actual
[5], actual
[6], actual
[7],
10298 expect
[0], expect
[1], expect
[2], expect
[3],
10299 expect
[4], expect
[5], expect
[6], expect
[7],
10303 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10305 printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10309 #else /* VM_TEST_COLLAPSE_COMPRESSOR */
10310 #define vm_test_collapse_compressor()
10311 #endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10313 #if VM_TEST_WIRE_AND_EXTRACT
10314 extern ledger_template_t task_ledger_template
;
10315 #include <mach/mach_vm.h>
10316 extern ppnum_t
vm_map_get_phys_page(vm_map_t map
,
10317 vm_offset_t offset
);
10319 vm_test_wire_and_extract(void)
10322 vm_map_t user_map
, wire_map
;
10323 mach_vm_address_t user_addr
, wire_addr
;
10324 mach_vm_size_t user_size
, wire_size
;
10325 mach_vm_offset_t cur_offset
;
10326 vm_prot_t cur_prot
, max_prot
;
10327 ppnum_t user_ppnum
, wire_ppnum
;
10330 ledger
= ledger_instantiate(task_ledger_template
,
10331 LEDGER_CREATE_ACTIVE_ENTRIES
);
10332 user_map
= vm_map_create(pmap_create(ledger
, 0, PMAP_CREATE_64BIT
),
10336 wire_map
= vm_map_create(NULL
,
10341 user_size
= 0x10000;
10342 kr
= mach_vm_allocate(user_map
,
10345 VM_FLAGS_ANYWHERE
);
10346 assert(kr
== KERN_SUCCESS
);
10348 wire_size
= user_size
;
10349 kr
= mach_vm_remap(wire_map
,
10360 assert(kr
== KERN_SUCCESS
);
10361 for (cur_offset
= 0;
10362 cur_offset
< wire_size
;
10363 cur_offset
+= PAGE_SIZE
) {
10364 kr
= vm_map_wire_and_extract(wire_map
,
10365 wire_addr
+ cur_offset
,
10366 VM_PROT_DEFAULT
| VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK
),
10369 assert(kr
== KERN_SUCCESS
);
10370 user_ppnum
= vm_map_get_phys_page(user_map
,
10371 user_addr
+ cur_offset
);
10372 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10373 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10375 user_map
, user_addr
+ cur_offset
, user_ppnum
,
10376 wire_map
, wire_addr
+ cur_offset
, wire_ppnum
);
10377 if (kr
!= KERN_SUCCESS
||
10379 wire_ppnum
!= user_ppnum
) {
10380 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10383 cur_offset
-= PAGE_SIZE
;
10384 kr
= vm_map_wire_and_extract(wire_map
,
10385 wire_addr
+ cur_offset
,
10389 assert(kr
== KERN_SUCCESS
);
10390 printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10391 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10393 user_map
, user_addr
+ cur_offset
, user_ppnum
,
10394 wire_map
, wire_addr
+ cur_offset
, wire_ppnum
);
10395 if (kr
!= KERN_SUCCESS
||
10397 wire_ppnum
!= user_ppnum
) {
10398 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10401 printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10403 #else /* VM_TEST_WIRE_AND_EXTRACT */
10404 #define vm_test_wire_and_extract()
10405 #endif /* VM_TEST_WIRE_AND_EXTRACT */
10407 #if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10409 vm_test_page_wire_overflow_panic(void)
10411 vm_object_t object
;
10414 printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10416 object
= vm_object_allocate(PAGE_SIZE
);
10417 vm_object_lock(object
);
10418 page
= vm_page_alloc(object
, 0x0);
10419 vm_page_lock_queues();
10421 vm_page_wire(page
, 1, FALSE
);
10422 } while (page
->wire_count
!= 0);
10423 vm_page_unlock_queues();
10424 vm_object_unlock(object
);
10425 panic("FBDP(%p,%p): wire_count overflow not detected\n",
10428 #else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10429 #define vm_test_page_wire_overflow_panic()
10430 #endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10432 #if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10433 extern int copyinframe(vm_address_t fp
, char *frame
, boolean_t is64bit
);
10435 vm_test_kernel_object_fault(void)
10439 uintptr_t frameb
[2];
10442 kr
= kernel_memory_allocate(kernel_map
, &stack
,
10443 kernel_stack_size
+ (2*PAGE_SIZE
),
10445 (KMA_KSTACK
| KMA_KOBJECT
|
10446 KMA_GUARD_FIRST
| KMA_GUARD_LAST
),
10447 VM_KERN_MEMORY_STACK
);
10448 if (kr
!= KERN_SUCCESS
) {
10449 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr
);
10451 ret
= copyinframe((uintptr_t)stack
, (char *)frameb
, TRUE
);
10453 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10455 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10457 vm_map_remove(kernel_map
,
10459 stack
+ kernel_stack_size
+ (2*PAGE_SIZE
),
10460 VM_MAP_REMOVE_KUNWIRE
);
10463 #else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10464 #define vm_test_kernel_object_fault()
10465 #endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10467 #if VM_TEST_DEVICE_PAGER_TRANSPOSE
10469 vm_test_device_pager_transpose(void)
10471 memory_object_t device_pager
;
10472 vm_object_t anon_object
, device_object
;
10474 vm_map_offset_t anon_mapping
, device_mapping
;
10477 size
= 3 * PAGE_SIZE
;
10478 anon_object
= vm_object_allocate(size
);
10479 assert(anon_object
!= VM_OBJECT_NULL
);
10480 device_pager
= device_pager_setup(NULL
, 0, size
, 0);
10481 assert(device_pager
!= NULL
);
10482 device_object
= memory_object_to_vm_object(device_pager
);
10483 assert(device_object
!= VM_OBJECT_NULL
);
10485 kr
= vm_map_enter(kernel_map
, &anon_mapping
, size
, 0,
10486 VM_FLAGS_ANYWHERE
, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_NONE
,
10487 anon_object
, 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
10488 VM_INHERIT_DEFAULT
);
10489 assert(kr
== KERN_SUCCESS
);
10490 device_mapping
= 0;
10491 kr
= vm_map_enter_mem_object(kernel_map
, &device_mapping
, size
, 0,
10493 VM_MAP_KERNEL_FLAGS_NONE
,
10494 VM_KERN_MEMORY_NONE
,
10495 (void *)device_pager
, 0, FALSE
,
10496 VM_PROT_DEFAULT
, VM_PROT_ALL
,
10497 VM_INHERIT_DEFAULT
);
10498 assert(kr
== KERN_SUCCESS
);
10499 memory_object_deallocate(device_pager
);
10501 vm_object_lock(anon_object
);
10502 vm_object_activity_begin(anon_object
);
10503 anon_object
->blocked_access
= TRUE
;
10504 vm_object_unlock(anon_object
);
10505 vm_object_lock(device_object
);
10506 vm_object_activity_begin(device_object
);
10507 device_object
->blocked_access
= TRUE
;
10508 vm_object_unlock(device_object
);
10510 assert(anon_object
->ref_count
== 1);
10511 assert(!anon_object
->named
);
10512 assert(device_object
->ref_count
== 2);
10513 assert(device_object
->named
);
10515 kr
= vm_object_transpose(device_object
, anon_object
, size
);
10516 assert(kr
== KERN_SUCCESS
);
10518 vm_object_lock(anon_object
);
10519 vm_object_activity_end(anon_object
);
10520 anon_object
->blocked_access
= FALSE
;
10521 vm_object_unlock(anon_object
);
10522 vm_object_lock(device_object
);
10523 vm_object_activity_end(device_object
);
10524 device_object
->blocked_access
= FALSE
;
10525 vm_object_unlock(device_object
);
10527 assert(anon_object
->ref_count
== 2);
10528 assert(anon_object
->named
);
10529 kr
= vm_deallocate(kernel_map
, anon_mapping
, size
);
10530 assert(kr
== KERN_SUCCESS
);
10531 assert(device_object
->ref_count
== 1);
10532 assert(!device_object
->named
);
10533 kr
= vm_deallocate(kernel_map
, device_mapping
, size
);
10534 assert(kr
== KERN_SUCCESS
);
10536 printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10538 #else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10539 #define vm_test_device_pager_transpose()
10540 #endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10545 vm_test_collapse_compressor();
10546 vm_test_wire_and_extract();
10547 vm_test_page_wire_overflow_panic();
10548 vm_test_kernel_object_fault();
10549 vm_test_device_pager_transpose();