]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/memory_object_default.h>
75 #include <mach/memory_object_control_server.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/upl.h>
78 #include <mach/vm_map.h>
79 #include <mach/vm_param.h>
80 #include <mach/vm_statistics.h>
81 #include <mach/sdt.h>
82
83 #include <kern/kern_types.h>
84 #include <kern/counters.h>
85 #include <kern/host_statistics.h>
86 #include <kern/machine.h>
87 #include <kern/misc_protos.h>
88 #include <kern/sched.h>
89 #include <kern/thread.h>
90 #include <kern/xpr.h>
91 #include <kern/kalloc.h>
92 #include <kern/policy_internal.h>
93 #include <kern/thread_group.h>
94
95 #include <machine/vm_tuning.h>
96 #include <machine/commpage.h>
97
98 #include <vm/pmap.h>
99 #include <vm/vm_compressor_pager.h>
100 #include <vm/vm_fault.h>
101 #include <vm/vm_map.h>
102 #include <vm/vm_object.h>
103 #include <vm/vm_page.h>
104 #include <vm/vm_pageout.h>
105 #include <vm/vm_protos.h> /* must be last */
106 #include <vm/memory_object.h>
107 #include <vm/vm_purgeable_internal.h>
108 #include <vm/vm_shared_region.h>
109 #include <vm/vm_compressor.h>
110
111 #include <san/kasan.h>
112
113 #if CONFIG_PHANTOM_CACHE
114 #include <vm/vm_phantom_cache.h>
115 #endif
116
117 extern int cs_debug;
118
119 #if UPL_DEBUG
120 #include <libkern/OSDebug.h>
121 #endif
122
123 extern void m_drain(void);
124
125 #if VM_PRESSURE_EVENTS
126 #if CONFIG_JETSAM
127 extern unsigned int memorystatus_available_pages;
128 extern unsigned int memorystatus_available_pages_pressure;
129 extern unsigned int memorystatus_available_pages_critical;
130 #else /* CONFIG_JETSAM */
131 extern uint64_t memorystatus_available_pages;
132 extern uint64_t memorystatus_available_pages_pressure;
133 extern uint64_t memorystatus_available_pages_critical;
134 #endif /* CONFIG_JETSAM */
135
136 extern unsigned int memorystatus_frozen_count;
137 extern unsigned int memorystatus_suspended_count;
138
139 extern vm_pressure_level_t memorystatus_vm_pressure_level;
140 int memorystatus_purge_on_warning = 2;
141 int memorystatus_purge_on_urgent = 5;
142 int memorystatus_purge_on_critical = 8;
143
144 void vm_pressure_response(void);
145 boolean_t vm_pressure_thread_running = FALSE;
146 extern void consider_vm_pressure_events(void);
147
148 #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
149 #endif /* VM_PRESSURE_EVENTS */
150
151 boolean_t vm_pressure_changed = FALSE;
152
153 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
154 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
155 #endif
156
157 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
158 #ifdef CONFIG_EMBEDDED
159 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
160 #else
161 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
162 #endif
163 #endif
164
165 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
166 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
167 #endif
168
169 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
170 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
171 #endif
172
173 #ifndef VM_PAGE_LAUNDRY_MAX
174 #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
175 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
176
177 #ifndef VM_PAGEOUT_BURST_WAIT
178 #define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
179 #endif /* VM_PAGEOUT_BURST_WAIT */
180
181 #ifndef VM_PAGEOUT_EMPTY_WAIT
182 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
183 #endif /* VM_PAGEOUT_EMPTY_WAIT */
184
185 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
186 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
187 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
188
189 #ifndef VM_PAGEOUT_IDLE_WAIT
190 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
191 #endif /* VM_PAGEOUT_IDLE_WAIT */
192
193 #ifndef VM_PAGEOUT_SWAP_WAIT
194 #define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
195 #endif /* VM_PAGEOUT_SWAP_WAIT */
196
197 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
198 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
199 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
200
201 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
202 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
203 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
204
205 unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
206 unsigned int vm_page_speculative_percentage = 5;
207
208 #ifndef VM_PAGE_SPECULATIVE_TARGET
209 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
210 #endif /* VM_PAGE_SPECULATIVE_TARGET */
211
212
213 /*
214 * To obtain a reasonable LRU approximation, the inactive queue
215 * needs to be large enough to give pages on it a chance to be
216 * referenced a second time. This macro defines the fraction
217 * of active+inactive pages that should be inactive.
218 * The pageout daemon uses it to update vm_page_inactive_target.
219 *
220 * If vm_page_free_count falls below vm_page_free_target and
221 * vm_page_inactive_count is below vm_page_inactive_target,
222 * then the pageout daemon starts running.
223 */
224
225 #ifndef VM_PAGE_INACTIVE_TARGET
226 #ifdef CONFIG_EMBEDDED
227 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
228 #else
229 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
230 #endif
231 #endif /* VM_PAGE_INACTIVE_TARGET */
232
233 /*
234 * Once the pageout daemon starts running, it keeps going
235 * until vm_page_free_count meets or exceeds vm_page_free_target.
236 */
237
238 #ifndef VM_PAGE_FREE_TARGET
239 #ifdef CONFIG_EMBEDDED
240 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
241 #else
242 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
243 #endif
244 #endif /* VM_PAGE_FREE_TARGET */
245
246
247 /*
248 * The pageout daemon always starts running once vm_page_free_count
249 * falls below vm_page_free_min.
250 */
251
252 #ifndef VM_PAGE_FREE_MIN
253 #ifdef CONFIG_EMBEDDED
254 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
255 #else
256 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
257 #endif
258 #endif /* VM_PAGE_FREE_MIN */
259
260 #ifdef CONFIG_EMBEDDED
261 #define VM_PAGE_FREE_RESERVED_LIMIT 100
262 #define VM_PAGE_FREE_MIN_LIMIT 1500
263 #define VM_PAGE_FREE_TARGET_LIMIT 2000
264 #else
265 #define VM_PAGE_FREE_RESERVED_LIMIT 1700
266 #define VM_PAGE_FREE_MIN_LIMIT 3500
267 #define VM_PAGE_FREE_TARGET_LIMIT 4000
268 #endif
269
270 /*
271 * When vm_page_free_count falls below vm_page_free_reserved,
272 * only vm-privileged threads can allocate pages. vm-privilege
273 * allows the pageout daemon and default pager (and any other
274 * associated threads needed for default pageout) to continue
275 * operation by dipping into the reserved pool of pages.
276 */
277
278 #ifndef VM_PAGE_FREE_RESERVED
279 #define VM_PAGE_FREE_RESERVED(n) \
280 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
281 #endif /* VM_PAGE_FREE_RESERVED */
282
283 /*
284 * When we dequeue pages from the inactive list, they are
285 * reactivated (ie, put back on the active queue) if referenced.
286 * However, it is possible to starve the free list if other
287 * processors are referencing pages faster than we can turn off
288 * the referenced bit. So we limit the number of reactivations
289 * we will make per call of vm_pageout_scan().
290 */
291 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
292 #ifndef VM_PAGE_REACTIVATE_LIMIT
293 #ifdef CONFIG_EMBEDDED
294 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
295 #else
296 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
297 #endif
298 #endif /* VM_PAGE_REACTIVATE_LIMIT */
299 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
300
301
302 extern boolean_t hibernate_cleaning_in_progress;
303
304 /*
305 * Exported variable used to broadcast the activation of the pageout scan
306 * Working Set uses this to throttle its use of pmap removes. In this
307 * way, code which runs within memory in an uncontested context does
308 * not keep encountering soft faults.
309 */
310
311 unsigned int vm_pageout_scan_event_counter = 0;
312
313 /*
314 * Forward declarations for internal routines.
315 */
316 struct cq {
317 struct vm_pageout_queue *q;
318 void *current_chead;
319 char *scratch_buf;
320 int id;
321 };
322
323 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
324
325
326 #if VM_PRESSURE_EVENTS
327 void vm_pressure_thread(void);
328
329 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
330 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
331
332 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
333 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
334 #endif
335 void vm_pageout_garbage_collect(int);
336 static void vm_pageout_iothread_external(void);
337 static void vm_pageout_iothread_internal(struct cq *cq);
338 static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
339
340 extern void vm_pageout_continue(void);
341 extern void vm_pageout_scan(void);
342 void vm_tests(void); /* forward */
343
344 boolean_t vm_restricted_to_single_processor = FALSE;
345 #if !CONFIG_EMBEDDED
346 static boolean_t vm_pageout_waiter = FALSE;
347 static boolean_t vm_pageout_running = FALSE;
348 #endif /* !CONFIG_EMBEDDED */
349
350
351 static thread_t vm_pageout_external_iothread = THREAD_NULL;
352 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
353
354 unsigned int vm_pageout_reserved_internal = 0;
355 unsigned int vm_pageout_reserved_really = 0;
356
357 unsigned int vm_pageout_swap_wait = 0;
358 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
359 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
360 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
361 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
362 unsigned int vm_pageout_deadlock_relief = 0;
363 unsigned int vm_pageout_inactive_relief = 0;
364 unsigned int vm_pageout_burst_active_throttle = 0;
365 unsigned int vm_pageout_burst_inactive_throttle = 0;
366
367 int vm_upl_wait_for_pages = 0;
368
369
370 /*
371 * These variables record the pageout daemon's actions:
372 * how many pages it looks at and what happens to those pages.
373 * No locking needed because only one thread modifies the variables.
374 */
375
376 unsigned int vm_pageout_active = 0; /* debugging */
377 unsigned int vm_pageout_inactive = 0; /* debugging */
378 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
379 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
380 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
381 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
382 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
383 unsigned int vm_pageout_inactive_error = 0; /* debugging */
384 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
385 unsigned int vm_pageout_inactive_notalive = 0; /* debugging */
386 unsigned int vm_pageout_inactive_used = 0; /* debugging */
387 unsigned int vm_pageout_cache_evicted = 0; /* debugging */
388 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
389 unsigned int vm_pageout_speculative_clean = 0; /* debugging */
390 unsigned int vm_pageout_speculative_dirty = 0; /* debugging */
391
392 unsigned int vm_pageout_freed_from_cleaned = 0;
393 unsigned int vm_pageout_freed_from_speculative = 0;
394 unsigned int vm_pageout_freed_from_inactive_clean = 0;
395 unsigned int vm_pageout_freed_after_compression = 0;
396
397 extern uint32_t vm_compressor_pages_grabbed;
398 extern uint32_t c_segment_pages_compressed;
399
400 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
401
402 unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
403 unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
404 unsigned int vm_pageout_cleaned_reference_reactivated = 0;
405 unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
406 unsigned int vm_pageout_cleaned_fault_reactivated = 0;
407 unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
408 unsigned int vm_pageout_cleaned_busy = 0;
409 unsigned int vm_pageout_cleaned_nolock = 0;
410
411 unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */
412 unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */
413 unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
414 unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
415 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
416 unsigned int vm_pageout_purged_objects = 0; /* used for sysctl vm stats */
417 unsigned int vm_stat_discard = 0; /* debugging */
418 unsigned int vm_stat_discard_sent = 0; /* debugging */
419 unsigned int vm_stat_discard_failure = 0; /* debugging */
420 unsigned int vm_stat_discard_throttle = 0; /* debugging */
421 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
422 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
423 unsigned int vm_pageout_skipped_external = 0; /* debugging */
424
425 unsigned int vm_pageout_scan_reclaimed_throttled = 0;
426 unsigned int vm_pageout_scan_active_throttled = 0;
427 unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
428 unsigned int vm_pageout_scan_inactive_throttled_external = 0;
429 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
430 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
431 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
432 unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */
433 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
434 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
435 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
436 unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */
437 unsigned int vm_pageout_scan_throttle_deferred = 0; /* debugging */
438 unsigned int vm_pageout_scan_yield_unthrottled = 0; /* debugging */
439 unsigned int vm_page_speculative_count_drifts = 0;
440 unsigned int vm_page_speculative_count_drift_max = 0;
441
442 uint32_t vm_compressor_failed;
443
444 /*
445 * Backing store throttle when BS is exhausted
446 */
447 unsigned int vm_backing_store_low = 0;
448
449 unsigned int vm_pageout_out_of_line = 0;
450 unsigned int vm_pageout_in_place = 0;
451
452 unsigned int vm_page_steal_pageout_page = 0;
453
454 struct vm_config vm_config;
455
456 struct vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
457 struct vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
458
459 unsigned int vm_page_speculative_target = 0;
460
461 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
462
463 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
464
465 #if DEVELOPMENT || DEBUG
466 unsigned long vm_cs_validated_resets = 0;
467 #endif
468
469 int vm_debug_events = 0;
470
471 #if CONFIG_MEMORYSTATUS
472 #if !CONFIG_JETSAM
473 extern boolean_t memorystatus_idle_exit_from_VM(void);
474 #endif
475 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
476 extern void memorystatus_on_pageout_scan_end(void);
477
478 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
479 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
480 #if DEVELOPMENT || DEBUG
481 uint32_t vm_grab_anon_overrides = 0;
482 uint32_t vm_grab_anon_nops = 0;
483 #endif
484
485 #endif
486
487 #if MACH_CLUSTER_STATS
488 unsigned long vm_pageout_cluster_dirtied = 0;
489 unsigned long vm_pageout_cluster_cleaned = 0;
490 unsigned long vm_pageout_cluster_collisions = 0;
491 unsigned long vm_pageout_cluster_clusters = 0;
492 unsigned long vm_pageout_cluster_conversions = 0;
493 unsigned long vm_pageout_target_collisions = 0;
494 unsigned long vm_pageout_target_page_dirtied = 0;
495 unsigned long vm_pageout_target_page_freed = 0;
496 #define CLUSTER_STAT(clause) clause
497 #else /* MACH_CLUSTER_STATS */
498 #define CLUSTER_STAT(clause)
499 #endif /* MACH_CLUSTER_STATS */
500
501
502 #if DEVELOPMENT || DEBUG
503 vmct_stats_t vmct_stats;
504 #endif
505
506 /*
507 * Routine: vm_pageout_object_terminate
508 * Purpose:
509 * Destroy the pageout_object, and perform all of the
510 * required cleanup actions.
511 *
512 * In/Out conditions:
513 * The object must be locked, and will be returned locked.
514 */
515 void
516 vm_pageout_object_terminate(
517 vm_object_t object)
518 {
519 vm_object_t shadow_object;
520
521 /*
522 * Deal with the deallocation (last reference) of a pageout object
523 * (used for cleaning-in-place) by dropping the paging references/
524 * freeing pages in the original object.
525 */
526
527 assert(object->pageout);
528 shadow_object = object->shadow;
529 vm_object_lock(shadow_object);
530
531 while (!vm_page_queue_empty(&object->memq)) {
532 vm_page_t p, m;
533 vm_object_offset_t offset;
534
535 p = (vm_page_t) vm_page_queue_first(&object->memq);
536
537 assert(p->private);
538 assert(p->free_when_done);
539 p->free_when_done = FALSE;
540 assert(!p->cleaning);
541 assert(!p->laundry);
542
543 offset = p->offset;
544 VM_PAGE_FREE(p);
545 p = VM_PAGE_NULL;
546
547 m = vm_page_lookup(shadow_object,
548 offset + object->vo_shadow_offset);
549
550 if(m == VM_PAGE_NULL)
551 continue;
552
553 assert((m->dirty) || (m->precious) ||
554 (m->busy && m->cleaning));
555
556 /*
557 * Handle the trusted pager throttle.
558 * Also decrement the burst throttle (if external).
559 */
560 vm_page_lock_queues();
561 if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
562 vm_pageout_throttle_up(m);
563
564 /*
565 * Handle the "target" page(s). These pages are to be freed if
566 * successfully cleaned. Target pages are always busy, and are
567 * wired exactly once. The initial target pages are not mapped,
568 * (so cannot be referenced or modified) but converted target
569 * pages may have been modified between the selection as an
570 * adjacent page and conversion to a target.
571 */
572 if (m->free_when_done) {
573 assert(m->busy);
574 assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
575 assert(m->wire_count == 1);
576 m->cleaning = FALSE;
577 m->free_when_done = FALSE;
578 #if MACH_CLUSTER_STATS
579 if (m->wanted) vm_pageout_target_collisions++;
580 #endif
581 /*
582 * Revoke all access to the page. Since the object is
583 * locked, and the page is busy, this prevents the page
584 * from being dirtied after the pmap_disconnect() call
585 * returns.
586 *
587 * Since the page is left "dirty" but "not modifed", we
588 * can detect whether the page was redirtied during
589 * pageout by checking the modify state.
590 */
591 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
592 SET_PAGE_DIRTY(m, FALSE);
593 } else {
594 m->dirty = FALSE;
595 }
596
597 if (m->dirty) {
598 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
599 vm_page_unwire(m, TRUE); /* reactivates */
600 VM_STAT_INCR(reactivations);
601 PAGE_WAKEUP_DONE(m);
602 } else {
603 CLUSTER_STAT(vm_pageout_target_page_freed++;)
604 vm_page_free(m);/* clears busy, etc. */
605 }
606 vm_page_unlock_queues();
607 continue;
608 }
609 /*
610 * Handle the "adjacent" pages. These pages were cleaned in
611 * place, and should be left alone.
612 * If prep_pin_count is nonzero, then someone is using the
613 * page, so make it active.
614 */
615 if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) && !m->private) {
616 if (m->reference)
617 vm_page_activate(m);
618 else
619 vm_page_deactivate(m);
620 }
621 if (m->overwriting) {
622 /*
623 * the (COPY_OUT_FROM == FALSE) request_page_list case
624 */
625 if (m->busy) {
626 /*
627 * We do not re-set m->dirty !
628 * The page was busy so no extraneous activity
629 * could have occurred. COPY_INTO is a read into the
630 * new pages. CLEAN_IN_PLACE does actually write
631 * out the pages but handling outside of this code
632 * will take care of resetting dirty. We clear the
633 * modify however for the Programmed I/O case.
634 */
635 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
636
637 m->busy = FALSE;
638 m->absent = FALSE;
639 } else {
640 /*
641 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
642 * Occurs when the original page was wired
643 * at the time of the list request
644 */
645 assert(VM_PAGE_WIRED(m));
646 vm_page_unwire(m, TRUE); /* reactivates */
647 }
648 m->overwriting = FALSE;
649 } else {
650 /*
651 * Set the dirty state according to whether or not the page was
652 * modified during the pageout. Note that we purposefully do
653 * NOT call pmap_clear_modify since the page is still mapped.
654 * If the page were to be dirtied between the 2 calls, this
655 * this fact would be lost. This code is only necessary to
656 * maintain statistics, since the pmap module is always
657 * consulted if m->dirty is false.
658 */
659 #if MACH_CLUSTER_STATS
660 m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
661
662 if (m->dirty) vm_pageout_cluster_dirtied++;
663 else vm_pageout_cluster_cleaned++;
664 if (m->wanted) vm_pageout_cluster_collisions++;
665 #else
666 m->dirty = FALSE;
667 #endif
668 }
669 m->cleaning = FALSE;
670
671 /*
672 * Wakeup any thread waiting for the page to be un-cleaning.
673 */
674 PAGE_WAKEUP(m);
675 vm_page_unlock_queues();
676 }
677 /*
678 * Account for the paging reference taken in vm_paging_object_allocate.
679 */
680 vm_object_activity_end(shadow_object);
681 vm_object_unlock(shadow_object);
682
683 assert(object->ref_count == 0);
684 assert(object->paging_in_progress == 0);
685 assert(object->activity_in_progress == 0);
686 assert(object->resident_page_count == 0);
687 return;
688 }
689
690 /*
691 * Routine: vm_pageclean_setup
692 *
693 * Purpose: setup a page to be cleaned (made non-dirty), but not
694 * necessarily flushed from the VM page cache.
695 * This is accomplished by cleaning in place.
696 *
697 * The page must not be busy, and new_object
698 * must be locked.
699 *
700 */
701 static void
702 vm_pageclean_setup(
703 vm_page_t m,
704 vm_page_t new_m,
705 vm_object_t new_object,
706 vm_object_offset_t new_offset)
707 {
708 assert(!m->busy);
709 #if 0
710 assert(!m->cleaning);
711 #endif
712
713 XPR(XPR_VM_PAGEOUT,
714 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
715 VM_PAGE_OBJECT(m), m->offset, m,
716 new_m, new_offset);
717
718 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
719
720 /*
721 * Mark original page as cleaning in place.
722 */
723 m->cleaning = TRUE;
724 SET_PAGE_DIRTY(m, FALSE);
725 m->precious = FALSE;
726
727 /*
728 * Convert the fictitious page to a private shadow of
729 * the real page.
730 */
731 assert(new_m->fictitious);
732 assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
733 new_m->fictitious = FALSE;
734 new_m->private = TRUE;
735 new_m->free_when_done = TRUE;
736 VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
737
738 vm_page_lockspin_queues();
739 vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
740 vm_page_unlock_queues();
741
742 vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
743 assert(!new_m->wanted);
744 new_m->busy = FALSE;
745 }
746
747 /*
748 * Routine: vm_pageout_initialize_page
749 * Purpose:
750 * Causes the specified page to be initialized in
751 * the appropriate memory object. This routine is used to push
752 * pages into a copy-object when they are modified in the
753 * permanent object.
754 *
755 * The page is moved to a temporary object and paged out.
756 *
757 * In/out conditions:
758 * The page in question must not be on any pageout queues.
759 * The object to which it belongs must be locked.
760 * The page must be busy, but not hold a paging reference.
761 *
762 * Implementation:
763 * Move this page to a completely new object.
764 */
765 void
766 vm_pageout_initialize_page(
767 vm_page_t m)
768 {
769 vm_object_t object;
770 vm_object_offset_t paging_offset;
771 memory_object_t pager;
772
773 XPR(XPR_VM_PAGEOUT,
774 "vm_pageout_initialize_page, page 0x%X\n",
775 m, 0, 0, 0, 0);
776
777 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
778
779 object = VM_PAGE_OBJECT(m);
780
781 assert(m->busy);
782 assert(object->internal);
783
784 /*
785 * Verify that we really want to clean this page
786 */
787 assert(!m->absent);
788 assert(!m->error);
789 assert(m->dirty);
790
791 /*
792 * Create a paging reference to let us play with the object.
793 */
794 paging_offset = m->offset + object->paging_offset;
795
796 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
797 panic("reservation without pageout?"); /* alan */
798
799 VM_PAGE_FREE(m);
800 vm_object_unlock(object);
801
802 return;
803 }
804
805 /*
806 * If there's no pager, then we can't clean the page. This should
807 * never happen since this should be a copy object and therefore not
808 * an external object, so the pager should always be there.
809 */
810
811 pager = object->pager;
812
813 if (pager == MEMORY_OBJECT_NULL) {
814 panic("missing pager for copy object");
815
816 VM_PAGE_FREE(m);
817 return;
818 }
819
820 /*
821 * set the page for future call to vm_fault_list_request
822 */
823 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
824 SET_PAGE_DIRTY(m, FALSE);
825
826 /*
827 * keep the object from collapsing or terminating
828 */
829 vm_object_paging_begin(object);
830 vm_object_unlock(object);
831
832 /*
833 * Write the data to its pager.
834 * Note that the data is passed by naming the new object,
835 * not a virtual address; the pager interface has been
836 * manipulated to use the "internal memory" data type.
837 * [The object reference from its allocation is donated
838 * to the eventual recipient.]
839 */
840 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
841
842 vm_object_lock(object);
843 vm_object_paging_end(object);
844 }
845
846 #if MACH_CLUSTER_STATS
847 #define MAXCLUSTERPAGES 16
848 struct {
849 unsigned long pages_in_cluster;
850 unsigned long pages_at_higher_offsets;
851 unsigned long pages_at_lower_offsets;
852 } cluster_stats[MAXCLUSTERPAGES];
853 #endif /* MACH_CLUSTER_STATS */
854
855
856 /*
857 * vm_pageout_cluster:
858 *
859 * Given a page, queue it to the appropriate I/O thread,
860 * which will page it out and attempt to clean adjacent pages
861 * in the same operation.
862 *
863 * The object and queues must be locked. We will take a
864 * paging reference to prevent deallocation or collapse when we
865 * release the object lock back at the call site. The I/O thread
866 * is responsible for consuming this reference
867 *
868 * The page must not be on any pageout queue.
869 */
870 int32_t vmct_active = 0;
871 typedef enum vmct_state_t {
872 VMCT_IDLE,
873 VMCT_AWAKENED,
874 VMCT_ACTIVE,
875 } vmct_state_t;
876 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
877
878 void
879 vm_pageout_cluster(vm_page_t m)
880 {
881 vm_object_t object = VM_PAGE_OBJECT(m);
882 struct vm_pageout_queue *q;
883
884
885 XPR(XPR_VM_PAGEOUT,
886 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
887 object, m->offset, m, 0, 0);
888
889 VM_PAGE_CHECK(m);
890 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
891 vm_object_lock_assert_exclusive(object);
892
893 /*
894 * Only a certain kind of page is appreciated here.
895 */
896 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
897 assert(!m->cleaning && !m->laundry);
898 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
899
900 /*
901 * protect the object from collapse or termination
902 */
903 vm_object_activity_begin(object);
904
905 if (object->internal == TRUE) {
906 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
907
908 m->busy = TRUE;
909
910 q = &vm_pageout_queue_internal;
911 } else
912 q = &vm_pageout_queue_external;
913
914 /*
915 * pgo_laundry count is tied to the laundry bit
916 */
917 m->laundry = TRUE;
918 q->pgo_laundry++;
919
920 m->vm_page_q_state = VM_PAGE_ON_PAGEOUT_Q;
921 vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
922
923 if (q->pgo_idle == TRUE) {
924 q->pgo_idle = FALSE;
925 thread_wakeup((event_t) &q->pgo_pending);
926 }
927 VM_PAGE_CHECK(m);
928 }
929
930
931 unsigned long vm_pageout_throttle_up_count = 0;
932
933 /*
934 * A page is back from laundry or we are stealing it back from
935 * the laundering state. See if there are some pages waiting to
936 * go to laundry and if we can let some of them go now.
937 *
938 * Object and page queues must be locked.
939 */
940 void
941 vm_pageout_throttle_up(
942 vm_page_t m)
943 {
944 struct vm_pageout_queue *q;
945 vm_object_t m_object;
946
947 m_object = VM_PAGE_OBJECT(m);
948
949 assert(m_object != VM_OBJECT_NULL);
950 assert(m_object != kernel_object);
951
952 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
953 vm_object_lock_assert_exclusive(m_object);
954
955 vm_pageout_throttle_up_count++;
956
957 if (m_object->internal == TRUE)
958 q = &vm_pageout_queue_internal;
959 else
960 q = &vm_pageout_queue_external;
961
962 if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
963
964 vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
965 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
966
967 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
968
969 vm_object_activity_end(m_object);
970 }
971 if (m->laundry == TRUE) {
972
973 m->laundry = FALSE;
974 q->pgo_laundry--;
975
976 if (q->pgo_throttled == TRUE) {
977 q->pgo_throttled = FALSE;
978 thread_wakeup((event_t) &q->pgo_laundry);
979 }
980 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
981 q->pgo_draining = FALSE;
982 thread_wakeup((event_t) (&q->pgo_laundry+1));
983 }
984 }
985 }
986
987
988 static void
989 vm_pageout_throttle_up_batch(
990 struct vm_pageout_queue *q,
991 int batch_cnt)
992 {
993 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
994
995 vm_pageout_throttle_up_count += batch_cnt;
996
997 q->pgo_laundry -= batch_cnt;
998
999 if (q->pgo_throttled == TRUE) {
1000 q->pgo_throttled = FALSE;
1001 thread_wakeup((event_t) &q->pgo_laundry);
1002 }
1003 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
1004 q->pgo_draining = FALSE;
1005 thread_wakeup((event_t) (&q->pgo_laundry+1));
1006 }
1007 }
1008
1009
1010
1011 /*
1012 * VM memory pressure monitoring.
1013 *
1014 * vm_pageout_scan() keeps track of the number of pages it considers and
1015 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1016 *
1017 * compute_memory_pressure() is called every second from compute_averages()
1018 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1019 * of recalimed pages in a new vm_pageout_stat[] bucket.
1020 *
1021 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1022 * The caller provides the number of seconds ("nsecs") worth of statistics
1023 * it wants, up to 30 seconds.
1024 * It computes the number of pages reclaimed in the past "nsecs" seconds and
1025 * also returns the number of pages the system still needs to reclaim at this
1026 * moment in time.
1027 */
1028 #define VM_PAGEOUT_STAT_SIZE 31
1029 struct vm_pageout_stat {
1030 unsigned int considered;
1031 unsigned int reclaimed_clean;
1032 unsigned int pages_compressed;
1033 unsigned int pages_grabbed_by_compressor;
1034 unsigned int cleaned_dirty_external;
1035 unsigned int throttled_internal_q;
1036 unsigned int throttled_external_q;
1037 unsigned int failed_compressions;
1038 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0,0,0,0,0,0,0}, };
1039
1040 unsigned int vm_pageout_stat_now = 0;
1041 unsigned int vm_memory_pressure = 0;
1042
1043 #define VM_PAGEOUT_STAT_BEFORE(i) \
1044 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1045 #define VM_PAGEOUT_STAT_AFTER(i) \
1046 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1047
1048 #if VM_PAGE_BUCKETS_CHECK
1049 int vm_page_buckets_check_interval = 10; /* in seconds */
1050 #endif /* VM_PAGE_BUCKETS_CHECK */
1051
1052 /*
1053 * Called from compute_averages().
1054 */
1055 void
1056 compute_memory_pressure(
1057 __unused void *arg)
1058 {
1059 unsigned int vm_pageout_next;
1060
1061 #if VM_PAGE_BUCKETS_CHECK
1062 /* check the consistency of VM page buckets at regular interval */
1063 static int counter = 0;
1064 if ((++counter % vm_page_buckets_check_interval) == 0) {
1065 vm_page_buckets_check();
1066 }
1067 #endif /* VM_PAGE_BUCKETS_CHECK */
1068
1069 vm_memory_pressure =
1070 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed_clean;
1071
1072 commpage_set_memory_pressure( vm_memory_pressure );
1073
1074 /* move "now" forward */
1075 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
1076 vm_pageout_stats[vm_pageout_next].considered = 0;
1077 vm_pageout_stats[vm_pageout_next].reclaimed_clean = 0;
1078 vm_pageout_stats[vm_pageout_next].throttled_internal_q = 0;
1079 vm_pageout_stats[vm_pageout_next].throttled_external_q = 0;
1080 vm_pageout_stats[vm_pageout_next].cleaned_dirty_external = 0;
1081 vm_pageout_stats[vm_pageout_next].pages_compressed = 0;
1082 vm_pageout_stats[vm_pageout_next].pages_grabbed_by_compressor = 0;
1083 vm_pageout_stats[vm_pageout_next].failed_compressions = 0;
1084
1085 vm_pageout_stat_now = vm_pageout_next;
1086 }
1087
1088
1089 /*
1090 * IMPORTANT
1091 * mach_vm_ctl_page_free_wanted() is called indirectly, via
1092 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1093 * it must be safe in the restricted stackshot context. Locks and/or
1094 * blocking are not allowable.
1095 */
1096 unsigned int
1097 mach_vm_ctl_page_free_wanted(void)
1098 {
1099 unsigned int page_free_target, page_free_count, page_free_wanted;
1100
1101 page_free_target = vm_page_free_target;
1102 page_free_count = vm_page_free_count;
1103 if (page_free_target > page_free_count) {
1104 page_free_wanted = page_free_target - page_free_count;
1105 } else {
1106 page_free_wanted = 0;
1107 }
1108
1109 return page_free_wanted;
1110 }
1111
1112
1113 /*
1114 * IMPORTANT:
1115 * mach_vm_pressure_monitor() is called when taking a stackshot, with
1116 * wait_for_pressure FALSE, so that code path must remain safe in the
1117 * restricted stackshot context. No blocking or locks are allowable.
1118 * on that code path.
1119 */
1120
1121 kern_return_t
1122 mach_vm_pressure_monitor(
1123 boolean_t wait_for_pressure,
1124 unsigned int nsecs_monitored,
1125 unsigned int *pages_reclaimed_p,
1126 unsigned int *pages_wanted_p)
1127 {
1128 wait_result_t wr;
1129 unsigned int vm_pageout_then, vm_pageout_now;
1130 unsigned int pages_reclaimed;
1131
1132 /*
1133 * We don't take the vm_page_queue_lock here because we don't want
1134 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1135 * thread when it's trying to reclaim memory. We don't need fully
1136 * accurate monitoring anyway...
1137 */
1138
1139 if (wait_for_pressure) {
1140 /* wait until there's memory pressure */
1141 while (vm_page_free_count >= vm_page_free_target) {
1142 wr = assert_wait((event_t) &vm_page_free_wanted,
1143 THREAD_INTERRUPTIBLE);
1144 if (wr == THREAD_WAITING) {
1145 wr = thread_block(THREAD_CONTINUE_NULL);
1146 }
1147 if (wr == THREAD_INTERRUPTED) {
1148 return KERN_ABORTED;
1149 }
1150 if (wr == THREAD_AWAKENED) {
1151 /*
1152 * The memory pressure might have already
1153 * been relieved but let's not block again
1154 * and let's report that there was memory
1155 * pressure at some point.
1156 */
1157 break;
1158 }
1159 }
1160 }
1161
1162 /* provide the number of pages the system wants to reclaim */
1163 if (pages_wanted_p != NULL) {
1164 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1165 }
1166
1167 if (pages_reclaimed_p == NULL) {
1168 return KERN_SUCCESS;
1169 }
1170
1171 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1172 vm_pageout_now = vm_pageout_stat_now;
1173 pages_reclaimed = 0;
1174 for (vm_pageout_then =
1175 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1176 vm_pageout_then != vm_pageout_now &&
1177 nsecs_monitored-- != 0;
1178 vm_pageout_then =
1179 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1180 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed_clean;
1181 }
1182 *pages_reclaimed_p = pages_reclaimed;
1183
1184 return KERN_SUCCESS;
1185 }
1186
1187
1188
1189 #if DEVELOPMENT || DEBUG
1190
1191 static void
1192 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1193
1194 /*
1195 * condition variable used to make sure there is
1196 * only a single sweep going on at a time
1197 */
1198 boolean_t vm_pageout_disconnect_all_pages_active = FALSE;
1199
1200
1201 void
1202 vm_pageout_disconnect_all_pages()
1203 {
1204 vm_page_lock_queues();
1205
1206 if (vm_pageout_disconnect_all_pages_active == TRUE) {
1207 vm_page_unlock_queues();
1208 return;
1209 }
1210 vm_pageout_disconnect_all_pages_active = TRUE;
1211 vm_page_unlock_queues();
1212
1213 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1214 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1215 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1216
1217 vm_pageout_disconnect_all_pages_active = FALSE;
1218 }
1219
1220
1221 void
1222 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1223 {
1224 vm_page_t m;
1225 vm_object_t t_object = NULL;
1226 vm_object_t l_object = NULL;
1227 vm_object_t m_object = NULL;
1228 int delayed_unlock = 0;
1229 int try_failed_count = 0;
1230 int disconnected_count = 0;
1231 int paused_count = 0;
1232 int object_locked_count = 0;
1233
1234 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1235 q, qcount, 0, 0, 0);
1236
1237 vm_page_lock_queues();
1238
1239 while (qcount && !vm_page_queue_empty(q)) {
1240
1241 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1242
1243 m = (vm_page_t) vm_page_queue_first(q);
1244 m_object = VM_PAGE_OBJECT(m);
1245
1246 /*
1247 * check to see if we currently are working
1248 * with the same object... if so, we've
1249 * already got the lock
1250 */
1251 if (m_object != l_object) {
1252 /*
1253 * the object associated with candidate page is
1254 * different from the one we were just working
1255 * with... dump the lock if we still own it
1256 */
1257 if (l_object != NULL) {
1258 vm_object_unlock(l_object);
1259 l_object = NULL;
1260 }
1261 if (m_object != t_object)
1262 try_failed_count = 0;
1263
1264 /*
1265 * Try to lock object; since we've alread got the
1266 * page queues lock, we can only 'try' for this one.
1267 * if the 'try' fails, we need to do a mutex_pause
1268 * to allow the owner of the object lock a chance to
1269 * run...
1270 */
1271 if ( !vm_object_lock_try_scan(m_object)) {
1272
1273 if (try_failed_count > 20) {
1274 goto reenter_pg_on_q;
1275 }
1276 vm_page_unlock_queues();
1277 mutex_pause(try_failed_count++);
1278 vm_page_lock_queues();
1279 delayed_unlock = 0;
1280
1281 paused_count++;
1282
1283 t_object = m_object;
1284 continue;
1285 }
1286 object_locked_count++;
1287
1288 l_object = m_object;
1289 }
1290 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1291 /*
1292 * put it back on the head of its queue
1293 */
1294 goto reenter_pg_on_q;
1295 }
1296 if (m->pmapped == TRUE) {
1297
1298 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1299
1300 disconnected_count++;
1301 }
1302 reenter_pg_on_q:
1303 vm_page_queue_remove(q, m, vm_page_t, pageq);
1304 vm_page_queue_enter(q, m, vm_page_t, pageq);
1305
1306 qcount--;
1307 try_failed_count = 0;
1308
1309 if (delayed_unlock++ > 128) {
1310
1311 if (l_object != NULL) {
1312 vm_object_unlock(l_object);
1313 l_object = NULL;
1314 }
1315 lck_mtx_yield(&vm_page_queue_lock);
1316 delayed_unlock = 0;
1317 }
1318 }
1319 if (l_object != NULL) {
1320 vm_object_unlock(l_object);
1321 l_object = NULL;
1322 }
1323 vm_page_unlock_queues();
1324
1325 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1326 q, disconnected_count, object_locked_count, paused_count, 0);
1327 }
1328
1329 #endif
1330
1331
1332 static void
1333 vm_pageout_page_queue(vm_page_queue_head_t *, int);
1334
1335 /*
1336 * condition variable used to make sure there is
1337 * only a single sweep going on at a time
1338 */
1339 boolean_t vm_pageout_anonymous_pages_active = FALSE;
1340
1341
1342 void
1343 vm_pageout_anonymous_pages()
1344 {
1345 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1346
1347 vm_page_lock_queues();
1348
1349 if (vm_pageout_anonymous_pages_active == TRUE) {
1350 vm_page_unlock_queues();
1351 return;
1352 }
1353 vm_pageout_anonymous_pages_active = TRUE;
1354 vm_page_unlock_queues();
1355
1356 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1357 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1358 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1359
1360 if (VM_CONFIG_SWAP_IS_PRESENT)
1361 vm_consider_swapping();
1362
1363 vm_page_lock_queues();
1364 vm_pageout_anonymous_pages_active = FALSE;
1365 vm_page_unlock_queues();
1366 }
1367 }
1368
1369
1370 void
1371 vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1372 {
1373 vm_page_t m;
1374 vm_object_t t_object = NULL;
1375 vm_object_t l_object = NULL;
1376 vm_object_t m_object = NULL;
1377 int delayed_unlock = 0;
1378 int try_failed_count = 0;
1379 int refmod_state;
1380 int pmap_options;
1381 struct vm_pageout_queue *iq;
1382 ppnum_t phys_page;
1383
1384
1385 iq = &vm_pageout_queue_internal;
1386
1387 vm_page_lock_queues();
1388
1389 while (qcount && !vm_page_queue_empty(q)) {
1390
1391 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1392
1393 if (VM_PAGE_Q_THROTTLED(iq)) {
1394
1395 if (l_object != NULL) {
1396 vm_object_unlock(l_object);
1397 l_object = NULL;
1398 }
1399 iq->pgo_draining = TRUE;
1400
1401 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1402 vm_page_unlock_queues();
1403
1404 thread_block(THREAD_CONTINUE_NULL);
1405
1406 vm_page_lock_queues();
1407 delayed_unlock = 0;
1408 continue;
1409 }
1410 m = (vm_page_t) vm_page_queue_first(q);
1411 m_object = VM_PAGE_OBJECT(m);
1412
1413 /*
1414 * check to see if we currently are working
1415 * with the same object... if so, we've
1416 * already got the lock
1417 */
1418 if (m_object != l_object) {
1419 if ( !m_object->internal)
1420 goto reenter_pg_on_q;
1421
1422 /*
1423 * the object associated with candidate page is
1424 * different from the one we were just working
1425 * with... dump the lock if we still own it
1426 */
1427 if (l_object != NULL) {
1428 vm_object_unlock(l_object);
1429 l_object = NULL;
1430 }
1431 if (m_object != t_object)
1432 try_failed_count = 0;
1433
1434 /*
1435 * Try to lock object; since we've alread got the
1436 * page queues lock, we can only 'try' for this one.
1437 * if the 'try' fails, we need to do a mutex_pause
1438 * to allow the owner of the object lock a chance to
1439 * run...
1440 */
1441 if ( !vm_object_lock_try_scan(m_object)) {
1442
1443 if (try_failed_count > 20) {
1444 goto reenter_pg_on_q;
1445 }
1446 vm_page_unlock_queues();
1447 mutex_pause(try_failed_count++);
1448 vm_page_lock_queues();
1449 delayed_unlock = 0;
1450
1451 t_object = m_object;
1452 continue;
1453 }
1454 l_object = m_object;
1455 }
1456 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1457 /*
1458 * page is not to be cleaned
1459 * put it back on the head of its queue
1460 */
1461 goto reenter_pg_on_q;
1462 }
1463 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1464
1465 if (m->reference == FALSE && m->pmapped == TRUE) {
1466 refmod_state = pmap_get_refmod(phys_page);
1467
1468 if (refmod_state & VM_MEM_REFERENCED)
1469 m->reference = TRUE;
1470 if (refmod_state & VM_MEM_MODIFIED) {
1471 SET_PAGE_DIRTY(m, FALSE);
1472 }
1473 }
1474 if (m->reference == TRUE) {
1475 m->reference = FALSE;
1476 pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1477 goto reenter_pg_on_q;
1478 }
1479 if (m->pmapped == TRUE) {
1480 if (m->dirty || m->precious) {
1481 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1482 } else {
1483 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1484 }
1485 refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1486 if (refmod_state & VM_MEM_MODIFIED) {
1487 SET_PAGE_DIRTY(m, FALSE);
1488 }
1489 }
1490 if ( !m->dirty && !m->precious) {
1491 vm_page_unlock_queues();
1492 VM_PAGE_FREE(m);
1493 vm_page_lock_queues();
1494 delayed_unlock = 0;
1495
1496 goto next_pg;
1497 }
1498 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1499
1500 if (!m_object->pager_initialized) {
1501
1502 vm_page_unlock_queues();
1503
1504 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1505
1506 if (!m_object->pager_initialized)
1507 vm_object_compressor_pager_create(m_object);
1508
1509 vm_page_lock_queues();
1510 delayed_unlock = 0;
1511 }
1512 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1513 goto reenter_pg_on_q;
1514 /*
1515 * vm_object_compressor_pager_create will drop the object lock
1516 * which means 'm' may no longer be valid to use
1517 */
1518 continue;
1519 }
1520 /*
1521 * we've already factored out pages in the laundry which
1522 * means this page can't be on the pageout queue so it's
1523 * safe to do the vm_page_queues_remove
1524 */
1525 vm_page_queues_remove(m, TRUE);
1526
1527 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1528
1529 vm_pageout_cluster(m);
1530
1531 goto next_pg;
1532
1533 reenter_pg_on_q:
1534 vm_page_queue_remove(q, m, vm_page_t, pageq);
1535 vm_page_queue_enter(q, m, vm_page_t, pageq);
1536 next_pg:
1537 qcount--;
1538 try_failed_count = 0;
1539
1540 if (delayed_unlock++ > 128) {
1541
1542 if (l_object != NULL) {
1543 vm_object_unlock(l_object);
1544 l_object = NULL;
1545 }
1546 lck_mtx_yield(&vm_page_queue_lock);
1547 delayed_unlock = 0;
1548 }
1549 }
1550 if (l_object != NULL) {
1551 vm_object_unlock(l_object);
1552 l_object = NULL;
1553 }
1554 vm_page_unlock_queues();
1555 }
1556
1557
1558
1559 /*
1560 * function in BSD to apply I/O throttle to the pageout thread
1561 */
1562 extern void vm_pageout_io_throttle(void);
1563
1564 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
1565 MACRO_BEGIN \
1566 /* \
1567 * If a "reusable" page somehow made it back into \
1568 * the active queue, it's been re-used and is not \
1569 * quite re-usable. \
1570 * If the VM object was "all_reusable", consider it \
1571 * as "all re-used" instead of converting it to \
1572 * "partially re-used", which could be expensive. \
1573 */ \
1574 assert(VM_PAGE_OBJECT((m)) == (obj)); \
1575 if ((m)->reusable || \
1576 (obj)->all_reusable) { \
1577 vm_object_reuse_pages((obj), \
1578 (m)->offset, \
1579 (m)->offset + PAGE_SIZE_64, \
1580 FALSE); \
1581 } \
1582 MACRO_END
1583
1584
1585 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1586 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1587
1588 #define FCS_IDLE 0
1589 #define FCS_DELAYED 1
1590 #define FCS_DEADLOCK_DETECTED 2
1591
1592 struct flow_control {
1593 int state;
1594 mach_timespec_t ts;
1595 };
1596
1597 #if CONFIG_BACKGROUND_QUEUE
1598 uint64_t vm_pageout_skipped_bq_internal = 0;
1599 uint64_t vm_pageout_considered_bq_internal = 0;
1600 uint64_t vm_pageout_considered_bq_external = 0;
1601 uint64_t vm_pageout_rejected_bq_internal = 0;
1602 uint64_t vm_pageout_rejected_bq_external = 0;
1603 #endif
1604
1605 uint32_t vm_pageout_no_victim = 0;
1606 uint32_t vm_pageout_considered_page = 0;
1607 uint32_t vm_page_filecache_min = 0;
1608
1609 #define ANONS_GRABBED_LIMIT 2
1610
1611 #if CONFIG_SECLUDED_MEMORY
1612 extern vm_page_t vm_page_grab_secluded(void);
1613 uint64_t vm_pageout_secluded_burst_count = 0;
1614 #endif /* CONFIG_SECLUDED_MEMORY */
1615
1616
1617 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1618 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1619
1620 #define VM_PAGEOUT_PB_NO_ACTION 0
1621 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1622 #define VM_PAGEOUT_PB_THREAD_YIELD 2
1623
1624
1625 static void
1626 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1627 {
1628 if (*local_freeq) {
1629 vm_page_unlock_queues();
1630
1631 VM_DEBUG_EVENT(
1632 vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1633 vm_page_free_count, *local_freed, 0, 1);
1634
1635 vm_page_free_list(*local_freeq, TRUE);
1636
1637 VM_DEBUG_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1638 vm_page_free_count, 0, 0, 1);
1639
1640 *local_freeq = NULL;
1641 *local_freed = 0;
1642
1643 vm_page_lock_queues();
1644 } else {
1645 lck_mtx_yield(&vm_page_queue_lock);
1646 }
1647 *delayed_unlock = 1;
1648 }
1649
1650
1651 static void
1652 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1653 vm_page_t *local_freeq, int *local_freed, int action)
1654 {
1655 vm_page_unlock_queues();
1656
1657 if (*object != NULL) {
1658 vm_object_unlock(*object);
1659 *object = NULL;
1660 }
1661 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1662
1663 if (*local_freeq) {
1664
1665 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1666 vm_page_free_count, *local_freed, 0, 2);
1667
1668 vm_page_free_list(*local_freeq, TRUE);
1669
1670 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1671 vm_page_free_count, 0, 0, 2);
1672
1673 *local_freeq = NULL;
1674 *local_freed = 0;
1675 }
1676 *delayed_unlock = 1;
1677
1678 switch (action) {
1679
1680 case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1681 vm_consider_waking_compactor_swapper();
1682 break;
1683 case VM_PAGEOUT_PB_THREAD_YIELD:
1684 thread_yield_internal(1);
1685 break;
1686 case VM_PAGEOUT_PB_NO_ACTION:
1687 default:
1688 break;
1689 }
1690 vm_page_lock_queues();
1691 }
1692
1693
1694 int last_vm_pageout_freed_from_inactive_clean = 0;
1695 int last_vm_pageout_freed_from_cleaned = 0;
1696 int last_vm_pageout_freed_from_speculative = 0;
1697 int last_vm_pageout_freed_after_compression = 0;
1698 int last_vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
1699 int last_vm_pageout_inactive_force_reclaim = 0;
1700 int last_vm_pageout_scan_inactive_throttled_external = 0;
1701 int last_vm_pageout_scan_inactive_throttled_internal = 0;
1702 int last_vm_pageout_reactivation_limit_exceeded = 0;
1703 int last_vm_pageout_considered_page = 0;
1704 int last_vm_compressor_pages_grabbed = 0;
1705 int last_vm_compressor_failed = 0;
1706 int last_vm_pageout_skipped_external = 0;
1707
1708
1709 void update_vm_info(void)
1710 {
1711 int tmp1, tmp2, tmp3, tmp4;
1712
1713 if (!kdebug_enable)
1714 return;
1715
1716 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
1717 vm_page_active_count,
1718 vm_page_speculative_count,
1719 vm_page_inactive_count,
1720 vm_page_anonymous_count,
1721 0);
1722
1723 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
1724 vm_page_free_count,
1725 vm_page_wire_count,
1726 VM_PAGE_COMPRESSOR_COUNT,
1727 0, 0);
1728
1729 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
1730 c_segment_pages_compressed,
1731 vm_page_internal_count,
1732 vm_page_external_count,
1733 vm_page_xpmapped_external_count,
1734 0);
1735
1736
1737 if ((vm_pageout_considered_page - last_vm_pageout_considered_page) == 0 &&
1738 (vm_pageout_enqueued_cleaned_from_inactive_dirty - last_vm_pageout_enqueued_cleaned_from_inactive_dirty == 0) &&
1739 (vm_pageout_freed_after_compression - last_vm_pageout_freed_after_compression == 0))
1740 return;
1741
1742
1743 tmp1 = vm_pageout_considered_page;
1744 tmp2 = vm_pageout_freed_from_speculative;
1745 tmp3 = vm_pageout_freed_from_inactive_clean;
1746
1747 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
1748 tmp1 - last_vm_pageout_considered_page,
1749 tmp2 - last_vm_pageout_freed_from_speculative,
1750 tmp3 - last_vm_pageout_freed_from_inactive_clean,
1751 0, 0);
1752
1753 last_vm_pageout_considered_page = tmp1;
1754 last_vm_pageout_freed_from_speculative = tmp2;
1755 last_vm_pageout_freed_from_inactive_clean = tmp3;
1756
1757
1758 tmp1 = vm_pageout_scan_inactive_throttled_external;
1759 tmp2 = vm_pageout_enqueued_cleaned_from_inactive_dirty;
1760 tmp3 = vm_pageout_freed_from_cleaned;
1761
1762 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
1763 tmp1 - last_vm_pageout_scan_inactive_throttled_external,
1764 tmp2 - last_vm_pageout_enqueued_cleaned_from_inactive_dirty,
1765 tmp3 - last_vm_pageout_freed_from_cleaned,
1766 0, 0);
1767
1768 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q += (tmp1 - last_vm_pageout_scan_inactive_throttled_external);
1769 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external += (tmp2 - last_vm_pageout_enqueued_cleaned_from_inactive_dirty);
1770
1771 last_vm_pageout_scan_inactive_throttled_external = tmp1;
1772 last_vm_pageout_enqueued_cleaned_from_inactive_dirty = tmp2;
1773 last_vm_pageout_freed_from_cleaned = tmp3;
1774
1775
1776 tmp1 = vm_pageout_scan_inactive_throttled_internal;
1777 tmp2 = vm_pageout_freed_after_compression;
1778 tmp3 = vm_compressor_pages_grabbed;
1779 tmp4 = vm_pageout_skipped_external;
1780
1781 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
1782 tmp1 - last_vm_pageout_scan_inactive_throttled_internal,
1783 tmp2 - last_vm_pageout_freed_after_compression,
1784 tmp3 - last_vm_compressor_pages_grabbed,
1785 tmp4 - last_vm_pageout_skipped_external,
1786 0);
1787
1788 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q += (tmp1 - last_vm_pageout_scan_inactive_throttled_internal);
1789 vm_pageout_stats[vm_pageout_stat_now].pages_compressed += (tmp2 - last_vm_pageout_freed_after_compression);
1790 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor += (tmp3 - last_vm_compressor_pages_grabbed);
1791
1792 last_vm_pageout_scan_inactive_throttled_internal = tmp1;
1793 last_vm_pageout_freed_after_compression = tmp2;
1794 last_vm_compressor_pages_grabbed = tmp3;
1795 last_vm_pageout_skipped_external = tmp4;
1796
1797
1798 if ((vm_pageout_reactivation_limit_exceeded - last_vm_pageout_reactivation_limit_exceeded) == 0 &&
1799 (vm_pageout_inactive_force_reclaim - last_vm_pageout_inactive_force_reclaim) == 0 &&
1800 (vm_compressor_failed - last_vm_compressor_failed) == 0)
1801 return;
1802
1803 tmp1 = vm_pageout_reactivation_limit_exceeded;
1804 tmp2 = vm_pageout_inactive_force_reclaim;
1805 tmp3 = vm_compressor_failed;
1806
1807 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
1808 tmp1 - last_vm_pageout_reactivation_limit_exceeded,
1809 tmp2 - last_vm_pageout_inactive_force_reclaim,
1810 tmp3 - last_vm_compressor_failed,
1811 0, 0);
1812
1813 vm_pageout_stats[vm_pageout_stat_now].failed_compressions += (tmp3 - last_vm_compressor_failed);
1814
1815 last_vm_pageout_reactivation_limit_exceeded = tmp1;
1816 last_vm_pageout_inactive_force_reclaim = tmp2;
1817 last_vm_compressor_failed = tmp3;
1818 }
1819
1820
1821 /*
1822 * vm_pageout_scan does the dirty work for the pageout daemon.
1823 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1824 * held and vm_page_free_wanted == 0.
1825 */
1826 void
1827 vm_pageout_scan(void)
1828 {
1829 unsigned int loop_count = 0;
1830 unsigned int inactive_burst_count = 0;
1831 unsigned int active_burst_count = 0;
1832 unsigned int reactivated_this_call;
1833 unsigned int reactivate_limit;
1834 vm_page_t local_freeq = NULL;
1835 int local_freed = 0;
1836 int delayed_unlock;
1837 int delayed_unlock_limit = 0;
1838 int refmod_state = 0;
1839 int vm_pageout_deadlock_target = 0;
1840 struct vm_pageout_queue *iq;
1841 struct vm_pageout_queue *eq;
1842 struct vm_speculative_age_q *sq;
1843 struct flow_control flow_control = { 0, { 0, 0 } };
1844 boolean_t inactive_throttled = FALSE;
1845 boolean_t try_failed;
1846 mach_timespec_t ts;
1847 unsigned int msecs = 0;
1848 vm_object_t object = NULL;
1849 uint32_t inactive_reclaim_run;
1850 boolean_t exceeded_burst_throttle;
1851 boolean_t grab_anonymous = FALSE;
1852 boolean_t force_anonymous = FALSE;
1853 boolean_t force_speculative_aging = FALSE;
1854 int anons_grabbed = 0;
1855 int page_prev_q_state = 0;
1856 #if CONFIG_BACKGROUND_QUEUE
1857 boolean_t page_from_bg_q = FALSE;
1858 #endif
1859 int cache_evict_throttle = 0;
1860 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
1861 int force_purge = 0;
1862 #define DELAY_SPECULATIVE_AGE 1000
1863 int delay_speculative_age = 0;
1864 vm_object_t m_object = VM_OBJECT_NULL;
1865
1866 #if VM_PRESSURE_EVENTS
1867 vm_pressure_level_t pressure_level;
1868 #endif /* VM_PRESSURE_EVENTS */
1869
1870 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1871 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1872 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1873
1874 flow_control.state = FCS_IDLE;
1875 iq = &vm_pageout_queue_internal;
1876 eq = &vm_pageout_queue_external;
1877 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1878
1879
1880 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1881
1882 /* Ask the pmap layer to return any pages it no longer needs. */
1883 pmap_release_pages_fast();
1884
1885 vm_page_lock_queues();
1886 delayed_unlock = 1;
1887
1888 /*
1889 * Calculate the max number of referenced pages on the inactive
1890 * queue that we will reactivate.
1891 */
1892 reactivated_this_call = 0;
1893 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1894 vm_page_inactive_count);
1895 inactive_reclaim_run = 0;
1896
1897 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1898
1899 /*
1900 * We want to gradually dribble pages from the active queue
1901 * to the inactive queue. If we let the inactive queue get
1902 * very small, and then suddenly dump many pages into it,
1903 * those pages won't get a sufficient chance to be referenced
1904 * before we start taking them from the inactive queue.
1905 *
1906 * We must limit the rate at which we send pages to the pagers
1907 * so that we don't tie up too many pages in the I/O queues.
1908 * We implement a throttling mechanism using the laundry count
1909 * to limit the number of pages outstanding to the default
1910 * and external pagers. We can bypass the throttles and look
1911 * for clean pages if the pageout queues don't drain in a timely
1912 * fashion since this may indicate that the pageout paths are
1913 * stalled waiting for memory, which only we can provide.
1914 */
1915
1916
1917 Restart:
1918
1919 assert(object == NULL);
1920 assert(delayed_unlock != 0);
1921
1922 /*
1923 * Recalculate vm_page_inactivate_target.
1924 */
1925 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1926 vm_page_inactive_count +
1927 vm_page_speculative_count);
1928
1929 vm_page_anonymous_min = vm_page_inactive_target / 20;
1930
1931
1932 /*
1933 * don't want to wake the pageout_scan thread up everytime we fall below
1934 * the targets... set a low water mark at 0.25% below the target
1935 */
1936 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1937
1938 if (vm_page_speculative_percentage > 50)
1939 vm_page_speculative_percentage = 50;
1940 else if (vm_page_speculative_percentage <= 0)
1941 vm_page_speculative_percentage = 1;
1942
1943 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1944 vm_page_inactive_count);
1945
1946 try_failed = FALSE;
1947
1948 for (;;) {
1949 vm_page_t m;
1950
1951 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1952
1953 if (vm_upl_wait_for_pages < 0)
1954 vm_upl_wait_for_pages = 0;
1955
1956 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1957
1958 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1959 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1960
1961 #if CONFIG_SECLUDED_MEMORY
1962 /*
1963 * Deal with secluded_q overflow.
1964 */
1965 if (vm_page_secluded_count > vm_page_secluded_target) {
1966 unsigned int secluded_overflow;
1967 vm_page_t secluded_page;
1968
1969 if (object != NULL) {
1970 vm_object_unlock(object);
1971 object = NULL;
1972 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1973 }
1974 /*
1975 * SECLUDED_AGING_BEFORE_ACTIVE:
1976 * Excess secluded pages go to the active queue and
1977 * will later go to the inactive queue.
1978 */
1979 active_burst_count = MIN(vm_pageout_burst_active_throttle,
1980 vm_page_secluded_count_inuse);
1981 secluded_overflow = (vm_page_secluded_count -
1982 vm_page_secluded_target);
1983 while (secluded_overflow-- > 0 &&
1984 vm_page_secluded_count > vm_page_secluded_target) {
1985 assert((vm_page_secluded_count_free +
1986 vm_page_secluded_count_inuse) ==
1987 vm_page_secluded_count);
1988 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1989 assert(secluded_page->vm_page_q_state ==
1990 VM_PAGE_ON_SECLUDED_Q);
1991 vm_page_queues_remove(secluded_page, FALSE);
1992 assert(!secluded_page->fictitious);
1993 assert(!VM_PAGE_WIRED(secluded_page));
1994 if (secluded_page->vm_page_object == 0) {
1995 /* transfer to free queue */
1996 assert(secluded_page->busy);
1997 secluded_page->snext = local_freeq;
1998 local_freeq = secluded_page;
1999 local_freed++;
2000 } else {
2001 /* transfer to head of active queue */
2002 vm_page_enqueue_active(secluded_page, FALSE);
2003 if (active_burst_count-- == 0) {
2004 vm_pageout_secluded_burst_count++;
2005 break;
2006 }
2007 }
2008 secluded_page = VM_PAGE_NULL;
2009
2010 if (delayed_unlock++ > delayed_unlock_limit) {
2011 vm_pageout_delayed_unlock(&delayed_unlock, &local_freed, &local_freeq);
2012 }
2013 }
2014 }
2015 #endif /* CONFIG_SECLUDED_MEMORY */
2016
2017 assert(delayed_unlock);
2018
2019 /*
2020 * Move pages from active to inactive if we're below the target
2021 */
2022 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
2023 goto done_moving_active_pages;
2024
2025 if (object != NULL) {
2026 vm_object_unlock(object);
2027 object = NULL;
2028 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2029 }
2030 /*
2031 * Don't sweep through active queue more than the throttle
2032 * which should be kept relatively low
2033 */
2034 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
2035
2036 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
2037 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
2038
2039 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
2040 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2041 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2042 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
2043
2044
2045 while (!vm_page_queue_empty(&vm_page_queue_active) && active_burst_count--) {
2046
2047 vm_pageout_active++;
2048
2049 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
2050
2051 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
2052 assert(!m->laundry);
2053 assert(VM_PAGE_OBJECT(m) != kernel_object);
2054 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2055
2056 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2057
2058 /*
2059 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
2060 *
2061 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
2062 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
2063 * new reference happens. If no futher references happen on the page after that remote TLB flushes
2064 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
2065 * by pageout_scan, which is just fine since the last reference would have happened quite far
2066 * in the past (TLB caches don't hang around for very long), and of course could just as easily
2067 * have happened before we moved the page
2068 */
2069 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
2070
2071 /*
2072 * The page might be absent or busy,
2073 * but vm_page_deactivate can handle that.
2074 * FALSE indicates that we don't want a H/W clear reference
2075 */
2076 vm_page_deactivate_internal(m, FALSE);
2077
2078 if (delayed_unlock++ > delayed_unlock_limit) {
2079 vm_pageout_delayed_unlock(&delayed_unlock, &local_freed, &local_freeq);
2080 }
2081 }
2082
2083 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
2084 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
2085 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
2086
2087 /**********************************************************************
2088 * above this point we're playing with the active and secluded queues
2089 * below this point we're playing with the throttling mechanisms
2090 * and the inactive queue
2091 **********************************************************************/
2092
2093 done_moving_active_pages:
2094
2095 if (vm_page_free_count + local_freed >= vm_page_free_target)
2096 {
2097 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2098 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2099 /*
2100 * make sure the pageout I/O threads are running
2101 * throttled in case there are still requests
2102 * in the laundry... since we have met our targets
2103 * we don't need the laundry to be cleaned in a timely
2104 * fashion... so let's avoid interfering with foreground
2105 * activity
2106 */
2107 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2108
2109 /*
2110 * recalculate vm_page_inactivate_target
2111 */
2112 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
2113 vm_page_inactive_count +
2114 vm_page_speculative_count);
2115 #ifndef CONFIG_EMBEDDED
2116 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
2117 !vm_page_queue_empty(&vm_page_queue_active)) {
2118 /*
2119 * inactive target still not met... keep going
2120 * until we get the queues balanced...
2121 */
2122 continue;
2123 }
2124 #endif
2125 lck_mtx_lock(&vm_page_queue_free_lock);
2126
2127 if ((vm_page_free_count >= vm_page_free_target) &&
2128 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2129 /*
2130 * done - we have met our target *and*
2131 * there is no one waiting for a page.
2132 */
2133 return_from_scan:
2134 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2135
2136 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
2137 vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
2138 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
2139 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2140 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2141
2142 return;
2143 }
2144 lck_mtx_unlock(&vm_page_queue_free_lock);
2145 }
2146
2147 /*
2148 * Before anything, we check if we have any ripe volatile
2149 * objects around. If so, try to purge the first object.
2150 * If the purge fails, fall through to reclaim a page instead.
2151 * If the purge succeeds, go back to the top and reevalute
2152 * the new memory situation.
2153 */
2154
2155 assert (available_for_purge>=0);
2156 force_purge = 0; /* no force-purging */
2157
2158 #if VM_PRESSURE_EVENTS
2159 pressure_level = memorystatus_vm_pressure_level;
2160
2161 if (pressure_level > kVMPressureNormal) {
2162
2163 if (pressure_level >= kVMPressureCritical) {
2164 force_purge = memorystatus_purge_on_critical;
2165 } else if (pressure_level >= kVMPressureUrgent) {
2166 force_purge = memorystatus_purge_on_urgent;
2167 } else if (pressure_level >= kVMPressureWarning) {
2168 force_purge = memorystatus_purge_on_warning;
2169 }
2170 }
2171 #endif /* VM_PRESSURE_EVENTS */
2172
2173 if (available_for_purge || force_purge) {
2174
2175 if (object != NULL) {
2176 vm_object_unlock(object);
2177 object = NULL;
2178 }
2179
2180 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2181
2182 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2183 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2184 vm_pageout_purged_objects++;
2185 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2186 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2187 continue;
2188 }
2189 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2190 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2191 }
2192
2193 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2194 /*
2195 * try to pull pages from the aging bins...
2196 * see vm_page.h for an explanation of how
2197 * this mechanism works
2198 */
2199 struct vm_speculative_age_q *aq;
2200 boolean_t can_steal = FALSE;
2201 int num_scanned_queues;
2202
2203 aq = &vm_page_queue_speculative[speculative_steal_index];
2204
2205 num_scanned_queues = 0;
2206 while (vm_page_queue_empty(&aq->age_q) &&
2207 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2208
2209 speculative_steal_index++;
2210
2211 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2212 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2213
2214 aq = &vm_page_queue_speculative[speculative_steal_index];
2215 }
2216
2217 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2218 /*
2219 * XXX We've scanned all the speculative
2220 * queues but still haven't found one
2221 * that is not empty, even though
2222 * vm_page_speculative_count is not 0.
2223 *
2224 * report the anomaly...
2225 */
2226 printf("vm_pageout_scan: "
2227 "all speculative queues empty "
2228 "but count=%d. Re-adjusting.\n",
2229 vm_page_speculative_count);
2230 if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
2231 vm_page_speculative_count_drift_max = vm_page_speculative_count;
2232 vm_page_speculative_count_drifts++;
2233 #if DEVELOPMENT || DEBUG
2234 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2235 #endif /* DEVELOPMENT || DEBUG */
2236 /* readjust... */
2237 vm_page_speculative_count = 0;
2238 /* ... and continue */
2239 continue;
2240 }
2241
2242 if (vm_page_speculative_count > vm_page_speculative_target || force_speculative_aging == TRUE)
2243 can_steal = TRUE;
2244 else {
2245 if (!delay_speculative_age) {
2246 mach_timespec_t ts_fully_aged;
2247
2248 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
2249 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
2250 * 1000 * NSEC_PER_USEC;
2251
2252 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2253
2254 clock_sec_t sec;
2255 clock_nsec_t nsec;
2256 clock_get_system_nanotime(&sec, &nsec);
2257 ts.tv_sec = (unsigned int) sec;
2258 ts.tv_nsec = nsec;
2259
2260 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2261 can_steal = TRUE;
2262 else
2263 delay_speculative_age++;
2264 } else {
2265 delay_speculative_age++;
2266 if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2267 delay_speculative_age = 0;
2268 }
2269 }
2270 if (can_steal == TRUE)
2271 vm_page_speculate_ageit(aq);
2272 }
2273 force_speculative_aging = FALSE;
2274
2275 #if CONFIG_BACKGROUND_QUEUE
2276 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0 &&
2277 ((vm_page_background_mode == VM_PAGE_BG_DISABLED) || (vm_page_background_count <= vm_page_background_target)))
2278 #else
2279 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0)
2280 #endif
2281 {
2282 int pages_evicted;
2283
2284 if (object != NULL) {
2285 vm_object_unlock(object);
2286 object = NULL;
2287 }
2288 pages_evicted = vm_object_cache_evict(100, 10);
2289
2290 if (pages_evicted) {
2291
2292 vm_pageout_cache_evicted += pages_evicted;
2293
2294 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2295 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
2296 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2297
2298 /*
2299 * we just freed up to 100 pages,
2300 * so go back to the top of the main loop
2301 * and re-evaulate the memory situation
2302 */
2303 continue;
2304 } else
2305 cache_evict_throttle = 1000;
2306 }
2307 if (cache_evict_throttle)
2308 cache_evict_throttle--;
2309
2310 #if CONFIG_JETSAM
2311 /*
2312 * don't let the filecache_min fall below 15% of available memory
2313 * on systems with an active compressor that isn't nearing its
2314 * limits w/r to accepting new data
2315 *
2316 * on systems w/o the compressor/swapper, the filecache is always
2317 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2318 * since most (if not all) of the anonymous pages are in the
2319 * throttled queue (which isn't counted as available) which
2320 * effectively disables this filter
2321 */
2322 if (vm_compressor_low_on_space())
2323 vm_page_filecache_min = 0;
2324 else
2325 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
2326 #else
2327 if (vm_compressor_out_of_space())
2328 vm_page_filecache_min = 0;
2329 else {
2330 /*
2331 * don't let the filecache_min fall below 33% of available memory...
2332 */
2333 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
2334 }
2335 #endif
2336 if (vm_page_free_count < (vm_page_free_reserved / 4))
2337 vm_page_filecache_min = 0;
2338
2339 exceeded_burst_throttle = FALSE;
2340 /*
2341 * Sometimes we have to pause:
2342 * 1) No inactive pages - nothing to do.
2343 * 2) Loop control - no acceptable pages found on the inactive queue
2344 * within the last vm_pageout_burst_inactive_throttle iterations
2345 * 3) Flow control - default pageout queue is full
2346 */
2347 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2348 vm_page_queue_empty(&vm_page_queue_anonymous) &&
2349 vm_page_queue_empty(&sq->age_q)) {
2350 vm_pageout_scan_empty_throttle++;
2351 msecs = vm_pageout_empty_wait;
2352 goto vm_pageout_scan_delay;
2353
2354 } else if (inactive_burst_count >=
2355 MIN(vm_pageout_burst_inactive_throttle,
2356 (vm_page_inactive_count +
2357 vm_page_speculative_count))) {
2358 vm_pageout_scan_burst_throttle++;
2359 msecs = vm_pageout_burst_wait;
2360
2361 exceeded_burst_throttle = TRUE;
2362 goto vm_pageout_scan_delay;
2363
2364 } else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
2365 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
2366 vm_pageout_scan_swap_throttle++;
2367 msecs = vm_pageout_swap_wait;
2368 goto vm_pageout_scan_delay;
2369
2370 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2371 VM_DYNAMIC_PAGING_ENABLED()) {
2372 clock_sec_t sec;
2373 clock_nsec_t nsec;
2374
2375 switch (flow_control.state) {
2376
2377 case FCS_IDLE:
2378 if ((vm_page_free_count + local_freed) < vm_page_free_target) {
2379
2380 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2381 VM_PAGEOUT_PB_THREAD_YIELD);
2382 if (!VM_PAGE_Q_THROTTLED(iq)) {
2383 vm_pageout_scan_yield_unthrottled++;
2384 continue;
2385 }
2386 if (vm_page_pageable_external_count > vm_page_filecache_min &&
2387 !vm_page_queue_empty(&vm_page_queue_inactive)) {
2388 anons_grabbed = ANONS_GRABBED_LIMIT;
2389 vm_pageout_scan_throttle_deferred++;
2390 goto consider_inactive;
2391 }
2392 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
2393 continue;
2394 }
2395 reset_deadlock_timer:
2396 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
2397 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2398 clock_get_system_nanotime(&sec, &nsec);
2399 flow_control.ts.tv_sec = (unsigned int) sec;
2400 flow_control.ts.tv_nsec = nsec;
2401 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2402
2403 flow_control.state = FCS_DELAYED;
2404 msecs = vm_pageout_deadlock_wait;
2405
2406 break;
2407
2408 case FCS_DELAYED:
2409 clock_get_system_nanotime(&sec, &nsec);
2410 ts.tv_sec = (unsigned int) sec;
2411 ts.tv_nsec = nsec;
2412
2413 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2414 /*
2415 * the pageout thread for the default pager is potentially
2416 * deadlocked since the
2417 * default pager queue has been throttled for more than the
2418 * allowable time... we need to move some clean pages or dirty
2419 * pages belonging to the external pagers if they aren't throttled
2420 * vm_page_free_wanted represents the number of threads currently
2421 * blocked waiting for pages... we'll move one page for each of
2422 * these plus a fixed amount to break the logjam... once we're done
2423 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2424 * with a new timeout target since we have no way of knowing
2425 * whether we've broken the deadlock except through observation
2426 * of the queue associated with the default pager... we need to
2427 * stop moving pages and allow the system to run to see what
2428 * state it settles into.
2429 */
2430 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
2431 vm_pageout_scan_deadlock_detected++;
2432 flow_control.state = FCS_DEADLOCK_DETECTED;
2433 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2434 goto consider_inactive;
2435 }
2436 /*
2437 * just resniff instead of trying
2438 * to compute a new delay time... we're going to be
2439 * awakened immediately upon a laundry completion,
2440 * so we won't wait any longer than necessary
2441 */
2442 msecs = vm_pageout_idle_wait;
2443 break;
2444
2445 case FCS_DEADLOCK_DETECTED:
2446 if (vm_pageout_deadlock_target)
2447 goto consider_inactive;
2448 goto reset_deadlock_timer;
2449
2450 }
2451 vm_pageout_scan_delay:
2452 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2453 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2454
2455 if (flow_control.state == FCS_DELAYED &&
2456 !VM_PAGE_Q_THROTTLED(iq)) {
2457 flow_control.state = FCS_IDLE;
2458 goto consider_inactive;
2459 }
2460
2461 if (vm_page_free_count >= vm_page_free_target) {
2462 /*
2463 * we're here because
2464 * 1) someone else freed up some pages while we had
2465 * the queues unlocked above
2466 * and we've hit one of the 3 conditions that
2467 * cause us to pause the pageout scan thread
2468 *
2469 * since we already have enough free pages,
2470 * let's avoid stalling and return normally
2471 *
2472 * before we return, make sure the pageout I/O threads
2473 * are running throttled in case there are still requests
2474 * in the laundry... since we have enough free pages
2475 * we don't need the laundry to be cleaned in a timely
2476 * fashion... so let's avoid interfering with foreground
2477 * activity
2478 *
2479 * we don't want to hold vm_page_queue_free_lock when
2480 * calling vm_pageout_adjust_eq_iothrottle (since it
2481 * may cause other locks to be taken), we do the intitial
2482 * check outside of the lock. Once we take the lock,
2483 * we recheck the condition since it may have changed.
2484 * if it has, no problem, we will make the threads
2485 * non-throttled before actually blocking
2486 */
2487 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2488 }
2489 lck_mtx_lock(&vm_page_queue_free_lock);
2490
2491 if (vm_page_free_count >= vm_page_free_target &&
2492 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2493 goto return_from_scan;
2494 }
2495 lck_mtx_unlock(&vm_page_queue_free_lock);
2496
2497 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2498 /*
2499 * we're most likely about to block due to one of
2500 * the 3 conditions that cause vm_pageout_scan to
2501 * not be able to make forward progress w/r
2502 * to providing new pages to the free queue,
2503 * so unthrottle the I/O threads in case we
2504 * have laundry to be cleaned... it needs
2505 * to be completed ASAP.
2506 *
2507 * even if we don't block, we want the io threads
2508 * running unthrottled since the sum of free +
2509 * clean pages is still under our free target
2510 */
2511 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2512 }
2513 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2514 /*
2515 * if we get here we're below our free target and
2516 * we're stalling due to a full laundry queue or
2517 * we don't have any inactive pages other then
2518 * those in the clean queue...
2519 * however, we have pages on the clean queue that
2520 * can be moved to the free queue, so let's not
2521 * stall the pageout scan
2522 */
2523 flow_control.state = FCS_IDLE;
2524 goto consider_inactive;
2525 }
2526 VM_CHECK_MEMORYSTATUS;
2527
2528 if (flow_control.state != FCS_IDLE)
2529 vm_pageout_scan_throttle++;
2530 iq->pgo_throttled = TRUE;
2531
2532 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2533 counter(c_vm_pageout_scan_block++);
2534
2535 vm_page_unlock_queues();
2536
2537 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2538
2539 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2540 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2541 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2542
2543 thread_block(THREAD_CONTINUE_NULL);
2544
2545 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2546 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2547 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2548
2549 vm_page_lock_queues();
2550
2551 iq->pgo_throttled = FALSE;
2552
2553 if (loop_count >= vm_page_inactive_count)
2554 loop_count = 0;
2555 inactive_burst_count = 0;
2556
2557 goto Restart;
2558 /*NOTREACHED*/
2559 }
2560
2561
2562 flow_control.state = FCS_IDLE;
2563 consider_inactive:
2564 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2565 vm_pageout_inactive_external_forced_reactivate_limit);
2566 loop_count++;
2567 inactive_burst_count++;
2568 vm_pageout_inactive++;
2569
2570
2571 /*
2572 * Choose a victim.
2573 */
2574 while (1) {
2575 uint32_t inactive_external_count;
2576
2577 #if CONFIG_BACKGROUND_QUEUE
2578 page_from_bg_q = FALSE;
2579 #endif /* CONFIG_BACKGROUND_QUEUE */
2580
2581 m = NULL;
2582 m_object = VM_OBJECT_NULL;
2583
2584 if (VM_DYNAMIC_PAGING_ENABLED()) {
2585 assert(vm_page_throttled_count == 0);
2586 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2587 }
2588
2589 /*
2590 * Try for a clean-queue inactive page.
2591 * These are pages that vm_pageout_scan tried to steal earlier, but
2592 * were dirty and had to be cleaned. Pick them up now that they are clean.
2593 */
2594 if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2595 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2596
2597 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2598
2599 break;
2600 }
2601
2602 /*
2603 * The next most eligible pages are ones we paged in speculatively,
2604 * but which have not yet been touched and have been aged out.
2605 */
2606 if (!vm_page_queue_empty(&sq->age_q)) {
2607 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2608
2609 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2610
2611 if (!m->dirty || force_anonymous == FALSE)
2612 break;
2613 else
2614 m = NULL;
2615 }
2616
2617 #if CONFIG_BACKGROUND_QUEUE
2618 if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2619 vm_object_t bg_m_object = NULL;
2620
2621 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2622
2623 bg_m_object = VM_PAGE_OBJECT(m);
2624
2625 if (!VM_PAGE_PAGEABLE(m)) {
2626 /*
2627 * This page is on the background queue
2628 * but not on a pageable queue. This is
2629 * likely a transient state and whoever
2630 * took it out of its pageable queue
2631 * will likely put it back on a pageable
2632 * queue soon but we can't deal with it
2633 * at this point, so let's ignore this
2634 * page.
2635 */
2636 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2637
2638 if (bg_m_object->internal &&
2639 ((vm_compressor_out_of_space() == TRUE) ||
2640 (vm_page_free_count < (vm_page_free_reserved / 4)))) {
2641
2642 vm_pageout_skipped_bq_internal++;
2643 } else {
2644 page_from_bg_q = TRUE;
2645
2646 if (bg_m_object->internal)
2647 vm_pageout_considered_bq_internal++;
2648 else
2649 vm_pageout_considered_bq_external++;
2650
2651 break;
2652 }
2653 }
2654 }
2655 #endif
2656
2657 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2658 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2659
2660 if ((vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) ||
2661 ((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
2662 grab_anonymous = TRUE;
2663 anons_grabbed = 0;
2664
2665 vm_pageout_skipped_external++;
2666 goto want_anonymous;
2667 }
2668 #if CONFIG_JETSAM
2669 /* If the file-backed pool has accumulated
2670 * significantly more pages than the jetsam
2671 * threshold, prefer to reclaim those
2672 * inline to minimise compute overhead of reclaiming
2673 * anonymous pages.
2674 * This calculation does not account for the CPU local
2675 * external page queues, as those are expected to be
2676 * much smaller relative to the global pools.
2677 */
2678 if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2679 if (vm_page_pageable_external_count >
2680 vm_page_filecache_min) {
2681 if ((vm_page_pageable_external_count *
2682 vm_pageout_memorystatus_fb_factor_dr) >
2683 (memorystatus_available_pages_critical *
2684 vm_pageout_memorystatus_fb_factor_nr)) {
2685 grab_anonymous = FALSE;
2686 #if DEVELOPMENT || DEBUG
2687 vm_grab_anon_overrides++;
2688 #endif
2689 }
2690 }
2691 #if DEVELOPMENT || DEBUG
2692 if (grab_anonymous) {
2693 vm_grab_anon_nops++;
2694 }
2695 #endif
2696 }
2697 #endif /* CONFIG_JETSAM */
2698
2699 want_anonymous:
2700 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2701
2702 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2703 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2704
2705 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2706 anons_grabbed = 0;
2707
2708 if (vm_page_pageable_external_count < vm_page_filecache_min) {
2709 if ((++reactivated_this_call % 100))
2710 goto must_activate_page;
2711 /*
2712 * steal 1% of the file backed pages even if
2713 * we are under the limit that has been set
2714 * for a healthy filecache
2715 */
2716 }
2717 break;
2718 }
2719 }
2720 if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2721 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2722
2723 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2724 anons_grabbed++;
2725
2726 break;
2727 }
2728
2729 /*
2730 * if we've gotten here, we have no victim page.
2731 * check to see if we've not finished balancing the queues
2732 * or we have a page on the aged speculative queue that we
2733 * skipped due to force_anonymous == TRUE.. or we have
2734 * speculative pages that we can prematurely age... if
2735 * one of these cases we'll keep going, else panic
2736 */
2737 force_anonymous = FALSE;
2738 vm_pageout_no_victim++;
2739
2740 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
2741 goto done_with_inactivepage;
2742
2743 if (!vm_page_queue_empty(&sq->age_q))
2744 goto done_with_inactivepage;
2745
2746 if (vm_page_speculative_count) {
2747 force_speculative_aging = TRUE;
2748 goto done_with_inactivepage;
2749 }
2750 panic("vm_pageout: no victim");
2751
2752 /* NOTREACHED */
2753 }
2754 assert(VM_PAGE_PAGEABLE(m));
2755 m_object = VM_PAGE_OBJECT(m);
2756 force_anonymous = FALSE;
2757
2758 page_prev_q_state = m->vm_page_q_state;
2759 /*
2760 * we just found this page on one of our queues...
2761 * it can't also be on the pageout queue, so safe
2762 * to call vm_page_queues_remove
2763 */
2764 vm_page_queues_remove(m, TRUE);
2765
2766 assert(!m->laundry);
2767 assert(!m->private);
2768 assert(!m->fictitious);
2769 assert(m_object != kernel_object);
2770 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2771
2772 vm_pageout_stats[vm_pageout_stat_now].considered++;
2773 vm_pageout_considered_page++;
2774
2775 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2776
2777 /*
2778 * check to see if we currently are working
2779 * with the same object... if so, we've
2780 * already got the lock
2781 */
2782 if (m_object != object) {
2783 /*
2784 * the object associated with candidate page is
2785 * different from the one we were just working
2786 * with... dump the lock if we still own it
2787 */
2788 if (object != NULL) {
2789 vm_object_unlock(object);
2790 object = NULL;
2791 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2792 }
2793 /*
2794 * Try to lock object; since we've alread got the
2795 * page queues lock, we can only 'try' for this one.
2796 * if the 'try' fails, we need to do a mutex_pause
2797 * to allow the owner of the object lock a chance to
2798 * run... otherwise, we're likely to trip over this
2799 * object in the same state as we work our way through
2800 * the queue... clumps of pages associated with the same
2801 * object are fairly typical on the inactive and active queues
2802 */
2803 if (!vm_object_lock_try_scan(m_object)) {
2804 vm_page_t m_want = NULL;
2805
2806 vm_pageout_inactive_nolock++;
2807
2808 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2809 vm_pageout_cleaned_nolock++;
2810
2811 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2812 m->reference = FALSE;
2813
2814 #if !CONFIG_EMBEDDED
2815 /*
2816 * m->object must be stable since we hold the page queues lock...
2817 * we can update the scan_collisions field sans the object lock
2818 * since it is a separate field and this is the only spot that does
2819 * a read-modify-write operation and it is never executed concurrently...
2820 * we can asynchronously set this field to 0 when creating a UPL, so it
2821 * is possible for the value to be a bit non-determistic, but that's ok
2822 * since it's only used as a hint
2823 */
2824
2825 /*
2826 * This is not used on EMBEDDED because having this variable set *could* lead
2827 * us to self-cannibalize pages from m_object to fill a UPL for a pagein.
2828 * And, there's a high probability that the object that vm_pageout_scan
2829 * wants and collides on is a very popular object e.g. the shared cache on EMBEDDED.
2830 * The older pages that we cannibalize from the shared cache could be really
2831 * important text pages e.g. the system call stubs.
2832 */
2833 m_object->scan_collisions = 1;
2834 #endif /* !CONFIG_EMBEDDED */
2835
2836 if ( !vm_page_queue_empty(&sq->age_q) )
2837 m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2838 else if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2839 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2840 else if ( !vm_page_queue_empty(&vm_page_queue_inactive) &&
2841 (anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)))
2842 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2843 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2844 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2845
2846 /*
2847 * this is the next object we're going to be interested in
2848 * try to make sure its available after the mutex_yield
2849 * returns control
2850 */
2851 if (m_want)
2852 vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2853
2854 /*
2855 * force us to dump any collected free pages
2856 * and to pause before moving on
2857 */
2858 try_failed = TRUE;
2859
2860 goto requeue_page;
2861 }
2862 object = m_object;
2863 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2864
2865 try_failed = FALSE;
2866 }
2867 assert(m_object == object);
2868 assert(VM_PAGE_OBJECT(m) == m_object);
2869
2870 if (m->busy) {
2871 /*
2872 * Somebody is already playing with this page.
2873 * Put it back on the appropriate queue
2874 *
2875 */
2876 vm_pageout_inactive_busy++;
2877
2878 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2879 vm_pageout_cleaned_busy++;
2880 requeue_page:
2881 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2882 vm_page_enqueue_inactive(m, FALSE);
2883 else
2884 vm_page_activate(m);
2885 #if CONFIG_BACKGROUND_QUEUE
2886 if (page_from_bg_q == TRUE) {
2887 if (m_object->internal)
2888 vm_pageout_rejected_bq_internal++;
2889 else
2890 vm_pageout_rejected_bq_external++;
2891 }
2892 #endif
2893 goto done_with_inactivepage;
2894 }
2895
2896
2897 /*
2898 * If it's absent, in error or the object is no longer alive,
2899 * we can reclaim the page... in the no longer alive case,
2900 * there are 2 states the page can be in that preclude us
2901 * from reclaiming it - busy or cleaning - that we've already
2902 * dealt with
2903 */
2904 if (m->absent || m->error || !object->alive) {
2905
2906 if (m->absent)
2907 vm_pageout_inactive_absent++;
2908 else if (!object->alive)
2909 vm_pageout_inactive_notalive++;
2910 else
2911 vm_pageout_inactive_error++;
2912 reclaim_page:
2913 if (vm_pageout_deadlock_target) {
2914 vm_pageout_scan_inactive_throttle_success++;
2915 vm_pageout_deadlock_target--;
2916 }
2917
2918 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2919
2920 if (object->internal) {
2921 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2922 } else {
2923 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2924 }
2925 assert(!m->cleaning);
2926 assert(!m->laundry);
2927
2928 m->busy = TRUE;
2929
2930 /*
2931 * remove page from object here since we're already
2932 * behind the object lock... defer the rest of the work
2933 * we'd normally do in vm_page_free_prepare_object
2934 * until 'vm_page_free_list' is called
2935 */
2936 if (m->tabled)
2937 vm_page_remove(m, TRUE);
2938
2939 assert(m->pageq.next == 0 && m->pageq.prev == 0);
2940 m->snext = local_freeq;
2941 local_freeq = m;
2942 local_freed++;
2943
2944 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2945 vm_pageout_freed_from_speculative++;
2946 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2947 vm_pageout_freed_from_cleaned++;
2948 else
2949 vm_pageout_freed_from_inactive_clean++;
2950
2951 vm_pageout_stats[vm_pageout_stat_now].reclaimed_clean++;
2952
2953 inactive_burst_count = 0;
2954 goto done_with_inactivepage;
2955 }
2956 /*
2957 * If the object is empty, the page must be reclaimed even
2958 * if dirty or used.
2959 * If the page belongs to a volatile object, we stick it back
2960 * on.
2961 */
2962 if (object->copy == VM_OBJECT_NULL) {
2963 if (object->purgable == VM_PURGABLE_EMPTY) {
2964 if (m->pmapped == TRUE) {
2965 /* unmap the page */
2966 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2967 if (refmod_state & VM_MEM_MODIFIED) {
2968 SET_PAGE_DIRTY(m, FALSE);
2969 }
2970 }
2971 if (m->dirty || m->precious) {
2972 /* we saved the cost of cleaning this page ! */
2973 vm_page_purged_count++;
2974 }
2975 goto reclaim_page;
2976 }
2977
2978 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2979 /*
2980 * With the VM compressor, the cost of
2981 * reclaiming a page is much lower (no I/O),
2982 * so if we find a "volatile" page, it's better
2983 * to let it get compressed rather than letting
2984 * it occupy a full page until it gets purged.
2985 * So no need to check for "volatile" here.
2986 */
2987 } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2988 /*
2989 * Avoid cleaning a "volatile" page which might
2990 * be purged soon.
2991 */
2992
2993 /* if it's wired, we can't put it on our queue */
2994 assert(!VM_PAGE_WIRED(m));
2995
2996 /* just stick it back on! */
2997 reactivated_this_call++;
2998
2999 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3000 vm_pageout_cleaned_volatile_reactivated++;
3001
3002 goto reactivate_page;
3003 }
3004 }
3005 /*
3006 * If it's being used, reactivate.
3007 * (Fictitious pages are either busy or absent.)
3008 * First, update the reference and dirty bits
3009 * to make sure the page is unreferenced.
3010 */
3011 refmod_state = -1;
3012
3013 if (m->reference == FALSE && m->pmapped == TRUE) {
3014 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3015
3016 if (refmod_state & VM_MEM_REFERENCED)
3017 m->reference = TRUE;
3018 if (refmod_state & VM_MEM_MODIFIED) {
3019 SET_PAGE_DIRTY(m, FALSE);
3020 }
3021 }
3022
3023 /*
3024 * if (m->cleaning && !m->free_when_done)
3025 * If already cleaning this page in place and it hasn't
3026 * been recently referenced, just pull off the queue.
3027 * We can leave the page mapped, and upl_commit_range
3028 * will put it on the clean queue.
3029 *
3030 * if (m->free_when_done && !m->cleaning)
3031 * an msync INVALIDATE is in progress...
3032 * this page has been marked for destruction
3033 * after it has been cleaned,
3034 * but not yet gathered into a UPL
3035 * where 'cleaning' will be set...
3036 * just leave it off the paging queues
3037 *
3038 * if (m->free_when_done && m->clenaing)
3039 * an msync INVALIDATE is in progress
3040 * and the UPL has already gathered this page...
3041 * just leave it off the paging queues
3042 */
3043
3044 /*
3045 * page with m->free_when_done and still on the queues means that an
3046 * MS_INVALIDATE is in progress on this page... leave it alone
3047 */
3048 if (m->free_when_done) {
3049 goto done_with_inactivepage;
3050 }
3051
3052 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */
3053 if (m->cleaning) {
3054 if (m->reference == TRUE) {
3055 reactivated_this_call++;
3056 goto reactivate_page;
3057 } else {
3058 goto done_with_inactivepage;
3059 }
3060 }
3061
3062 if (m->reference || m->dirty) {
3063 /* deal with a rogue "reusable" page */
3064 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
3065 }
3066
3067 if (!m->no_cache &&
3068 #if CONFIG_BACKGROUND_QUEUE
3069 page_from_bg_q == FALSE &&
3070 #endif
3071 (m->reference ||
3072 (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
3073 /*
3074 * The page we pulled off the inactive list has
3075 * been referenced. It is possible for other
3076 * processors to be touching pages faster than we
3077 * can clear the referenced bit and traverse the
3078 * inactive queue, so we limit the number of
3079 * reactivations.
3080 */
3081 if (++reactivated_this_call >= reactivate_limit) {
3082 vm_pageout_reactivation_limit_exceeded++;
3083 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
3084 vm_pageout_inactive_force_reclaim++;
3085 } else {
3086 uint32_t isinuse;
3087
3088 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3089 vm_pageout_cleaned_reference_reactivated++;
3090 reactivate_page:
3091 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
3092 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
3093 /*
3094 * no explict mappings of this object exist
3095 * and it's not open via the filesystem
3096 */
3097 vm_page_deactivate(m);
3098 vm_pageout_inactive_deactivated++;
3099 } else {
3100 must_activate_page:
3101 /*
3102 * The page was/is being used, so put back on active list.
3103 */
3104 vm_page_activate(m);
3105 VM_STAT_INCR(reactivations);
3106 inactive_burst_count = 0;
3107 }
3108 #if CONFIG_BACKGROUND_QUEUE
3109 if (page_from_bg_q == TRUE) {
3110 if (m_object->internal)
3111 vm_pageout_rejected_bq_internal++;
3112 else
3113 vm_pageout_rejected_bq_external++;
3114 }
3115 #endif
3116 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3117 vm_pageout_cleaned_reactivated++;
3118 vm_pageout_inactive_used++;
3119
3120 goto done_with_inactivepage;
3121 }
3122 /*
3123 * Make sure we call pmap_get_refmod() if it
3124 * wasn't already called just above, to update
3125 * the dirty bit.
3126 */
3127 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
3128 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3129 if (refmod_state & VM_MEM_MODIFIED) {
3130 SET_PAGE_DIRTY(m, FALSE);
3131 }
3132 }
3133 }
3134
3135 XPR(XPR_VM_PAGEOUT,
3136 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
3137 object, m->offset, m, 0,0);
3138
3139 /*
3140 * we've got a candidate page to steal...
3141 *
3142 * m->dirty is up to date courtesy of the
3143 * preceding check for m->reference... if
3144 * we get here, then m->reference had to be
3145 * FALSE (or possibly "reactivate_limit" was
3146 * exceeded), but in either case we called
3147 * pmap_get_refmod() and updated both
3148 * m->reference and m->dirty
3149 *
3150 * if it's dirty or precious we need to
3151 * see if the target queue is throtttled
3152 * it if is, we need to skip over it by moving it back
3153 * to the end of the inactive queue
3154 */
3155
3156 inactive_throttled = FALSE;
3157
3158 if (m->dirty || m->precious) {
3159 if (object->internal) {
3160 if (VM_PAGE_Q_THROTTLED(iq))
3161 inactive_throttled = TRUE;
3162 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3163 inactive_throttled = TRUE;
3164 }
3165 }
3166 throttle_inactive:
3167 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3168 object->internal && m->dirty &&
3169 (object->purgable == VM_PURGABLE_DENY ||
3170 object->purgable == VM_PURGABLE_NONVOLATILE ||
3171 object->purgable == VM_PURGABLE_VOLATILE)) {
3172 vm_page_check_pageable_safe(m);
3173 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3174 vm_page_queue_enter(&vm_page_queue_throttled, m,
3175 vm_page_t, pageq);
3176 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
3177 vm_page_throttled_count++;
3178
3179 vm_pageout_scan_reclaimed_throttled++;
3180
3181 inactive_burst_count = 0;
3182 goto done_with_inactivepage;
3183 }
3184 if (inactive_throttled == TRUE) {
3185
3186 if (object->internal == FALSE) {
3187 /*
3188 * we need to break up the following potential deadlock case...
3189 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3190 * b) The thread doing the writing is waiting for pages while holding the truncate lock
3191 * c) Most of the pages in the inactive queue belong to this file.
3192 *
3193 * we are potentially in this deadlock because...
3194 * a) the external pageout queue is throttled
3195 * b) we're done with the active queue and moved on to the inactive queue
3196 * c) we've got a dirty external page
3197 *
3198 * since we don't know the reason for the external pageout queue being throttled we
3199 * must suspect that we are deadlocked, so move the current page onto the active queue
3200 * in an effort to cause a page from the active queue to 'age' to the inactive queue
3201 *
3202 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3203 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3204 * pool the next time we select a victim page... if we can make enough new free pages,
3205 * the deadlock will break, the external pageout queue will empty and it will no longer
3206 * be throttled
3207 *
3208 * if we have jetsam configured, keep a count of the pages reactivated this way so
3209 * that we can try to find clean pages in the active/inactive queues before
3210 * deciding to jetsam a process
3211 */
3212 vm_pageout_scan_inactive_throttled_external++;
3213
3214 vm_page_check_pageable_safe(m);
3215 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3216 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3217 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
3218 vm_page_active_count++;
3219 vm_page_pageable_external_count++;
3220
3221 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3222
3223 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3224 vm_pageout_inactive_external_forced_reactivate_limit--;
3225
3226 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3227 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3228 /*
3229 * Possible deadlock scenario so request jetsam action
3230 */
3231 assert(object);
3232 vm_object_unlock(object);
3233 object = VM_OBJECT_NULL;
3234 vm_page_unlock_queues();
3235
3236 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3237 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3238
3239 /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
3240 if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
3241 vm_pageout_inactive_external_forced_jetsam_count++;
3242 }
3243
3244 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
3245 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3246
3247 vm_page_lock_queues();
3248 delayed_unlock = 1;
3249 }
3250 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3251 force_anonymous = TRUE;
3252 #endif
3253 inactive_burst_count = 0;
3254 goto done_with_inactivepage;
3255 } else {
3256 vm_pageout_scan_inactive_throttled_internal++;
3257 goto must_activate_page;
3258 }
3259 }
3260
3261 /*
3262 * we've got a page that we can steal...
3263 * eliminate all mappings and make sure
3264 * we have the up-to-date modified state
3265 *
3266 * if we need to do a pmap_disconnect then we
3267 * need to re-evaluate m->dirty since the pmap_disconnect
3268 * provides the true state atomically... the
3269 * page was still mapped up to the pmap_disconnect
3270 * and may have been dirtied at the last microsecond
3271 *
3272 * Note that if 'pmapped' is FALSE then the page is not
3273 * and has not been in any map, so there is no point calling
3274 * pmap_disconnect(). m->dirty could have been set in anticipation
3275 * of likely usage of the page.
3276 */
3277 if (m->pmapped == TRUE) {
3278 int pmap_options;
3279
3280 /*
3281 * Don't count this page as going into the compressor
3282 * if any of these are true:
3283 * 1) compressed pager isn't enabled
3284 * 2) Freezer enabled device with compressed pager
3285 * backend (exclusive use) i.e. most of the VM system
3286 * (including vm_pageout_scan) has no knowledge of
3287 * the compressor
3288 * 3) This page belongs to a file and hence will not be
3289 * sent into the compressor
3290 */
3291 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3292 object->internal == FALSE) {
3293 pmap_options = 0;
3294 } else if (m->dirty || m->precious) {
3295 /*
3296 * VM knows that this page is dirty (or
3297 * precious) and needs to be compressed
3298 * rather than freed.
3299 * Tell the pmap layer to count this page
3300 * as "compressed".
3301 */
3302 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3303 } else {
3304 /*
3305 * VM does not know if the page needs to
3306 * be preserved but the pmap layer might tell
3307 * us if any mapping has "modified" it.
3308 * Let's the pmap layer to count this page
3309 * as compressed if and only if it has been
3310 * modified.
3311 */
3312 pmap_options =
3313 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3314 }
3315 refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3316 pmap_options,
3317 NULL);
3318 if (refmod_state & VM_MEM_MODIFIED) {
3319 SET_PAGE_DIRTY(m, FALSE);
3320 }
3321 }
3322 /*
3323 * reset our count of pages that have been reclaimed
3324 * since the last page was 'stolen'
3325 */
3326 inactive_reclaim_run = 0;
3327
3328 /*
3329 * If it's clean and not precious, we can free the page.
3330 */
3331 if (!m->dirty && !m->precious) {
3332
3333 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3334 vm_pageout_speculative_clean++;
3335 else {
3336 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3337 vm_pageout_inactive_anonymous++;
3338 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3339 vm_pageout_cleaned_reclaimed++;
3340
3341 vm_pageout_inactive_clean++;
3342 }
3343 /*
3344 * OK, at this point we have found a page we are going to free.
3345 */
3346 #if CONFIG_PHANTOM_CACHE
3347 if (!object->internal)
3348 vm_phantom_cache_add_ghost(m);
3349 #endif
3350 goto reclaim_page;
3351 }
3352
3353 /*
3354 * The page may have been dirtied since the last check
3355 * for a throttled target queue (which may have been skipped
3356 * if the page was clean then). With the dirty page
3357 * disconnected here, we can make one final check.
3358 */
3359 if (object->internal) {
3360 if (VM_PAGE_Q_THROTTLED(iq))
3361 inactive_throttled = TRUE;
3362 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3363 inactive_throttled = TRUE;
3364 }
3365
3366 if (inactive_throttled == TRUE)
3367 goto throttle_inactive;
3368
3369 #if VM_PRESSURE_EVENTS
3370 #if CONFIG_JETSAM
3371
3372 /*
3373 * If Jetsam is enabled, then the sending
3374 * of memory pressure notifications is handled
3375 * from the same thread that takes care of high-water
3376 * and other jetsams i.e. the memorystatus_thread.
3377 */
3378
3379 #else /* CONFIG_JETSAM */
3380
3381 vm_pressure_response();
3382
3383 #endif /* CONFIG_JETSAM */
3384 #endif /* VM_PRESSURE_EVENTS */
3385
3386 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3387 vm_pageout_speculative_dirty++;
3388 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3389 vm_pageout_inactive_anonymous++;
3390
3391 if (object->internal)
3392 vm_pageout_inactive_dirty_internal++;
3393 else
3394 vm_pageout_inactive_dirty_external++;
3395
3396 /*
3397 * do NOT set the pageout bit!
3398 * sure, we might need free pages, but this page is going to take time to become free
3399 * anyway, so we may as well put it on the clean queue first and take it from there later
3400 * if necessary. that way, we'll ensure we don't free up too much. -mj
3401 */
3402 vm_pageout_cluster(m);
3403
3404 done_with_inactivepage:
3405
3406 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
3407
3408 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3409 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3410 if (try_failed == TRUE)
3411 lck_mtx_yield(&vm_page_queue_lock);
3412 }
3413
3414 /*
3415 * back to top of pageout scan loop
3416 */
3417 }
3418 }
3419
3420
3421 int vm_page_free_count_init;
3422
3423 void
3424 vm_page_free_reserve(
3425 int pages)
3426 {
3427 int free_after_reserve;
3428
3429 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3430
3431 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3432 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3433 else
3434 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3435
3436 } else {
3437 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3438 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3439 else
3440 vm_page_free_reserved += pages;
3441 }
3442 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
3443
3444 vm_page_free_min = vm_page_free_reserved +
3445 VM_PAGE_FREE_MIN(free_after_reserve);
3446
3447 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3448 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3449
3450 vm_page_free_target = vm_page_free_reserved +
3451 VM_PAGE_FREE_TARGET(free_after_reserve);
3452
3453 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3454 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3455
3456 if (vm_page_free_target < vm_page_free_min + 5)
3457 vm_page_free_target = vm_page_free_min + 5;
3458
3459 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3460 }
3461
3462 /*
3463 * vm_pageout is the high level pageout daemon.
3464 */
3465
3466 void
3467 vm_pageout_continue(void)
3468 {
3469 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3470 vm_pageout_scan_event_counter++;
3471
3472 #if !CONFIG_EMBEDDED
3473 lck_mtx_lock(&vm_page_queue_free_lock);
3474 vm_pageout_running = TRUE;
3475 lck_mtx_unlock(&vm_page_queue_free_lock);
3476 #endif /* CONFIG_EMBEDDED */
3477
3478 vm_pageout_scan();
3479 /*
3480 * we hold both the vm_page_queue_free_lock
3481 * and the vm_page_queues_lock at this point
3482 */
3483 assert(vm_page_free_wanted == 0);
3484 assert(vm_page_free_wanted_privileged == 0);
3485 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3486
3487 #if !CONFIG_EMBEDDED
3488 vm_pageout_running = FALSE;
3489 if (vm_pageout_waiter) {
3490 vm_pageout_waiter = FALSE;
3491 thread_wakeup((event_t)&vm_pageout_waiter);
3492 }
3493 #endif /* !CONFIG_EMBEDDED */
3494
3495 lck_mtx_unlock(&vm_page_queue_free_lock);
3496 vm_page_unlock_queues();
3497
3498 counter(c_vm_pageout_block++);
3499 thread_block((thread_continue_t)vm_pageout_continue);
3500 /*NOTREACHED*/
3501 }
3502
3503 #if !CONFIG_EMBEDDED
3504 kern_return_t
3505 vm_pageout_wait(uint64_t deadline)
3506 {
3507 kern_return_t kr;
3508
3509 lck_mtx_lock(&vm_page_queue_free_lock);
3510 for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3511 vm_pageout_waiter = TRUE;
3512 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3513 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3514 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3515 kr = KERN_OPERATION_TIMED_OUT;
3516 }
3517 }
3518 lck_mtx_unlock(&vm_page_queue_free_lock);
3519
3520 return (kr);
3521 }
3522 #endif /* !CONFIG_EMBEDDED */
3523
3524
3525 static void
3526 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3527 {
3528 vm_page_t m = NULL;
3529 vm_object_t object;
3530 vm_object_offset_t offset;
3531 memory_object_t pager;
3532
3533 /* On systems without a compressor, the external IO thread clears its
3534 * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3535 * creation)
3536 */
3537 if (vm_pageout_internal_iothread != THREAD_NULL)
3538 current_thread()->options &= ~TH_OPT_VMPRIV;
3539
3540 vm_page_lockspin_queues();
3541
3542 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3543
3544 q->pgo_busy = TRUE;
3545 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3546
3547 assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3548 VM_PAGE_CHECK(m);
3549 /*
3550 * grab a snapshot of the object and offset this
3551 * page is tabled in so that we can relookup this
3552 * page after we've taken the object lock - these
3553 * fields are stable while we hold the page queues lock
3554 * but as soon as we drop it, there is nothing to keep
3555 * this page in this object... we hold an activity_in_progress
3556 * on this object which will keep it from terminating
3557 */
3558 object = VM_PAGE_OBJECT(m);
3559 offset = m->offset;
3560
3561 if (object->object_slid) {
3562 panic("slid page %p not allowed on this path\n", m);
3563 }
3564 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3565 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3566
3567 vm_page_unlock_queues();
3568
3569 vm_object_lock(object);
3570
3571 m = vm_page_lookup(object, offset);
3572
3573 if (m == NULL ||
3574 m->busy || m->cleaning || !m->laundry || (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
3575 /*
3576 * it's either the same page that someone else has
3577 * started cleaning (or it's finished cleaning or
3578 * been put back on the pageout queue), or
3579 * the page has been freed or we have found a
3580 * new page at this offset... in all of these cases
3581 * we merely need to release the activity_in_progress
3582 * we took when we put the page on the pageout queue
3583 */
3584 vm_object_activity_end(object);
3585 vm_object_unlock(object);
3586
3587 vm_page_lockspin_queues();
3588 continue;
3589 }
3590 pager = object->pager;
3591
3592 if (pager == MEMORY_OBJECT_NULL) {
3593 /*
3594 * This pager has been destroyed by either
3595 * memory_object_destroy or vm_object_destroy, and
3596 * so there is nowhere for the page to go.
3597 */
3598 if (m->free_when_done) {
3599 /*
3600 * Just free the page... VM_PAGE_FREE takes
3601 * care of cleaning up all the state...
3602 * including doing the vm_pageout_throttle_up
3603 */
3604 VM_PAGE_FREE(m);
3605 } else {
3606 vm_page_lockspin_queues();
3607
3608 vm_pageout_throttle_up(m);
3609 vm_page_activate(m);
3610
3611 vm_page_unlock_queues();
3612
3613 /*
3614 * And we are done with it.
3615 */
3616 }
3617 vm_object_activity_end(object);
3618 vm_object_unlock(object);
3619
3620 vm_page_lockspin_queues();
3621 continue;
3622 }
3623 #if 0
3624 /*
3625 * we don't hold the page queue lock
3626 * so this check isn't safe to make
3627 */
3628 VM_PAGE_CHECK(m);
3629 #endif
3630 /*
3631 * give back the activity_in_progress reference we
3632 * took when we queued up this page and replace it
3633 * it with a paging_in_progress reference that will
3634 * also hold the paging offset from changing and
3635 * prevent the object from terminating
3636 */
3637 vm_object_activity_end(object);
3638 vm_object_paging_begin(object);
3639 vm_object_unlock(object);
3640
3641 /*
3642 * Send the data to the pager.
3643 * any pageout clustering happens there
3644 */
3645 memory_object_data_return(pager,
3646 m->offset + object->paging_offset,
3647 PAGE_SIZE,
3648 NULL,
3649 NULL,
3650 FALSE,
3651 FALSE,
3652 0);
3653
3654 vm_object_lock(object);
3655 vm_object_paging_end(object);
3656 vm_object_unlock(object);
3657
3658 vm_pageout_io_throttle();
3659
3660 vm_page_lockspin_queues();
3661 }
3662 q->pgo_busy = FALSE;
3663 q->pgo_idle = TRUE;
3664
3665 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3666 vm_page_unlock_queues();
3667
3668 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3669 /*NOTREACHED*/
3670 }
3671
3672
3673 #define MAX_FREE_BATCH 32
3674 uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3675 * this thread.
3676 */
3677
3678
3679 #if DEVELOPMENT || DEBUG
3680 uint64_t compressor_epoch_start, compressor_epoch_stop, compressor_threads_runtime;
3681 #endif
3682
3683 void
3684 vm_pageout_iothread_internal_continue(struct cq *);
3685 void
3686 vm_pageout_iothread_internal_continue(struct cq *cq)
3687 {
3688 struct vm_pageout_queue *q;
3689 vm_page_t m = NULL;
3690 boolean_t pgo_draining;
3691 vm_page_t local_q;
3692 int local_cnt;
3693 vm_page_t local_freeq = NULL;
3694 int local_freed = 0;
3695 int local_batch_size;
3696 int ncomps = 0;
3697 #if DEVELOPMENT || DEBUG
3698 boolean_t marked_active = FALSE;
3699 #endif
3700 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3701
3702 q = cq->q;
3703 local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
3704
3705 #if RECORD_THE_COMPRESSED_DATA
3706 if (q->pgo_laundry)
3707 c_compressed_record_init();
3708 #endif
3709 while (TRUE) {
3710 int pages_left_on_q = 0;
3711
3712 local_cnt = 0;
3713 local_q = NULL;
3714
3715 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3716
3717 vm_page_lock_queues();
3718 #if DEVELOPMENT || DEBUG
3719 if (marked_active == FALSE) {
3720 vmct_active++;
3721 vmct_state[cq->id] = VMCT_ACTIVE;
3722 marked_active = TRUE;
3723 if (vmct_active == 1) {
3724 compressor_epoch_start = mach_absolute_time();
3725 }
3726 }
3727 #endif
3728 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3729
3730 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3731
3732 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3733
3734 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3735 assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3736 VM_PAGE_CHECK(m);
3737
3738 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3739 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3740 m->laundry = FALSE;
3741
3742 m->snext = local_q;
3743 local_q = m;
3744 local_cnt++;
3745 }
3746 if (local_q == NULL)
3747 break;
3748
3749 q->pgo_busy = TRUE;
3750
3751 if ((pgo_draining = q->pgo_draining) == FALSE) {
3752 vm_pageout_throttle_up_batch(q, local_cnt);
3753 pages_left_on_q = q->pgo_laundry;
3754 } else
3755 pages_left_on_q = q->pgo_laundry - local_cnt;
3756
3757 vm_page_unlock_queues();
3758
3759 #if !RECORD_THE_COMPRESSED_DATA
3760 if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1)) {
3761 thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3762 }
3763 #endif
3764 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3765
3766 while (local_q) {
3767
3768 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3769
3770 m = local_q;
3771 local_q = m->snext;
3772 m->snext = NULL;
3773
3774 if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
3775 ncomps++;
3776 m->snext = local_freeq;
3777 local_freeq = m;
3778 local_freed++;
3779
3780 if (local_freed >= MAX_FREE_BATCH) {
3781 vm_pageout_freed_after_compression += local_freed;
3782
3783 vm_page_free_list(local_freeq, TRUE);
3784 local_freeq = NULL;
3785 local_freed = 0;
3786 }
3787 }
3788 #if !CONFIG_JETSAM
3789 while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3790 kern_return_t wait_result;
3791 int need_wakeup = 0;
3792
3793 if (local_freeq) {
3794 vm_pageout_freed_after_compression += local_freed;
3795
3796 vm_page_free_list(local_freeq, TRUE);
3797 local_freeq = NULL;
3798 local_freed = 0;
3799
3800 continue;
3801 }
3802 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3803
3804 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3805
3806 if (vm_page_free_wanted_privileged++ == 0)
3807 need_wakeup = 1;
3808 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3809
3810 lck_mtx_unlock(&vm_page_queue_free_lock);
3811
3812 if (need_wakeup)
3813 thread_wakeup((event_t)&vm_page_free_wanted);
3814
3815 if (wait_result == THREAD_WAITING)
3816
3817 thread_block(THREAD_CONTINUE_NULL);
3818 } else
3819 lck_mtx_unlock(&vm_page_queue_free_lock);
3820 }
3821 #endif
3822 }
3823 if (local_freeq) {
3824 vm_pageout_freed_after_compression += local_freed;
3825
3826 vm_page_free_list(local_freeq, TRUE);
3827 local_freeq = NULL;
3828 local_freed = 0;
3829 }
3830 if (pgo_draining == TRUE) {
3831 vm_page_lockspin_queues();
3832 vm_pageout_throttle_up_batch(q, local_cnt);
3833 vm_page_unlock_queues();
3834 }
3835 }
3836 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3837
3838 /*
3839 * queue lock is held and our q is empty
3840 */
3841 q->pgo_busy = FALSE;
3842 q->pgo_idle = TRUE;
3843
3844 assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3845 #if DEVELOPMENT || DEBUG
3846 if (marked_active == TRUE) {
3847 vmct_active--;
3848 vmct_state[cq->id] = VMCT_IDLE;
3849
3850 if (vmct_active == 0) {
3851 compressor_epoch_stop = mach_absolute_time();
3852 assert(compressor_epoch_stop > compressor_epoch_start);
3853 /* This interval includes intervals where one or more
3854 * compressor threads were pre-empted
3855 */
3856 vmct_stats.vmct_cthreads_total += compressor_epoch_stop - compressor_epoch_start;
3857 }
3858
3859 }
3860 #endif
3861 vm_page_unlock_queues();
3862 #if DEVELOPMENT || DEBUG
3863 if (__improbable(vm_compressor_time_thread)) {
3864 vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3865 vmct_stats.vmct_pages[cq->id] += ncomps;
3866 vmct_stats.vmct_iterations[cq->id]++;
3867 if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3868 vmct_stats.vmct_maxpages[cq->id] = ncomps;
3869 }
3870 if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3871 vmct_stats.vmct_minpages[cq->id] = ncomps;
3872 }
3873 }
3874 #endif
3875
3876 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3877
3878 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3879 /*NOTREACHED*/
3880 }
3881
3882
3883 kern_return_t
3884 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m, boolean_t object_locked_by_caller)
3885 {
3886 vm_object_t object;
3887 memory_object_t pager;
3888 int compressed_count_delta;
3889 kern_return_t retval;
3890
3891 object = VM_PAGE_OBJECT(m);
3892
3893 if (object->object_slid) {
3894 panic("slid page %p not allowed on this path\n", m);
3895 }
3896 assert(!m->free_when_done);
3897 assert(!m->laundry);
3898
3899 pager = object->pager;
3900
3901 if (object_locked_by_caller == FALSE && (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)) {
3902
3903 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3904
3905 vm_object_lock(object);
3906
3907 /*
3908 * If there is no memory object for the page, create
3909 * one and hand it to the compression pager.
3910 */
3911
3912 if (!object->pager_initialized)
3913 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3914 if (!object->pager_initialized)
3915 vm_object_compressor_pager_create(object);
3916
3917 pager = object->pager;
3918
3919 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3920 /*
3921 * Still no pager for the object,
3922 * or the pager has been destroyed.
3923 * Reactivate the page.
3924 *
3925 * Should only happen if there is no
3926 * compression pager
3927 */
3928 PAGE_WAKEUP_DONE(m);
3929
3930 vm_page_lockspin_queues();
3931 vm_page_activate(m);
3932 vm_pageout_dirty_no_pager++;
3933 vm_page_unlock_queues();
3934
3935 /*
3936 * And we are done with it.
3937 */
3938 vm_object_activity_end(object);
3939 vm_object_unlock(object);
3940
3941 return KERN_FAILURE;
3942 }
3943 vm_object_unlock(object);
3944
3945 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3946 }
3947 assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3948
3949 if (object_locked_by_caller == FALSE)
3950 assert(object->activity_in_progress > 0);
3951
3952 retval = vm_compressor_pager_put(
3953 pager,
3954 m->offset + object->paging_offset,
3955 VM_PAGE_GET_PHYS_PAGE(m),
3956 current_chead,
3957 scratch_buf,
3958 &compressed_count_delta);
3959
3960 if (object_locked_by_caller == FALSE) {
3961 vm_object_lock(object);
3962
3963 assert(object->activity_in_progress > 0);
3964 assert(VM_PAGE_OBJECT(m) == object);
3965 }
3966
3967 vm_compressor_pager_count(pager,
3968 compressed_count_delta,
3969 FALSE, /* shared_lock */
3970 object);
3971
3972 assert( !VM_PAGE_WIRED(m));
3973
3974 if (retval == KERN_SUCCESS) {
3975 /*
3976 * If the object is purgeable, its owner's
3977 * purgeable ledgers will be updated in
3978 * vm_page_remove() but the page still
3979 * contributes to the owner's memory footprint,
3980 * so account for it as such.
3981 */
3982 if (object->purgable != VM_PURGABLE_DENY &&
3983 object->vo_purgeable_owner != NULL) {
3984 /* one more compressed purgeable page */
3985 vm_purgeable_compressed_update(object,
3986 +1);
3987 }
3988 VM_STAT_INCR(compressions);
3989
3990 if (m->tabled)
3991 vm_page_remove(m, TRUE);
3992
3993 } else {
3994 PAGE_WAKEUP_DONE(m);
3995
3996 vm_page_lockspin_queues();
3997
3998 vm_page_activate(m);
3999 vm_compressor_failed++;
4000
4001 vm_page_unlock_queues();
4002 }
4003 if (object_locked_by_caller == FALSE) {
4004 vm_object_activity_end(object);
4005 vm_object_unlock(object);
4006 }
4007 return retval;
4008 }
4009
4010
4011 static void
4012 vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
4013 {
4014 uint32_t policy;
4015
4016 if (hibernate_cleaning_in_progress == TRUE)
4017 req_lowpriority = FALSE;
4018
4019 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
4020
4021 vm_page_unlock_queues();
4022
4023 if (req_lowpriority == TRUE) {
4024 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
4025 DTRACE_VM(laundrythrottle);
4026 } else {
4027 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
4028 DTRACE_VM(laundryunthrottle);
4029 }
4030 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
4031 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
4032
4033 eq->pgo_lowpriority = req_lowpriority;
4034
4035 vm_page_lock_queues();
4036 }
4037 }
4038
4039
4040 static void
4041 vm_pageout_iothread_external(void)
4042 {
4043 thread_t self = current_thread();
4044
4045 self->options |= TH_OPT_VMPRIV;
4046
4047 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
4048
4049 proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
4050 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
4051
4052 vm_page_lock_queues();
4053
4054 vm_pageout_queue_external.pgo_tid = self->thread_id;
4055 vm_pageout_queue_external.pgo_lowpriority = TRUE;
4056 vm_pageout_queue_external.pgo_inited = TRUE;
4057
4058 vm_page_unlock_queues();
4059
4060 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
4061
4062 /*NOTREACHED*/
4063 }
4064
4065
4066 static void
4067 vm_pageout_iothread_internal(struct cq *cq)
4068 {
4069 thread_t self = current_thread();
4070
4071 self->options |= TH_OPT_VMPRIV;
4072
4073 vm_page_lock_queues();
4074
4075 vm_pageout_queue_internal.pgo_tid = self->thread_id;
4076 vm_pageout_queue_internal.pgo_lowpriority = TRUE;
4077 vm_pageout_queue_internal.pgo_inited = TRUE;
4078
4079 vm_page_unlock_queues();
4080
4081 if (vm_restricted_to_single_processor == TRUE)
4082 thread_vm_bind_group_add();
4083
4084
4085 thread_set_thread_name(current_thread(), "VM_compressor");
4086 #if DEVELOPMENT || DEBUG
4087 vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
4088 #endif
4089 vm_pageout_iothread_internal_continue(cq);
4090
4091 /*NOTREACHED*/
4092 }
4093
4094 kern_return_t
4095 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
4096 {
4097 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
4098 return KERN_SUCCESS;
4099 } else {
4100 return KERN_FAILURE; /* Already set */
4101 }
4102 }
4103
4104 extern boolean_t memorystatus_manual_testing_on;
4105 extern unsigned int memorystatus_level;
4106
4107
4108 #if VM_PRESSURE_EVENTS
4109
4110 boolean_t vm_pressure_events_enabled = FALSE;
4111
4112 void
4113 vm_pressure_response(void)
4114 {
4115
4116 vm_pressure_level_t old_level = kVMPressureNormal;
4117 int new_level = -1;
4118 unsigned int total_pages;
4119 uint64_t available_memory = 0;
4120
4121 if (vm_pressure_events_enabled == FALSE)
4122 return;
4123
4124 #if CONFIG_EMBEDDED
4125
4126 available_memory = (uint64_t) memorystatus_available_pages;
4127
4128 #else /* CONFIG_EMBEDDED */
4129
4130 available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4131 memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4132
4133 #endif /* CONFIG_EMBEDDED */
4134
4135 total_pages = (unsigned int) atop_64(max_mem);
4136 #if CONFIG_SECLUDED_MEMORY
4137 total_pages -= vm_page_secluded_count;
4138 #endif /* CONFIG_SECLUDED_MEMORY */
4139 memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
4140
4141 if (memorystatus_manual_testing_on) {
4142 return;
4143 }
4144
4145 old_level = memorystatus_vm_pressure_level;
4146
4147 switch (memorystatus_vm_pressure_level) {
4148
4149 case kVMPressureNormal:
4150 {
4151 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4152 new_level = kVMPressureCritical;
4153 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4154 new_level = kVMPressureWarning;
4155 }
4156 break;
4157 }
4158
4159 case kVMPressureWarning:
4160 case kVMPressureUrgent:
4161 {
4162 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4163 new_level = kVMPressureNormal;
4164 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4165 new_level = kVMPressureCritical;
4166 }
4167 break;
4168 }
4169
4170 case kVMPressureCritical:
4171 {
4172 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4173 new_level = kVMPressureNormal;
4174 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4175 new_level = kVMPressureWarning;
4176 }
4177 break;
4178 }
4179
4180 default:
4181 return;
4182 }
4183
4184 if (new_level != -1) {
4185 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4186
4187 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) {
4188 if (vm_pressure_thread_running == FALSE) {
4189 thread_wakeup(&vm_pressure_thread);
4190 }
4191
4192 if (old_level != new_level) {
4193 thread_wakeup(&vm_pressure_changed);
4194 }
4195 }
4196 }
4197
4198 }
4199 #endif /* VM_PRESSURE_EVENTS */
4200
4201 kern_return_t
4202 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4203
4204 #if CONFIG_EMBEDDED
4205
4206 return KERN_FAILURE;
4207
4208 #elif !VM_PRESSURE_EVENTS
4209
4210 return KERN_FAILURE;
4211
4212 #else /* VM_PRESSURE_EVENTS */
4213
4214 kern_return_t kr = KERN_SUCCESS;
4215
4216 if (pressure_level != NULL) {
4217
4218 vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4219
4220 if (wait_for_pressure == TRUE) {
4221 wait_result_t wr = 0;
4222
4223 while (old_level == *pressure_level) {
4224 wr = assert_wait((event_t) &vm_pressure_changed,
4225 THREAD_INTERRUPTIBLE);
4226 if (wr == THREAD_WAITING) {
4227 wr = thread_block(THREAD_CONTINUE_NULL);
4228 }
4229 if (wr == THREAD_INTERRUPTED) {
4230 return KERN_ABORTED;
4231 }
4232 if (wr == THREAD_AWAKENED) {
4233
4234 old_level = memorystatus_vm_pressure_level;
4235
4236 if (old_level != *pressure_level) {
4237 break;
4238 }
4239 }
4240 }
4241 }
4242
4243 *pressure_level = old_level;
4244 kr = KERN_SUCCESS;
4245 } else {
4246 kr = KERN_INVALID_ARGUMENT;
4247 }
4248
4249 return kr;
4250 #endif /* VM_PRESSURE_EVENTS */
4251 }
4252
4253 #if VM_PRESSURE_EVENTS
4254 void
4255 vm_pressure_thread(void) {
4256 static boolean_t thread_initialized = FALSE;
4257
4258 if (thread_initialized == TRUE) {
4259 vm_pressure_thread_running = TRUE;
4260 consider_vm_pressure_events();
4261 vm_pressure_thread_running = FALSE;
4262 }
4263
4264 thread_initialized = TRUE;
4265 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4266 thread_block((thread_continue_t)vm_pressure_thread);
4267 }
4268 #endif /* VM_PRESSURE_EVENTS */
4269
4270
4271 uint32_t vm_pageout_considered_page_last = 0;
4272
4273 /*
4274 * called once per-second via "compute_averages"
4275 */
4276 void
4277 compute_pageout_gc_throttle(__unused void *arg)
4278 {
4279 if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
4280
4281 vm_pageout_considered_page_last = vm_pageout_considered_page;
4282
4283 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4284 }
4285 }
4286
4287 /*
4288 * vm_pageout_garbage_collect can also be called when the zone allocator needs
4289 * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4290 * jetsams. We need to check if the zone map size is above its jetsam limit to
4291 * decide if this was indeed the case.
4292 *
4293 * We need to do this on a different thread because of the following reasons:
4294 *
4295 * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4296 * itself causing the system to hang. We perform synchronous jetsams if we're
4297 * leaking in the VM map entries zone, so the leaking process could be doing a
4298 * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4299 * jetsam itself. We also need the vm_map lock on the process termination path,
4300 * which would now lead the dying process to deadlock against itself.
4301 *
4302 * 2. The jetsam path might need to allocate zone memory itself. We could try
4303 * using the non-blocking variant of zalloc for this path, but we can still
4304 * end up trying to do a kernel_memory_allocate when the zone_map is almost
4305 * full.
4306 */
4307
4308 extern boolean_t is_zone_map_nearing_exhaustion(void);
4309
4310 void
4311 vm_pageout_garbage_collect(int collect)
4312 {
4313 if (collect) {
4314 if (is_zone_map_nearing_exhaustion()) {
4315 /*
4316 * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4317 *
4318 * Bail out after calling zone_gc (which triggers the
4319 * zone-map-exhaustion jetsams). If we fall through, the subsequent
4320 * operations that clear out a bunch of caches might allocate zone
4321 * memory themselves (for eg. vm_map operations would need VM map
4322 * entries). Since the zone map is almost full at this point, we
4323 * could end up with a panic. We just need to quickly jetsam a
4324 * process and exit here.
4325 *
4326 * It could so happen that we were woken up to relieve memory
4327 * pressure and the zone map also happened to be near its limit at
4328 * the time, in which case we'll skip out early. But that should be
4329 * ok; if memory pressure persists, the thread will simply be woken
4330 * up again.
4331 */
4332 consider_zone_gc(TRUE);
4333
4334 } else {
4335 /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4336 boolean_t buf_large_zfree = FALSE;
4337 boolean_t first_try = TRUE;
4338
4339 stack_collect();
4340
4341 consider_machine_collect();
4342 m_drain();
4343
4344 do {
4345 if (consider_buffer_cache_collect != NULL) {
4346 buf_large_zfree = (*consider_buffer_cache_collect)(0);
4347 }
4348 if (first_try == TRUE || buf_large_zfree == TRUE) {
4349 /*
4350 * consider_zone_gc should be last, because the other operations
4351 * might return memory to zones.
4352 */
4353 consider_zone_gc(FALSE);
4354 }
4355 first_try = FALSE;
4356
4357 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4358
4359 consider_machine_adjust();
4360 }
4361 }
4362
4363 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4364
4365 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4366 /*NOTREACHED*/
4367 }
4368
4369
4370 #if VM_PAGE_BUCKETS_CHECK
4371 #if VM_PAGE_FAKE_BUCKETS
4372 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4373 #endif /* VM_PAGE_FAKE_BUCKETS */
4374 #endif /* VM_PAGE_BUCKETS_CHECK */
4375
4376
4377
4378 void
4379 vm_set_restrictions()
4380 {
4381 host_basic_info_data_t hinfo;
4382 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4383
4384 #define BSD_HOST 1
4385 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4386
4387 assert(hinfo.max_cpus > 0);
4388
4389 if (hinfo.max_cpus <= 3) {
4390 /*
4391 * on systems with a limited number of CPUS, bind the
4392 * 4 major threads that can free memory and that tend to use
4393 * a fair bit of CPU under pressured conditions to a single processor.
4394 * This insures that these threads don't hog all of the available CPUs
4395 * (important for camera launch), while allowing them to run independently
4396 * w/r to locks... the 4 threads are
4397 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4398 * vm_compressor_swap_trigger_thread (minor and major compactions),
4399 * memorystatus_thread (jetsams).
4400 *
4401 * the first time the thread is run, it is responsible for checking the
4402 * state of vm_restricted_to_single_processor, and if TRUE it calls
4403 * thread_bind_master... someday this should be replaced with a group
4404 * scheduling mechanism and KPI.
4405 */
4406 vm_restricted_to_single_processor = TRUE;
4407 }
4408 }
4409
4410 void
4411 vm_pageout(void)
4412 {
4413 thread_t self = current_thread();
4414 thread_t thread;
4415 kern_return_t result;
4416 spl_t s;
4417
4418 /*
4419 * Set thread privileges.
4420 */
4421 s = splsched();
4422
4423 thread_lock(self);
4424 self->options |= TH_OPT_VMPRIV;
4425 sched_set_thread_base_priority(self, BASEPRI_VM);
4426 thread_unlock(self);
4427
4428 if (!self->reserved_stack)
4429 self->reserved_stack = self->kernel_stack;
4430
4431 if (vm_restricted_to_single_processor == TRUE)
4432 thread_vm_bind_group_add();
4433
4434 splx(s);
4435
4436 thread_set_thread_name(current_thread(), "VM_pageout_scan");
4437
4438 /*
4439 * Initialize some paging parameters.
4440 */
4441
4442 if (vm_pageout_swap_wait == 0)
4443 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4444
4445 if (vm_pageout_idle_wait == 0)
4446 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4447
4448 if (vm_pageout_burst_wait == 0)
4449 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4450
4451 if (vm_pageout_empty_wait == 0)
4452 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4453
4454 if (vm_pageout_deadlock_wait == 0)
4455 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4456
4457 if (vm_pageout_deadlock_relief == 0)
4458 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4459
4460 if (vm_pageout_inactive_relief == 0)
4461 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
4462
4463 if (vm_pageout_burst_active_throttle == 0)
4464 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
4465
4466 if (vm_pageout_burst_inactive_throttle == 0)
4467 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4468
4469 /*
4470 * Set kernel task to low backing store privileged
4471 * status
4472 */
4473 task_lock(kernel_task);
4474 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
4475 task_unlock(kernel_task);
4476
4477 vm_page_free_count_init = vm_page_free_count;
4478
4479 /*
4480 * even if we've already called vm_page_free_reserve
4481 * call it again here to insure that the targets are
4482 * accurately calculated (it uses vm_page_free_count_init)
4483 * calling it with an arg of 0 will not change the reserve
4484 * but will re-calculate free_min and free_target
4485 */
4486 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4487 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4488 } else
4489 vm_page_free_reserve(0);
4490
4491
4492 vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4493 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4494 vm_pageout_queue_external.pgo_laundry = 0;
4495 vm_pageout_queue_external.pgo_idle = FALSE;
4496 vm_pageout_queue_external.pgo_busy = FALSE;
4497 vm_pageout_queue_external.pgo_throttled = FALSE;
4498 vm_pageout_queue_external.pgo_draining = FALSE;
4499 vm_pageout_queue_external.pgo_lowpriority = FALSE;
4500 vm_pageout_queue_external.pgo_tid = -1;
4501 vm_pageout_queue_external.pgo_inited = FALSE;
4502
4503 vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4504 vm_pageout_queue_internal.pgo_maxlaundry = 0;
4505 vm_pageout_queue_internal.pgo_laundry = 0;
4506 vm_pageout_queue_internal.pgo_idle = FALSE;
4507 vm_pageout_queue_internal.pgo_busy = FALSE;
4508 vm_pageout_queue_internal.pgo_throttled = FALSE;
4509 vm_pageout_queue_internal.pgo_draining = FALSE;
4510 vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4511 vm_pageout_queue_internal.pgo_tid = -1;
4512 vm_pageout_queue_internal.pgo_inited = FALSE;
4513
4514 /* internal pageout thread started when default pager registered first time */
4515 /* external pageout and garbage collection threads started here */
4516
4517 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4518 BASEPRI_VM,
4519 &vm_pageout_external_iothread);
4520 if (result != KERN_SUCCESS)
4521 panic("vm_pageout_iothread_external: create failed");
4522
4523 thread_deallocate(vm_pageout_external_iothread);
4524
4525 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4526 BASEPRI_DEFAULT,
4527 &thread);
4528 if (result != KERN_SUCCESS)
4529 panic("vm_pageout_garbage_collect: create failed");
4530
4531 thread_deallocate(thread);
4532
4533 #if VM_PRESSURE_EVENTS
4534 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4535 BASEPRI_DEFAULT,
4536 &thread);
4537
4538 if (result != KERN_SUCCESS)
4539 panic("vm_pressure_thread: create failed");
4540
4541 thread_deallocate(thread);
4542 #endif
4543
4544 vm_object_reaper_init();
4545
4546
4547 bzero(&vm_config, sizeof(vm_config));
4548
4549 switch(vm_compressor_mode) {
4550
4551 case VM_PAGER_DEFAULT:
4552 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4553
4554 case VM_PAGER_COMPRESSOR_WITH_SWAP:
4555 vm_config.compressor_is_present = TRUE;
4556 vm_config.swap_is_present = TRUE;
4557 vm_config.compressor_is_active = TRUE;
4558 vm_config.swap_is_active = TRUE;
4559 break;
4560
4561 case VM_PAGER_COMPRESSOR_NO_SWAP:
4562 vm_config.compressor_is_present = TRUE;
4563 vm_config.swap_is_present = TRUE;
4564 vm_config.compressor_is_active = TRUE;
4565 break;
4566
4567 case VM_PAGER_FREEZER_DEFAULT:
4568 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4569
4570 case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4571 vm_config.compressor_is_present = TRUE;
4572 vm_config.swap_is_present = TRUE;
4573 break;
4574
4575 case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4576 vm_config.compressor_is_present = TRUE;
4577 vm_config.swap_is_present = TRUE;
4578 vm_config.compressor_is_active = TRUE;
4579 vm_config.freezer_swap_is_active = TRUE;
4580 break;
4581
4582 case VM_PAGER_NOT_CONFIGURED:
4583 break;
4584
4585 default:
4586 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4587 break;
4588 }
4589 if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4590 vm_compressor_pager_init();
4591
4592 #if VM_PRESSURE_EVENTS
4593 vm_pressure_events_enabled = TRUE;
4594 #endif /* VM_PRESSURE_EVENTS */
4595
4596 #if CONFIG_PHANTOM_CACHE
4597 vm_phantom_cache_init();
4598 #endif
4599 #if VM_PAGE_BUCKETS_CHECK
4600 #if VM_PAGE_FAKE_BUCKETS
4601 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4602 (uint64_t) vm_page_fake_buckets_start,
4603 (uint64_t) vm_page_fake_buckets_end);
4604 pmap_protect(kernel_pmap,
4605 vm_page_fake_buckets_start,
4606 vm_page_fake_buckets_end,
4607 VM_PROT_READ);
4608 // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4609 #endif /* VM_PAGE_FAKE_BUCKETS */
4610 #endif /* VM_PAGE_BUCKETS_CHECK */
4611
4612 #if VM_OBJECT_TRACKING
4613 vm_object_tracking_init();
4614 #endif /* VM_OBJECT_TRACKING */
4615
4616 vm_tests();
4617
4618 vm_pageout_continue();
4619
4620 /*
4621 * Unreached code!
4622 *
4623 * The vm_pageout_continue() call above never returns, so the code below is never
4624 * executed. We take advantage of this to declare several DTrace VM related probe
4625 * points that our kernel doesn't have an analog for. These are probe points that
4626 * exist in Solaris and are in the DTrace documentation, so people may have written
4627 * scripts that use them. Declaring the probe points here means their scripts will
4628 * compile and execute which we want for portability of the scripts, but since this
4629 * section of code is never reached, the probe points will simply never fire. Yes,
4630 * this is basically a hack. The problem is the DTrace probe points were chosen with
4631 * Solaris specific VM events in mind, not portability to different VM implementations.
4632 */
4633
4634 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4635 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4636 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4637 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4638 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4639 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4640 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4641 /*NOTREACHED*/
4642 }
4643
4644
4645
4646 #if CONFIG_EMBEDDED
4647 int vm_compressor_thread_count = 1;
4648 #else
4649 int vm_compressor_thread_count = 2;
4650 #endif
4651
4652 kern_return_t
4653 vm_pageout_internal_start(void)
4654 {
4655 kern_return_t result;
4656 int i;
4657 host_basic_info_data_t hinfo;
4658
4659 assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4660
4661 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4662 #define BSD_HOST 1
4663 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4664
4665 assert(hinfo.max_cpus > 0);
4666
4667 PE_parse_boot_argn("vmcomp_threads", &vm_compressor_thread_count, sizeof(vm_compressor_thread_count));
4668 if (vm_compressor_thread_count >= hinfo.max_cpus)
4669 vm_compressor_thread_count = hinfo.max_cpus - 1;
4670 if (vm_compressor_thread_count <= 0)
4671 vm_compressor_thread_count = 1;
4672 else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4673 vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4674
4675 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4676
4677 PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4678
4679 for (i = 0; i < vm_compressor_thread_count; i++) {
4680 ciq[i].id = i;
4681 ciq[i].q = &vm_pageout_queue_internal;
4682 ciq[i].current_chead = NULL;
4683 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4684
4685 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_VM, &vm_pageout_internal_iothread);
4686
4687 if (result == KERN_SUCCESS)
4688 thread_deallocate(vm_pageout_internal_iothread);
4689 else
4690 break;
4691 }
4692 return result;
4693 }
4694
4695 #if CONFIG_IOSCHED
4696 /*
4697 * To support I/O Expedite for compressed files we mark the upls with special flags.
4698 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4699 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4700 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4701 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4702 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4703 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4704 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4705 * unless the real I/O upl is being destroyed).
4706 */
4707
4708
4709 static void
4710 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4711 {
4712 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4713
4714 upl_lock(src_upl);
4715 if (src_upl->decmp_io_upl) {
4716 /*
4717 * If there is already an alive real I/O UPL, ignore this new UPL.
4718 * This case should rarely happen and even if it does, it just means
4719 * that we might issue a spurious expedite which the driver is expected
4720 * to handle.
4721 */
4722 upl_unlock(src_upl);
4723 return;
4724 }
4725 src_upl->decmp_io_upl = (void *)upl;
4726 src_upl->ref_count++;
4727
4728 upl->flags |= UPL_DECMP_REAL_IO;
4729 upl->decmp_io_upl = (void *)src_upl;
4730 upl_unlock(src_upl);
4731 }
4732 #endif /* CONFIG_IOSCHED */
4733
4734 #if UPL_DEBUG
4735 int upl_debug_enabled = 1;
4736 #else
4737 int upl_debug_enabled = 0;
4738 #endif
4739
4740 static upl_t
4741 upl_create(int type, int flags, upl_size_t size)
4742 {
4743 upl_t upl;
4744 vm_size_t page_field_size = 0;
4745 int upl_flags = 0;
4746 vm_size_t upl_size = sizeof(struct upl);
4747
4748 size = round_page_32(size);
4749
4750 if (type & UPL_CREATE_LITE) {
4751 page_field_size = (atop(size) + 7) >> 3;
4752 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4753
4754 upl_flags |= UPL_LITE;
4755 }
4756 if (type & UPL_CREATE_INTERNAL) {
4757 upl_size += sizeof(struct upl_page_info) * atop(size);
4758
4759 upl_flags |= UPL_INTERNAL;
4760 }
4761 upl = (upl_t)kalloc(upl_size + page_field_size);
4762
4763 if (page_field_size)
4764 bzero((char *)upl + upl_size, page_field_size);
4765
4766 upl->flags = upl_flags | flags;
4767 upl->kaddr = (vm_offset_t)0;
4768 upl->size = 0;
4769 upl->map_object = NULL;
4770 upl->ref_count = 1;
4771 upl->ext_ref_count = 0;
4772 upl->highest_page = 0;
4773 upl_lock_init(upl);
4774 upl->vector_upl = NULL;
4775 upl->associated_upl = NULL;
4776 #if CONFIG_IOSCHED
4777 if (type & UPL_CREATE_IO_TRACKING) {
4778 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4779 }
4780
4781 upl->upl_reprio_info = 0;
4782 upl->decmp_io_upl = 0;
4783 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4784 /* Only support expedite on internal UPLs */
4785 thread_t curthread = current_thread();
4786 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4787 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4788 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4789 if (curthread->decmp_upl != NULL)
4790 upl_set_decmp_info(upl, curthread->decmp_upl);
4791 }
4792 #endif
4793 #if CONFIG_IOSCHED || UPL_DEBUG
4794 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4795 upl->upl_creator = current_thread();
4796 upl->uplq.next = 0;
4797 upl->uplq.prev = 0;
4798 upl->flags |= UPL_TRACKED_BY_OBJECT;
4799 }
4800 #endif
4801
4802 #if UPL_DEBUG
4803 upl->ubc_alias1 = 0;
4804 upl->ubc_alias2 = 0;
4805
4806 upl->upl_state = 0;
4807 upl->upl_commit_index = 0;
4808 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4809
4810 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4811 #endif /* UPL_DEBUG */
4812
4813 return(upl);
4814 }
4815
4816 static void
4817 upl_destroy(upl_t upl)
4818 {
4819 int page_field_size; /* bit field in word size buf */
4820 int size;
4821
4822 if (upl->ext_ref_count) {
4823 panic("upl(%p) ext_ref_count", upl);
4824 }
4825
4826 #if CONFIG_IOSCHED
4827 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4828 upl_t src_upl;
4829 src_upl = upl->decmp_io_upl;
4830 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4831 upl_lock(src_upl);
4832 src_upl->decmp_io_upl = NULL;
4833 upl_unlock(src_upl);
4834 upl_deallocate(src_upl);
4835 }
4836 #endif /* CONFIG_IOSCHED */
4837
4838 #if CONFIG_IOSCHED || UPL_DEBUG
4839 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4840 vm_object_t object;
4841
4842 if (upl->flags & UPL_SHADOWED) {
4843 object = upl->map_object->shadow;
4844 } else {
4845 object = upl->map_object;
4846 }
4847
4848 vm_object_lock(object);
4849 queue_remove(&object->uplq, upl, upl_t, uplq);
4850 vm_object_activity_end(object);
4851 vm_object_collapse(object, 0, TRUE);
4852 vm_object_unlock(object);
4853 }
4854 #endif
4855 /*
4856 * drop a reference on the map_object whether or
4857 * not a pageout object is inserted
4858 */
4859 if (upl->flags & UPL_SHADOWED)
4860 vm_object_deallocate(upl->map_object);
4861
4862 if (upl->flags & UPL_DEVICE_MEMORY)
4863 size = PAGE_SIZE;
4864 else
4865 size = upl->size;
4866 page_field_size = 0;
4867
4868 if (upl->flags & UPL_LITE) {
4869 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4870 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4871 }
4872 upl_lock_destroy(upl);
4873 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4874
4875 #if CONFIG_IOSCHED
4876 if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4877 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4878 #endif
4879
4880 if (upl->flags & UPL_INTERNAL) {
4881 kfree(upl,
4882 sizeof(struct upl) +
4883 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4884 + page_field_size);
4885 } else {
4886 kfree(upl, sizeof(struct upl) + page_field_size);
4887 }
4888 }
4889
4890 void
4891 upl_deallocate(upl_t upl)
4892 {
4893 upl_lock(upl);
4894 if (--upl->ref_count == 0) {
4895 if(vector_upl_is_valid(upl))
4896 vector_upl_deallocate(upl);
4897 upl_unlock(upl);
4898 upl_destroy(upl);
4899 }
4900 else
4901 upl_unlock(upl);
4902 }
4903
4904 #if CONFIG_IOSCHED
4905 void
4906 upl_mark_decmp(upl_t upl)
4907 {
4908 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4909 upl->flags |= UPL_DECMP_REQ;
4910 upl->upl_creator->decmp_upl = (void *)upl;
4911 }
4912 }
4913
4914 void
4915 upl_unmark_decmp(upl_t upl)
4916 {
4917 if(upl && (upl->flags & UPL_DECMP_REQ)) {
4918 upl->upl_creator->decmp_upl = NULL;
4919 }
4920 }
4921
4922 #endif /* CONFIG_IOSCHED */
4923
4924 #define VM_PAGE_Q_BACKING_UP(q) \
4925 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4926
4927 boolean_t must_throttle_writes(void);
4928
4929 boolean_t
4930 must_throttle_writes()
4931 {
4932 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4933 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4934 return (TRUE);
4935
4936 return (FALSE);
4937 }
4938
4939
4940 #if DEVELOPMENT || DEBUG
4941 /*/*
4942 * Statistics about UPL enforcement of copy-on-write obligations.
4943 */
4944 unsigned long upl_cow = 0;
4945 unsigned long upl_cow_again = 0;
4946 unsigned long upl_cow_pages = 0;
4947 unsigned long upl_cow_again_pages = 0;
4948
4949 unsigned long iopl_cow = 0;
4950 unsigned long iopl_cow_pages = 0;
4951 #endif
4952
4953 /*
4954 * Routine: vm_object_upl_request
4955 * Purpose:
4956 * Cause the population of a portion of a vm_object.
4957 * Depending on the nature of the request, the pages
4958 * returned may be contain valid data or be uninitialized.
4959 * A page list structure, listing the physical pages
4960 * will be returned upon request.
4961 * This function is called by the file system or any other
4962 * supplier of backing store to a pager.
4963 * IMPORTANT NOTE: The caller must still respect the relationship
4964 * between the vm_object and its backing memory object. The
4965 * caller MUST NOT substitute changes in the backing file
4966 * without first doing a memory_object_lock_request on the
4967 * target range unless it is know that the pages are not
4968 * shared with another entity at the pager level.
4969 * Copy_in_to:
4970 * if a page list structure is present
4971 * return the mapped physical pages, where a
4972 * page is not present, return a non-initialized
4973 * one. If the no_sync bit is turned on, don't
4974 * call the pager unlock to synchronize with other
4975 * possible copies of the page. Leave pages busy
4976 * in the original object, if a page list structure
4977 * was specified. When a commit of the page list
4978 * pages is done, the dirty bit will be set for each one.
4979 * Copy_out_from:
4980 * If a page list structure is present, return
4981 * all mapped pages. Where a page does not exist
4982 * map a zero filled one. Leave pages busy in
4983 * the original object. If a page list structure
4984 * is not specified, this call is a no-op.
4985 *
4986 * Note: access of default pager objects has a rather interesting
4987 * twist. The caller of this routine, presumably the file system
4988 * page cache handling code, will never actually make a request
4989 * against a default pager backed object. Only the default
4990 * pager will make requests on backing store related vm_objects
4991 * In this way the default pager can maintain the relationship
4992 * between backing store files (abstract memory objects) and
4993 * the vm_objects (cache objects), they support.
4994 *
4995 */
4996
4997 __private_extern__ kern_return_t
4998 vm_object_upl_request(
4999 vm_object_t object,
5000 vm_object_offset_t offset,
5001 upl_size_t size,
5002 upl_t *upl_ptr,
5003 upl_page_info_array_t user_page_list,
5004 unsigned int *page_list_count,
5005 upl_control_flags_t cntrl_flags,
5006 vm_tag_t tag)
5007 {
5008 vm_page_t dst_page = VM_PAGE_NULL;
5009 vm_object_offset_t dst_offset;
5010 upl_size_t xfer_size;
5011 unsigned int size_in_pages;
5012 boolean_t dirty;
5013 boolean_t hw_dirty;
5014 upl_t upl = NULL;
5015 unsigned int entry;
5016 #if MACH_CLUSTER_STATS
5017 boolean_t encountered_lrp = FALSE;
5018 #endif
5019 vm_page_t alias_page = NULL;
5020 int refmod_state = 0;
5021 wpl_array_t lite_list = NULL;
5022 vm_object_t last_copy_object;
5023 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
5024 struct vm_page_delayed_work *dwp;
5025 int dw_count;
5026 int dw_limit;
5027 int io_tracking_flag = 0;
5028 int grab_options;
5029 ppnum_t phys_page;
5030
5031 if (cntrl_flags & ~UPL_VALID_FLAGS) {
5032 /*
5033 * For forward compatibility's sake,
5034 * reject any unknown flag.
5035 */
5036 return KERN_INVALID_VALUE;
5037 }
5038 if ( (!object->internal) && (object->paging_offset != 0) )
5039 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5040 if (object->phys_contiguous)
5041 panic("vm_object_upl_request: contiguous object specified\n");
5042
5043
5044 if (size > MAX_UPL_SIZE_BYTES)
5045 size = MAX_UPL_SIZE_BYTES;
5046
5047 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
5048 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5049
5050 #if CONFIG_IOSCHED || UPL_DEBUG
5051 if (object->io_tracking || upl_debug_enabled)
5052 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5053 #endif
5054 #if CONFIG_IOSCHED
5055 if (object->io_tracking)
5056 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5057 #endif
5058
5059 if (cntrl_flags & UPL_SET_INTERNAL) {
5060 if (cntrl_flags & UPL_SET_LITE) {
5061
5062 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5063
5064 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5065 lite_list = (wpl_array_t)
5066 (((uintptr_t)user_page_list) +
5067 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5068 if (size == 0) {
5069 user_page_list = NULL;
5070 lite_list = NULL;
5071 }
5072 } else {
5073 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
5074
5075 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5076 if (size == 0) {
5077 user_page_list = NULL;
5078 }
5079 }
5080 } else {
5081 if (cntrl_flags & UPL_SET_LITE) {
5082
5083 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5084
5085 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
5086 if (size == 0) {
5087 lite_list = NULL;
5088 }
5089 } else {
5090 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
5091 }
5092 }
5093 *upl_ptr = upl;
5094
5095 if (user_page_list)
5096 user_page_list[0].device = FALSE;
5097
5098 if (cntrl_flags & UPL_SET_LITE) {
5099 upl->map_object = object;
5100 } else {
5101 upl->map_object = vm_object_allocate(size);
5102 /*
5103 * No neeed to lock the new object: nobody else knows
5104 * about it yet, so it's all ours so far.
5105 */
5106 upl->map_object->shadow = object;
5107 upl->map_object->pageout = TRUE;
5108 upl->map_object->can_persist = FALSE;
5109 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5110 upl->map_object->vo_shadow_offset = offset;
5111 upl->map_object->wimg_bits = object->wimg_bits;
5112
5113 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5114
5115 upl->flags |= UPL_SHADOWED;
5116 }
5117 if (cntrl_flags & UPL_FOR_PAGEOUT)
5118 upl->flags |= UPL_PAGEOUT;
5119
5120 vm_object_lock(object);
5121 vm_object_activity_begin(object);
5122
5123 grab_options = 0;
5124 #if CONFIG_SECLUDED_MEMORY
5125 if (object->can_grab_secluded) {
5126 grab_options |= VM_PAGE_GRAB_SECLUDED;
5127 }
5128 #endif /* CONFIG_SECLUDED_MEMORY */
5129
5130 /*
5131 * we can lock in the paging_offset once paging_in_progress is set
5132 */
5133 upl->size = size;
5134 upl->offset = offset + object->paging_offset;
5135
5136 #if CONFIG_IOSCHED || UPL_DEBUG
5137 if (object->io_tracking || upl_debug_enabled) {
5138 vm_object_activity_begin(object);
5139 queue_enter(&object->uplq, upl, upl_t, uplq);
5140 }
5141 #endif
5142 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
5143 /*
5144 * Honor copy-on-write obligations
5145 *
5146 * The caller is gathering these pages and
5147 * might modify their contents. We need to
5148 * make sure that the copy object has its own
5149 * private copies of these pages before we let
5150 * the caller modify them.
5151 */
5152 vm_object_update(object,
5153 offset,
5154 size,
5155 NULL,
5156 NULL,
5157 FALSE, /* should_return */
5158 MEMORY_OBJECT_COPY_SYNC,
5159 VM_PROT_NO_CHANGE);
5160 #if DEVELOPMENT || DEBUG
5161 upl_cow++;
5162 upl_cow_pages += size >> PAGE_SHIFT;
5163 #endif
5164 }
5165 /*
5166 * remember which copy object we synchronized with
5167 */
5168 last_copy_object = object->copy;
5169 entry = 0;
5170
5171 xfer_size = size;
5172 dst_offset = offset;
5173 size_in_pages = size / PAGE_SIZE;
5174
5175 dwp = &dw_array[0];
5176 dw_count = 0;
5177 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5178
5179 if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5180 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5181 object->scan_collisions = 0;
5182
5183 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5184 boolean_t isSSD = FALSE;
5185
5186 #if CONFIG_EMBEDDED
5187 isSSD = TRUE;
5188 #else
5189 vnode_pager_get_isSSD(object->pager, &isSSD);
5190 #endif
5191 vm_object_unlock(object);
5192
5193 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5194
5195 if (isSSD == TRUE)
5196 delay(1000 * size_in_pages);
5197 else
5198 delay(5000 * size_in_pages);
5199 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5200
5201 vm_object_lock(object);
5202 }
5203
5204 while (xfer_size) {
5205
5206 dwp->dw_mask = 0;
5207
5208 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5209 vm_object_unlock(object);
5210 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5211 vm_object_lock(object);
5212 }
5213 if (cntrl_flags & UPL_COPYOUT_FROM) {
5214 upl->flags |= UPL_PAGE_SYNC_DONE;
5215
5216 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5217 dst_page->fictitious ||
5218 dst_page->absent ||
5219 dst_page->error ||
5220 dst_page->cleaning ||
5221 (VM_PAGE_WIRED(dst_page))) {
5222
5223 if (user_page_list)
5224 user_page_list[entry].phys_addr = 0;
5225
5226 goto try_next_page;
5227 }
5228 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5229
5230 /*
5231 * grab this up front...
5232 * a high percentange of the time we're going to
5233 * need the hardware modification state a bit later
5234 * anyway... so we can eliminate an extra call into
5235 * the pmap layer by grabbing it here and recording it
5236 */
5237 if (dst_page->pmapped)
5238 refmod_state = pmap_get_refmod(phys_page);
5239 else
5240 refmod_state = 0;
5241
5242 if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5243 /*
5244 * page is on inactive list and referenced...
5245 * reactivate it now... this gets it out of the
5246 * way of vm_pageout_scan which would have to
5247 * reactivate it upon tripping over it
5248 */
5249 dwp->dw_mask |= DW_vm_page_activate;
5250 }
5251 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5252 /*
5253 * we're only asking for DIRTY pages to be returned
5254 */
5255 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5256 /*
5257 * if we were the page stolen by vm_pageout_scan to be
5258 * cleaned (as opposed to a buddy being clustered in
5259 * or this request is not being driven by a PAGEOUT cluster
5260 * then we only need to check for the page being dirty or
5261 * precious to decide whether to return it
5262 */
5263 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
5264 goto check_busy;
5265 goto dont_return;
5266 }
5267 /*
5268 * this is a request for a PAGEOUT cluster and this page
5269 * is merely along for the ride as a 'buddy'... not only
5270 * does it have to be dirty to be returned, but it also
5271 * can't have been referenced recently...
5272 */
5273 if ( (hibernate_cleaning_in_progress == TRUE ||
5274 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) ||
5275 (dst_page->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5276 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
5277 goto check_busy;
5278 }
5279 dont_return:
5280 /*
5281 * if we reach here, we're not to return
5282 * the page... go on to the next one
5283 */
5284 if (dst_page->laundry == TRUE) {
5285 /*
5286 * if we get here, the page is not 'cleaning' (filtered out above).
5287 * since it has been referenced, remove it from the laundry
5288 * so we don't pay the cost of an I/O to clean a page
5289 * we're just going to take back
5290 */
5291 vm_page_lockspin_queues();
5292
5293 vm_pageout_steal_laundry(dst_page, TRUE);
5294 vm_page_activate(dst_page);
5295
5296 vm_page_unlock_queues();
5297 }
5298 if (user_page_list)
5299 user_page_list[entry].phys_addr = 0;
5300
5301 goto try_next_page;
5302 }
5303 check_busy:
5304 if (dst_page->busy) {
5305 if (cntrl_flags & UPL_NOBLOCK) {
5306 if (user_page_list)
5307 user_page_list[entry].phys_addr = 0;
5308 dwp->dw_mask = 0;
5309
5310 goto try_next_page;
5311 }
5312 /*
5313 * someone else is playing with the
5314 * page. We will have to wait.
5315 */
5316 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5317
5318 continue;
5319 }
5320 if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5321
5322 vm_page_lockspin_queues();
5323
5324 if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5325 /*
5326 * we've buddied up a page for a clustered pageout
5327 * that has already been moved to the pageout
5328 * queue by pageout_scan... we need to remove
5329 * it from the queue and drop the laundry count
5330 * on that queue
5331 */
5332 vm_pageout_throttle_up(dst_page);
5333 }
5334 vm_page_unlock_queues();
5335 }
5336 #if MACH_CLUSTER_STATS
5337 /*
5338 * pageout statistics gathering. count
5339 * all the pages we will page out that
5340 * were not counted in the initial
5341 * vm_pageout_scan work
5342 */
5343 if (dst_page->pageout)
5344 encountered_lrp = TRUE;
5345 if ((dst_page->dirty || (object->internal && dst_page->precious))) {
5346 if (encountered_lrp)
5347 CLUSTER_STAT(pages_at_higher_offsets++;)
5348 else
5349 CLUSTER_STAT(pages_at_lower_offsets++;)
5350 }
5351 #endif
5352 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5353 dirty = hw_dirty ? TRUE : dst_page->dirty;
5354
5355 if (phys_page > upl->highest_page)
5356 upl->highest_page = phys_page;
5357
5358 assert (!pmap_is_noencrypt(phys_page));
5359
5360 if (cntrl_flags & UPL_SET_LITE) {
5361 unsigned int pg_num;
5362
5363 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5364 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5365 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5366
5367 if (hw_dirty)
5368 pmap_clear_modify(phys_page);
5369
5370 /*
5371 * Mark original page as cleaning
5372 * in place.
5373 */
5374 dst_page->cleaning = TRUE;
5375 dst_page->precious = FALSE;
5376 } else {
5377 /*
5378 * use pageclean setup, it is more
5379 * convenient even for the pageout
5380 * cases here
5381 */
5382 vm_object_lock(upl->map_object);
5383 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5384 vm_object_unlock(upl->map_object);
5385
5386 alias_page->absent = FALSE;
5387 alias_page = NULL;
5388 }
5389 if (dirty) {
5390 SET_PAGE_DIRTY(dst_page, FALSE);
5391 } else {
5392 dst_page->dirty = FALSE;
5393 }
5394
5395 if (!dirty)
5396 dst_page->precious = TRUE;
5397
5398 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5399 if ( !VM_PAGE_WIRED(dst_page))
5400 dst_page->free_when_done = TRUE;
5401 }
5402 } else {
5403 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5404 /*
5405 * Honor copy-on-write obligations
5406 *
5407 * The copy object has changed since we
5408 * last synchronized for copy-on-write.
5409 * Another copy object might have been
5410 * inserted while we released the object's
5411 * lock. Since someone could have seen the
5412 * original contents of the remaining pages
5413 * through that new object, we have to
5414 * synchronize with it again for the remaining
5415 * pages only. The previous pages are "busy"
5416 * so they can not be seen through the new
5417 * mapping. The new mapping will see our
5418 * upcoming changes for those previous pages,
5419 * but that's OK since they couldn't see what
5420 * was there before. It's just a race anyway
5421 * and there's no guarantee of consistency or
5422 * atomicity. We just don't want new mappings
5423 * to see both the *before* and *after* pages.
5424 */
5425 if (object->copy != VM_OBJECT_NULL) {
5426 vm_object_update(
5427 object,
5428 dst_offset,/* current offset */
5429 xfer_size, /* remaining size */
5430 NULL,
5431 NULL,
5432 FALSE, /* should_return */
5433 MEMORY_OBJECT_COPY_SYNC,
5434 VM_PROT_NO_CHANGE);
5435
5436 #if DEVELOPMENT || DEBUG
5437 upl_cow_again++;
5438 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
5439 #endif
5440 }
5441 /*
5442 * remember the copy object we synced with
5443 */
5444 last_copy_object = object->copy;
5445 }
5446 dst_page = vm_page_lookup(object, dst_offset);
5447
5448 if (dst_page != VM_PAGE_NULL) {
5449
5450 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5451 /*
5452 * skip over pages already present in the cache
5453 */
5454 if (user_page_list)
5455 user_page_list[entry].phys_addr = 0;
5456
5457 goto try_next_page;
5458 }
5459 if (dst_page->fictitious) {
5460 panic("need corner case for fictitious page");
5461 }
5462
5463 if (dst_page->busy || dst_page->cleaning) {
5464 /*
5465 * someone else is playing with the
5466 * page. We will have to wait.
5467 */
5468 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5469
5470 continue;
5471 }
5472 if (dst_page->laundry)
5473 vm_pageout_steal_laundry(dst_page, FALSE);
5474 } else {
5475 if (object->private) {
5476 /*
5477 * This is a nasty wrinkle for users
5478 * of upl who encounter device or
5479 * private memory however, it is
5480 * unavoidable, only a fault can
5481 * resolve the actual backing
5482 * physical page by asking the
5483 * backing device.
5484 */
5485 if (user_page_list)
5486 user_page_list[entry].phys_addr = 0;
5487
5488 goto try_next_page;
5489 }
5490 if (object->scan_collisions) {
5491 /*
5492 * the pageout_scan thread is trying to steal
5493 * pages from this object, but has run into our
5494 * lock... grab 2 pages from the head of the object...
5495 * the first is freed on behalf of pageout_scan, the
5496 * 2nd is for our own use... we use vm_object_page_grab
5497 * in both cases to avoid taking pages from the free
5498 * list since we are under memory pressure and our
5499 * lock on this object is getting in the way of
5500 * relieving it
5501 */
5502 dst_page = vm_object_page_grab(object);
5503
5504 if (dst_page != VM_PAGE_NULL)
5505 vm_page_release(dst_page,
5506 FALSE);
5507
5508 dst_page = vm_object_page_grab(object);
5509 }
5510 if (dst_page == VM_PAGE_NULL) {
5511 /*
5512 * need to allocate a page
5513 */
5514 dst_page = vm_page_grab_options(grab_options);
5515 }
5516 if (dst_page == VM_PAGE_NULL) {
5517 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5518 /*
5519 * we don't want to stall waiting for pages to come onto the free list
5520 * while we're already holding absent pages in this UPL
5521 * the caller will deal with the empty slots
5522 */
5523 if (user_page_list)
5524 user_page_list[entry].phys_addr = 0;
5525
5526 goto try_next_page;
5527 }
5528 /*
5529 * no pages available... wait
5530 * then try again for the same
5531 * offset...
5532 */
5533 vm_object_unlock(object);
5534
5535 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5536
5537 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5538
5539 VM_PAGE_WAIT();
5540 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5541
5542 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5543
5544 vm_object_lock(object);
5545
5546 continue;
5547 }
5548 vm_page_insert(dst_page, object, dst_offset);
5549
5550 dst_page->absent = TRUE;
5551 dst_page->busy = FALSE;
5552
5553 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5554 /*
5555 * if UPL_RET_ONLY_ABSENT was specified,
5556 * than we're definitely setting up a
5557 * upl for a clustered read/pagein
5558 * operation... mark the pages as clustered
5559 * so upl_commit_range can put them on the
5560 * speculative list
5561 */
5562 dst_page->clustered = TRUE;
5563
5564 if ( !(cntrl_flags & UPL_FILE_IO))
5565 VM_STAT_INCR(pageins);
5566 }
5567 }
5568 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5569
5570 dst_page->overwriting = TRUE;
5571
5572 if (dst_page->pmapped) {
5573 if ( !(cntrl_flags & UPL_FILE_IO))
5574 /*
5575 * eliminate all mappings from the
5576 * original object and its prodigy
5577 */
5578 refmod_state = pmap_disconnect(phys_page);
5579 else
5580 refmod_state = pmap_get_refmod(phys_page);
5581 } else
5582 refmod_state = 0;
5583
5584 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5585 dirty = hw_dirty ? TRUE : dst_page->dirty;
5586
5587 if (cntrl_flags & UPL_SET_LITE) {
5588 unsigned int pg_num;
5589
5590 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5591 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5592 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5593
5594 if (hw_dirty)
5595 pmap_clear_modify(phys_page);
5596
5597 /*
5598 * Mark original page as cleaning
5599 * in place.
5600 */
5601 dst_page->cleaning = TRUE;
5602 dst_page->precious = FALSE;
5603 } else {
5604 /*
5605 * use pageclean setup, it is more
5606 * convenient even for the pageout
5607 * cases here
5608 */
5609 vm_object_lock(upl->map_object);
5610 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5611 vm_object_unlock(upl->map_object);
5612
5613 alias_page->absent = FALSE;
5614 alias_page = NULL;
5615 }
5616
5617 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5618 upl->flags &= ~UPL_CLEAR_DIRTY;
5619 upl->flags |= UPL_SET_DIRTY;
5620 dirty = TRUE;
5621 upl->flags |= UPL_SET_DIRTY;
5622 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5623 /*
5624 * clean in place for read implies
5625 * that a write will be done on all
5626 * the pages that are dirty before
5627 * a upl commit is done. The caller
5628 * is obligated to preserve the
5629 * contents of all pages marked dirty
5630 */
5631 upl->flags |= UPL_CLEAR_DIRTY;
5632 }
5633 dst_page->dirty = dirty;
5634
5635 if (!dirty)
5636 dst_page->precious = TRUE;
5637
5638 if ( !VM_PAGE_WIRED(dst_page)) {
5639 /*
5640 * deny access to the target page while
5641 * it is being worked on
5642 */
5643 dst_page->busy = TRUE;
5644 } else
5645 dwp->dw_mask |= DW_vm_page_wire;
5646
5647 /*
5648 * We might be about to satisfy a fault which has been
5649 * requested. So no need for the "restart" bit.
5650 */
5651 dst_page->restart = FALSE;
5652 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5653 /*
5654 * expect the page to be used
5655 */
5656 dwp->dw_mask |= DW_set_reference;
5657 }
5658 if (cntrl_flags & UPL_PRECIOUS) {
5659 if (object->internal) {
5660 SET_PAGE_DIRTY(dst_page, FALSE);
5661 dst_page->precious = FALSE;
5662 } else {
5663 dst_page->precious = TRUE;
5664 }
5665 } else {
5666 dst_page->precious = FALSE;
5667 }
5668 }
5669 if (dst_page->busy)
5670 upl->flags |= UPL_HAS_BUSY;
5671
5672 if (phys_page > upl->highest_page)
5673 upl->highest_page = phys_page;
5674 assert (!pmap_is_noencrypt(phys_page));
5675 if (user_page_list) {
5676 user_page_list[entry].phys_addr = phys_page;
5677 user_page_list[entry].free_when_done = dst_page->free_when_done;
5678 user_page_list[entry].absent = dst_page->absent;
5679 user_page_list[entry].dirty = dst_page->dirty;
5680 user_page_list[entry].precious = dst_page->precious;
5681 user_page_list[entry].device = FALSE;
5682 user_page_list[entry].needed = FALSE;
5683 if (dst_page->clustered == TRUE)
5684 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5685 else
5686 user_page_list[entry].speculative = FALSE;
5687 user_page_list[entry].cs_validated = dst_page->cs_validated;
5688 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5689 user_page_list[entry].cs_nx = dst_page->cs_nx;
5690 user_page_list[entry].mark = FALSE;
5691 }
5692 /*
5693 * if UPL_RET_ONLY_ABSENT is set, then
5694 * we are working with a fresh page and we've
5695 * just set the clustered flag on it to
5696 * indicate that it was drug in as part of a
5697 * speculative cluster... so leave it alone
5698 */
5699 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5700 /*
5701 * someone is explicitly grabbing this page...
5702 * update clustered and speculative state
5703 *
5704 */
5705 if (dst_page->clustered)
5706 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5707 }
5708 try_next_page:
5709 if (dwp->dw_mask) {
5710 if (dwp->dw_mask & DW_vm_page_activate)
5711 VM_STAT_INCR(reactivations);
5712
5713 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5714
5715 if (dw_count >= dw_limit) {
5716 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5717
5718 dwp = &dw_array[0];
5719 dw_count = 0;
5720 }
5721 }
5722 entry++;
5723 dst_offset += PAGE_SIZE_64;
5724 xfer_size -= PAGE_SIZE;
5725 }
5726 if (dw_count)
5727 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5728
5729 if (alias_page != NULL) {
5730 VM_PAGE_FREE(alias_page);
5731 }
5732
5733 if (page_list_count != NULL) {
5734 if (upl->flags & UPL_INTERNAL)
5735 *page_list_count = 0;
5736 else if (*page_list_count > entry)
5737 *page_list_count = entry;
5738 }
5739 #if UPL_DEBUG
5740 upl->upl_state = 1;
5741 #endif
5742 vm_object_unlock(object);
5743
5744 return KERN_SUCCESS;
5745 }
5746
5747 /*
5748 * Routine: vm_object_super_upl_request
5749 * Purpose:
5750 * Cause the population of a portion of a vm_object
5751 * in much the same way as memory_object_upl_request.
5752 * Depending on the nature of the request, the pages
5753 * returned may be contain valid data or be uninitialized.
5754 * However, the region may be expanded up to the super
5755 * cluster size provided.
5756 */
5757
5758 __private_extern__ kern_return_t
5759 vm_object_super_upl_request(
5760 vm_object_t object,
5761 vm_object_offset_t offset,
5762 upl_size_t size,
5763 upl_size_t super_cluster,
5764 upl_t *upl,
5765 upl_page_info_t *user_page_list,
5766 unsigned int *page_list_count,
5767 upl_control_flags_t cntrl_flags,
5768 vm_tag_t tag)
5769 {
5770 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5771 return KERN_FAILURE;
5772
5773 assert(object->paging_in_progress);
5774 offset = offset - object->paging_offset;
5775
5776 if (super_cluster > size) {
5777
5778 vm_object_offset_t base_offset;
5779 upl_size_t super_size;
5780 vm_object_size_t super_size_64;
5781
5782 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5783 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5784 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5785 super_size = (upl_size_t) super_size_64;
5786 assert(super_size == super_size_64);
5787
5788 if (offset > (base_offset + super_size)) {
5789 panic("vm_object_super_upl_request: Missed target pageout"
5790 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5791 offset, base_offset, super_size, super_cluster,
5792 size, object->paging_offset);
5793 }
5794 /*
5795 * apparently there is a case where the vm requests a
5796 * page to be written out who's offset is beyond the
5797 * object size
5798 */
5799 if ((offset + size) > (base_offset + super_size)) {
5800 super_size_64 = (offset + size) - base_offset;
5801 super_size = (upl_size_t) super_size_64;
5802 assert(super_size == super_size_64);
5803 }
5804
5805 offset = base_offset;
5806 size = super_size;
5807 }
5808 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5809 }
5810
5811 #if CONFIG_EMBEDDED
5812 int cs_executable_create_upl = 0;
5813 extern int proc_selfpid(void);
5814 extern char *proc_name_address(void *p);
5815 #endif /* CONFIG_EMBEDDED */
5816
5817 kern_return_t
5818 vm_map_create_upl(
5819 vm_map_t map,
5820 vm_map_address_t offset,
5821 upl_size_t *upl_size,
5822 upl_t *upl,
5823 upl_page_info_array_t page_list,
5824 unsigned int *count,
5825 upl_control_flags_t *flags,
5826 vm_tag_t tag)
5827 {
5828 vm_map_entry_t entry;
5829 upl_control_flags_t caller_flags;
5830 int force_data_sync;
5831 int sync_cow_data;
5832 vm_object_t local_object;
5833 vm_map_offset_t local_offset;
5834 vm_map_offset_t local_start;
5835 kern_return_t ret;
5836
5837 assert(page_aligned(offset));
5838
5839 caller_flags = *flags;
5840
5841 if (caller_flags & ~UPL_VALID_FLAGS) {
5842 /*
5843 * For forward compatibility's sake,
5844 * reject any unknown flag.
5845 */
5846 return KERN_INVALID_VALUE;
5847 }
5848 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5849 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5850
5851 if (upl == NULL)
5852 return KERN_INVALID_ARGUMENT;
5853
5854 REDISCOVER_ENTRY:
5855 vm_map_lock_read(map);
5856
5857 if (!vm_map_lookup_entry(map, offset, &entry)) {
5858 vm_map_unlock_read(map);
5859 return KERN_FAILURE;
5860 }
5861
5862 if ((entry->vme_end - offset) < *upl_size) {
5863 *upl_size = (upl_size_t) (entry->vme_end - offset);
5864 assert(*upl_size == entry->vme_end - offset);
5865 }
5866
5867 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5868 *flags = 0;
5869
5870 if (!entry->is_sub_map &&
5871 VME_OBJECT(entry) != VM_OBJECT_NULL) {
5872 if (VME_OBJECT(entry)->private)
5873 *flags = UPL_DEV_MEMORY;
5874
5875 if (VME_OBJECT(entry)->phys_contiguous)
5876 *flags |= UPL_PHYS_CONTIG;
5877 }
5878 vm_map_unlock_read(map);
5879 return KERN_SUCCESS;
5880 }
5881
5882 if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
5883 !VME_OBJECT(entry)->phys_contiguous) {
5884 if (*upl_size > MAX_UPL_SIZE_BYTES)
5885 *upl_size = MAX_UPL_SIZE_BYTES;
5886 }
5887
5888 /*
5889 * Create an object if necessary.
5890 */
5891 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5892
5893 if (vm_map_lock_read_to_write(map))
5894 goto REDISCOVER_ENTRY;
5895
5896 VME_OBJECT_SET(entry,
5897 vm_object_allocate((vm_size_t)
5898 (entry->vme_end -
5899 entry->vme_start)));
5900 VME_OFFSET_SET(entry, 0);
5901 assert(entry->use_pmap);
5902
5903 vm_map_lock_write_to_read(map);
5904 }
5905
5906 if (!(caller_flags & UPL_COPYOUT_FROM) &&
5907 !(entry->protection & VM_PROT_WRITE)) {
5908 vm_map_unlock_read(map);
5909 return KERN_PROTECTION_FAILURE;
5910 }
5911
5912 #if CONFIG_EMBEDDED
5913 if (map->pmap != kernel_pmap &&
5914 (caller_flags & UPL_COPYOUT_FROM) &&
5915 (entry->protection & VM_PROT_EXECUTE) &&
5916 !(entry->protection & VM_PROT_WRITE)) {
5917 vm_offset_t kaddr;
5918 vm_size_t ksize;
5919
5920 /*
5921 * We're about to create a read-only UPL backed by
5922 * memory from an executable mapping.
5923 * Wiring the pages would result in the pages being copied
5924 * (due to the "MAP_PRIVATE" mapping) and no longer
5925 * code-signed, so no longer eligible for execution.
5926 * Instead, let's copy the data into a kernel buffer and
5927 * create the UPL from this kernel buffer.
5928 * The kernel buffer is then freed, leaving the UPL holding
5929 * the last reference on the VM object, so the memory will
5930 * be released when the UPL is committed.
5931 */
5932
5933 vm_map_unlock_read(map);
5934 /* allocate kernel buffer */
5935 ksize = round_page(*upl_size);
5936 kaddr = 0;
5937 ret = kmem_alloc_pageable(kernel_map,
5938 &kaddr,
5939 ksize,
5940 tag);
5941 if (ret == KERN_SUCCESS) {
5942 /* copyin the user data */
5943 assert(page_aligned(offset));
5944 ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
5945 }
5946 if (ret == KERN_SUCCESS) {
5947 if (ksize > *upl_size) {
5948 /* zero out the extra space in kernel buffer */
5949 memset((void *)(kaddr + *upl_size),
5950 0,
5951 ksize - *upl_size);
5952 }
5953 /* create the UPL from the kernel buffer */
5954 ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5955 upl, page_list, count, flags, tag);
5956 }
5957 if (kaddr != 0) {
5958 /* free the kernel buffer */
5959 kmem_free(kernel_map, kaddr, ksize);
5960 kaddr = 0;
5961 ksize = 0;
5962 }
5963 #if DEVELOPMENT || DEBUG
5964 DTRACE_VM4(create_upl_from_executable,
5965 vm_map_t, map,
5966 vm_map_address_t, offset,
5967 upl_size_t, *upl_size,
5968 kern_return_t, ret);
5969 #endif /* DEVELOPMENT || DEBUG */
5970 return ret;
5971 }
5972 #endif /* CONFIG_EMBEDDED */
5973
5974 local_object = VME_OBJECT(entry);
5975 assert(local_object != VM_OBJECT_NULL);
5976
5977 if (!entry->is_sub_map &&
5978 !entry->needs_copy &&
5979 *upl_size != 0 &&
5980 local_object->vo_size > *upl_size && /* partial UPL */
5981 entry->wired_count == 0 && /* No COW for entries that are wired */
5982 (map->pmap != kernel_pmap) && /* alias checks */
5983 (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
5984 ||
5985 (/* case 2 */
5986 local_object->internal &&
5987 (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5988 local_object->ref_count > 1))) {
5989 vm_prot_t prot;
5990
5991 /*
5992 * Case 1:
5993 * Set up the targeted range for copy-on-write to avoid
5994 * applying true_share/copy_delay to the entire object.
5995 *
5996 * Case 2:
5997 * This map entry covers only part of an internal
5998 * object. There could be other map entries covering
5999 * other areas of this object and some of these map
6000 * entries could be marked as "needs_copy", which
6001 * assumes that the object is COPY_SYMMETRIC.
6002 * To avoid marking this object as COPY_DELAY and
6003 * "true_share", let's shadow it and mark the new
6004 * (smaller) object as "true_share" and COPY_DELAY.
6005 */
6006
6007 if (vm_map_lock_read_to_write(map)) {
6008 goto REDISCOVER_ENTRY;
6009 }
6010 vm_map_lock_assert_exclusive(map);
6011 assert(VME_OBJECT(entry) == local_object);
6012
6013 vm_map_clip_start(map,
6014 entry,
6015 vm_map_trunc_page(offset,
6016 VM_MAP_PAGE_MASK(map)));
6017 vm_map_clip_end(map,
6018 entry,
6019 vm_map_round_page(offset + *upl_size,
6020 VM_MAP_PAGE_MASK(map)));
6021 if ((entry->vme_end - offset) < *upl_size) {
6022 *upl_size = (upl_size_t) (entry->vme_end - offset);
6023 assert(*upl_size == entry->vme_end - offset);
6024 }
6025
6026 prot = entry->protection & ~VM_PROT_WRITE;
6027 if (override_nx(map, VME_ALIAS(entry)) && prot)
6028 prot |= VM_PROT_EXECUTE;
6029 vm_object_pmap_protect(local_object,
6030 VME_OFFSET(entry),
6031 entry->vme_end - entry->vme_start,
6032 ((entry->is_shared ||
6033 map->mapped_in_other_pmaps)
6034 ? PMAP_NULL
6035 : map->pmap),
6036 entry->vme_start,
6037 prot);
6038
6039 assert(entry->wired_count == 0);
6040
6041 /*
6042 * Lock the VM object and re-check its status: if it's mapped
6043 * in another address space, we could still be racing with
6044 * another thread holding that other VM map exclusively.
6045 */
6046 vm_object_lock(local_object);
6047 if (local_object->true_share) {
6048 /* object is already in proper state: no COW needed */
6049 assert(local_object->copy_strategy !=
6050 MEMORY_OBJECT_COPY_SYMMETRIC);
6051 } else {
6052 /* not true_share: ask for copy-on-write below */
6053 assert(local_object->copy_strategy ==
6054 MEMORY_OBJECT_COPY_SYMMETRIC);
6055 entry->needs_copy = TRUE;
6056 }
6057 vm_object_unlock(local_object);
6058
6059 vm_map_lock_write_to_read(map);
6060 }
6061
6062 if (entry->needs_copy) {
6063 /*
6064 * Honor copy-on-write for COPY_SYMMETRIC
6065 * strategy.
6066 */
6067 vm_map_t local_map;
6068 vm_object_t object;
6069 vm_object_offset_t new_offset;
6070 vm_prot_t prot;
6071 boolean_t wired;
6072 vm_map_version_t version;
6073 vm_map_t real_map;
6074 vm_prot_t fault_type;
6075
6076 local_map = map;
6077
6078 if (caller_flags & UPL_COPYOUT_FROM) {
6079 fault_type = VM_PROT_READ | VM_PROT_COPY;
6080 vm_counters.create_upl_extra_cow++;
6081 vm_counters.create_upl_extra_cow_pages +=
6082 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
6083 } else {
6084 fault_type = VM_PROT_WRITE;
6085 }
6086 if (vm_map_lookup_locked(&local_map,
6087 offset, fault_type,
6088 OBJECT_LOCK_EXCLUSIVE,
6089 &version, &object,
6090 &new_offset, &prot, &wired,
6091 NULL,
6092 &real_map) != KERN_SUCCESS) {
6093 if (fault_type == VM_PROT_WRITE) {
6094 vm_counters.create_upl_lookup_failure_write++;
6095 } else {
6096 vm_counters.create_upl_lookup_failure_copy++;
6097 }
6098 vm_map_unlock_read(local_map);
6099 return KERN_FAILURE;
6100 }
6101 if (real_map != map)
6102 vm_map_unlock(real_map);
6103 vm_map_unlock_read(local_map);
6104
6105 vm_object_unlock(object);
6106
6107 goto REDISCOVER_ENTRY;
6108 }
6109
6110 if (entry->is_sub_map) {
6111 vm_map_t submap;
6112
6113 submap = VME_SUBMAP(entry);
6114 local_start = entry->vme_start;
6115 local_offset = VME_OFFSET(entry);
6116
6117 vm_map_reference(submap);
6118 vm_map_unlock_read(map);
6119
6120 ret = vm_map_create_upl(submap,
6121 local_offset + (offset - local_start),
6122 upl_size, upl, page_list, count, flags, tag);
6123 vm_map_deallocate(submap);
6124
6125 return ret;
6126 }
6127
6128 if (sync_cow_data &&
6129 (VME_OBJECT(entry)->shadow ||
6130 VME_OBJECT(entry)->copy)) {
6131 local_object = VME_OBJECT(entry);
6132 local_start = entry->vme_start;
6133 local_offset = VME_OFFSET(entry);
6134
6135 vm_object_reference(local_object);
6136 vm_map_unlock_read(map);
6137
6138 if (local_object->shadow && local_object->copy) {
6139 vm_object_lock_request(local_object->shadow,
6140 ((vm_object_offset_t)
6141 ((offset - local_start) +
6142 local_offset) +
6143 local_object->vo_shadow_offset),
6144 *upl_size, FALSE,
6145 MEMORY_OBJECT_DATA_SYNC,
6146 VM_PROT_NO_CHANGE);
6147 }
6148 sync_cow_data = FALSE;
6149 vm_object_deallocate(local_object);
6150
6151 goto REDISCOVER_ENTRY;
6152 }
6153 if (force_data_sync) {
6154 local_object = VME_OBJECT(entry);
6155 local_start = entry->vme_start;
6156 local_offset = VME_OFFSET(entry);
6157
6158 vm_object_reference(local_object);
6159 vm_map_unlock_read(map);
6160
6161 vm_object_lock_request(local_object,
6162 ((vm_object_offset_t)
6163 ((offset - local_start) +
6164 local_offset)),
6165 (vm_object_size_t)*upl_size,
6166 FALSE,
6167 MEMORY_OBJECT_DATA_SYNC,
6168 VM_PROT_NO_CHANGE);
6169
6170 force_data_sync = FALSE;
6171 vm_object_deallocate(local_object);
6172
6173 goto REDISCOVER_ENTRY;
6174 }
6175 if (VME_OBJECT(entry)->private)
6176 *flags = UPL_DEV_MEMORY;
6177 else
6178 *flags = 0;
6179
6180 if (VME_OBJECT(entry)->phys_contiguous)
6181 *flags |= UPL_PHYS_CONTIG;
6182
6183 local_object = VME_OBJECT(entry);
6184 local_offset = VME_OFFSET(entry);
6185 local_start = entry->vme_start;
6186
6187 #if CONFIG_EMBEDDED
6188 /*
6189 * Wiring will copy the pages to the shadow object.
6190 * The shadow object will not be code-signed so
6191 * attempting to execute code from these copied pages
6192 * would trigger a code-signing violation.
6193 */
6194 if (entry->protection & VM_PROT_EXECUTE) {
6195 #if MACH_ASSERT
6196 printf("pid %d[%s] create_upl out of executable range from "
6197 "0x%llx to 0x%llx: side effects may include "
6198 "code-signing violations later on\n",
6199 proc_selfpid(),
6200 (current_task()->bsd_info
6201 ? proc_name_address(current_task()->bsd_info)
6202 : "?"),
6203 (uint64_t) entry->vme_start,
6204 (uint64_t) entry->vme_end);
6205 #endif /* MACH_ASSERT */
6206 DTRACE_VM2(cs_executable_create_upl,
6207 uint64_t, (uint64_t)entry->vme_start,
6208 uint64_t, (uint64_t)entry->vme_end);
6209 cs_executable_create_upl++;
6210 }
6211 #endif /* CONFIG_EMBEDDED */
6212
6213 vm_object_lock(local_object);
6214
6215 /*
6216 * Ensure that this object is "true_share" and "copy_delay" now,
6217 * while we're still holding the VM map lock. After we unlock the map,
6218 * anything could happen to that mapping, including some copy-on-write
6219 * activity. We need to make sure that the IOPL will point at the
6220 * same memory as the mapping.
6221 */
6222 if (local_object->true_share) {
6223 assert(local_object->copy_strategy !=
6224 MEMORY_OBJECT_COPY_SYMMETRIC);
6225 } else if (local_object != kernel_object &&
6226 local_object != compressor_object &&
6227 !local_object->phys_contiguous) {
6228 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6229 if (!local_object->true_share &&
6230 vm_object_tracking_inited) {
6231 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6232 int num = 0;
6233 num = OSBacktrace(bt,
6234 VM_OBJECT_TRACKING_BTDEPTH);
6235 btlog_add_entry(vm_object_tracking_btlog,
6236 local_object,
6237 VM_OBJECT_TRACKING_OP_TRUESHARE,
6238 bt,
6239 num);
6240 }
6241 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6242 local_object->true_share = TRUE;
6243 if (local_object->copy_strategy ==
6244 MEMORY_OBJECT_COPY_SYMMETRIC) {
6245 local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6246 }
6247 }
6248
6249 vm_object_reference_locked(local_object);
6250 vm_object_unlock(local_object);
6251
6252 vm_map_unlock_read(map);
6253
6254 ret = vm_object_iopl_request(local_object,
6255 ((vm_object_offset_t)
6256 ((offset - local_start) + local_offset)),
6257 *upl_size,
6258 upl,
6259 page_list,
6260 count,
6261 caller_flags,
6262 tag);
6263 vm_object_deallocate(local_object);
6264
6265 return ret;
6266 }
6267
6268 /*
6269 * Internal routine to enter a UPL into a VM map.
6270 *
6271 * JMM - This should just be doable through the standard
6272 * vm_map_enter() API.
6273 */
6274 kern_return_t
6275 vm_map_enter_upl(
6276 vm_map_t map,
6277 upl_t upl,
6278 vm_map_offset_t *dst_addr)
6279 {
6280 vm_map_size_t size;
6281 vm_object_offset_t offset;
6282 vm_map_offset_t addr;
6283 vm_page_t m;
6284 kern_return_t kr;
6285 int isVectorUPL = 0, curr_upl=0;
6286 upl_t vector_upl = NULL;
6287 vm_offset_t vector_upl_dst_addr = 0;
6288 vm_map_t vector_upl_submap = NULL;
6289 upl_offset_t subupl_offset = 0;
6290 upl_size_t subupl_size = 0;
6291
6292 if (upl == UPL_NULL)
6293 return KERN_INVALID_ARGUMENT;
6294
6295 if((isVectorUPL = vector_upl_is_valid(upl))) {
6296 int mapped=0,valid_upls=0;
6297 vector_upl = upl;
6298
6299 upl_lock(vector_upl);
6300 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6301 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6302 if(upl == NULL)
6303 continue;
6304 valid_upls++;
6305 if (UPL_PAGE_LIST_MAPPED & upl->flags)
6306 mapped++;
6307 }
6308
6309 if(mapped) {
6310 if(mapped != valid_upls)
6311 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6312 else {
6313 upl_unlock(vector_upl);
6314 return KERN_FAILURE;
6315 }
6316 }
6317
6318 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6319 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6320 &vector_upl_submap);
6321 if( kr != KERN_SUCCESS )
6322 panic("Vector UPL submap allocation failed\n");
6323 map = vector_upl_submap;
6324 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6325 curr_upl=0;
6326 }
6327 else
6328 upl_lock(upl);
6329
6330 process_upl_to_enter:
6331 if(isVectorUPL){
6332 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6333 *dst_addr = vector_upl_dst_addr;
6334 upl_unlock(vector_upl);
6335 return KERN_SUCCESS;
6336 }
6337 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6338 if(upl == NULL)
6339 goto process_upl_to_enter;
6340
6341 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6342 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6343 } else {
6344 /*
6345 * check to see if already mapped
6346 */
6347 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6348 upl_unlock(upl);
6349 return KERN_FAILURE;
6350 }
6351 }
6352 if ((!(upl->flags & UPL_SHADOWED)) &&
6353 ((upl->flags & UPL_HAS_BUSY) ||
6354 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6355
6356 vm_object_t object;
6357 vm_page_t alias_page;
6358 vm_object_offset_t new_offset;
6359 unsigned int pg_num;
6360 wpl_array_t lite_list;
6361
6362 if (upl->flags & UPL_INTERNAL) {
6363 lite_list = (wpl_array_t)
6364 ((((uintptr_t)upl) + sizeof(struct upl))
6365 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6366 } else {
6367 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6368 }
6369 object = upl->map_object;
6370 upl->map_object = vm_object_allocate(upl->size);
6371
6372 vm_object_lock(upl->map_object);
6373
6374 upl->map_object->shadow = object;
6375 upl->map_object->pageout = TRUE;
6376 upl->map_object->can_persist = FALSE;
6377 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6378 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6379 upl->map_object->wimg_bits = object->wimg_bits;
6380 offset = upl->map_object->vo_shadow_offset;
6381 new_offset = 0;
6382 size = upl->size;
6383
6384 upl->flags |= UPL_SHADOWED;
6385
6386 while (size) {
6387 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6388 assert(pg_num == new_offset / PAGE_SIZE);
6389
6390 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6391
6392 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6393
6394 vm_object_lock(object);
6395
6396 m = vm_page_lookup(object, offset);
6397 if (m == VM_PAGE_NULL) {
6398 panic("vm_upl_map: page missing\n");
6399 }
6400
6401 /*
6402 * Convert the fictitious page to a private
6403 * shadow of the real page.
6404 */
6405 assert(alias_page->fictitious);
6406 alias_page->fictitious = FALSE;
6407 alias_page->private = TRUE;
6408 alias_page->free_when_done = TRUE;
6409 /*
6410 * since m is a page in the upl it must
6411 * already be wired or BUSY, so it's
6412 * safe to assign the underlying physical
6413 * page to the alias
6414 */
6415 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6416
6417 vm_object_unlock(object);
6418
6419 vm_page_lockspin_queues();
6420 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6421 vm_page_unlock_queues();
6422
6423 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6424
6425 assert(!alias_page->wanted);
6426 alias_page->busy = FALSE;
6427 alias_page->absent = FALSE;
6428 }
6429 size -= PAGE_SIZE;
6430 offset += PAGE_SIZE_64;
6431 new_offset += PAGE_SIZE_64;
6432 }
6433 vm_object_unlock(upl->map_object);
6434 }
6435 if (upl->flags & UPL_SHADOWED)
6436 offset = 0;
6437 else
6438 offset = upl->offset - upl->map_object->paging_offset;
6439
6440 size = upl->size;
6441
6442 vm_object_reference(upl->map_object);
6443
6444 if(!isVectorUPL) {
6445 *dst_addr = 0;
6446 /*
6447 * NEED A UPL_MAP ALIAS
6448 */
6449 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6450 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6451 upl->map_object, offset, FALSE,
6452 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6453
6454 if (kr != KERN_SUCCESS) {
6455 vm_object_deallocate(upl->map_object);
6456 upl_unlock(upl);
6457 return(kr);
6458 }
6459 }
6460 else {
6461 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6462 VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6463 upl->map_object, offset, FALSE,
6464 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6465 if(kr)
6466 panic("vm_map_enter failed for a Vector UPL\n");
6467 }
6468 vm_object_lock(upl->map_object);
6469
6470 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6471 m = vm_page_lookup(upl->map_object, offset);
6472
6473 if (m) {
6474 m->pmapped = TRUE;
6475
6476 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6477 * but only in kernel space. If this was on a user map,
6478 * we'd have to set the wpmapped bit. */
6479 /* m->wpmapped = TRUE; */
6480 assert(map->pmap == kernel_pmap);
6481
6482 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE, kr);
6483
6484 assert(kr == KERN_SUCCESS);
6485 #if KASAN
6486 kasan_notify_address(addr, PAGE_SIZE_64);
6487 #endif
6488 }
6489 offset += PAGE_SIZE_64;
6490 }
6491 vm_object_unlock(upl->map_object);
6492
6493 /*
6494 * hold a reference for the mapping
6495 */
6496 upl->ref_count++;
6497 upl->flags |= UPL_PAGE_LIST_MAPPED;
6498 upl->kaddr = (vm_offset_t) *dst_addr;
6499 assert(upl->kaddr == *dst_addr);
6500
6501 if(isVectorUPL)
6502 goto process_upl_to_enter;
6503
6504 upl_unlock(upl);
6505
6506 return KERN_SUCCESS;
6507 }
6508
6509 /*
6510 * Internal routine to remove a UPL mapping from a VM map.
6511 *
6512 * XXX - This should just be doable through a standard
6513 * vm_map_remove() operation. Otherwise, implicit clean-up
6514 * of the target map won't be able to correctly remove
6515 * these (and release the reference on the UPL). Having
6516 * to do this means we can't map these into user-space
6517 * maps yet.
6518 */
6519 kern_return_t
6520 vm_map_remove_upl(
6521 vm_map_t map,
6522 upl_t upl)
6523 {
6524 vm_address_t addr;
6525 upl_size_t size;
6526 int isVectorUPL = 0, curr_upl = 0;
6527 upl_t vector_upl = NULL;
6528
6529 if (upl == UPL_NULL)
6530 return KERN_INVALID_ARGUMENT;
6531
6532 if((isVectorUPL = vector_upl_is_valid(upl))) {
6533 int unmapped=0, valid_upls=0;
6534 vector_upl = upl;
6535 upl_lock(vector_upl);
6536 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6537 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6538 if(upl == NULL)
6539 continue;
6540 valid_upls++;
6541 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6542 unmapped++;
6543 }
6544
6545 if(unmapped) {
6546 if(unmapped != valid_upls)
6547 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6548 else {
6549 upl_unlock(vector_upl);
6550 return KERN_FAILURE;
6551 }
6552 }
6553 curr_upl=0;
6554 }
6555 else
6556 upl_lock(upl);
6557
6558 process_upl_to_remove:
6559 if(isVectorUPL) {
6560 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6561 vm_map_t v_upl_submap;
6562 vm_offset_t v_upl_submap_dst_addr;
6563 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6564
6565 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
6566 vm_map_deallocate(v_upl_submap);
6567 upl_unlock(vector_upl);
6568 return KERN_SUCCESS;
6569 }
6570
6571 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6572 if(upl == NULL)
6573 goto process_upl_to_remove;
6574 }
6575
6576 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6577 addr = upl->kaddr;
6578 size = upl->size;
6579
6580 assert(upl->ref_count > 1);
6581 upl->ref_count--; /* removing mapping ref */
6582
6583 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6584 upl->kaddr = (vm_offset_t) 0;
6585
6586 if(!isVectorUPL) {
6587 upl_unlock(upl);
6588
6589 vm_map_remove(
6590 map,
6591 vm_map_trunc_page(addr,
6592 VM_MAP_PAGE_MASK(map)),
6593 vm_map_round_page(addr + size,
6594 VM_MAP_PAGE_MASK(map)),
6595 VM_MAP_NO_FLAGS);
6596
6597 return KERN_SUCCESS;
6598 }
6599 else {
6600 /*
6601 * If it's a Vectored UPL, we'll be removing the entire
6602 * submap anyways, so no need to remove individual UPL
6603 * element mappings from within the submap
6604 */
6605 goto process_upl_to_remove;
6606 }
6607 }
6608 upl_unlock(upl);
6609
6610 return KERN_FAILURE;
6611 }
6612
6613
6614 kern_return_t
6615 upl_commit_range(
6616 upl_t upl,
6617 upl_offset_t offset,
6618 upl_size_t size,
6619 int flags,
6620 upl_page_info_t *page_list,
6621 mach_msg_type_number_t count,
6622 boolean_t *empty)
6623 {
6624 upl_size_t xfer_size, subupl_size = size;
6625 vm_object_t shadow_object;
6626 vm_object_t object;
6627 vm_object_t m_object;
6628 vm_object_offset_t target_offset;
6629 upl_offset_t subupl_offset = offset;
6630 int entry;
6631 wpl_array_t lite_list;
6632 int occupied;
6633 int clear_refmod = 0;
6634 int pgpgout_count = 0;
6635 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6636 struct vm_page_delayed_work *dwp;
6637 int dw_count;
6638 int dw_limit;
6639 int isVectorUPL = 0;
6640 upl_t vector_upl = NULL;
6641 boolean_t should_be_throttled = FALSE;
6642
6643 vm_page_t nxt_page = VM_PAGE_NULL;
6644 int fast_path_possible = 0;
6645 int fast_path_full_commit = 0;
6646 int throttle_page = 0;
6647 int unwired_count = 0;
6648 int local_queue_count = 0;
6649 vm_page_t first_local, last_local;
6650
6651 *empty = FALSE;
6652
6653 if (upl == UPL_NULL)
6654 return KERN_INVALID_ARGUMENT;
6655
6656 if (count == 0)
6657 page_list = NULL;
6658
6659 if((isVectorUPL = vector_upl_is_valid(upl))) {
6660 vector_upl = upl;
6661 upl_lock(vector_upl);
6662 }
6663 else
6664 upl_lock(upl);
6665
6666 process_upl_to_commit:
6667
6668 if(isVectorUPL) {
6669 size = subupl_size;
6670 offset = subupl_offset;
6671 if(size == 0) {
6672 upl_unlock(vector_upl);
6673 return KERN_SUCCESS;
6674 }
6675 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6676 if(upl == NULL) {
6677 upl_unlock(vector_upl);
6678 return KERN_FAILURE;
6679 }
6680 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6681 subupl_size -= size;
6682 subupl_offset += size;
6683 }
6684
6685 #if UPL_DEBUG
6686 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6687 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6688
6689 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6690 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6691
6692 upl->upl_commit_index++;
6693 }
6694 #endif
6695 if (upl->flags & UPL_DEVICE_MEMORY)
6696 xfer_size = 0;
6697 else if ((offset + size) <= upl->size)
6698 xfer_size = size;
6699 else {
6700 if(!isVectorUPL)
6701 upl_unlock(upl);
6702 else {
6703 upl_unlock(vector_upl);
6704 }
6705 return KERN_FAILURE;
6706 }
6707 if (upl->flags & UPL_SET_DIRTY)
6708 flags |= UPL_COMMIT_SET_DIRTY;
6709 if (upl->flags & UPL_CLEAR_DIRTY)
6710 flags |= UPL_COMMIT_CLEAR_DIRTY;
6711
6712 if (upl->flags & UPL_INTERNAL)
6713 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6714 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6715 else
6716 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6717
6718 object = upl->map_object;
6719
6720 if (upl->flags & UPL_SHADOWED) {
6721 vm_object_lock(object);
6722 shadow_object = object->shadow;
6723 } else {
6724 shadow_object = object;
6725 }
6726 entry = offset/PAGE_SIZE;
6727 target_offset = (vm_object_offset_t)offset;
6728
6729 assert(!(target_offset & PAGE_MASK));
6730 assert(!(xfer_size & PAGE_MASK));
6731
6732 if (upl->flags & UPL_KERNEL_OBJECT)
6733 vm_object_lock_shared(shadow_object);
6734 else
6735 vm_object_lock(shadow_object);
6736
6737 VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6738
6739 if (upl->flags & UPL_ACCESS_BLOCKED) {
6740 assert(shadow_object->blocked_access);
6741 shadow_object->blocked_access = FALSE;
6742 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6743 }
6744
6745 if (shadow_object->code_signed) {
6746 /*
6747 * CODE SIGNING:
6748 * If the object is code-signed, do not let this UPL tell
6749 * us if the pages are valid or not. Let the pages be
6750 * validated by VM the normal way (when they get mapped or
6751 * copied).
6752 */
6753 flags &= ~UPL_COMMIT_CS_VALIDATED;
6754 }
6755 if (! page_list) {
6756 /*
6757 * No page list to get the code-signing info from !?
6758 */
6759 flags &= ~UPL_COMMIT_CS_VALIDATED;
6760 }
6761 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6762 should_be_throttled = TRUE;
6763
6764 dwp = &dw_array[0];
6765 dw_count = 0;
6766 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6767
6768 if ((upl->flags & UPL_IO_WIRE) &&
6769 !(flags & UPL_COMMIT_FREE_ABSENT) &&
6770 !isVectorUPL &&
6771 shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6772 shadow_object->purgable != VM_PURGABLE_EMPTY) {
6773
6774 if (!vm_page_queue_empty(&shadow_object->memq)) {
6775
6776 if (size == shadow_object->vo_size) {
6777 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6778 fast_path_full_commit = 1;
6779 }
6780 fast_path_possible = 1;
6781
6782 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6783 (shadow_object->purgable == VM_PURGABLE_DENY ||
6784 shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6785 shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6786 throttle_page = 1;
6787 }
6788 }
6789 }
6790 first_local = VM_PAGE_NULL;
6791 last_local = VM_PAGE_NULL;
6792
6793 while (xfer_size) {
6794 vm_page_t t, m;
6795
6796 dwp->dw_mask = 0;
6797 clear_refmod = 0;
6798
6799 m = VM_PAGE_NULL;
6800
6801 if (upl->flags & UPL_LITE) {
6802 unsigned int pg_num;
6803
6804 if (nxt_page != VM_PAGE_NULL) {
6805 m = nxt_page;
6806 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
6807 target_offset = m->offset;
6808 }
6809 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6810 assert(pg_num == target_offset/PAGE_SIZE);
6811
6812 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6813 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6814
6815 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6816 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6817 } else
6818 m = NULL;
6819 }
6820 if (upl->flags & UPL_SHADOWED) {
6821 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6822
6823 t->free_when_done = FALSE;
6824
6825 VM_PAGE_FREE(t);
6826
6827 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6828 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6829 }
6830 }
6831 if (m == VM_PAGE_NULL)
6832 goto commit_next_page;
6833
6834 m_object = VM_PAGE_OBJECT(m);
6835
6836 if (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6837 assert(m->busy);
6838
6839 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6840 goto commit_next_page;
6841 }
6842
6843 if (flags & UPL_COMMIT_CS_VALIDATED) {
6844 /*
6845 * CODE SIGNING:
6846 * Set the code signing bits according to
6847 * what the UPL says they should be.
6848 */
6849 m->cs_validated = page_list[entry].cs_validated;
6850 m->cs_tainted = page_list[entry].cs_tainted;
6851 m->cs_nx = page_list[entry].cs_nx;
6852 }
6853 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6854 m->written_by_kernel = TRUE;
6855
6856 if (upl->flags & UPL_IO_WIRE) {
6857
6858 if (page_list)
6859 page_list[entry].phys_addr = 0;
6860
6861 if (flags & UPL_COMMIT_SET_DIRTY) {
6862 SET_PAGE_DIRTY(m, FALSE);
6863 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6864 m->dirty = FALSE;
6865
6866 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6867 m->cs_validated && !m->cs_tainted) {
6868 /*
6869 * CODE SIGNING:
6870 * This page is no longer dirty
6871 * but could have been modified,
6872 * so it will need to be
6873 * re-validated.
6874 */
6875 if (m->slid) {
6876 panic("upl_commit_range(%p): page %p was slid\n",
6877 upl, m);
6878 }
6879 assert(!m->slid);
6880 m->cs_validated = FALSE;
6881 #if DEVELOPMENT || DEBUG
6882 vm_cs_validated_resets++;
6883 #endif
6884 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6885 }
6886 clear_refmod |= VM_MEM_MODIFIED;
6887 }
6888 if (upl->flags & UPL_ACCESS_BLOCKED) {
6889 /*
6890 * We blocked access to the pages in this UPL.
6891 * Clear the "busy" bit and wake up any waiter
6892 * for this page.
6893 */
6894 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6895 }
6896 if (fast_path_possible) {
6897 assert(m_object->purgable != VM_PURGABLE_EMPTY);
6898 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6899 if (m->absent) {
6900 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
6901 assert(m->wire_count == 0);
6902 assert(m->busy);
6903
6904 m->absent = FALSE;
6905 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6906 } else {
6907 if (m->wire_count == 0)
6908 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6909 assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
6910
6911 /*
6912 * XXX FBDP need to update some other
6913 * counters here (purgeable_wired_count)
6914 * (ledgers), ...
6915 */
6916 assert(m->wire_count > 0);
6917 m->wire_count--;
6918
6919 if (m->wire_count == 0) {
6920 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
6921 unwired_count++;
6922 }
6923 }
6924 if (m->wire_count == 0) {
6925 assert(m->pageq.next == 0 && m->pageq.prev == 0);
6926
6927 if (last_local == VM_PAGE_NULL) {
6928 assert(first_local == VM_PAGE_NULL);
6929
6930 last_local = m;
6931 first_local = m;
6932 } else {
6933 assert(first_local != VM_PAGE_NULL);
6934
6935 m->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6936 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6937 first_local = m;
6938 }
6939 local_queue_count++;
6940
6941 if (throttle_page) {
6942 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
6943 } else {
6944 if (flags & UPL_COMMIT_INACTIVATE) {
6945 if (shadow_object->internal)
6946 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6947 else
6948 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6949 } else
6950 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
6951 }
6952 }
6953 } else {
6954 if (flags & UPL_COMMIT_INACTIVATE) {
6955 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6956 clear_refmod |= VM_MEM_REFERENCED;
6957 }
6958 if (m->absent) {
6959 if (flags & UPL_COMMIT_FREE_ABSENT)
6960 dwp->dw_mask |= DW_vm_page_free;
6961 else {
6962 m->absent = FALSE;
6963 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6964
6965 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6966 dwp->dw_mask |= DW_vm_page_activate;
6967 }
6968 } else
6969 dwp->dw_mask |= DW_vm_page_unwire;
6970 }
6971 goto commit_next_page;
6972 }
6973 assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6974
6975 if (page_list)
6976 page_list[entry].phys_addr = 0;
6977
6978 /*
6979 * make sure to clear the hardware
6980 * modify or reference bits before
6981 * releasing the BUSY bit on this page
6982 * otherwise we risk losing a legitimate
6983 * change of state
6984 */
6985 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6986 m->dirty = FALSE;
6987
6988 clear_refmod |= VM_MEM_MODIFIED;
6989 }
6990 if (m->laundry)
6991 dwp->dw_mask |= DW_vm_pageout_throttle_up;
6992
6993 if (VM_PAGE_WIRED(m))
6994 m->free_when_done = FALSE;
6995
6996 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6997 m->cs_validated && !m->cs_tainted) {
6998 /*
6999 * CODE SIGNING:
7000 * This page is no longer dirty
7001 * but could have been modified,
7002 * so it will need to be
7003 * re-validated.
7004 */
7005 if (m->slid) {
7006 panic("upl_commit_range(%p): page %p was slid\n",
7007 upl, m);
7008 }
7009 assert(!m->slid);
7010 m->cs_validated = FALSE;
7011 #if DEVELOPMENT || DEBUG
7012 vm_cs_validated_resets++;
7013 #endif
7014 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7015 }
7016 if (m->overwriting) {
7017 /*
7018 * the (COPY_OUT_FROM == FALSE) request_page_list case
7019 */
7020 if (m->busy) {
7021 #if CONFIG_PHANTOM_CACHE
7022 if (m->absent && !m_object->internal)
7023 dwp->dw_mask |= DW_vm_phantom_cache_update;
7024 #endif
7025 m->absent = FALSE;
7026
7027 dwp->dw_mask |= DW_clear_busy;
7028 } else {
7029 /*
7030 * alternate (COPY_OUT_FROM == FALSE) page_list case
7031 * Occurs when the original page was wired
7032 * at the time of the list request
7033 */
7034 assert(VM_PAGE_WIRED(m));
7035
7036 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
7037 }
7038 m->overwriting = FALSE;
7039 }
7040 m->cleaning = FALSE;
7041
7042 if (m->free_when_done) {
7043 /*
7044 * With the clean queue enabled, UPL_PAGEOUT should
7045 * no longer set the pageout bit. It's pages now go
7046 * to the clean queue.
7047 */
7048 assert(!(flags & UPL_PAGEOUT));
7049 assert(!m_object->internal);
7050
7051 m->free_when_done = FALSE;
7052 #if MACH_CLUSTER_STATS
7053 if (m->wanted) vm_pageout_target_collisions++;
7054 #endif
7055 if ((flags & UPL_COMMIT_SET_DIRTY) ||
7056 (m->pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
7057 /*
7058 * page was re-dirtied after we started
7059 * the pageout... reactivate it since
7060 * we don't know whether the on-disk
7061 * copy matches what is now in memory
7062 */
7063 SET_PAGE_DIRTY(m, FALSE);
7064
7065 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
7066
7067 if (upl->flags & UPL_PAGEOUT) {
7068 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
7069 VM_STAT_INCR(reactivations);
7070 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
7071 }
7072 } else {
7073 /*
7074 * page has been successfully cleaned
7075 * go ahead and free it for other use
7076 */
7077 if (m_object->internal) {
7078 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
7079 } else {
7080 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
7081 }
7082 m->dirty = FALSE;
7083 m->busy = TRUE;
7084
7085 dwp->dw_mask |= DW_vm_page_free;
7086 }
7087 goto commit_next_page;
7088 }
7089 #if MACH_CLUSTER_STATS
7090 if (m->wpmapped)
7091 m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
7092
7093 if (m->dirty) vm_pageout_cluster_dirtied++;
7094 else vm_pageout_cluster_cleaned++;
7095 if (m->wanted) vm_pageout_cluster_collisions++;
7096 #endif
7097 /*
7098 * It is a part of the semantic of COPYOUT_FROM
7099 * UPLs that a commit implies cache sync
7100 * between the vm page and the backing store
7101 * this can be used to strip the precious bit
7102 * as well as clean
7103 */
7104 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
7105 m->precious = FALSE;
7106
7107 if (flags & UPL_COMMIT_SET_DIRTY) {
7108 SET_PAGE_DIRTY(m, FALSE);
7109 } else {
7110 m->dirty = FALSE;
7111 }
7112
7113 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7114 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
7115 pgpgout_count++;
7116
7117 VM_STAT_INCR(pageouts);
7118 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
7119
7120 dwp->dw_mask |= DW_enqueue_cleaned;
7121 vm_pageout_enqueued_cleaned_from_inactive_dirty++;
7122 } else if (should_be_throttled == TRUE && (m->vm_page_q_state == VM_PAGE_NOT_ON_Q)) {
7123 /*
7124 * page coming back in from being 'frozen'...
7125 * it was dirty before it was frozen, so keep it so
7126 * the vm_page_activate will notice that it really belongs
7127 * on the throttle queue and put it there
7128 */
7129 SET_PAGE_DIRTY(m, FALSE);
7130 dwp->dw_mask |= DW_vm_page_activate;
7131
7132 } else {
7133 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && (m->vm_page_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
7134 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7135 clear_refmod |= VM_MEM_REFERENCED;
7136 } else if ( !VM_PAGE_PAGEABLE(m)) {
7137
7138 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
7139 dwp->dw_mask |= DW_vm_page_speculate;
7140 else if (m->reference)
7141 dwp->dw_mask |= DW_vm_page_activate;
7142 else {
7143 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7144 clear_refmod |= VM_MEM_REFERENCED;
7145 }
7146 }
7147 }
7148 if (upl->flags & UPL_ACCESS_BLOCKED) {
7149 /*
7150 * We blocked access to the pages in this URL.
7151 * Clear the "busy" bit on this page before we
7152 * wake up any waiter.
7153 */
7154 dwp->dw_mask |= DW_clear_busy;
7155 }
7156 /*
7157 * Wakeup any thread waiting for the page to be un-cleaning.
7158 */
7159 dwp->dw_mask |= DW_PAGE_WAKEUP;
7160
7161 commit_next_page:
7162 if (clear_refmod)
7163 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
7164
7165 target_offset += PAGE_SIZE_64;
7166 xfer_size -= PAGE_SIZE;
7167 entry++;
7168
7169 if (dwp->dw_mask) {
7170 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7171 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7172
7173 if (dw_count >= dw_limit) {
7174 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7175
7176 dwp = &dw_array[0];
7177 dw_count = 0;
7178 }
7179 } else {
7180 if (dwp->dw_mask & DW_clear_busy)
7181 m->busy = FALSE;
7182
7183 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7184 PAGE_WAKEUP(m);
7185 }
7186 }
7187 }
7188 if (dw_count)
7189 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7190
7191 if (fast_path_possible) {
7192
7193 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7194 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7195
7196 if (local_queue_count || unwired_count) {
7197
7198 if (local_queue_count) {
7199 vm_page_t first_target;
7200 vm_page_queue_head_t *target_queue;
7201
7202 if (throttle_page)
7203 target_queue = &vm_page_queue_throttled;
7204 else {
7205 if (flags & UPL_COMMIT_INACTIVATE) {
7206 if (shadow_object->internal)
7207 target_queue = &vm_page_queue_anonymous;
7208 else
7209 target_queue = &vm_page_queue_inactive;
7210 } else
7211 target_queue = &vm_page_queue_active;
7212 }
7213 /*
7214 * Transfer the entire local queue to a regular LRU page queues.
7215 */
7216 vm_page_lockspin_queues();
7217
7218 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7219
7220 if (vm_page_queue_empty(target_queue))
7221 target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7222 else
7223 first_target->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7224
7225 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7226 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7227 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7228
7229 /*
7230 * Adjust the global page counts.
7231 */
7232 if (throttle_page) {
7233 vm_page_throttled_count += local_queue_count;
7234 } else {
7235 if (flags & UPL_COMMIT_INACTIVATE) {
7236 if (shadow_object->internal)
7237 vm_page_anonymous_count += local_queue_count;
7238 vm_page_inactive_count += local_queue_count;
7239
7240 token_new_pagecount += local_queue_count;
7241 } else
7242 vm_page_active_count += local_queue_count;
7243
7244 if (shadow_object->internal)
7245 vm_page_pageable_internal_count += local_queue_count;
7246 else
7247 vm_page_pageable_external_count += local_queue_count;
7248 }
7249 } else {
7250 vm_page_lockspin_queues();
7251 }
7252 if (unwired_count) {
7253 vm_page_wire_count -= unwired_count;
7254 VM_CHECK_MEMORYSTATUS;
7255 }
7256 vm_page_unlock_queues();
7257
7258 VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7259 }
7260 }
7261 occupied = 1;
7262
7263 if (upl->flags & UPL_DEVICE_MEMORY) {
7264 occupied = 0;
7265 } else if (upl->flags & UPL_LITE) {
7266 int pg_num;
7267 int i;
7268
7269 occupied = 0;
7270
7271 if (!fast_path_full_commit) {
7272 pg_num = upl->size/PAGE_SIZE;
7273 pg_num = (pg_num + 31) >> 5;
7274
7275 for (i = 0; i < pg_num; i++) {
7276 if (lite_list[i] != 0) {
7277 occupied = 1;
7278 break;
7279 }
7280 }
7281 }
7282 } else {
7283 if (vm_page_queue_empty(&upl->map_object->memq))
7284 occupied = 0;
7285 }
7286 if (occupied == 0) {
7287 /*
7288 * If this UPL element belongs to a Vector UPL and is
7289 * empty, then this is the right function to deallocate
7290 * it. So go ahead set the *empty variable. The flag
7291 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7292 * should be considered relevant for the Vector UPL and not
7293 * the internal UPLs.
7294 */
7295 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7296 *empty = TRUE;
7297
7298 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7299 /*
7300 * this is not a paging object
7301 * so we need to drop the paging reference
7302 * that was taken when we created the UPL
7303 * against this object
7304 */
7305 vm_object_activity_end(shadow_object);
7306 vm_object_collapse(shadow_object, 0, TRUE);
7307 } else {
7308 /*
7309 * we dontated the paging reference to
7310 * the map object... vm_pageout_object_terminate
7311 * will drop this reference
7312 */
7313 }
7314 }
7315 VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7316 vm_object_unlock(shadow_object);
7317 if (object != shadow_object)
7318 vm_object_unlock(object);
7319
7320 if(!isVectorUPL)
7321 upl_unlock(upl);
7322 else {
7323 /*
7324 * If we completed our operations on an UPL that is
7325 * part of a Vectored UPL and if empty is TRUE, then
7326 * we should go ahead and deallocate this UPL element.
7327 * Then we check if this was the last of the UPL elements
7328 * within that Vectored UPL. If so, set empty to TRUE
7329 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7330 * can go ahead and deallocate the Vector UPL too.
7331 */
7332 if(*empty==TRUE) {
7333 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7334 upl_deallocate(upl);
7335 }
7336 goto process_upl_to_commit;
7337 }
7338 if (pgpgout_count) {
7339 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7340 }
7341
7342 return KERN_SUCCESS;
7343 }
7344
7345 kern_return_t
7346 upl_abort_range(
7347 upl_t upl,
7348 upl_offset_t offset,
7349 upl_size_t size,
7350 int error,
7351 boolean_t *empty)
7352 {
7353 upl_page_info_t *user_page_list = NULL;
7354 upl_size_t xfer_size, subupl_size = size;
7355 vm_object_t shadow_object;
7356 vm_object_t object;
7357 vm_object_offset_t target_offset;
7358 upl_offset_t subupl_offset = offset;
7359 int entry;
7360 wpl_array_t lite_list;
7361 int occupied;
7362 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7363 struct vm_page_delayed_work *dwp;
7364 int dw_count;
7365 int dw_limit;
7366 int isVectorUPL = 0;
7367 upl_t vector_upl = NULL;
7368
7369 *empty = FALSE;
7370
7371 if (upl == UPL_NULL)
7372 return KERN_INVALID_ARGUMENT;
7373
7374 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7375 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7376
7377 if((isVectorUPL = vector_upl_is_valid(upl))) {
7378 vector_upl = upl;
7379 upl_lock(vector_upl);
7380 }
7381 else
7382 upl_lock(upl);
7383
7384 process_upl_to_abort:
7385 if(isVectorUPL) {
7386 size = subupl_size;
7387 offset = subupl_offset;
7388 if(size == 0) {
7389 upl_unlock(vector_upl);
7390 return KERN_SUCCESS;
7391 }
7392 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7393 if(upl == NULL) {
7394 upl_unlock(vector_upl);
7395 return KERN_FAILURE;
7396 }
7397 subupl_size -= size;
7398 subupl_offset += size;
7399 }
7400
7401 *empty = FALSE;
7402
7403 #if UPL_DEBUG
7404 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7405 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7406
7407 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7408 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7409 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7410
7411 upl->upl_commit_index++;
7412 }
7413 #endif
7414 if (upl->flags & UPL_DEVICE_MEMORY)
7415 xfer_size = 0;
7416 else if ((offset + size) <= upl->size)
7417 xfer_size = size;
7418 else {
7419 if(!isVectorUPL)
7420 upl_unlock(upl);
7421 else {
7422 upl_unlock(vector_upl);
7423 }
7424
7425 return KERN_FAILURE;
7426 }
7427 if (upl->flags & UPL_INTERNAL) {
7428 lite_list = (wpl_array_t)
7429 ((((uintptr_t)upl) + sizeof(struct upl))
7430 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7431
7432 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7433 } else {
7434 lite_list = (wpl_array_t)
7435 (((uintptr_t)upl) + sizeof(struct upl));
7436 }
7437 object = upl->map_object;
7438
7439 if (upl->flags & UPL_SHADOWED) {
7440 vm_object_lock(object);
7441 shadow_object = object->shadow;
7442 } else
7443 shadow_object = object;
7444
7445 entry = offset/PAGE_SIZE;
7446 target_offset = (vm_object_offset_t)offset;
7447
7448 assert(!(target_offset & PAGE_MASK));
7449 assert(!(xfer_size & PAGE_MASK));
7450
7451 if (upl->flags & UPL_KERNEL_OBJECT)
7452 vm_object_lock_shared(shadow_object);
7453 else
7454 vm_object_lock(shadow_object);
7455
7456 if (upl->flags & UPL_ACCESS_BLOCKED) {
7457 assert(shadow_object->blocked_access);
7458 shadow_object->blocked_access = FALSE;
7459 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7460 }
7461
7462 dwp = &dw_array[0];
7463 dw_count = 0;
7464 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7465
7466 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7467 panic("upl_abort_range: kernel_object being DUMPED");
7468
7469 while (xfer_size) {
7470 vm_page_t t, m;
7471 unsigned int pg_num;
7472 boolean_t needed;
7473
7474 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7475 assert(pg_num == target_offset/PAGE_SIZE);
7476
7477 needed = FALSE;
7478
7479 if (user_page_list)
7480 needed = user_page_list[pg_num].needed;
7481
7482 dwp->dw_mask = 0;
7483 m = VM_PAGE_NULL;
7484
7485 if (upl->flags & UPL_LITE) {
7486
7487 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7488 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7489
7490 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7491 m = vm_page_lookup(shadow_object, target_offset +
7492 (upl->offset - shadow_object->paging_offset));
7493 }
7494 }
7495 if (upl->flags & UPL_SHADOWED) {
7496 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7497 t->free_when_done = FALSE;
7498
7499 VM_PAGE_FREE(t);
7500
7501 if (m == VM_PAGE_NULL)
7502 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7503 }
7504 }
7505 if ((upl->flags & UPL_KERNEL_OBJECT))
7506 goto abort_next_page;
7507
7508 if (m != VM_PAGE_NULL) {
7509
7510 assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7511
7512 if (m->absent) {
7513 boolean_t must_free = TRUE;
7514
7515 /*
7516 * COPYOUT = FALSE case
7517 * check for error conditions which must
7518 * be passed back to the pages customer
7519 */
7520 if (error & UPL_ABORT_RESTART) {
7521 m->restart = TRUE;
7522 m->absent = FALSE;
7523 m->unusual = TRUE;
7524 must_free = FALSE;
7525 } else if (error & UPL_ABORT_UNAVAILABLE) {
7526 m->restart = FALSE;
7527 m->unusual = TRUE;
7528 must_free = FALSE;
7529 } else if (error & UPL_ABORT_ERROR) {
7530 m->restart = FALSE;
7531 m->absent = FALSE;
7532 m->error = TRUE;
7533 m->unusual = TRUE;
7534 must_free = FALSE;
7535 }
7536 if (m->clustered && needed == FALSE) {
7537 /*
7538 * This page was a part of a speculative
7539 * read-ahead initiated by the kernel
7540 * itself. No one is expecting this
7541 * page and no one will clean up its
7542 * error state if it ever becomes valid
7543 * in the future.
7544 * We have to free it here.
7545 */
7546 must_free = TRUE;
7547 }
7548 m->cleaning = FALSE;
7549
7550 if (m->overwriting && !m->busy) {
7551 /*
7552 * this shouldn't happen since
7553 * this is an 'absent' page, but
7554 * it doesn't hurt to check for
7555 * the 'alternate' method of
7556 * stabilizing the page...
7557 * we will mark 'busy' to be cleared
7558 * in the following code which will
7559 * take care of the primary stabilzation
7560 * method (i.e. setting 'busy' to TRUE)
7561 */
7562 dwp->dw_mask |= DW_vm_page_unwire;
7563 }
7564 m->overwriting = FALSE;
7565
7566 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7567
7568 if (must_free == TRUE)
7569 dwp->dw_mask |= DW_vm_page_free;
7570 else
7571 dwp->dw_mask |= DW_vm_page_activate;
7572 } else {
7573 /*
7574 * Handle the trusted pager throttle.
7575 */
7576 if (m->laundry)
7577 dwp->dw_mask |= DW_vm_pageout_throttle_up;
7578
7579 if (upl->flags & UPL_ACCESS_BLOCKED) {
7580 /*
7581 * We blocked access to the pages in this UPL.
7582 * Clear the "busy" bit and wake up any waiter
7583 * for this page.
7584 */
7585 dwp->dw_mask |= DW_clear_busy;
7586 }
7587 if (m->overwriting) {
7588 if (m->busy)
7589 dwp->dw_mask |= DW_clear_busy;
7590 else {
7591 /*
7592 * deal with the 'alternate' method
7593 * of stabilizing the page...
7594 * we will either free the page
7595 * or mark 'busy' to be cleared
7596 * in the following code which will
7597 * take care of the primary stabilzation
7598 * method (i.e. setting 'busy' to TRUE)
7599 */
7600 dwp->dw_mask |= DW_vm_page_unwire;
7601 }
7602 m->overwriting = FALSE;
7603 }
7604 m->free_when_done = FALSE;
7605 m->cleaning = FALSE;
7606
7607 if (error & UPL_ABORT_DUMP_PAGES) {
7608 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7609
7610 dwp->dw_mask |= DW_vm_page_free;
7611 } else {
7612 if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7613 if (error & UPL_ABORT_REFERENCE) {
7614 /*
7615 * we've been told to explictly
7616 * reference this page... for
7617 * file I/O, this is done by
7618 * implementing an LRU on the inactive q
7619 */
7620 dwp->dw_mask |= DW_vm_page_lru;
7621
7622 } else if ( !VM_PAGE_PAGEABLE(m))
7623 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7624 }
7625 dwp->dw_mask |= DW_PAGE_WAKEUP;
7626 }
7627 }
7628 }
7629 abort_next_page:
7630 target_offset += PAGE_SIZE_64;
7631 xfer_size -= PAGE_SIZE;
7632 entry++;
7633
7634 if (dwp->dw_mask) {
7635 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7636 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7637
7638 if (dw_count >= dw_limit) {
7639 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7640
7641 dwp = &dw_array[0];
7642 dw_count = 0;
7643 }
7644 } else {
7645 if (dwp->dw_mask & DW_clear_busy)
7646 m->busy = FALSE;
7647
7648 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7649 PAGE_WAKEUP(m);
7650 }
7651 }
7652 }
7653 if (dw_count)
7654 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7655
7656 occupied = 1;
7657
7658 if (upl->flags & UPL_DEVICE_MEMORY) {
7659 occupied = 0;
7660 } else if (upl->flags & UPL_LITE) {
7661 int pg_num;
7662 int i;
7663
7664 pg_num = upl->size/PAGE_SIZE;
7665 pg_num = (pg_num + 31) >> 5;
7666 occupied = 0;
7667
7668 for (i = 0; i < pg_num; i++) {
7669 if (lite_list[i] != 0) {
7670 occupied = 1;
7671 break;
7672 }
7673 }
7674 } else {
7675 if (vm_page_queue_empty(&upl->map_object->memq))
7676 occupied = 0;
7677 }
7678 if (occupied == 0) {
7679 /*
7680 * If this UPL element belongs to a Vector UPL and is
7681 * empty, then this is the right function to deallocate
7682 * it. So go ahead set the *empty variable. The flag
7683 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7684 * should be considered relevant for the Vector UPL and
7685 * not the internal UPLs.
7686 */
7687 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7688 *empty = TRUE;
7689
7690 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7691 /*
7692 * this is not a paging object
7693 * so we need to drop the paging reference
7694 * that was taken when we created the UPL
7695 * against this object
7696 */
7697 vm_object_activity_end(shadow_object);
7698 vm_object_collapse(shadow_object, 0, TRUE);
7699 } else {
7700 /*
7701 * we dontated the paging reference to
7702 * the map object... vm_pageout_object_terminate
7703 * will drop this reference
7704 */
7705 }
7706 }
7707 vm_object_unlock(shadow_object);
7708 if (object != shadow_object)
7709 vm_object_unlock(object);
7710
7711 if(!isVectorUPL)
7712 upl_unlock(upl);
7713 else {
7714 /*
7715 * If we completed our operations on an UPL that is
7716 * part of a Vectored UPL and if empty is TRUE, then
7717 * we should go ahead and deallocate this UPL element.
7718 * Then we check if this was the last of the UPL elements
7719 * within that Vectored UPL. If so, set empty to TRUE
7720 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7721 * can go ahead and deallocate the Vector UPL too.
7722 */
7723 if(*empty == TRUE) {
7724 *empty = vector_upl_set_subupl(vector_upl, upl,0);
7725 upl_deallocate(upl);
7726 }
7727 goto process_upl_to_abort;
7728 }
7729
7730 return KERN_SUCCESS;
7731 }
7732
7733
7734 kern_return_t
7735 upl_abort(
7736 upl_t upl,
7737 int error)
7738 {
7739 boolean_t empty;
7740
7741 if (upl == UPL_NULL)
7742 return KERN_INVALID_ARGUMENT;
7743
7744 return upl_abort_range(upl, 0, upl->size, error, &empty);
7745 }
7746
7747
7748 /* an option on commit should be wire */
7749 kern_return_t
7750 upl_commit(
7751 upl_t upl,
7752 upl_page_info_t *page_list,
7753 mach_msg_type_number_t count)
7754 {
7755 boolean_t empty;
7756
7757 if (upl == UPL_NULL)
7758 return KERN_INVALID_ARGUMENT;
7759
7760 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7761 }
7762
7763
7764 void
7765 iopl_valid_data(
7766 upl_t upl,
7767 vm_tag_t tag)
7768 {
7769 vm_object_t object;
7770 vm_offset_t offset;
7771 vm_page_t m, nxt_page = VM_PAGE_NULL;
7772 upl_size_t size;
7773 int wired_count = 0;
7774
7775 if (upl == NULL)
7776 panic("iopl_valid_data: NULL upl");
7777 if (vector_upl_is_valid(upl))
7778 panic("iopl_valid_data: vector upl");
7779 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7780 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7781
7782 object = upl->map_object;
7783
7784 if (object == kernel_object || object == compressor_object)
7785 panic("iopl_valid_data: object == kernel or compressor");
7786
7787 if (object->purgable == VM_PURGABLE_VOLATILE ||
7788 object->purgable == VM_PURGABLE_EMPTY)
7789 panic("iopl_valid_data: object %p purgable %d",
7790 object, object->purgable);
7791
7792 size = upl->size;
7793
7794 vm_object_lock(object);
7795 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7796
7797 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7798 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7799 else
7800 offset = 0 + upl->offset - object->paging_offset;
7801
7802 while (size) {
7803
7804 if (nxt_page != VM_PAGE_NULL) {
7805 m = nxt_page;
7806 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
7807 } else {
7808 m = vm_page_lookup(object, offset);
7809 offset += PAGE_SIZE;
7810
7811 if (m == VM_PAGE_NULL)
7812 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7813 }
7814 if (m->busy) {
7815 if (!m->absent)
7816 panic("iopl_valid_data: busy page w/o absent");
7817
7818 if (m->pageq.next || m->pageq.prev)
7819 panic("iopl_valid_data: busy+absent page on page queue");
7820 if (m->reusable) {
7821 panic("iopl_valid_data: %p is reusable", m);
7822 }
7823
7824 m->absent = FALSE;
7825 m->dirty = TRUE;
7826 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
7827 assert(m->wire_count == 0);
7828 m->wire_count++;
7829 assert(m->wire_count);
7830 if (m->wire_count == 1) {
7831 m->vm_page_q_state = VM_PAGE_IS_WIRED;
7832 wired_count++;
7833 } else {
7834 panic("iopl_valid_data: %p already wired\n", m);
7835 }
7836
7837 PAGE_WAKEUP_DONE(m);
7838 }
7839 size -= PAGE_SIZE;
7840 }
7841 if (wired_count) {
7842
7843 VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7844 assert(object->resident_page_count >= object->wired_page_count);
7845
7846 /* no need to adjust purgeable accounting for this object: */
7847 assert(object->purgable != VM_PURGABLE_VOLATILE);
7848 assert(object->purgable != VM_PURGABLE_EMPTY);
7849
7850 vm_page_lockspin_queues();
7851 vm_page_wire_count += wired_count;
7852 vm_page_unlock_queues();
7853 }
7854 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7855 vm_object_unlock(object);
7856 }
7857
7858
7859 void
7860 vm_object_set_pmap_cache_attr(
7861 vm_object_t object,
7862 upl_page_info_array_t user_page_list,
7863 unsigned int num_pages,
7864 boolean_t batch_pmap_op)
7865 {
7866 unsigned int cache_attr = 0;
7867
7868 cache_attr = object->wimg_bits & VM_WIMG_MASK;
7869 assert(user_page_list);
7870 if (cache_attr != VM_WIMG_USE_DEFAULT) {
7871 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7872 }
7873 }
7874
7875
7876 boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7877 kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t *, int);
7878
7879
7880
7881 boolean_t
7882 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7883 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7884 {
7885 vm_page_t dst_page;
7886 unsigned int entry;
7887 int page_count;
7888 int delayed_unlock = 0;
7889 boolean_t retval = TRUE;
7890 ppnum_t phys_page;
7891
7892 vm_object_lock_assert_exclusive(object);
7893 assert(object->purgable != VM_PURGABLE_VOLATILE);
7894 assert(object->purgable != VM_PURGABLE_EMPTY);
7895 assert(object->pager == NULL);
7896 assert(object->copy == NULL);
7897 assert(object->shadow == NULL);
7898
7899 page_count = object->resident_page_count;
7900 dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7901
7902 vm_page_lock_queues();
7903
7904 while (page_count--) {
7905
7906 if (dst_page->busy ||
7907 dst_page->fictitious ||
7908 dst_page->absent ||
7909 dst_page->error ||
7910 dst_page->cleaning ||
7911 dst_page->restart ||
7912 dst_page->laundry) {
7913 retval = FALSE;
7914 goto done;
7915 }
7916 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
7917 retval = FALSE;
7918 goto done;
7919 }
7920 dst_page->reference = TRUE;
7921
7922 vm_page_wire(dst_page, tag, FALSE);
7923
7924 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7925 SET_PAGE_DIRTY(dst_page, FALSE);
7926 }
7927 entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
7928 assert(entry >= 0 && entry < object->resident_page_count);
7929 lite_list[entry>>5] |= 1 << (entry & 31);
7930
7931 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7932
7933 if (phys_page > upl->highest_page)
7934 upl->highest_page = phys_page;
7935
7936 if (user_page_list) {
7937 user_page_list[entry].phys_addr = phys_page;
7938 user_page_list[entry].absent = dst_page->absent;
7939 user_page_list[entry].dirty = dst_page->dirty;
7940 user_page_list[entry].free_when_done = dst_page->free_when_done;
7941 user_page_list[entry].precious = dst_page->precious;
7942 user_page_list[entry].device = FALSE;
7943 user_page_list[entry].speculative = FALSE;
7944 user_page_list[entry].cs_validated = FALSE;
7945 user_page_list[entry].cs_tainted = FALSE;
7946 user_page_list[entry].cs_nx = FALSE;
7947 user_page_list[entry].needed = FALSE;
7948 user_page_list[entry].mark = FALSE;
7949 }
7950 if (delayed_unlock++ > 256) {
7951 delayed_unlock = 0;
7952 lck_mtx_yield(&vm_page_queue_lock);
7953
7954 VM_CHECK_MEMORYSTATUS;
7955 }
7956 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->listq);
7957 }
7958 done:
7959 vm_page_unlock_queues();
7960
7961 VM_CHECK_MEMORYSTATUS;
7962
7963 return (retval);
7964 }
7965
7966
7967 kern_return_t
7968 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7969 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset, int page_count)
7970 {
7971 vm_page_t dst_page;
7972 boolean_t no_zero_fill = FALSE;
7973 int interruptible;
7974 int pages_wired = 0;
7975 int pages_inserted = 0;
7976 int entry = 0;
7977 uint64_t delayed_ledger_update = 0;
7978 kern_return_t ret = KERN_SUCCESS;
7979 int grab_options;
7980 ppnum_t phys_page;
7981
7982 vm_object_lock_assert_exclusive(object);
7983 assert(object->purgable != VM_PURGABLE_VOLATILE);
7984 assert(object->purgable != VM_PURGABLE_EMPTY);
7985 assert(object->pager == NULL);
7986 assert(object->copy == NULL);
7987 assert(object->shadow == NULL);
7988
7989 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7990 interruptible = THREAD_ABORTSAFE;
7991 else
7992 interruptible = THREAD_UNINT;
7993
7994 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7995 no_zero_fill = TRUE;
7996
7997 grab_options = 0;
7998 #if CONFIG_SECLUDED_MEMORY
7999 if (object->can_grab_secluded) {
8000 grab_options |= VM_PAGE_GRAB_SECLUDED;
8001 }
8002 #endif /* CONFIG_SECLUDED_MEMORY */
8003
8004 while (page_count--) {
8005
8006 while ((dst_page = vm_page_grab_options(grab_options))
8007 == VM_PAGE_NULL) {
8008
8009 OSAddAtomic(page_count, &vm_upl_wait_for_pages);
8010
8011 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8012
8013 if (vm_page_wait(interruptible) == FALSE) {
8014 /*
8015 * interrupted case
8016 */
8017 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8018
8019 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8020
8021 ret = MACH_SEND_INTERRUPTED;
8022 goto done;
8023 }
8024 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8025
8026 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8027 }
8028 if (no_zero_fill == FALSE)
8029 vm_page_zero_fill(dst_page);
8030 else
8031 dst_page->absent = TRUE;
8032
8033 dst_page->reference = TRUE;
8034
8035 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8036 SET_PAGE_DIRTY(dst_page, FALSE);
8037 }
8038 if (dst_page->absent == FALSE) {
8039 assert(dst_page->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8040 assert(dst_page->wire_count == 0);
8041 dst_page->wire_count++;
8042 dst_page->vm_page_q_state = VM_PAGE_IS_WIRED;
8043 assert(dst_page->wire_count);
8044 pages_wired++;
8045 PAGE_WAKEUP_DONE(dst_page);
8046 }
8047 pages_inserted++;
8048
8049 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
8050
8051 lite_list[entry>>5] |= 1 << (entry & 31);
8052
8053 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8054
8055 if (phys_page > upl->highest_page)
8056 upl->highest_page = phys_page;
8057
8058 if (user_page_list) {
8059 user_page_list[entry].phys_addr = phys_page;
8060 user_page_list[entry].absent = dst_page->absent;
8061 user_page_list[entry].dirty = dst_page->dirty;
8062 user_page_list[entry].free_when_done = FALSE;
8063 user_page_list[entry].precious = FALSE;
8064 user_page_list[entry].device = FALSE;
8065 user_page_list[entry].speculative = FALSE;
8066 user_page_list[entry].cs_validated = FALSE;
8067 user_page_list[entry].cs_tainted = FALSE;
8068 user_page_list[entry].cs_nx = FALSE;
8069 user_page_list[entry].needed = FALSE;
8070 user_page_list[entry].mark = FALSE;
8071 }
8072 entry++;
8073 *dst_offset += PAGE_SIZE_64;
8074 }
8075 done:
8076 if (pages_wired) {
8077 vm_page_lockspin_queues();
8078 vm_page_wire_count += pages_wired;
8079 vm_page_unlock_queues();
8080 }
8081 if (pages_inserted) {
8082 if (object->internal) {
8083 OSAddAtomic(pages_inserted, &vm_page_internal_count);
8084 } else {
8085 OSAddAtomic(pages_inserted, &vm_page_external_count);
8086 }
8087 }
8088 if (delayed_ledger_update) {
8089 task_t owner;
8090
8091 owner = object->vo_purgeable_owner;
8092 assert(owner);
8093
8094 /* more non-volatile bytes */
8095 ledger_credit(owner->ledger,
8096 task_ledgers.purgeable_nonvolatile,
8097 delayed_ledger_update);
8098 /* more footprint */
8099 ledger_credit(owner->ledger,
8100 task_ledgers.phys_footprint,
8101 delayed_ledger_update);
8102 }
8103 return (ret);
8104 }
8105
8106
8107 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
8108
8109
8110 kern_return_t
8111 vm_object_iopl_request(
8112 vm_object_t object,
8113 vm_object_offset_t offset,
8114 upl_size_t size,
8115 upl_t *upl_ptr,
8116 upl_page_info_array_t user_page_list,
8117 unsigned int *page_list_count,
8118 upl_control_flags_t cntrl_flags,
8119 vm_tag_t tag)
8120 {
8121 vm_page_t dst_page;
8122 vm_object_offset_t dst_offset;
8123 upl_size_t xfer_size;
8124 upl_t upl = NULL;
8125 unsigned int entry;
8126 wpl_array_t lite_list = NULL;
8127 int no_zero_fill = FALSE;
8128 unsigned int size_in_pages;
8129 u_int32_t psize;
8130 kern_return_t ret;
8131 vm_prot_t prot;
8132 struct vm_object_fault_info fault_info;
8133 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
8134 struct vm_page_delayed_work *dwp;
8135 int dw_count;
8136 int dw_limit;
8137 int dw_index;
8138 boolean_t caller_lookup;
8139 int io_tracking_flag = 0;
8140 int interruptible;
8141 ppnum_t phys_page;
8142
8143 boolean_t set_cache_attr_needed = FALSE;
8144 boolean_t free_wired_pages = FALSE;
8145 boolean_t fast_path_empty_req = FALSE;
8146 boolean_t fast_path_full_req = FALSE;
8147
8148 if (cntrl_flags & ~UPL_VALID_FLAGS) {
8149 /*
8150 * For forward compatibility's sake,
8151 * reject any unknown flag.
8152 */
8153 return KERN_INVALID_VALUE;
8154 }
8155 if (vm_lopage_needed == FALSE)
8156 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8157
8158 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8159 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8160 return KERN_INVALID_VALUE;
8161
8162 if (object->phys_contiguous) {
8163 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8164 return KERN_INVALID_ADDRESS;
8165
8166 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8167 return KERN_INVALID_ADDRESS;
8168 }
8169 }
8170 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8171 no_zero_fill = TRUE;
8172
8173 if (cntrl_flags & UPL_COPYOUT_FROM)
8174 prot = VM_PROT_READ;
8175 else
8176 prot = VM_PROT_READ | VM_PROT_WRITE;
8177
8178 if ((!object->internal) && (object->paging_offset != 0))
8179 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8180
8181 #if CONFIG_IOSCHED || UPL_DEBUG
8182 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8183 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8184 #endif
8185
8186 #if CONFIG_IOSCHED
8187 if (object->io_tracking) {
8188 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8189 if (object != kernel_object)
8190 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8191 }
8192 #endif
8193
8194 if (object->phys_contiguous)
8195 psize = PAGE_SIZE;
8196 else
8197 psize = size;
8198
8199 if (cntrl_flags & UPL_SET_INTERNAL) {
8200 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8201
8202 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8203 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8204 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8205 if (size == 0) {
8206 user_page_list = NULL;
8207 lite_list = NULL;
8208 }
8209 } else {
8210 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8211
8212 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8213 if (size == 0) {
8214 lite_list = NULL;
8215 }
8216 }
8217 if (user_page_list)
8218 user_page_list[0].device = FALSE;
8219 *upl_ptr = upl;
8220
8221 upl->map_object = object;
8222 upl->size = size;
8223
8224 size_in_pages = size / PAGE_SIZE;
8225
8226 if (object == kernel_object &&
8227 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8228 upl->flags |= UPL_KERNEL_OBJECT;
8229 #if UPL_DEBUG
8230 vm_object_lock(object);
8231 #else
8232 vm_object_lock_shared(object);
8233 #endif
8234 } else {
8235 vm_object_lock(object);
8236 vm_object_activity_begin(object);
8237 }
8238 /*
8239 * paging in progress also protects the paging_offset
8240 */
8241 upl->offset = offset + object->paging_offset;
8242
8243 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8244 /*
8245 * The user requested that access to the pages in this UPL
8246 * be blocked until the UPL is commited or aborted.
8247 */
8248 upl->flags |= UPL_ACCESS_BLOCKED;
8249 }
8250
8251 #if CONFIG_IOSCHED || UPL_DEBUG
8252 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8253 vm_object_activity_begin(object);
8254 queue_enter(&object->uplq, upl, upl_t, uplq);
8255 }
8256 #endif
8257
8258 if (object->phys_contiguous) {
8259
8260 if (upl->flags & UPL_ACCESS_BLOCKED) {
8261 assert(!object->blocked_access);
8262 object->blocked_access = TRUE;
8263 }
8264
8265 vm_object_unlock(object);
8266
8267 /*
8268 * don't need any shadow mappings for this one
8269 * since it is already I/O memory
8270 */
8271 upl->flags |= UPL_DEVICE_MEMORY;
8272
8273 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8274
8275 if (user_page_list) {
8276 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8277 user_page_list[0].device = TRUE;
8278 }
8279 if (page_list_count != NULL) {
8280 if (upl->flags & UPL_INTERNAL)
8281 *page_list_count = 0;
8282 else
8283 *page_list_count = 1;
8284 }
8285 return KERN_SUCCESS;
8286 }
8287 if (object != kernel_object && object != compressor_object) {
8288 /*
8289 * Protect user space from future COW operations
8290 */
8291 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8292 if (!object->true_share &&
8293 vm_object_tracking_inited) {
8294 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8295 int num = 0;
8296
8297 num = OSBacktrace(bt,
8298 VM_OBJECT_TRACKING_BTDEPTH);
8299 btlog_add_entry(vm_object_tracking_btlog,
8300 object,
8301 VM_OBJECT_TRACKING_OP_TRUESHARE,
8302 bt,
8303 num);
8304 }
8305 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8306
8307 vm_object_lock_assert_exclusive(object);
8308 object->true_share = TRUE;
8309
8310 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8311 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8312 }
8313
8314 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8315 object->copy != VM_OBJECT_NULL) {
8316 /*
8317 * Honor copy-on-write obligations
8318 *
8319 * The caller is gathering these pages and
8320 * might modify their contents. We need to
8321 * make sure that the copy object has its own
8322 * private copies of these pages before we let
8323 * the caller modify them.
8324 *
8325 * NOTE: someone else could map the original object
8326 * after we've done this copy-on-write here, and they
8327 * could then see an inconsistent picture of the memory
8328 * while it's being modified via the UPL. To prevent this,
8329 * we would have to block access to these pages until the
8330 * UPL is released. We could use the UPL_BLOCK_ACCESS
8331 * code path for that...
8332 */
8333 vm_object_update(object,
8334 offset,
8335 size,
8336 NULL,
8337 NULL,
8338 FALSE, /* should_return */
8339 MEMORY_OBJECT_COPY_SYNC,
8340 VM_PROT_NO_CHANGE);
8341 #if DEVELOPMENT || DEBUG
8342 iopl_cow++;
8343 iopl_cow_pages += size >> PAGE_SHIFT;
8344 #endif
8345 }
8346 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8347 object->purgable != VM_PURGABLE_VOLATILE &&
8348 object->purgable != VM_PURGABLE_EMPTY &&
8349 object->copy == NULL &&
8350 size == object->vo_size &&
8351 offset == 0 &&
8352 object->shadow == NULL &&
8353 object->pager == NULL)
8354 {
8355 if (object->resident_page_count == size_in_pages)
8356 {
8357 assert(object != compressor_object);
8358 assert(object != kernel_object);
8359 fast_path_full_req = TRUE;
8360 }
8361 else if (object->resident_page_count == 0)
8362 {
8363 assert(object != compressor_object);
8364 assert(object != kernel_object);
8365 fast_path_empty_req = TRUE;
8366 set_cache_attr_needed = TRUE;
8367 }
8368 }
8369
8370 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8371 interruptible = THREAD_ABORTSAFE;
8372 else
8373 interruptible = THREAD_UNINT;
8374
8375 entry = 0;
8376
8377 xfer_size = size;
8378 dst_offset = offset;
8379 dw_count = 0;
8380
8381 if (fast_path_full_req) {
8382
8383 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8384 goto finish;
8385 /*
8386 * we couldn't complete the processing of this request on the fast path
8387 * so fall through to the slow path and finish up
8388 */
8389
8390 } else if (fast_path_empty_req) {
8391
8392 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8393 ret = KERN_MEMORY_ERROR;
8394 goto return_err;
8395 }
8396 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages);
8397
8398 if (ret) {
8399 free_wired_pages = TRUE;
8400 goto return_err;
8401 }
8402 goto finish;
8403 }
8404
8405 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8406 fault_info.user_tag = 0;
8407 fault_info.lo_offset = offset;
8408 fault_info.hi_offset = offset + xfer_size;
8409 fault_info.no_cache = FALSE;
8410 fault_info.stealth = FALSE;
8411 fault_info.io_sync = FALSE;
8412 fault_info.cs_bypass = FALSE;
8413 fault_info.mark_zf_absent = TRUE;
8414 fault_info.interruptible = interruptible;
8415 fault_info.batch_pmap_op = TRUE;
8416
8417 dwp = &dw_array[0];
8418 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8419
8420 while (xfer_size) {
8421 vm_fault_return_t result;
8422
8423 dwp->dw_mask = 0;
8424
8425 if (fast_path_full_req) {
8426 /*
8427 * if we get here, it means that we ran into a page
8428 * state we couldn't handle in the fast path and
8429 * bailed out to the slow path... since the order
8430 * we look at pages is different between the 2 paths,
8431 * the following check is needed to determine whether
8432 * this page was already processed in the fast path
8433 */
8434 if (lite_list[entry>>5] & (1 << (entry & 31)))
8435 goto skip_page;
8436 }
8437 dst_page = vm_page_lookup(object, dst_offset);
8438
8439 if (dst_page == VM_PAGE_NULL ||
8440 dst_page->busy ||
8441 dst_page->error ||
8442 dst_page->restart ||
8443 dst_page->absent ||
8444 dst_page->fictitious) {
8445
8446 if (object == kernel_object)
8447 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8448 if (object == compressor_object)
8449 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8450
8451 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8452 ret = KERN_MEMORY_ERROR;
8453 goto return_err;
8454 }
8455 set_cache_attr_needed = TRUE;
8456
8457 /*
8458 * We just looked up the page and the result remains valid
8459 * until the object lock is release, so send it to
8460 * vm_fault_page() (as "dst_page"), to avoid having to
8461 * look it up again there.
8462 */
8463 caller_lookup = TRUE;
8464
8465 do {
8466 vm_page_t top_page;
8467 kern_return_t error_code;
8468
8469 fault_info.cluster_size = xfer_size;
8470
8471 vm_object_paging_begin(object);
8472
8473 result = vm_fault_page(object, dst_offset,
8474 prot | VM_PROT_WRITE, FALSE,
8475 caller_lookup,
8476 &prot, &dst_page, &top_page,
8477 (int *)0,
8478 &error_code, no_zero_fill,
8479 FALSE, &fault_info);
8480
8481 /* our lookup is no longer valid at this point */
8482 caller_lookup = FALSE;
8483
8484 switch (result) {
8485
8486 case VM_FAULT_SUCCESS:
8487
8488 if ( !dst_page->absent) {
8489 PAGE_WAKEUP_DONE(dst_page);
8490 } else {
8491 /*
8492 * we only get back an absent page if we
8493 * requested that it not be zero-filled
8494 * because we are about to fill it via I/O
8495 *
8496 * absent pages should be left BUSY
8497 * to prevent them from being faulted
8498 * into an address space before we've
8499 * had a chance to complete the I/O on
8500 * them since they may contain info that
8501 * shouldn't be seen by the faulting task
8502 */
8503 }
8504 /*
8505 * Release paging references and
8506 * top-level placeholder page, if any.
8507 */
8508 if (top_page != VM_PAGE_NULL) {
8509 vm_object_t local_object;
8510
8511 local_object = VM_PAGE_OBJECT(top_page);
8512
8513 /*
8514 * comparing 2 packed pointers
8515 */
8516 if (top_page->vm_page_object != dst_page->vm_page_object) {
8517 vm_object_lock(local_object);
8518 VM_PAGE_FREE(top_page);
8519 vm_object_paging_end(local_object);
8520 vm_object_unlock(local_object);
8521 } else {
8522 VM_PAGE_FREE(top_page);
8523 vm_object_paging_end(local_object);
8524 }
8525 }
8526 vm_object_paging_end(object);
8527 break;
8528
8529 case VM_FAULT_RETRY:
8530 vm_object_lock(object);
8531 break;
8532
8533 case VM_FAULT_MEMORY_SHORTAGE:
8534 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8535
8536 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8537
8538 if (vm_page_wait(interruptible)) {
8539 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8540
8541 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8542 vm_object_lock(object);
8543
8544 break;
8545 }
8546 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8547
8548 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8549
8550 /* fall thru */
8551
8552 case VM_FAULT_INTERRUPTED:
8553 error_code = MACH_SEND_INTERRUPTED;
8554 case VM_FAULT_MEMORY_ERROR:
8555 memory_error:
8556 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8557
8558 vm_object_lock(object);
8559 goto return_err;
8560
8561 case VM_FAULT_SUCCESS_NO_VM_PAGE:
8562 /* success but no page: fail */
8563 vm_object_paging_end(object);
8564 vm_object_unlock(object);
8565 goto memory_error;
8566
8567 default:
8568 panic("vm_object_iopl_request: unexpected error"
8569 " 0x%x from vm_fault_page()\n", result);
8570 }
8571 } while (result != VM_FAULT_SUCCESS);
8572
8573 }
8574 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8575
8576 if (upl->flags & UPL_KERNEL_OBJECT)
8577 goto record_phys_addr;
8578
8579 if (dst_page->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8580 dst_page->busy = TRUE;
8581 goto record_phys_addr;
8582 }
8583
8584 if (dst_page->cleaning) {
8585 /*
8586 * Someone else is cleaning this page in place.
8587 * In theory, we should be able to proceed and use this
8588 * page but they'll probably end up clearing the "busy"
8589 * bit on it in upl_commit_range() but they didn't set
8590 * it, so they would clear our "busy" bit and open
8591 * us to race conditions.
8592 * We'd better wait for the cleaning to complete and
8593 * then try again.
8594 */
8595 vm_object_iopl_request_sleep_for_cleaning++;
8596 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8597 continue;
8598 }
8599 if (dst_page->laundry)
8600 vm_pageout_steal_laundry(dst_page, FALSE);
8601
8602 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8603 phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8604 vm_page_t low_page;
8605 int refmod;
8606
8607 /*
8608 * support devices that can't DMA above 32 bits
8609 * by substituting pages from a pool of low address
8610 * memory for any pages we find above the 4G mark
8611 * can't substitute if the page is already wired because
8612 * we don't know whether that physical address has been
8613 * handed out to some other 64 bit capable DMA device to use
8614 */
8615 if (VM_PAGE_WIRED(dst_page)) {
8616 ret = KERN_PROTECTION_FAILURE;
8617 goto return_err;
8618 }
8619 low_page = vm_page_grablo();
8620
8621 if (low_page == VM_PAGE_NULL) {
8622 ret = KERN_RESOURCE_SHORTAGE;
8623 goto return_err;
8624 }
8625 /*
8626 * from here until the vm_page_replace completes
8627 * we musn't drop the object lock... we don't
8628 * want anyone refaulting this page in and using
8629 * it after we disconnect it... we want the fault
8630 * to find the new page being substituted.
8631 */
8632 if (dst_page->pmapped)
8633 refmod = pmap_disconnect(phys_page);
8634 else
8635 refmod = 0;
8636
8637 if (!dst_page->absent)
8638 vm_page_copy(dst_page, low_page);
8639
8640 low_page->reference = dst_page->reference;
8641 low_page->dirty = dst_page->dirty;
8642 low_page->absent = dst_page->absent;
8643
8644 if (refmod & VM_MEM_REFERENCED)
8645 low_page->reference = TRUE;
8646 if (refmod & VM_MEM_MODIFIED) {
8647 SET_PAGE_DIRTY(low_page, FALSE);
8648 }
8649
8650 vm_page_replace(low_page, object, dst_offset);
8651
8652 dst_page = low_page;
8653 /*
8654 * vm_page_grablo returned the page marked
8655 * BUSY... we don't need a PAGE_WAKEUP_DONE
8656 * here, because we've never dropped the object lock
8657 */
8658 if ( !dst_page->absent)
8659 dst_page->busy = FALSE;
8660
8661 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8662 }
8663 if ( !dst_page->busy)
8664 dwp->dw_mask |= DW_vm_page_wire;
8665
8666 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8667 /*
8668 * Mark the page "busy" to block any future page fault
8669 * on this page in addition to wiring it.
8670 * We'll also remove the mapping
8671 * of all these pages before leaving this routine.
8672 */
8673 assert(!dst_page->fictitious);
8674 dst_page->busy = TRUE;
8675 }
8676 /*
8677 * expect the page to be used
8678 * page queues lock must be held to set 'reference'
8679 */
8680 dwp->dw_mask |= DW_set_reference;
8681
8682 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8683 SET_PAGE_DIRTY(dst_page, TRUE);
8684 }
8685 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
8686 pmap_sync_page_attributes_phys(phys_page);
8687 dst_page->written_by_kernel = FALSE;
8688 }
8689
8690 record_phys_addr:
8691 if (dst_page->busy)
8692 upl->flags |= UPL_HAS_BUSY;
8693
8694 lite_list[entry>>5] |= 1 << (entry & 31);
8695
8696 if (phys_page > upl->highest_page)
8697 upl->highest_page = phys_page;
8698
8699 if (user_page_list) {
8700 user_page_list[entry].phys_addr = phys_page;
8701 user_page_list[entry].free_when_done = dst_page->free_when_done;
8702 user_page_list[entry].absent = dst_page->absent;
8703 user_page_list[entry].dirty = dst_page->dirty;
8704 user_page_list[entry].precious = dst_page->precious;
8705 user_page_list[entry].device = FALSE;
8706 user_page_list[entry].needed = FALSE;
8707 if (dst_page->clustered == TRUE)
8708 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8709 else
8710 user_page_list[entry].speculative = FALSE;
8711 user_page_list[entry].cs_validated = dst_page->cs_validated;
8712 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
8713 user_page_list[entry].cs_nx = dst_page->cs_nx;
8714 user_page_list[entry].mark = FALSE;
8715 }
8716 if (object != kernel_object && object != compressor_object) {
8717 /*
8718 * someone is explicitly grabbing this page...
8719 * update clustered and speculative state
8720 *
8721 */
8722 if (dst_page->clustered)
8723 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8724 }
8725 skip_page:
8726 entry++;
8727 dst_offset += PAGE_SIZE_64;
8728 xfer_size -= PAGE_SIZE;
8729
8730 if (dwp->dw_mask) {
8731 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8732
8733 if (dw_count >= dw_limit) {
8734 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8735
8736 dwp = &dw_array[0];
8737 dw_count = 0;
8738 }
8739 }
8740 }
8741 assert(entry == size_in_pages);
8742
8743 if (dw_count)
8744 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8745 finish:
8746 if (user_page_list && set_cache_attr_needed == TRUE)
8747 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8748
8749 if (page_list_count != NULL) {
8750 if (upl->flags & UPL_INTERNAL)
8751 *page_list_count = 0;
8752 else if (*page_list_count > size_in_pages)
8753 *page_list_count = size_in_pages;
8754 }
8755 vm_object_unlock(object);
8756
8757 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8758 /*
8759 * We've marked all the pages "busy" so that future
8760 * page faults will block.
8761 * Now remove the mapping for these pages, so that they
8762 * can't be accessed without causing a page fault.
8763 */
8764 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8765 PMAP_NULL, 0, VM_PROT_NONE);
8766 assert(!object->blocked_access);
8767 object->blocked_access = TRUE;
8768 }
8769
8770 return KERN_SUCCESS;
8771
8772 return_err:
8773 dw_index = 0;
8774
8775 for (; offset < dst_offset; offset += PAGE_SIZE) {
8776 boolean_t need_unwire;
8777
8778 dst_page = vm_page_lookup(object, offset);
8779
8780 if (dst_page == VM_PAGE_NULL)
8781 panic("vm_object_iopl_request: Wired page missing. \n");
8782
8783 /*
8784 * if we've already processed this page in an earlier
8785 * dw_do_work, we need to undo the wiring... we will
8786 * leave the dirty and reference bits on if they
8787 * were set, since we don't have a good way of knowing
8788 * what the previous state was and we won't get here
8789 * under any normal circumstances... we will always
8790 * clear BUSY and wakeup any waiters via vm_page_free
8791 * or PAGE_WAKEUP_DONE
8792 */
8793 need_unwire = TRUE;
8794
8795 if (dw_count) {
8796 if (dw_array[dw_index].dw_m == dst_page) {
8797 /*
8798 * still in the deferred work list
8799 * which means we haven't yet called
8800 * vm_page_wire on this page
8801 */
8802 need_unwire = FALSE;
8803
8804 dw_index++;
8805 dw_count--;
8806 }
8807 }
8808 vm_page_lock_queues();
8809
8810 if (dst_page->absent || free_wired_pages == TRUE) {
8811 vm_page_free(dst_page);
8812
8813 need_unwire = FALSE;
8814 } else {
8815 if (need_unwire == TRUE)
8816 vm_page_unwire(dst_page, TRUE);
8817
8818 PAGE_WAKEUP_DONE(dst_page);
8819 }
8820 vm_page_unlock_queues();
8821
8822 if (need_unwire == TRUE)
8823 VM_STAT_INCR(reactivations);
8824 }
8825 #if UPL_DEBUG
8826 upl->upl_state = 2;
8827 #endif
8828 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8829 vm_object_activity_end(object);
8830 vm_object_collapse(object, 0, TRUE);
8831 }
8832 vm_object_unlock(object);
8833 upl_destroy(upl);
8834
8835 return ret;
8836 }
8837
8838 kern_return_t
8839 upl_transpose(
8840 upl_t upl1,
8841 upl_t upl2)
8842 {
8843 kern_return_t retval;
8844 boolean_t upls_locked;
8845 vm_object_t object1, object2;
8846
8847 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8848 return KERN_INVALID_ARGUMENT;
8849 }
8850
8851 upls_locked = FALSE;
8852
8853 /*
8854 * Since we need to lock both UPLs at the same time,
8855 * avoid deadlocks by always taking locks in the same order.
8856 */
8857 if (upl1 < upl2) {
8858 upl_lock(upl1);
8859 upl_lock(upl2);
8860 } else {
8861 upl_lock(upl2);
8862 upl_lock(upl1);
8863 }
8864 upls_locked = TRUE; /* the UPLs will need to be unlocked */
8865
8866 object1 = upl1->map_object;
8867 object2 = upl2->map_object;
8868
8869 if (upl1->offset != 0 || upl2->offset != 0 ||
8870 upl1->size != upl2->size) {
8871 /*
8872 * We deal only with full objects, not subsets.
8873 * That's because we exchange the entire backing store info
8874 * for the objects: pager, resident pages, etc... We can't do
8875 * only part of it.
8876 */
8877 retval = KERN_INVALID_VALUE;
8878 goto done;
8879 }
8880
8881 /*
8882 * Tranpose the VM objects' backing store.
8883 */
8884 retval = vm_object_transpose(object1, object2,
8885 (vm_object_size_t) upl1->size);
8886
8887 if (retval == KERN_SUCCESS) {
8888 /*
8889 * Make each UPL point to the correct VM object, i.e. the
8890 * object holding the pages that the UPL refers to...
8891 */
8892 #if CONFIG_IOSCHED || UPL_DEBUG
8893 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8894 vm_object_lock(object1);
8895 vm_object_lock(object2);
8896 }
8897 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8898 queue_remove(&object1->uplq, upl1, upl_t, uplq);
8899 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8900 queue_remove(&object2->uplq, upl2, upl_t, uplq);
8901 #endif
8902 upl1->map_object = object2;
8903 upl2->map_object = object1;
8904
8905 #if CONFIG_IOSCHED || UPL_DEBUG
8906 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8907 queue_enter(&object2->uplq, upl1, upl_t, uplq);
8908 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8909 queue_enter(&object1->uplq, upl2, upl_t, uplq);
8910 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8911 vm_object_unlock(object2);
8912 vm_object_unlock(object1);
8913 }
8914 #endif
8915 }
8916
8917 done:
8918 /*
8919 * Cleanup.
8920 */
8921 if (upls_locked) {
8922 upl_unlock(upl1);
8923 upl_unlock(upl2);
8924 upls_locked = FALSE;
8925 }
8926
8927 return retval;
8928 }
8929
8930 void
8931 upl_range_needed(
8932 upl_t upl,
8933 int index,
8934 int count)
8935 {
8936 upl_page_info_t *user_page_list;
8937 int size_in_pages;
8938
8939 if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8940 return;
8941
8942 size_in_pages = upl->size / PAGE_SIZE;
8943
8944 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8945
8946 while (count-- && index < size_in_pages)
8947 user_page_list[index++].needed = TRUE;
8948 }
8949
8950
8951 /*
8952 * Reserve of virtual addresses in the kernel address space.
8953 * We need to map the physical pages in the kernel, so that we
8954 * can call the code-signing or slide routines with a kernel
8955 * virtual address. We keep this pool of pre-allocated kernel
8956 * virtual addresses so that we don't have to scan the kernel's
8957 * virtaul address space each time we need to work with
8958 * a physical page.
8959 */
8960 decl_simple_lock_data(,vm_paging_lock)
8961 #define VM_PAGING_NUM_PAGES 64
8962 vm_map_offset_t vm_paging_base_address = 0;
8963 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8964 int vm_paging_max_index = 0;
8965 int vm_paging_page_waiter = 0;
8966 int vm_paging_page_waiter_total = 0;
8967 unsigned long vm_paging_no_kernel_page = 0;
8968 unsigned long vm_paging_objects_mapped = 0;
8969 unsigned long vm_paging_pages_mapped = 0;
8970 unsigned long vm_paging_objects_mapped_slow = 0;
8971 unsigned long vm_paging_pages_mapped_slow = 0;
8972
8973 void
8974 vm_paging_map_init(void)
8975 {
8976 kern_return_t kr;
8977 vm_map_offset_t page_map_offset;
8978 vm_map_entry_t map_entry;
8979
8980 assert(vm_paging_base_address == 0);
8981
8982 /*
8983 * Initialize our pool of pre-allocated kernel
8984 * virtual addresses.
8985 */
8986 page_map_offset = 0;
8987 kr = vm_map_find_space(kernel_map,
8988 &page_map_offset,
8989 VM_PAGING_NUM_PAGES * PAGE_SIZE,
8990 0,
8991 0,
8992 VM_MAP_KERNEL_FLAGS_NONE,
8993 VM_KERN_MEMORY_NONE,
8994 &map_entry);
8995 if (kr != KERN_SUCCESS) {
8996 panic("vm_paging_map_init: kernel_map full\n");
8997 }
8998 VME_OBJECT_SET(map_entry, kernel_object);
8999 VME_OFFSET_SET(map_entry, page_map_offset);
9000 map_entry->protection = VM_PROT_NONE;
9001 map_entry->max_protection = VM_PROT_NONE;
9002 map_entry->permanent = TRUE;
9003 vm_object_reference(kernel_object);
9004 vm_map_unlock(kernel_map);
9005
9006 assert(vm_paging_base_address == 0);
9007 vm_paging_base_address = page_map_offset;
9008 }
9009
9010 /*
9011 * vm_paging_map_object:
9012 * Maps part of a VM object's pages in the kernel
9013 * virtual address space, using the pre-allocated
9014 * kernel virtual addresses, if possible.
9015 * Context:
9016 * The VM object is locked. This lock will get
9017 * dropped and re-acquired though, so the caller
9018 * must make sure the VM object is kept alive
9019 * (by holding a VM map that has a reference
9020 * on it, for example, or taking an extra reference).
9021 * The page should also be kept busy to prevent
9022 * it from being reclaimed.
9023 */
9024 kern_return_t
9025 vm_paging_map_object(
9026 vm_page_t page,
9027 vm_object_t object,
9028 vm_object_offset_t offset,
9029 vm_prot_t protection,
9030 boolean_t can_unlock_object,
9031 vm_map_size_t *size, /* IN/OUT */
9032 vm_map_offset_t *address, /* OUT */
9033 boolean_t *need_unmap) /* OUT */
9034 {
9035 kern_return_t kr;
9036 vm_map_offset_t page_map_offset;
9037 vm_map_size_t map_size;
9038 vm_object_offset_t object_offset;
9039 int i;
9040
9041 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
9042 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9043 #if __x86_64__
9044 *address = (vm_map_offset_t)
9045 PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
9046 PAGE_SHIFT);
9047 *need_unmap = FALSE;
9048 return KERN_SUCCESS;
9049 #elif __arm__ || __arm64__
9050 *address = (vm_map_offset_t)
9051 phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
9052 *need_unmap = FALSE;
9053 return KERN_SUCCESS;
9054 #else
9055 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9056 #endif
9057
9058 assert(page->busy);
9059 /*
9060 * Use one of the pre-allocated kernel virtual addresses
9061 * and just enter the VM page in the kernel address space
9062 * at that virtual address.
9063 */
9064 simple_lock(&vm_paging_lock);
9065
9066 /*
9067 * Try and find an available kernel virtual address
9068 * from our pre-allocated pool.
9069 */
9070 page_map_offset = 0;
9071 for (;;) {
9072 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9073 if (vm_paging_page_inuse[i] == FALSE) {
9074 page_map_offset =
9075 vm_paging_base_address +
9076 (i * PAGE_SIZE);
9077 break;
9078 }
9079 }
9080 if (page_map_offset != 0) {
9081 /* found a space to map our page ! */
9082 break;
9083 }
9084
9085 if (can_unlock_object) {
9086 /*
9087 * If we can afford to unlock the VM object,
9088 * let's take the slow path now...
9089 */
9090 break;
9091 }
9092 /*
9093 * We can't afford to unlock the VM object, so
9094 * let's wait for a space to become available...
9095 */
9096 vm_paging_page_waiter_total++;
9097 vm_paging_page_waiter++;
9098 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9099 if (kr == THREAD_WAITING) {
9100 simple_unlock(&vm_paging_lock);
9101 kr = thread_block(THREAD_CONTINUE_NULL);
9102 simple_lock(&vm_paging_lock);
9103 }
9104 vm_paging_page_waiter--;
9105 /* ... and try again */
9106 }
9107
9108 if (page_map_offset != 0) {
9109 /*
9110 * We found a kernel virtual address;
9111 * map the physical page to that virtual address.
9112 */
9113 if (i > vm_paging_max_index) {
9114 vm_paging_max_index = i;
9115 }
9116 vm_paging_page_inuse[i] = TRUE;
9117 simple_unlock(&vm_paging_lock);
9118
9119 page->pmapped = TRUE;
9120
9121 /*
9122 * Keep the VM object locked over the PMAP_ENTER
9123 * and the actual use of the page by the kernel,
9124 * or this pmap mapping might get undone by a
9125 * vm_object_pmap_protect() call...
9126 */
9127 PMAP_ENTER(kernel_pmap,
9128 page_map_offset,
9129 page,
9130 protection,
9131 VM_PROT_NONE,
9132 0,
9133 TRUE,
9134 kr);
9135 assert(kr == KERN_SUCCESS);
9136 vm_paging_objects_mapped++;
9137 vm_paging_pages_mapped++;
9138 *address = page_map_offset;
9139 *need_unmap = TRUE;
9140
9141 #if KASAN
9142 kasan_notify_address(page_map_offset, PAGE_SIZE);
9143 #endif
9144
9145 /* all done and mapped, ready to use ! */
9146 return KERN_SUCCESS;
9147 }
9148
9149 /*
9150 * We ran out of pre-allocated kernel virtual
9151 * addresses. Just map the page in the kernel
9152 * the slow and regular way.
9153 */
9154 vm_paging_no_kernel_page++;
9155 simple_unlock(&vm_paging_lock);
9156 }
9157
9158 if (! can_unlock_object) {
9159 *address = 0;
9160 *size = 0;
9161 *need_unmap = FALSE;
9162 return KERN_NOT_SUPPORTED;
9163 }
9164
9165 object_offset = vm_object_trunc_page(offset);
9166 map_size = vm_map_round_page(*size,
9167 VM_MAP_PAGE_MASK(kernel_map));
9168
9169 /*
9170 * Try and map the required range of the object
9171 * in the kernel_map
9172 */
9173
9174 vm_object_reference_locked(object); /* for the map entry */
9175 vm_object_unlock(object);
9176
9177 kr = vm_map_enter(kernel_map,
9178 address,
9179 map_size,
9180 0,
9181 VM_FLAGS_ANYWHERE,
9182 VM_MAP_KERNEL_FLAGS_NONE,
9183 VM_KERN_MEMORY_NONE,
9184 object,
9185 object_offset,
9186 FALSE,
9187 protection,
9188 VM_PROT_ALL,
9189 VM_INHERIT_NONE);
9190 if (kr != KERN_SUCCESS) {
9191 *address = 0;
9192 *size = 0;
9193 *need_unmap = FALSE;
9194 vm_object_deallocate(object); /* for the map entry */
9195 vm_object_lock(object);
9196 return kr;
9197 }
9198
9199 *size = map_size;
9200
9201 /*
9202 * Enter the mapped pages in the page table now.
9203 */
9204 vm_object_lock(object);
9205 /*
9206 * VM object must be kept locked from before PMAP_ENTER()
9207 * until after the kernel is done accessing the page(s).
9208 * Otherwise, the pmap mappings in the kernel could be
9209 * undone by a call to vm_object_pmap_protect().
9210 */
9211
9212 for (page_map_offset = 0;
9213 map_size != 0;
9214 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9215
9216 page = vm_page_lookup(object, offset + page_map_offset);
9217 if (page == VM_PAGE_NULL) {
9218 printf("vm_paging_map_object: no page !?");
9219 vm_object_unlock(object);
9220 kr = vm_map_remove(kernel_map, *address, *size,
9221 VM_MAP_NO_FLAGS);
9222 assert(kr == KERN_SUCCESS);
9223 *address = 0;
9224 *size = 0;
9225 *need_unmap = FALSE;
9226 vm_object_lock(object);
9227 return KERN_MEMORY_ERROR;
9228 }
9229 page->pmapped = TRUE;
9230
9231 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9232 PMAP_ENTER(kernel_pmap,
9233 *address + page_map_offset,
9234 page,
9235 protection,
9236 VM_PROT_NONE,
9237 0,
9238 TRUE,
9239 kr);
9240 assert(kr == KERN_SUCCESS);
9241 #if KASAN
9242 kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9243 #endif
9244 }
9245
9246 vm_paging_objects_mapped_slow++;
9247 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9248
9249 *need_unmap = TRUE;
9250
9251 return KERN_SUCCESS;
9252 }
9253
9254 /*
9255 * vm_paging_unmap_object:
9256 * Unmaps part of a VM object's pages from the kernel
9257 * virtual address space.
9258 * Context:
9259 * The VM object is locked. This lock will get
9260 * dropped and re-acquired though.
9261 */
9262 void
9263 vm_paging_unmap_object(
9264 vm_object_t object,
9265 vm_map_offset_t start,
9266 vm_map_offset_t end)
9267 {
9268 kern_return_t kr;
9269 int i;
9270
9271 if ((vm_paging_base_address == 0) ||
9272 (start < vm_paging_base_address) ||
9273 (end > (vm_paging_base_address
9274 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9275 /*
9276 * We didn't use our pre-allocated pool of
9277 * kernel virtual address. Deallocate the
9278 * virtual memory.
9279 */
9280 if (object != VM_OBJECT_NULL) {
9281 vm_object_unlock(object);
9282 }
9283 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
9284 if (object != VM_OBJECT_NULL) {
9285 vm_object_lock(object);
9286 }
9287 assert(kr == KERN_SUCCESS);
9288 } else {
9289 /*
9290 * We used a kernel virtual address from our
9291 * pre-allocated pool. Put it back in the pool
9292 * for next time.
9293 */
9294 assert(end - start == PAGE_SIZE);
9295 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9296 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9297
9298 /* undo the pmap mapping */
9299 pmap_remove(kernel_pmap, start, end);
9300
9301 simple_lock(&vm_paging_lock);
9302 vm_paging_page_inuse[i] = FALSE;
9303 if (vm_paging_page_waiter) {
9304 thread_wakeup(&vm_paging_page_waiter);
9305 }
9306 simple_unlock(&vm_paging_lock);
9307 }
9308 }
9309
9310
9311 /*
9312 * page->object must be locked
9313 */
9314 void
9315 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9316 {
9317 if (!queues_locked) {
9318 vm_page_lockspin_queues();
9319 }
9320
9321 page->free_when_done = FALSE;
9322 /*
9323 * need to drop the laundry count...
9324 * we may also need to remove it
9325 * from the I/O paging queue...
9326 * vm_pageout_throttle_up handles both cases
9327 *
9328 * the laundry and pageout_queue flags are cleared...
9329 */
9330 vm_pageout_throttle_up(page);
9331
9332 vm_page_steal_pageout_page++;
9333
9334 if (!queues_locked) {
9335 vm_page_unlock_queues();
9336 }
9337 }
9338
9339 upl_t
9340 vector_upl_create(vm_offset_t upl_offset)
9341 {
9342 int vector_upl_size = sizeof(struct _vector_upl);
9343 int i=0;
9344 upl_t upl;
9345 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9346
9347 upl = upl_create(0,UPL_VECTOR,0);
9348 upl->vector_upl = vector_upl;
9349 upl->offset = upl_offset;
9350 vector_upl->size = 0;
9351 vector_upl->offset = upl_offset;
9352 vector_upl->invalid_upls=0;
9353 vector_upl->num_upls=0;
9354 vector_upl->pagelist = NULL;
9355
9356 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9357 vector_upl->upl_iostates[i].size = 0;
9358 vector_upl->upl_iostates[i].offset = 0;
9359
9360 }
9361 return upl;
9362 }
9363
9364 void
9365 vector_upl_deallocate(upl_t upl)
9366 {
9367 if(upl) {
9368 vector_upl_t vector_upl = upl->vector_upl;
9369 if(vector_upl) {
9370 if(vector_upl->invalid_upls != vector_upl->num_upls)
9371 panic("Deallocating non-empty Vectored UPL\n");
9372 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9373 vector_upl->invalid_upls=0;
9374 vector_upl->num_upls = 0;
9375 vector_upl->pagelist = NULL;
9376 vector_upl->size = 0;
9377 vector_upl->offset = 0;
9378 kfree(vector_upl, sizeof(struct _vector_upl));
9379 vector_upl = (vector_upl_t)0xfeedfeed;
9380 }
9381 else
9382 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9383 }
9384 else
9385 panic("vector_upl_deallocate was passed a NULL upl\n");
9386 }
9387
9388 boolean_t
9389 vector_upl_is_valid(upl_t upl)
9390 {
9391 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9392 vector_upl_t vector_upl = upl->vector_upl;
9393 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9394 return FALSE;
9395 else
9396 return TRUE;
9397 }
9398 return FALSE;
9399 }
9400
9401 boolean_t
9402 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9403 {
9404 if(vector_upl_is_valid(upl)) {
9405 vector_upl_t vector_upl = upl->vector_upl;
9406
9407 if(vector_upl) {
9408 if(subupl) {
9409 if(io_size) {
9410 if(io_size < PAGE_SIZE)
9411 io_size = PAGE_SIZE;
9412 subupl->vector_upl = (void*)vector_upl;
9413 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9414 vector_upl->size += io_size;
9415 upl->size += io_size;
9416 }
9417 else {
9418 uint32_t i=0,invalid_upls=0;
9419 for(i = 0; i < vector_upl->num_upls; i++) {
9420 if(vector_upl->upl_elems[i] == subupl)
9421 break;
9422 }
9423 if(i == vector_upl->num_upls)
9424 panic("Trying to remove sub-upl when none exists");
9425
9426 vector_upl->upl_elems[i] = NULL;
9427 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9428 if(invalid_upls == vector_upl->num_upls)
9429 return TRUE;
9430 else
9431 return FALSE;
9432 }
9433 }
9434 else
9435 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9436 }
9437 else
9438 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9439 }
9440 else
9441 panic("vector_upl_set_subupl was passed a NULL upl\n");
9442
9443 return FALSE;
9444 }
9445
9446 void
9447 vector_upl_set_pagelist(upl_t upl)
9448 {
9449 if(vector_upl_is_valid(upl)) {
9450 uint32_t i=0;
9451 vector_upl_t vector_upl = upl->vector_upl;
9452
9453 if(vector_upl) {
9454 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9455
9456 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9457
9458 for(i=0; i < vector_upl->num_upls; i++) {
9459 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9460 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9461 pagelist_size += cur_upl_pagelist_size;
9462 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9463 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9464 }
9465 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9466 }
9467 else
9468 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9469 }
9470 else
9471 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9472
9473 }
9474
9475 upl_t
9476 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9477 {
9478 if(vector_upl_is_valid(upl)) {
9479 vector_upl_t vector_upl = upl->vector_upl;
9480 if(vector_upl) {
9481 if(index < vector_upl->num_upls)
9482 return vector_upl->upl_elems[index];
9483 }
9484 else
9485 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9486 }
9487 return NULL;
9488 }
9489
9490 upl_t
9491 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9492 {
9493 if(vector_upl_is_valid(upl)) {
9494 uint32_t i=0;
9495 vector_upl_t vector_upl = upl->vector_upl;
9496
9497 if(vector_upl) {
9498 upl_t subupl = NULL;
9499 vector_upl_iostates_t subupl_state;
9500
9501 for(i=0; i < vector_upl->num_upls; i++) {
9502 subupl = vector_upl->upl_elems[i];
9503 subupl_state = vector_upl->upl_iostates[i];
9504 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9505 /* We could have been passed an offset/size pair that belongs
9506 * to an UPL element that has already been committed/aborted.
9507 * If so, return NULL.
9508 */
9509 if(subupl == NULL)
9510 return NULL;
9511 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9512 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9513 if(*upl_size > subupl_state.size)
9514 *upl_size = subupl_state.size;
9515 }
9516 if(*upl_offset >= subupl_state.offset)
9517 *upl_offset -= subupl_state.offset;
9518 else if(i)
9519 panic("Vector UPL offset miscalculation\n");
9520 return subupl;
9521 }
9522 }
9523 }
9524 else
9525 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9526 }
9527 return NULL;
9528 }
9529
9530 void
9531 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9532 {
9533 *v_upl_submap = NULL;
9534
9535 if(vector_upl_is_valid(upl)) {
9536 vector_upl_t vector_upl = upl->vector_upl;
9537 if(vector_upl) {
9538 *v_upl_submap = vector_upl->submap;
9539 *submap_dst_addr = vector_upl->submap_dst_addr;
9540 }
9541 else
9542 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9543 }
9544 else
9545 panic("vector_upl_get_submap was passed a null UPL\n");
9546 }
9547
9548 void
9549 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9550 {
9551 if(vector_upl_is_valid(upl)) {
9552 vector_upl_t vector_upl = upl->vector_upl;
9553 if(vector_upl) {
9554 vector_upl->submap = submap;
9555 vector_upl->submap_dst_addr = submap_dst_addr;
9556 }
9557 else
9558 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9559 }
9560 else
9561 panic("vector_upl_get_submap was passed a NULL UPL\n");
9562 }
9563
9564 void
9565 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9566 {
9567 if(vector_upl_is_valid(upl)) {
9568 uint32_t i = 0;
9569 vector_upl_t vector_upl = upl->vector_upl;
9570
9571 if(vector_upl) {
9572 for(i = 0; i < vector_upl->num_upls; i++) {
9573 if(vector_upl->upl_elems[i] == subupl)
9574 break;
9575 }
9576
9577 if(i == vector_upl->num_upls)
9578 panic("setting sub-upl iostate when none exists");
9579
9580 vector_upl->upl_iostates[i].offset = offset;
9581 if(size < PAGE_SIZE)
9582 size = PAGE_SIZE;
9583 vector_upl->upl_iostates[i].size = size;
9584 }
9585 else
9586 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9587 }
9588 else
9589 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9590 }
9591
9592 void
9593 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9594 {
9595 if(vector_upl_is_valid(upl)) {
9596 uint32_t i = 0;
9597 vector_upl_t vector_upl = upl->vector_upl;
9598
9599 if(vector_upl) {
9600 for(i = 0; i < vector_upl->num_upls; i++) {
9601 if(vector_upl->upl_elems[i] == subupl)
9602 break;
9603 }
9604
9605 if(i == vector_upl->num_upls)
9606 panic("getting sub-upl iostate when none exists");
9607
9608 *offset = vector_upl->upl_iostates[i].offset;
9609 *size = vector_upl->upl_iostates[i].size;
9610 }
9611 else
9612 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9613 }
9614 else
9615 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9616 }
9617
9618 void
9619 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9620 {
9621 if(vector_upl_is_valid(upl)) {
9622 vector_upl_t vector_upl = upl->vector_upl;
9623 if(vector_upl) {
9624 if(index < vector_upl->num_upls) {
9625 *offset = vector_upl->upl_iostates[index].offset;
9626 *size = vector_upl->upl_iostates[index].size;
9627 }
9628 else
9629 *offset = *size = 0;
9630 }
9631 else
9632 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9633 }
9634 else
9635 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9636 }
9637
9638 upl_page_info_t *
9639 upl_get_internal_vectorupl_pagelist(upl_t upl)
9640 {
9641 return ((vector_upl_t)(upl->vector_upl))->pagelist;
9642 }
9643
9644 void *
9645 upl_get_internal_vectorupl(upl_t upl)
9646 {
9647 return upl->vector_upl;
9648 }
9649
9650 vm_size_t
9651 upl_get_internal_pagelist_offset(void)
9652 {
9653 return sizeof(struct upl);
9654 }
9655
9656 void
9657 upl_clear_dirty(
9658 upl_t upl,
9659 boolean_t value)
9660 {
9661 if (value) {
9662 upl->flags |= UPL_CLEAR_DIRTY;
9663 } else {
9664 upl->flags &= ~UPL_CLEAR_DIRTY;
9665 }
9666 }
9667
9668 void
9669 upl_set_referenced(
9670 upl_t upl,
9671 boolean_t value)
9672 {
9673 upl_lock(upl);
9674 if (value) {
9675 upl->ext_ref_count++;
9676 } else {
9677 if (!upl->ext_ref_count) {
9678 panic("upl_set_referenced not %p\n", upl);
9679 }
9680 upl->ext_ref_count--;
9681 }
9682 upl_unlock(upl);
9683 }
9684
9685 #if CONFIG_IOSCHED
9686 void
9687 upl_set_blkno(
9688 upl_t upl,
9689 vm_offset_t upl_offset,
9690 int io_size,
9691 int64_t blkno)
9692 {
9693 int i,j;
9694 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9695 return;
9696
9697 assert(upl->upl_reprio_info != 0);
9698 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9699 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9700 }
9701 }
9702 #endif
9703
9704 boolean_t
9705 vm_page_is_slideable(vm_page_t m)
9706 {
9707 boolean_t result = FALSE;
9708 vm_shared_region_slide_info_t si;
9709 vm_object_t m_object;
9710
9711 m_object = VM_PAGE_OBJECT(m);
9712
9713 vm_object_lock_assert_held(m_object);
9714
9715 /* make sure our page belongs to the one object allowed to do this */
9716 if (!m_object->object_slid) {
9717 goto done;
9718 }
9719
9720 si = m_object->vo_slide_info;
9721 if (si == NULL) {
9722 goto done;
9723 }
9724
9725 if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
9726 result = TRUE;
9727 }
9728
9729 done:
9730 return result;
9731 }
9732
9733 int vm_page_slide_counter = 0;
9734 int vm_page_slide_errors = 0;
9735 kern_return_t
9736 vm_page_slide(
9737 vm_page_t page,
9738 vm_map_offset_t kernel_mapping_offset)
9739 {
9740 kern_return_t kr;
9741 vm_map_size_t kernel_mapping_size;
9742 boolean_t kernel_mapping_needs_unmap;
9743 vm_offset_t kernel_vaddr;
9744 uint32_t pageIndex;
9745 uint32_t slide_chunk;
9746 vm_object_t page_object;
9747
9748 page_object = VM_PAGE_OBJECT(page);
9749
9750 assert(!page->slid);
9751 assert(page_object->object_slid);
9752 vm_object_lock_assert_exclusive(page_object);
9753
9754 if (page->error)
9755 return KERN_FAILURE;
9756
9757 /*
9758 * Take a paging-in-progress reference to keep the object
9759 * alive even if we have to unlock it (in vm_paging_map_object()
9760 * for example)...
9761 */
9762 vm_object_paging_begin(page_object);
9763
9764 if (kernel_mapping_offset == 0) {
9765 /*
9766 * The page hasn't already been mapped in kernel space
9767 * by the caller. Map it now, so that we can access
9768 * its contents and decrypt them.
9769 */
9770 kernel_mapping_size = PAGE_SIZE;
9771 kernel_mapping_needs_unmap = FALSE;
9772 kr = vm_paging_map_object(page,
9773 page_object,
9774 page->offset,
9775 VM_PROT_READ | VM_PROT_WRITE,
9776 FALSE,
9777 &kernel_mapping_size,
9778 &kernel_mapping_offset,
9779 &kernel_mapping_needs_unmap);
9780 if (kr != KERN_SUCCESS) {
9781 panic("vm_page_slide: "
9782 "could not map page in kernel: 0x%x\n",
9783 kr);
9784 }
9785 } else {
9786 kernel_mapping_size = 0;
9787 kernel_mapping_needs_unmap = FALSE;
9788 }
9789 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9790
9791 /*
9792 * Slide the pointers on the page.
9793 */
9794
9795 /*assert that slide_file_info.start/end are page-aligned?*/
9796
9797 assert(!page->slid);
9798 assert(page_object->object_slid);
9799
9800 pageIndex = (uint32_t)((page->offset -
9801 page_object->vo_slide_info->start) /
9802 PAGE_SIZE_FOR_SR_SLIDE);
9803 for (slide_chunk = 0;
9804 slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
9805 slide_chunk++) {
9806 kr = vm_shared_region_slide_page(page_object->vo_slide_info,
9807 (kernel_vaddr +
9808 (slide_chunk *
9809 PAGE_SIZE_FOR_SR_SLIDE)),
9810 (pageIndex + slide_chunk));
9811 if (kr != KERN_SUCCESS) {
9812 break;
9813 }
9814 }
9815
9816 vm_page_slide_counter++;
9817
9818 /*
9819 * Unmap the page from the kernel's address space,
9820 */
9821 if (kernel_mapping_needs_unmap) {
9822 vm_paging_unmap_object(page_object,
9823 kernel_vaddr,
9824 kernel_vaddr + PAGE_SIZE);
9825 }
9826
9827 page->dirty = FALSE;
9828 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(page), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
9829
9830 if (kr != KERN_SUCCESS || cs_debug > 1) {
9831 printf("vm_page_slide(%p): "
9832 "obj %p off 0x%llx mobj %p moff 0x%llx\n",
9833 page,
9834 page_object, page->offset,
9835 page_object->pager,
9836 page->offset + page_object->paging_offset);
9837 }
9838
9839 if (kr == KERN_SUCCESS) {
9840 page->slid = TRUE;
9841 } else {
9842 page->error = TRUE;
9843 vm_page_slide_errors++;
9844 }
9845
9846 vm_object_paging_end(page_object);
9847
9848 return kr;
9849 }
9850
9851 void inline memoryshot(unsigned int event, unsigned int control)
9852 {
9853 if (vm_debug_events) {
9854 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9855 vm_page_active_count, vm_page_inactive_count,
9856 vm_page_free_count, vm_page_speculative_count,
9857 vm_page_throttled_count);
9858 } else {
9859 (void) event;
9860 (void) control;
9861 }
9862
9863 }
9864
9865 #ifdef MACH_BSD
9866
9867 boolean_t upl_device_page(upl_page_info_t *upl)
9868 {
9869 return(UPL_DEVICE_PAGE(upl));
9870 }
9871 boolean_t upl_page_present(upl_page_info_t *upl, int index)
9872 {
9873 return(UPL_PAGE_PRESENT(upl, index));
9874 }
9875 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
9876 {
9877 return(UPL_SPECULATIVE_PAGE(upl, index));
9878 }
9879 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
9880 {
9881 return(UPL_DIRTY_PAGE(upl, index));
9882 }
9883 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
9884 {
9885 return(UPL_VALID_PAGE(upl, index));
9886 }
9887 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
9888 {
9889 return(UPL_PHYS_PAGE(upl, index));
9890 }
9891
9892 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9893 {
9894 upl[index].mark = v;
9895 }
9896
9897 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
9898 {
9899 return upl[index].mark;
9900 }
9901
9902 void
9903 vm_countdirtypages(void)
9904 {
9905 vm_page_t m;
9906 int dpages;
9907 int pgopages;
9908 int precpages;
9909
9910
9911 dpages=0;
9912 pgopages=0;
9913 precpages=0;
9914
9915 vm_page_lock_queues();
9916 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9917 do {
9918 if (m ==(vm_page_t )0) break;
9919
9920 if(m->dirty) dpages++;
9921 if(m->free_when_done) pgopages++;
9922 if(m->precious) precpages++;
9923
9924 assert(VM_PAGE_OBJECT(m) != kernel_object);
9925 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9926 if (m ==(vm_page_t )0) break;
9927
9928 } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9929 vm_page_unlock_queues();
9930
9931 vm_page_lock_queues();
9932 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9933 do {
9934 if (m ==(vm_page_t )0) break;
9935
9936 dpages++;
9937 assert(m->dirty);
9938 assert(!m->free_when_done);
9939 assert(VM_PAGE_OBJECT(m) != kernel_object);
9940 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9941 if (m ==(vm_page_t )0) break;
9942
9943 } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9944 vm_page_unlock_queues();
9945
9946 vm_page_lock_queues();
9947 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9948 do {
9949 if (m ==(vm_page_t )0) break;
9950
9951 if(m->dirty) dpages++;
9952 if(m->free_when_done) pgopages++;
9953 if(m->precious) precpages++;
9954
9955 assert(VM_PAGE_OBJECT(m) != kernel_object);
9956 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9957 if (m ==(vm_page_t )0) break;
9958
9959 } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9960 vm_page_unlock_queues();
9961
9962 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9963
9964 dpages=0;
9965 pgopages=0;
9966 precpages=0;
9967
9968 vm_page_lock_queues();
9969 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9970
9971 do {
9972 if(m == (vm_page_t )0) break;
9973 if(m->dirty) dpages++;
9974 if(m->free_when_done) pgopages++;
9975 if(m->precious) precpages++;
9976
9977 assert(VM_PAGE_OBJECT(m) != kernel_object);
9978 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9979 if(m == (vm_page_t )0) break;
9980
9981 } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9982 vm_page_unlock_queues();
9983
9984 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9985
9986 }
9987 #endif /* MACH_BSD */
9988
9989
9990 #if CONFIG_IOSCHED
9991 int upl_get_cached_tier(upl_t upl)
9992 {
9993 assert(upl);
9994 if (upl->flags & UPL_TRACKED_BY_OBJECT)
9995 return (upl->upl_priority);
9996 return (-1);
9997 }
9998 #endif /* CONFIG_IOSCHED */
9999
10000 ppnum_t upl_get_highest_page(
10001 upl_t upl)
10002 {
10003 return upl->highest_page;
10004 }
10005
10006 upl_size_t upl_get_size(
10007 upl_t upl)
10008 {
10009 return upl->size;
10010 }
10011
10012 upl_t upl_associated_upl(upl_t upl)
10013 {
10014 return upl->associated_upl;
10015 }
10016
10017 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
10018 {
10019 upl->associated_upl = associated_upl;
10020 }
10021
10022 struct vnode * upl_lookup_vnode(upl_t upl)
10023 {
10024 if (!upl->map_object->internal)
10025 return vnode_pager_lookup_vnode(upl->map_object->pager);
10026 else
10027 return NULL;
10028 }
10029
10030 #if UPL_DEBUG
10031 kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
10032 {
10033 upl->ubc_alias1 = alias1;
10034 upl->ubc_alias2 = alias2;
10035 return KERN_SUCCESS;
10036 }
10037 int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
10038 {
10039 if(al)
10040 *al = upl->ubc_alias1;
10041 if(al2)
10042 *al2 = upl->ubc_alias2;
10043 return KERN_SUCCESS;
10044 }
10045 #endif /* UPL_DEBUG */
10046
10047 #if VM_PRESSURE_EVENTS
10048 /*
10049 * Upward trajectory.
10050 */
10051 extern boolean_t vm_compressor_low_on_space(void);
10052
10053 boolean_t
10054 VM_PRESSURE_NORMAL_TO_WARNING(void) {
10055
10056 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10057
10058 /* Available pages below our threshold */
10059 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
10060 /* No frozen processes to kill */
10061 if (memorystatus_frozen_count == 0) {
10062 /* Not enough suspended processes available. */
10063 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
10064 return TRUE;
10065 }
10066 }
10067 }
10068 return FALSE;
10069
10070 } else {
10071 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
10072 }
10073 }
10074
10075 boolean_t
10076 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
10077
10078 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10079
10080 /* Available pages below our threshold */
10081 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
10082 return TRUE;
10083 }
10084 return FALSE;
10085 } else {
10086 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10087 }
10088 }
10089
10090 /*
10091 * Downward trajectory.
10092 */
10093 boolean_t
10094 VM_PRESSURE_WARNING_TO_NORMAL(void) {
10095
10096 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10097
10098 /* Available pages above our threshold */
10099 unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
10100 if (memorystatus_available_pages > target_threshold) {
10101 return TRUE;
10102 }
10103 return FALSE;
10104 } else {
10105 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
10106 }
10107 }
10108
10109 boolean_t
10110 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
10111
10112 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10113
10114 /* Available pages above our threshold */
10115 unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
10116 if (memorystatus_available_pages > target_threshold) {
10117 return TRUE;
10118 }
10119 return FALSE;
10120 } else {
10121 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10122 }
10123 }
10124 #endif /* VM_PRESSURE_EVENTS */
10125
10126
10127
10128 #define VM_TEST_COLLAPSE_COMPRESSOR 0
10129 #define VM_TEST_WIRE_AND_EXTRACT 0
10130 #define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC 0
10131 #if __arm64__
10132 #define VM_TEST_KERNEL_OBJECT_FAULT 0
10133 #endif /* __arm64__ */
10134 #define VM_TEST_DEVICE_PAGER_TRANSPOSE (DEVELOPMENT || DEBUG)
10135
10136 #if VM_TEST_COLLAPSE_COMPRESSOR
10137 extern boolean_t vm_object_collapse_compressor_allowed;
10138 #include <IOKit/IOLib.h>
10139 static void
10140 vm_test_collapse_compressor(void)
10141 {
10142 vm_object_size_t backing_size, top_size;
10143 vm_object_t backing_object, top_object;
10144 vm_map_offset_t backing_offset, top_offset;
10145 unsigned char *backing_address, *top_address;
10146 kern_return_t kr;
10147
10148 printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
10149
10150 /* create backing object */
10151 backing_size = 15 * PAGE_SIZE;
10152 backing_object = vm_object_allocate(backing_size);
10153 assert(backing_object != VM_OBJECT_NULL);
10154 printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
10155 backing_object);
10156 /* map backing object */
10157 backing_offset = 0;
10158 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
10159 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
10160 backing_object, 0, FALSE,
10161 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
10162 assert(kr == KERN_SUCCESS);
10163 backing_address = (unsigned char *) backing_offset;
10164 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10165 "mapped backing object %p at 0x%llx\n",
10166 backing_object, (uint64_t) backing_offset);
10167 /* populate with pages to be compressed in backing object */
10168 backing_address[0x1*PAGE_SIZE] = 0xB1;
10169 backing_address[0x4*PAGE_SIZE] = 0xB4;
10170 backing_address[0x7*PAGE_SIZE] = 0xB7;
10171 backing_address[0xa*PAGE_SIZE] = 0xBA;
10172 backing_address[0xd*PAGE_SIZE] = 0xBD;
10173 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10174 "populated pages to be compressed in "
10175 "backing_object %p\n", backing_object);
10176 /* compress backing object */
10177 vm_object_pageout(backing_object);
10178 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
10179 backing_object);
10180 /* wait for all the pages to be gone */
10181 while (*(volatile int *)&backing_object->resident_page_count != 0)
10182 IODelay(10);
10183 printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
10184 backing_object);
10185 /* populate with pages to be resident in backing object */
10186 backing_address[0x0*PAGE_SIZE] = 0xB0;
10187 backing_address[0x3*PAGE_SIZE] = 0xB3;
10188 backing_address[0x6*PAGE_SIZE] = 0xB6;
10189 backing_address[0x9*PAGE_SIZE] = 0xB9;
10190 backing_address[0xc*PAGE_SIZE] = 0xBC;
10191 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10192 "populated pages to be resident in "
10193 "backing_object %p\n", backing_object);
10194 /* leave the other pages absent */
10195 /* mess with the paging_offset of the backing_object */
10196 assert(backing_object->paging_offset == 0);
10197 backing_object->paging_offset = 0x3000;
10198
10199 /* create top object */
10200 top_size = 9 * PAGE_SIZE;
10201 top_object = vm_object_allocate(top_size);
10202 assert(top_object != VM_OBJECT_NULL);
10203 printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
10204 top_object);
10205 /* map top object */
10206 top_offset = 0;
10207 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
10208 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
10209 top_object, 0, FALSE,
10210 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
10211 assert(kr == KERN_SUCCESS);
10212 top_address = (unsigned char *) top_offset;
10213 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10214 "mapped top object %p at 0x%llx\n",
10215 top_object, (uint64_t) top_offset);
10216 /* populate with pages to be compressed in top object */
10217 top_address[0x3*PAGE_SIZE] = 0xA3;
10218 top_address[0x4*PAGE_SIZE] = 0xA4;
10219 top_address[0x5*PAGE_SIZE] = 0xA5;
10220 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10221 "populated pages to be compressed in "
10222 "top_object %p\n", top_object);
10223 /* compress top object */
10224 vm_object_pageout(top_object);
10225 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
10226 top_object);
10227 /* wait for all the pages to be gone */
10228 while (top_object->resident_page_count != 0)
10229 IODelay(10);
10230 printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
10231 top_object);
10232 /* populate with pages to be resident in top object */
10233 top_address[0x0*PAGE_SIZE] = 0xA0;
10234 top_address[0x1*PAGE_SIZE] = 0xA1;
10235 top_address[0x2*PAGE_SIZE] = 0xA2;
10236 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10237 "populated pages to be resident in "
10238 "top_object %p\n", top_object);
10239 /* leave the other pages absent */
10240
10241 /* link the 2 objects */
10242 vm_object_reference(backing_object);
10243 top_object->shadow = backing_object;
10244 top_object->vo_shadow_offset = 0x3000;
10245 printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
10246 top_object, backing_object);
10247
10248 /* unmap backing object */
10249 vm_map_remove(kernel_map,
10250 backing_offset,
10251 backing_offset + backing_size,
10252 0);
10253 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10254 "unmapped backing_object %p [0x%llx:0x%llx]\n",
10255 backing_object,
10256 (uint64_t) backing_offset,
10257 (uint64_t) (backing_offset + backing_size));
10258
10259 /* collapse */
10260 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
10261 vm_object_lock(top_object);
10262 vm_object_collapse(top_object, 0, FALSE);
10263 vm_object_unlock(top_object);
10264 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
10265
10266 /* did it work? */
10267 if (top_object->shadow != VM_OBJECT_NULL) {
10268 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
10269 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10270 if (vm_object_collapse_compressor_allowed) {
10271 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10272 }
10273 } else {
10274 /* check the contents of the mapping */
10275 unsigned char expect[9] =
10276 { 0xA0, 0xA1, 0xA2, /* resident in top */
10277 0xA3, 0xA4, 0xA5, /* compressed in top */
10278 0xB9, /* resident in backing + shadow_offset */
10279 0xBD, /* compressed in backing + shadow_offset + paging_offset */
10280 0x00 }; /* absent in both */
10281 unsigned char actual[9];
10282 unsigned int i, errors;
10283
10284 errors = 0;
10285 for (i = 0; i < sizeof (actual); i++) {
10286 actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10287 if (actual[i] != expect[i]) {
10288 errors++;
10289 }
10290 }
10291 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10292 "actual [%x %x %x %x %x %x %x %x %x] "
10293 "expect [%x %x %x %x %x %x %x %x %x] "
10294 "%d errors\n",
10295 actual[0], actual[1], actual[2], actual[3],
10296 actual[4], actual[5], actual[6], actual[7],
10297 actual[8],
10298 expect[0], expect[1], expect[2], expect[3],
10299 expect[4], expect[5], expect[6], expect[7],
10300 expect[8],
10301 errors);
10302 if (errors) {
10303 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10304 } else {
10305 printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10306 }
10307 }
10308 }
10309 #else /* VM_TEST_COLLAPSE_COMPRESSOR */
10310 #define vm_test_collapse_compressor()
10311 #endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10312
10313 #if VM_TEST_WIRE_AND_EXTRACT
10314 extern ledger_template_t task_ledger_template;
10315 #include <mach/mach_vm.h>
10316 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10317 vm_offset_t offset);
10318 static void
10319 vm_test_wire_and_extract(void)
10320 {
10321 ledger_t ledger;
10322 vm_map_t user_map, wire_map;
10323 mach_vm_address_t user_addr, wire_addr;
10324 mach_vm_size_t user_size, wire_size;
10325 mach_vm_offset_t cur_offset;
10326 vm_prot_t cur_prot, max_prot;
10327 ppnum_t user_ppnum, wire_ppnum;
10328 kern_return_t kr;
10329
10330 ledger = ledger_instantiate(task_ledger_template,
10331 LEDGER_CREATE_ACTIVE_ENTRIES);
10332 user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
10333 0x100000000ULL,
10334 0x200000000ULL,
10335 TRUE);
10336 wire_map = vm_map_create(NULL,
10337 0x100000000ULL,
10338 0x200000000ULL,
10339 TRUE);
10340 user_addr = 0;
10341 user_size = 0x10000;
10342 kr = mach_vm_allocate(user_map,
10343 &user_addr,
10344 user_size,
10345 VM_FLAGS_ANYWHERE);
10346 assert(kr == KERN_SUCCESS);
10347 wire_addr = 0;
10348 wire_size = user_size;
10349 kr = mach_vm_remap(wire_map,
10350 &wire_addr,
10351 wire_size,
10352 0,
10353 VM_FLAGS_ANYWHERE,
10354 user_map,
10355 user_addr,
10356 FALSE,
10357 &cur_prot,
10358 &max_prot,
10359 VM_INHERIT_NONE);
10360 assert(kr == KERN_SUCCESS);
10361 for (cur_offset = 0;
10362 cur_offset < wire_size;
10363 cur_offset += PAGE_SIZE) {
10364 kr = vm_map_wire_and_extract(wire_map,
10365 wire_addr + cur_offset,
10366 VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10367 TRUE,
10368 &wire_ppnum);
10369 assert(kr == KERN_SUCCESS);
10370 user_ppnum = vm_map_get_phys_page(user_map,
10371 user_addr + cur_offset);
10372 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10373 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10374 kr,
10375 user_map, user_addr + cur_offset, user_ppnum,
10376 wire_map, wire_addr + cur_offset, wire_ppnum);
10377 if (kr != KERN_SUCCESS ||
10378 wire_ppnum == 0 ||
10379 wire_ppnum != user_ppnum) {
10380 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10381 }
10382 }
10383 cur_offset -= PAGE_SIZE;
10384 kr = vm_map_wire_and_extract(wire_map,
10385 wire_addr + cur_offset,
10386 VM_PROT_DEFAULT,
10387 TRUE,
10388 &wire_ppnum);
10389 assert(kr == KERN_SUCCESS);
10390 printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10391 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10392 kr,
10393 user_map, user_addr + cur_offset, user_ppnum,
10394 wire_map, wire_addr + cur_offset, wire_ppnum);
10395 if (kr != KERN_SUCCESS ||
10396 wire_ppnum == 0 ||
10397 wire_ppnum != user_ppnum) {
10398 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10399 }
10400
10401 printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10402 }
10403 #else /* VM_TEST_WIRE_AND_EXTRACT */
10404 #define vm_test_wire_and_extract()
10405 #endif /* VM_TEST_WIRE_AND_EXTRACT */
10406
10407 #if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10408 static void
10409 vm_test_page_wire_overflow_panic(void)
10410 {
10411 vm_object_t object;
10412 vm_page_t page;
10413
10414 printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10415
10416 object = vm_object_allocate(PAGE_SIZE);
10417 vm_object_lock(object);
10418 page = vm_page_alloc(object, 0x0);
10419 vm_page_lock_queues();
10420 do {
10421 vm_page_wire(page, 1, FALSE);
10422 } while (page->wire_count != 0);
10423 vm_page_unlock_queues();
10424 vm_object_unlock(object);
10425 panic("FBDP(%p,%p): wire_count overflow not detected\n",
10426 object, page);
10427 }
10428 #else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10429 #define vm_test_page_wire_overflow_panic()
10430 #endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10431
10432 #if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10433 extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10434 static void
10435 vm_test_kernel_object_fault(void)
10436 {
10437 kern_return_t kr;
10438 vm_offset_t stack;
10439 uintptr_t frameb[2];
10440 int ret;
10441
10442 kr = kernel_memory_allocate(kernel_map, &stack,
10443 kernel_stack_size + (2*PAGE_SIZE),
10444 0,
10445 (KMA_KSTACK | KMA_KOBJECT |
10446 KMA_GUARD_FIRST | KMA_GUARD_LAST),
10447 VM_KERN_MEMORY_STACK);
10448 if (kr != KERN_SUCCESS) {
10449 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10450 }
10451 ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10452 if (ret != 0) {
10453 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10454 } else {
10455 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10456 }
10457 vm_map_remove(kernel_map,
10458 stack,
10459 stack + kernel_stack_size + (2*PAGE_SIZE),
10460 VM_MAP_REMOVE_KUNWIRE);
10461 stack = 0;
10462 }
10463 #else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10464 #define vm_test_kernel_object_fault()
10465 #endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10466
10467 #if VM_TEST_DEVICE_PAGER_TRANSPOSE
10468 static void
10469 vm_test_device_pager_transpose(void)
10470 {
10471 memory_object_t device_pager;
10472 vm_object_t anon_object, device_object;
10473 vm_size_t size;
10474 vm_map_offset_t anon_mapping, device_mapping;
10475 kern_return_t kr;
10476
10477 size = 3 * PAGE_SIZE;
10478 anon_object = vm_object_allocate(size);
10479 assert(anon_object != VM_OBJECT_NULL);
10480 device_pager = device_pager_setup(NULL, 0, size, 0);
10481 assert(device_pager != NULL);
10482 device_object = memory_object_to_vm_object(device_pager);
10483 assert(device_object != VM_OBJECT_NULL);
10484 anon_mapping = 0;
10485 kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
10486 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10487 anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10488 VM_INHERIT_DEFAULT);
10489 assert(kr == KERN_SUCCESS);
10490 device_mapping = 0;
10491 kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
10492 VM_FLAGS_ANYWHERE,
10493 VM_MAP_KERNEL_FLAGS_NONE,
10494 VM_KERN_MEMORY_NONE,
10495 (void *)device_pager, 0, FALSE,
10496 VM_PROT_DEFAULT, VM_PROT_ALL,
10497 VM_INHERIT_DEFAULT);
10498 assert(kr == KERN_SUCCESS);
10499 memory_object_deallocate(device_pager);
10500
10501 vm_object_lock(anon_object);
10502 vm_object_activity_begin(anon_object);
10503 anon_object->blocked_access = TRUE;
10504 vm_object_unlock(anon_object);
10505 vm_object_lock(device_object);
10506 vm_object_activity_begin(device_object);
10507 device_object->blocked_access = TRUE;
10508 vm_object_unlock(device_object);
10509
10510 assert(anon_object->ref_count == 1);
10511 assert(!anon_object->named);
10512 assert(device_object->ref_count == 2);
10513 assert(device_object->named);
10514
10515 kr = vm_object_transpose(device_object, anon_object, size);
10516 assert(kr == KERN_SUCCESS);
10517
10518 vm_object_lock(anon_object);
10519 vm_object_activity_end(anon_object);
10520 anon_object->blocked_access = FALSE;
10521 vm_object_unlock(anon_object);
10522 vm_object_lock(device_object);
10523 vm_object_activity_end(device_object);
10524 device_object->blocked_access = FALSE;
10525 vm_object_unlock(device_object);
10526
10527 assert(anon_object->ref_count == 2);
10528 assert(anon_object->named);
10529 kr = vm_deallocate(kernel_map, anon_mapping, size);
10530 assert(kr == KERN_SUCCESS);
10531 assert(device_object->ref_count == 1);
10532 assert(!device_object->named);
10533 kr = vm_deallocate(kernel_map, device_mapping, size);
10534 assert(kr == KERN_SUCCESS);
10535
10536 printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10537 }
10538 #else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10539 #define vm_test_device_pager_transpose()
10540 #endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10541
10542 void
10543 vm_tests(void)
10544 {
10545 vm_test_collapse_compressor();
10546 vm_test_wire_and_extract();
10547 vm_test_page_wire_overflow_panic();
10548 vm_test_kernel_object_fault();
10549 vm_test_device_pager_transpose();
10550 }