]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/memory_object_default.h>
75 #include <mach/memory_object_control_server.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/upl.h>
78 #include <mach/vm_map.h>
79 #include <mach/vm_param.h>
80 #include <mach/vm_statistics.h>
81 #include <mach/sdt.h>
82
83 #include <kern/kern_types.h>
84 #include <kern/counters.h>
85 #include <kern/host_statistics.h>
86 #include <kern/machine.h>
87 #include <kern/misc_protos.h>
88 #include <kern/sched.h>
89 #include <kern/thread.h>
90 #include <kern/xpr.h>
91 #include <kern/kalloc.h>
92 #include <kern/policy_internal.h>
93 #include <kern/thread_group.h>
94
95 #include <machine/vm_tuning.h>
96 #include <machine/commpage.h>
97
98 #include <vm/pmap.h>
99 #include <vm/vm_compressor_pager.h>
100 #include <vm/vm_fault.h>
101 #include <vm/vm_map.h>
102 #include <vm/vm_object.h>
103 #include <vm/vm_page.h>
104 #include <vm/vm_pageout.h>
105 #include <vm/vm_protos.h> /* must be last */
106 #include <vm/memory_object.h>
107 #include <vm/vm_purgeable_internal.h>
108 #include <vm/vm_shared_region.h>
109 #include <vm/vm_compressor.h>
110
111 #include <san/kasan.h>
112
113 #if CONFIG_PHANTOM_CACHE
114 #include <vm/vm_phantom_cache.h>
115 #endif
116
117 extern int cs_debug;
118
119 #if UPL_DEBUG
120 #include <libkern/OSDebug.h>
121 #endif
122
123 extern void m_drain(void);
124
125 #if VM_PRESSURE_EVENTS
126 #if CONFIG_JETSAM
127 extern unsigned int memorystatus_available_pages;
128 extern unsigned int memorystatus_available_pages_pressure;
129 extern unsigned int memorystatus_available_pages_critical;
130 #else /* CONFIG_JETSAM */
131 extern uint64_t memorystatus_available_pages;
132 extern uint64_t memorystatus_available_pages_pressure;
133 extern uint64_t memorystatus_available_pages_critical;
134 #endif /* CONFIG_JETSAM */
135
136 extern unsigned int memorystatus_frozen_count;
137 extern unsigned int memorystatus_suspended_count;
138
139 extern vm_pressure_level_t memorystatus_vm_pressure_level;
140 int memorystatus_purge_on_warning = 2;
141 int memorystatus_purge_on_urgent = 5;
142 int memorystatus_purge_on_critical = 8;
143
144 void vm_pressure_response(void);
145 boolean_t vm_pressure_thread_running = FALSE;
146 extern void consider_vm_pressure_events(void);
147
148 #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
149 #endif /* VM_PRESSURE_EVENTS */
150
151 boolean_t vm_pressure_changed = FALSE;
152
153 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
154 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
155 #endif
156
157 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
158 #ifdef CONFIG_EMBEDDED
159 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
160 #else
161 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
162 #endif
163 #endif
164
165 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
166 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
167 #endif
168
169 #ifndef VM_PAGEOUT_INACTIVE_RELIEF
170 #define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
171 #endif
172
173 #ifndef VM_PAGE_LAUNDRY_MAX
174 #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
175 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
176
177 #ifndef VM_PAGEOUT_BURST_WAIT
178 #define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */
179 #endif /* VM_PAGEOUT_BURST_WAIT */
180
181 #ifndef VM_PAGEOUT_EMPTY_WAIT
182 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
183 #endif /* VM_PAGEOUT_EMPTY_WAIT */
184
185 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
186 #define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
187 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
188
189 #ifndef VM_PAGEOUT_IDLE_WAIT
190 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
191 #endif /* VM_PAGEOUT_IDLE_WAIT */
192
193 #ifndef VM_PAGEOUT_SWAP_WAIT
194 #define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */
195 #endif /* VM_PAGEOUT_SWAP_WAIT */
196
197 #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED
198 #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */
199 #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */
200
201 #ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS
202 #define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */
203 #endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */
204
205 unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
206 unsigned int vm_page_speculative_percentage = 5;
207
208 #ifndef VM_PAGE_SPECULATIVE_TARGET
209 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
210 #endif /* VM_PAGE_SPECULATIVE_TARGET */
211
212
213 /*
214 * To obtain a reasonable LRU approximation, the inactive queue
215 * needs to be large enough to give pages on it a chance to be
216 * referenced a second time. This macro defines the fraction
217 * of active+inactive pages that should be inactive.
218 * The pageout daemon uses it to update vm_page_inactive_target.
219 *
220 * If vm_page_free_count falls below vm_page_free_target and
221 * vm_page_inactive_count is below vm_page_inactive_target,
222 * then the pageout daemon starts running.
223 */
224
225 #ifndef VM_PAGE_INACTIVE_TARGET
226 #ifdef CONFIG_EMBEDDED
227 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
228 #else
229 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
230 #endif
231 #endif /* VM_PAGE_INACTIVE_TARGET */
232
233 /*
234 * Once the pageout daemon starts running, it keeps going
235 * until vm_page_free_count meets or exceeds vm_page_free_target.
236 */
237
238 #ifndef VM_PAGE_FREE_TARGET
239 #ifdef CONFIG_EMBEDDED
240 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
241 #else
242 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
243 #endif
244 #endif /* VM_PAGE_FREE_TARGET */
245
246
247 /*
248 * The pageout daemon always starts running once vm_page_free_count
249 * falls below vm_page_free_min.
250 */
251
252 #ifndef VM_PAGE_FREE_MIN
253 #ifdef CONFIG_EMBEDDED
254 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
255 #else
256 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
257 #endif
258 #endif /* VM_PAGE_FREE_MIN */
259
260 #ifdef CONFIG_EMBEDDED
261 #define VM_PAGE_FREE_RESERVED_LIMIT 100
262 #define VM_PAGE_FREE_MIN_LIMIT 1500
263 #define VM_PAGE_FREE_TARGET_LIMIT 2000
264 #else
265 #define VM_PAGE_FREE_RESERVED_LIMIT 1700
266 #define VM_PAGE_FREE_MIN_LIMIT 3500
267 #define VM_PAGE_FREE_TARGET_LIMIT 4000
268 #endif
269
270 /*
271 * When vm_page_free_count falls below vm_page_free_reserved,
272 * only vm-privileged threads can allocate pages. vm-privilege
273 * allows the pageout daemon and default pager (and any other
274 * associated threads needed for default pageout) to continue
275 * operation by dipping into the reserved pool of pages.
276 */
277
278 #ifndef VM_PAGE_FREE_RESERVED
279 #define VM_PAGE_FREE_RESERVED(n) \
280 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
281 #endif /* VM_PAGE_FREE_RESERVED */
282
283 /*
284 * When we dequeue pages from the inactive list, they are
285 * reactivated (ie, put back on the active queue) if referenced.
286 * However, it is possible to starve the free list if other
287 * processors are referencing pages faster than we can turn off
288 * the referenced bit. So we limit the number of reactivations
289 * we will make per call of vm_pageout_scan().
290 */
291 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
292 #ifndef VM_PAGE_REACTIVATE_LIMIT
293 #ifdef CONFIG_EMBEDDED
294 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
295 #else
296 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
297 #endif
298 #endif /* VM_PAGE_REACTIVATE_LIMIT */
299 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
300
301
302 extern boolean_t hibernate_cleaning_in_progress;
303
304 /*
305 * Exported variable used to broadcast the activation of the pageout scan
306 * Working Set uses this to throttle its use of pmap removes. In this
307 * way, code which runs within memory in an uncontested context does
308 * not keep encountering soft faults.
309 */
310
311 unsigned int vm_pageout_scan_event_counter = 0;
312
313 /*
314 * Forward declarations for internal routines.
315 */
316 struct cq {
317 struct vm_pageout_queue *q;
318 void *current_chead;
319 char *scratch_buf;
320 int id;
321 };
322
323 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
324
325
326 #if VM_PRESSURE_EVENTS
327 void vm_pressure_thread(void);
328
329 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
330 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
331
332 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
333 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
334 #endif
335 void vm_pageout_garbage_collect(int);
336 static void vm_pageout_iothread_external(void);
337 static void vm_pageout_iothread_internal(struct cq *cq);
338 static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
339
340 extern void vm_pageout_continue(void);
341 extern void vm_pageout_scan(void);
342 void vm_tests(void); /* forward */
343
344 boolean_t vm_restricted_to_single_processor = FALSE;
345 #if !CONFIG_EMBEDDED
346 static boolean_t vm_pageout_waiter = FALSE;
347 static boolean_t vm_pageout_running = FALSE;
348 #endif /* !CONFIG_EMBEDDED */
349
350
351 static thread_t vm_pageout_external_iothread = THREAD_NULL;
352 static thread_t vm_pageout_internal_iothread = THREAD_NULL;
353
354 unsigned int vm_pageout_reserved_internal = 0;
355 unsigned int vm_pageout_reserved_really = 0;
356
357 unsigned int vm_pageout_swap_wait = 0;
358 unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
359 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
360 unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
361 unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
362 unsigned int vm_pageout_deadlock_relief = 0;
363 unsigned int vm_pageout_inactive_relief = 0;
364 unsigned int vm_pageout_burst_active_throttle = 0;
365 unsigned int vm_pageout_burst_inactive_throttle = 0;
366
367 int vm_upl_wait_for_pages = 0;
368
369
370 /*
371 * These variables record the pageout daemon's actions:
372 * how many pages it looks at and what happens to those pages.
373 * No locking needed because only one thread modifies the variables.
374 */
375
376 unsigned int vm_pageout_active = 0; /* debugging */
377 unsigned int vm_pageout_inactive = 0; /* debugging */
378 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
379 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
380 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
381 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
382 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
383 unsigned int vm_pageout_inactive_error = 0; /* debugging */
384 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
385 unsigned int vm_pageout_inactive_notalive = 0; /* debugging */
386 unsigned int vm_pageout_inactive_used = 0; /* debugging */
387 unsigned int vm_pageout_cache_evicted = 0; /* debugging */
388 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
389 unsigned int vm_pageout_speculative_clean = 0; /* debugging */
390 unsigned int vm_pageout_speculative_dirty = 0; /* debugging */
391
392 unsigned int vm_pageout_freed_from_cleaned = 0;
393 unsigned int vm_pageout_freed_from_speculative = 0;
394 unsigned int vm_pageout_freed_from_inactive_clean = 0;
395 unsigned int vm_pageout_freed_after_compression = 0;
396
397 extern uint32_t vm_compressor_pages_grabbed;
398 extern uint32_t c_segment_pages_compressed;
399
400 unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
401
402 unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */
403 unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */
404 unsigned int vm_pageout_cleaned_reference_reactivated = 0;
405 unsigned int vm_pageout_cleaned_volatile_reactivated = 0;
406 unsigned int vm_pageout_cleaned_fault_reactivated = 0;
407 unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */
408 unsigned int vm_pageout_cleaned_busy = 0;
409 unsigned int vm_pageout_cleaned_nolock = 0;
410
411 unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */
412 unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */
413 unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */
414 unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */
415 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
416 unsigned int vm_pageout_purged_objects = 0; /* used for sysctl vm stats */
417 unsigned int vm_stat_discard = 0; /* debugging */
418 unsigned int vm_stat_discard_sent = 0; /* debugging */
419 unsigned int vm_stat_discard_failure = 0; /* debugging */
420 unsigned int vm_stat_discard_throttle = 0; /* debugging */
421 unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */
422 unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */
423
424 unsigned int vm_pageout_scan_reclaimed_throttled = 0;
425 unsigned int vm_pageout_scan_active_throttled = 0;
426 unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
427 unsigned int vm_pageout_scan_inactive_throttled_external = 0;
428 unsigned int vm_pageout_scan_throttle = 0; /* debugging */
429 unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
430 unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
431 unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */
432 unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
433 unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
434 unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
435 unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */
436 unsigned int vm_pageout_scan_throttle_deferred = 0; /* debugging */
437 unsigned int vm_pageout_scan_yield_unthrottled = 0; /* debugging */
438 unsigned int vm_page_speculative_count_drifts = 0;
439 unsigned int vm_page_speculative_count_drift_max = 0;
440
441 uint32_t vm_compressor_failed;
442
443 /*
444 * Backing store throttle when BS is exhausted
445 */
446 unsigned int vm_backing_store_low = 0;
447
448 unsigned int vm_pageout_out_of_line = 0;
449 unsigned int vm_pageout_in_place = 0;
450
451 unsigned int vm_page_steal_pageout_page = 0;
452
453 struct vm_config vm_config;
454
455 struct vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
456 struct vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
457
458 unsigned int vm_page_speculative_target = 0;
459
460 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
461
462 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
463
464 #if DEVELOPMENT || DEBUG
465 unsigned long vm_cs_validated_resets = 0;
466 #endif
467
468 int vm_debug_events = 0;
469
470 #if CONFIG_MEMORYSTATUS
471 #if !CONFIG_JETSAM
472 extern boolean_t memorystatus_idle_exit_from_VM(void);
473 #endif
474 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
475 extern void memorystatus_on_pageout_scan_end(void);
476
477 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
478 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
479 #if DEVELOPMENT || DEBUG
480 uint32_t vm_grab_anon_overrides = 0;
481 uint32_t vm_grab_anon_nops = 0;
482 #endif
483
484 #endif
485
486 #if MACH_CLUSTER_STATS
487 unsigned long vm_pageout_cluster_dirtied = 0;
488 unsigned long vm_pageout_cluster_cleaned = 0;
489 unsigned long vm_pageout_cluster_collisions = 0;
490 unsigned long vm_pageout_cluster_clusters = 0;
491 unsigned long vm_pageout_cluster_conversions = 0;
492 unsigned long vm_pageout_target_collisions = 0;
493 unsigned long vm_pageout_target_page_dirtied = 0;
494 unsigned long vm_pageout_target_page_freed = 0;
495 #define CLUSTER_STAT(clause) clause
496 #else /* MACH_CLUSTER_STATS */
497 #define CLUSTER_STAT(clause)
498 #endif /* MACH_CLUSTER_STATS */
499
500
501 #if DEVELOPMENT || DEBUG
502 vmct_stats_t vmct_stats;
503 #endif
504
505 /*
506 * Routine: vm_pageout_object_terminate
507 * Purpose:
508 * Destroy the pageout_object, and perform all of the
509 * required cleanup actions.
510 *
511 * In/Out conditions:
512 * The object must be locked, and will be returned locked.
513 */
514 void
515 vm_pageout_object_terminate(
516 vm_object_t object)
517 {
518 vm_object_t shadow_object;
519
520 /*
521 * Deal with the deallocation (last reference) of a pageout object
522 * (used for cleaning-in-place) by dropping the paging references/
523 * freeing pages in the original object.
524 */
525
526 assert(object->pageout);
527 shadow_object = object->shadow;
528 vm_object_lock(shadow_object);
529
530 while (!vm_page_queue_empty(&object->memq)) {
531 vm_page_t p, m;
532 vm_object_offset_t offset;
533
534 p = (vm_page_t) vm_page_queue_first(&object->memq);
535
536 assert(p->private);
537 assert(p->free_when_done);
538 p->free_when_done = FALSE;
539 assert(!p->cleaning);
540 assert(!p->laundry);
541
542 offset = p->offset;
543 VM_PAGE_FREE(p);
544 p = VM_PAGE_NULL;
545
546 m = vm_page_lookup(shadow_object,
547 offset + object->vo_shadow_offset);
548
549 if(m == VM_PAGE_NULL)
550 continue;
551
552 assert((m->dirty) || (m->precious) ||
553 (m->busy && m->cleaning));
554
555 /*
556 * Handle the trusted pager throttle.
557 * Also decrement the burst throttle (if external).
558 */
559 vm_page_lock_queues();
560 if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
561 vm_pageout_throttle_up(m);
562
563 /*
564 * Handle the "target" page(s). These pages are to be freed if
565 * successfully cleaned. Target pages are always busy, and are
566 * wired exactly once. The initial target pages are not mapped,
567 * (so cannot be referenced or modified) but converted target
568 * pages may have been modified between the selection as an
569 * adjacent page and conversion to a target.
570 */
571 if (m->free_when_done) {
572 assert(m->busy);
573 assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
574 assert(m->wire_count == 1);
575 m->cleaning = FALSE;
576 m->free_when_done = FALSE;
577 #if MACH_CLUSTER_STATS
578 if (m->wanted) vm_pageout_target_collisions++;
579 #endif
580 /*
581 * Revoke all access to the page. Since the object is
582 * locked, and the page is busy, this prevents the page
583 * from being dirtied after the pmap_disconnect() call
584 * returns.
585 *
586 * Since the page is left "dirty" but "not modifed", we
587 * can detect whether the page was redirtied during
588 * pageout by checking the modify state.
589 */
590 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
591 SET_PAGE_DIRTY(m, FALSE);
592 } else {
593 m->dirty = FALSE;
594 }
595
596 if (m->dirty) {
597 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
598 vm_page_unwire(m, TRUE); /* reactivates */
599 VM_STAT_INCR(reactivations);
600 PAGE_WAKEUP_DONE(m);
601 } else {
602 CLUSTER_STAT(vm_pageout_target_page_freed++;)
603 vm_page_free(m);/* clears busy, etc. */
604 }
605 vm_page_unlock_queues();
606 continue;
607 }
608 /*
609 * Handle the "adjacent" pages. These pages were cleaned in
610 * place, and should be left alone.
611 * If prep_pin_count is nonzero, then someone is using the
612 * page, so make it active.
613 */
614 if ((m->vm_page_q_state == VM_PAGE_NOT_ON_Q) && !m->private) {
615 if (m->reference)
616 vm_page_activate(m);
617 else
618 vm_page_deactivate(m);
619 }
620 if (m->overwriting) {
621 /*
622 * the (COPY_OUT_FROM == FALSE) request_page_list case
623 */
624 if (m->busy) {
625 /*
626 * We do not re-set m->dirty !
627 * The page was busy so no extraneous activity
628 * could have occurred. COPY_INTO is a read into the
629 * new pages. CLEAN_IN_PLACE does actually write
630 * out the pages but handling outside of this code
631 * will take care of resetting dirty. We clear the
632 * modify however for the Programmed I/O case.
633 */
634 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
635
636 m->busy = FALSE;
637 m->absent = FALSE;
638 } else {
639 /*
640 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
641 * Occurs when the original page was wired
642 * at the time of the list request
643 */
644 assert(VM_PAGE_WIRED(m));
645 vm_page_unwire(m, TRUE); /* reactivates */
646 }
647 m->overwriting = FALSE;
648 } else {
649 /*
650 * Set the dirty state according to whether or not the page was
651 * modified during the pageout. Note that we purposefully do
652 * NOT call pmap_clear_modify since the page is still mapped.
653 * If the page were to be dirtied between the 2 calls, this
654 * this fact would be lost. This code is only necessary to
655 * maintain statistics, since the pmap module is always
656 * consulted if m->dirty is false.
657 */
658 #if MACH_CLUSTER_STATS
659 m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
660
661 if (m->dirty) vm_pageout_cluster_dirtied++;
662 else vm_pageout_cluster_cleaned++;
663 if (m->wanted) vm_pageout_cluster_collisions++;
664 #else
665 m->dirty = FALSE;
666 #endif
667 }
668 m->cleaning = FALSE;
669
670 /*
671 * Wakeup any thread waiting for the page to be un-cleaning.
672 */
673 PAGE_WAKEUP(m);
674 vm_page_unlock_queues();
675 }
676 /*
677 * Account for the paging reference taken in vm_paging_object_allocate.
678 */
679 vm_object_activity_end(shadow_object);
680 vm_object_unlock(shadow_object);
681
682 assert(object->ref_count == 0);
683 assert(object->paging_in_progress == 0);
684 assert(object->activity_in_progress == 0);
685 assert(object->resident_page_count == 0);
686 return;
687 }
688
689 /*
690 * Routine: vm_pageclean_setup
691 *
692 * Purpose: setup a page to be cleaned (made non-dirty), but not
693 * necessarily flushed from the VM page cache.
694 * This is accomplished by cleaning in place.
695 *
696 * The page must not be busy, and new_object
697 * must be locked.
698 *
699 */
700 static void
701 vm_pageclean_setup(
702 vm_page_t m,
703 vm_page_t new_m,
704 vm_object_t new_object,
705 vm_object_offset_t new_offset)
706 {
707 assert(!m->busy);
708 #if 0
709 assert(!m->cleaning);
710 #endif
711
712 XPR(XPR_VM_PAGEOUT,
713 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
714 VM_PAGE_OBJECT(m), m->offset, m,
715 new_m, new_offset);
716
717 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
718
719 /*
720 * Mark original page as cleaning in place.
721 */
722 m->cleaning = TRUE;
723 SET_PAGE_DIRTY(m, FALSE);
724 m->precious = FALSE;
725
726 /*
727 * Convert the fictitious page to a private shadow of
728 * the real page.
729 */
730 assert(new_m->fictitious);
731 assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
732 new_m->fictitious = FALSE;
733 new_m->private = TRUE;
734 new_m->free_when_done = TRUE;
735 VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
736
737 vm_page_lockspin_queues();
738 vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
739 vm_page_unlock_queues();
740
741 vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
742 assert(!new_m->wanted);
743 new_m->busy = FALSE;
744 }
745
746 /*
747 * Routine: vm_pageout_initialize_page
748 * Purpose:
749 * Causes the specified page to be initialized in
750 * the appropriate memory object. This routine is used to push
751 * pages into a copy-object when they are modified in the
752 * permanent object.
753 *
754 * The page is moved to a temporary object and paged out.
755 *
756 * In/out conditions:
757 * The page in question must not be on any pageout queues.
758 * The object to which it belongs must be locked.
759 * The page must be busy, but not hold a paging reference.
760 *
761 * Implementation:
762 * Move this page to a completely new object.
763 */
764 void
765 vm_pageout_initialize_page(
766 vm_page_t m)
767 {
768 vm_object_t object;
769 vm_object_offset_t paging_offset;
770 memory_object_t pager;
771
772 XPR(XPR_VM_PAGEOUT,
773 "vm_pageout_initialize_page, page 0x%X\n",
774 m, 0, 0, 0, 0);
775
776 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
777
778 object = VM_PAGE_OBJECT(m);
779
780 assert(m->busy);
781 assert(object->internal);
782
783 /*
784 * Verify that we really want to clean this page
785 */
786 assert(!m->absent);
787 assert(!m->error);
788 assert(m->dirty);
789
790 /*
791 * Create a paging reference to let us play with the object.
792 */
793 paging_offset = m->offset + object->paging_offset;
794
795 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) {
796 panic("reservation without pageout?"); /* alan */
797
798 VM_PAGE_FREE(m);
799 vm_object_unlock(object);
800
801 return;
802 }
803
804 /*
805 * If there's no pager, then we can't clean the page. This should
806 * never happen since this should be a copy object and therefore not
807 * an external object, so the pager should always be there.
808 */
809
810 pager = object->pager;
811
812 if (pager == MEMORY_OBJECT_NULL) {
813 panic("missing pager for copy object");
814
815 VM_PAGE_FREE(m);
816 return;
817 }
818
819 /*
820 * set the page for future call to vm_fault_list_request
821 */
822 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
823 SET_PAGE_DIRTY(m, FALSE);
824
825 /*
826 * keep the object from collapsing or terminating
827 */
828 vm_object_paging_begin(object);
829 vm_object_unlock(object);
830
831 /*
832 * Write the data to its pager.
833 * Note that the data is passed by naming the new object,
834 * not a virtual address; the pager interface has been
835 * manipulated to use the "internal memory" data type.
836 * [The object reference from its allocation is donated
837 * to the eventual recipient.]
838 */
839 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
840
841 vm_object_lock(object);
842 vm_object_paging_end(object);
843 }
844
845 #if MACH_CLUSTER_STATS
846 #define MAXCLUSTERPAGES 16
847 struct {
848 unsigned long pages_in_cluster;
849 unsigned long pages_at_higher_offsets;
850 unsigned long pages_at_lower_offsets;
851 } cluster_stats[MAXCLUSTERPAGES];
852 #endif /* MACH_CLUSTER_STATS */
853
854
855 /*
856 * vm_pageout_cluster:
857 *
858 * Given a page, queue it to the appropriate I/O thread,
859 * which will page it out and attempt to clean adjacent pages
860 * in the same operation.
861 *
862 * The object and queues must be locked. We will take a
863 * paging reference to prevent deallocation or collapse when we
864 * release the object lock back at the call site. The I/O thread
865 * is responsible for consuming this reference
866 *
867 * The page must not be on any pageout queue.
868 */
869 int32_t vmct_active = 0;
870 typedef enum vmct_state_t {
871 VMCT_IDLE,
872 VMCT_AWAKENED,
873 VMCT_ACTIVE,
874 } vmct_state_t;
875 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
876
877 void
878 vm_pageout_cluster(vm_page_t m)
879 {
880 vm_object_t object = VM_PAGE_OBJECT(m);
881 struct vm_pageout_queue *q;
882
883
884 XPR(XPR_VM_PAGEOUT,
885 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
886 object, m->offset, m, 0, 0);
887
888 VM_PAGE_CHECK(m);
889 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
890 vm_object_lock_assert_exclusive(object);
891
892 /*
893 * Only a certain kind of page is appreciated here.
894 */
895 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
896 assert(!m->cleaning && !m->laundry);
897 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
898
899 /*
900 * protect the object from collapse or termination
901 */
902 vm_object_activity_begin(object);
903
904 if (object->internal == TRUE) {
905 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
906
907 m->busy = TRUE;
908
909 q = &vm_pageout_queue_internal;
910 } else
911 q = &vm_pageout_queue_external;
912
913 /*
914 * pgo_laundry count is tied to the laundry bit
915 */
916 m->laundry = TRUE;
917 q->pgo_laundry++;
918
919 m->vm_page_q_state = VM_PAGE_ON_PAGEOUT_Q;
920 vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
921
922 if (q->pgo_idle == TRUE) {
923 q->pgo_idle = FALSE;
924 thread_wakeup((event_t) &q->pgo_pending);
925 }
926 VM_PAGE_CHECK(m);
927 }
928
929
930 unsigned long vm_pageout_throttle_up_count = 0;
931
932 /*
933 * A page is back from laundry or we are stealing it back from
934 * the laundering state. See if there are some pages waiting to
935 * go to laundry and if we can let some of them go now.
936 *
937 * Object and page queues must be locked.
938 */
939 void
940 vm_pageout_throttle_up(
941 vm_page_t m)
942 {
943 struct vm_pageout_queue *q;
944 vm_object_t m_object;
945
946 m_object = VM_PAGE_OBJECT(m);
947
948 assert(m_object != VM_OBJECT_NULL);
949 assert(m_object != kernel_object);
950
951 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
952 vm_object_lock_assert_exclusive(m_object);
953
954 vm_pageout_throttle_up_count++;
955
956 if (m_object->internal == TRUE)
957 q = &vm_pageout_queue_internal;
958 else
959 q = &vm_pageout_queue_external;
960
961 if (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
962
963 vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
964 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
965
966 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
967
968 vm_object_activity_end(m_object);
969 }
970 if (m->laundry == TRUE) {
971
972 m->laundry = FALSE;
973 q->pgo_laundry--;
974
975 if (q->pgo_throttled == TRUE) {
976 q->pgo_throttled = FALSE;
977 thread_wakeup((event_t) &q->pgo_laundry);
978 }
979 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
980 q->pgo_draining = FALSE;
981 thread_wakeup((event_t) (&q->pgo_laundry+1));
982 }
983 }
984 }
985
986
987 static void
988 vm_pageout_throttle_up_batch(
989 struct vm_pageout_queue *q,
990 int batch_cnt)
991 {
992 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
993
994 vm_pageout_throttle_up_count += batch_cnt;
995
996 q->pgo_laundry -= batch_cnt;
997
998 if (q->pgo_throttled == TRUE) {
999 q->pgo_throttled = FALSE;
1000 thread_wakeup((event_t) &q->pgo_laundry);
1001 }
1002 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
1003 q->pgo_draining = FALSE;
1004 thread_wakeup((event_t) (&q->pgo_laundry+1));
1005 }
1006 }
1007
1008
1009
1010 /*
1011 * VM memory pressure monitoring.
1012 *
1013 * vm_pageout_scan() keeps track of the number of pages it considers and
1014 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
1015 *
1016 * compute_memory_pressure() is called every second from compute_averages()
1017 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
1018 * of recalimed pages in a new vm_pageout_stat[] bucket.
1019 *
1020 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
1021 * The caller provides the number of seconds ("nsecs") worth of statistics
1022 * it wants, up to 30 seconds.
1023 * It computes the number of pages reclaimed in the past "nsecs" seconds and
1024 * also returns the number of pages the system still needs to reclaim at this
1025 * moment in time.
1026 */
1027 #define VM_PAGEOUT_STAT_SIZE 31
1028 struct vm_pageout_stat {
1029 unsigned int considered;
1030 unsigned int reclaimed_clean;
1031 unsigned int pages_compressed;
1032 unsigned int pages_grabbed_by_compressor;
1033 unsigned int cleaned_dirty_external;
1034 unsigned int throttled_internal_q;
1035 unsigned int throttled_external_q;
1036 unsigned int failed_compressions;
1037 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0,0,0,0,0,0,0}, };
1038
1039 unsigned int vm_pageout_stat_now = 0;
1040 unsigned int vm_memory_pressure = 0;
1041
1042 #define VM_PAGEOUT_STAT_BEFORE(i) \
1043 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
1044 #define VM_PAGEOUT_STAT_AFTER(i) \
1045 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
1046
1047 #if VM_PAGE_BUCKETS_CHECK
1048 int vm_page_buckets_check_interval = 10; /* in seconds */
1049 #endif /* VM_PAGE_BUCKETS_CHECK */
1050
1051 /*
1052 * Called from compute_averages().
1053 */
1054 void
1055 compute_memory_pressure(
1056 __unused void *arg)
1057 {
1058 unsigned int vm_pageout_next;
1059
1060 #if VM_PAGE_BUCKETS_CHECK
1061 /* check the consistency of VM page buckets at regular interval */
1062 static int counter = 0;
1063 if ((++counter % vm_page_buckets_check_interval) == 0) {
1064 vm_page_buckets_check();
1065 }
1066 #endif /* VM_PAGE_BUCKETS_CHECK */
1067
1068 vm_memory_pressure =
1069 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed_clean;
1070
1071 commpage_set_memory_pressure( vm_memory_pressure );
1072
1073 /* move "now" forward */
1074 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
1075 vm_pageout_stats[vm_pageout_next].considered = 0;
1076 vm_pageout_stats[vm_pageout_next].reclaimed_clean = 0;
1077 vm_pageout_stats[vm_pageout_next].throttled_internal_q = 0;
1078 vm_pageout_stats[vm_pageout_next].throttled_external_q = 0;
1079 vm_pageout_stats[vm_pageout_next].cleaned_dirty_external = 0;
1080 vm_pageout_stats[vm_pageout_next].pages_compressed = 0;
1081 vm_pageout_stats[vm_pageout_next].pages_grabbed_by_compressor = 0;
1082 vm_pageout_stats[vm_pageout_next].failed_compressions = 0;
1083
1084 vm_pageout_stat_now = vm_pageout_next;
1085 }
1086
1087
1088 /*
1089 * IMPORTANT
1090 * mach_vm_ctl_page_free_wanted() is called indirectly, via
1091 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1092 * it must be safe in the restricted stackshot context. Locks and/or
1093 * blocking are not allowable.
1094 */
1095 unsigned int
1096 mach_vm_ctl_page_free_wanted(void)
1097 {
1098 unsigned int page_free_target, page_free_count, page_free_wanted;
1099
1100 page_free_target = vm_page_free_target;
1101 page_free_count = vm_page_free_count;
1102 if (page_free_target > page_free_count) {
1103 page_free_wanted = page_free_target - page_free_count;
1104 } else {
1105 page_free_wanted = 0;
1106 }
1107
1108 return page_free_wanted;
1109 }
1110
1111
1112 /*
1113 * IMPORTANT:
1114 * mach_vm_pressure_monitor() is called when taking a stackshot, with
1115 * wait_for_pressure FALSE, so that code path must remain safe in the
1116 * restricted stackshot context. No blocking or locks are allowable.
1117 * on that code path.
1118 */
1119
1120 kern_return_t
1121 mach_vm_pressure_monitor(
1122 boolean_t wait_for_pressure,
1123 unsigned int nsecs_monitored,
1124 unsigned int *pages_reclaimed_p,
1125 unsigned int *pages_wanted_p)
1126 {
1127 wait_result_t wr;
1128 unsigned int vm_pageout_then, vm_pageout_now;
1129 unsigned int pages_reclaimed;
1130
1131 /*
1132 * We don't take the vm_page_queue_lock here because we don't want
1133 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1134 * thread when it's trying to reclaim memory. We don't need fully
1135 * accurate monitoring anyway...
1136 */
1137
1138 if (wait_for_pressure) {
1139 /* wait until there's memory pressure */
1140 while (vm_page_free_count >= vm_page_free_target) {
1141 wr = assert_wait((event_t) &vm_page_free_wanted,
1142 THREAD_INTERRUPTIBLE);
1143 if (wr == THREAD_WAITING) {
1144 wr = thread_block(THREAD_CONTINUE_NULL);
1145 }
1146 if (wr == THREAD_INTERRUPTED) {
1147 return KERN_ABORTED;
1148 }
1149 if (wr == THREAD_AWAKENED) {
1150 /*
1151 * The memory pressure might have already
1152 * been relieved but let's not block again
1153 * and let's report that there was memory
1154 * pressure at some point.
1155 */
1156 break;
1157 }
1158 }
1159 }
1160
1161 /* provide the number of pages the system wants to reclaim */
1162 if (pages_wanted_p != NULL) {
1163 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1164 }
1165
1166 if (pages_reclaimed_p == NULL) {
1167 return KERN_SUCCESS;
1168 }
1169
1170 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1171 vm_pageout_now = vm_pageout_stat_now;
1172 pages_reclaimed = 0;
1173 for (vm_pageout_then =
1174 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1175 vm_pageout_then != vm_pageout_now &&
1176 nsecs_monitored-- != 0;
1177 vm_pageout_then =
1178 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1179 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed_clean;
1180 }
1181 *pages_reclaimed_p = pages_reclaimed;
1182
1183 return KERN_SUCCESS;
1184 }
1185
1186
1187
1188 #if DEVELOPMENT || DEBUG
1189
1190 static void
1191 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1192
1193 /*
1194 * condition variable used to make sure there is
1195 * only a single sweep going on at a time
1196 */
1197 boolean_t vm_pageout_disconnect_all_pages_active = FALSE;
1198
1199
1200 void
1201 vm_pageout_disconnect_all_pages()
1202 {
1203 vm_page_lock_queues();
1204
1205 if (vm_pageout_disconnect_all_pages_active == TRUE) {
1206 vm_page_unlock_queues();
1207 return;
1208 }
1209 vm_pageout_disconnect_all_pages_active = TRUE;
1210 vm_page_unlock_queues();
1211
1212 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1213 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1214 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1215
1216 vm_pageout_disconnect_all_pages_active = FALSE;
1217 }
1218
1219
1220 void
1221 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1222 {
1223 vm_page_t m;
1224 vm_object_t t_object = NULL;
1225 vm_object_t l_object = NULL;
1226 vm_object_t m_object = NULL;
1227 int delayed_unlock = 0;
1228 int try_failed_count = 0;
1229 int disconnected_count = 0;
1230 int paused_count = 0;
1231 int object_locked_count = 0;
1232
1233 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1234 q, qcount, 0, 0, 0);
1235
1236 vm_page_lock_queues();
1237
1238 while (qcount && !vm_page_queue_empty(q)) {
1239
1240 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1241
1242 m = (vm_page_t) vm_page_queue_first(q);
1243 m_object = VM_PAGE_OBJECT(m);
1244
1245 /*
1246 * check to see if we currently are working
1247 * with the same object... if so, we've
1248 * already got the lock
1249 */
1250 if (m_object != l_object) {
1251 /*
1252 * the object associated with candidate page is
1253 * different from the one we were just working
1254 * with... dump the lock if we still own it
1255 */
1256 if (l_object != NULL) {
1257 vm_object_unlock(l_object);
1258 l_object = NULL;
1259 }
1260 if (m_object != t_object)
1261 try_failed_count = 0;
1262
1263 /*
1264 * Try to lock object; since we've alread got the
1265 * page queues lock, we can only 'try' for this one.
1266 * if the 'try' fails, we need to do a mutex_pause
1267 * to allow the owner of the object lock a chance to
1268 * run...
1269 */
1270 if ( !vm_object_lock_try_scan(m_object)) {
1271
1272 if (try_failed_count > 20) {
1273 goto reenter_pg_on_q;
1274 }
1275 vm_page_unlock_queues();
1276 mutex_pause(try_failed_count++);
1277 vm_page_lock_queues();
1278 delayed_unlock = 0;
1279
1280 paused_count++;
1281
1282 t_object = m_object;
1283 continue;
1284 }
1285 object_locked_count++;
1286
1287 l_object = m_object;
1288 }
1289 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1290 /*
1291 * put it back on the head of its queue
1292 */
1293 goto reenter_pg_on_q;
1294 }
1295 if (m->pmapped == TRUE) {
1296
1297 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1298
1299 disconnected_count++;
1300 }
1301 reenter_pg_on_q:
1302 vm_page_queue_remove(q, m, vm_page_t, pageq);
1303 vm_page_queue_enter(q, m, vm_page_t, pageq);
1304
1305 qcount--;
1306 try_failed_count = 0;
1307
1308 if (delayed_unlock++ > 128) {
1309
1310 if (l_object != NULL) {
1311 vm_object_unlock(l_object);
1312 l_object = NULL;
1313 }
1314 lck_mtx_yield(&vm_page_queue_lock);
1315 delayed_unlock = 0;
1316 }
1317 }
1318 if (l_object != NULL) {
1319 vm_object_unlock(l_object);
1320 l_object = NULL;
1321 }
1322 vm_page_unlock_queues();
1323
1324 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1325 q, disconnected_count, object_locked_count, paused_count, 0);
1326 }
1327
1328 #endif
1329
1330
1331 static void
1332 vm_pageout_page_queue(vm_page_queue_head_t *, int);
1333
1334 /*
1335 * condition variable used to make sure there is
1336 * only a single sweep going on at a time
1337 */
1338 boolean_t vm_pageout_anonymous_pages_active = FALSE;
1339
1340
1341 void
1342 vm_pageout_anonymous_pages()
1343 {
1344 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1345
1346 vm_page_lock_queues();
1347
1348 if (vm_pageout_anonymous_pages_active == TRUE) {
1349 vm_page_unlock_queues();
1350 return;
1351 }
1352 vm_pageout_anonymous_pages_active = TRUE;
1353 vm_page_unlock_queues();
1354
1355 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1356 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1357 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1358
1359 if (VM_CONFIG_SWAP_IS_PRESENT)
1360 vm_consider_swapping();
1361
1362 vm_page_lock_queues();
1363 vm_pageout_anonymous_pages_active = FALSE;
1364 vm_page_unlock_queues();
1365 }
1366 }
1367
1368
1369 void
1370 vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1371 {
1372 vm_page_t m;
1373 vm_object_t t_object = NULL;
1374 vm_object_t l_object = NULL;
1375 vm_object_t m_object = NULL;
1376 int delayed_unlock = 0;
1377 int try_failed_count = 0;
1378 int refmod_state;
1379 int pmap_options;
1380 struct vm_pageout_queue *iq;
1381 ppnum_t phys_page;
1382
1383
1384 iq = &vm_pageout_queue_internal;
1385
1386 vm_page_lock_queues();
1387
1388 while (qcount && !vm_page_queue_empty(q)) {
1389
1390 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1391
1392 if (VM_PAGE_Q_THROTTLED(iq)) {
1393
1394 if (l_object != NULL) {
1395 vm_object_unlock(l_object);
1396 l_object = NULL;
1397 }
1398 iq->pgo_draining = TRUE;
1399
1400 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1401 vm_page_unlock_queues();
1402
1403 thread_block(THREAD_CONTINUE_NULL);
1404
1405 vm_page_lock_queues();
1406 delayed_unlock = 0;
1407 continue;
1408 }
1409 m = (vm_page_t) vm_page_queue_first(q);
1410 m_object = VM_PAGE_OBJECT(m);
1411
1412 /*
1413 * check to see if we currently are working
1414 * with the same object... if so, we've
1415 * already got the lock
1416 */
1417 if (m_object != l_object) {
1418 if ( !m_object->internal)
1419 goto reenter_pg_on_q;
1420
1421 /*
1422 * the object associated with candidate page is
1423 * different from the one we were just working
1424 * with... dump the lock if we still own it
1425 */
1426 if (l_object != NULL) {
1427 vm_object_unlock(l_object);
1428 l_object = NULL;
1429 }
1430 if (m_object != t_object)
1431 try_failed_count = 0;
1432
1433 /*
1434 * Try to lock object; since we've alread got the
1435 * page queues lock, we can only 'try' for this one.
1436 * if the 'try' fails, we need to do a mutex_pause
1437 * to allow the owner of the object lock a chance to
1438 * run...
1439 */
1440 if ( !vm_object_lock_try_scan(m_object)) {
1441
1442 if (try_failed_count > 20) {
1443 goto reenter_pg_on_q;
1444 }
1445 vm_page_unlock_queues();
1446 mutex_pause(try_failed_count++);
1447 vm_page_lock_queues();
1448 delayed_unlock = 0;
1449
1450 t_object = m_object;
1451 continue;
1452 }
1453 l_object = m_object;
1454 }
1455 if ( !m_object->alive || m->cleaning || m->laundry || m->busy || m->absent || m->error || m->free_when_done) {
1456 /*
1457 * page is not to be cleaned
1458 * put it back on the head of its queue
1459 */
1460 goto reenter_pg_on_q;
1461 }
1462 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1463
1464 if (m->reference == FALSE && m->pmapped == TRUE) {
1465 refmod_state = pmap_get_refmod(phys_page);
1466
1467 if (refmod_state & VM_MEM_REFERENCED)
1468 m->reference = TRUE;
1469 if (refmod_state & VM_MEM_MODIFIED) {
1470 SET_PAGE_DIRTY(m, FALSE);
1471 }
1472 }
1473 if (m->reference == TRUE) {
1474 m->reference = FALSE;
1475 pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1476 goto reenter_pg_on_q;
1477 }
1478 if (m->pmapped == TRUE) {
1479 if (m->dirty || m->precious) {
1480 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1481 } else {
1482 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1483 }
1484 refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1485 if (refmod_state & VM_MEM_MODIFIED) {
1486 SET_PAGE_DIRTY(m, FALSE);
1487 }
1488 }
1489 if ( !m->dirty && !m->precious) {
1490 vm_page_unlock_queues();
1491 VM_PAGE_FREE(m);
1492 vm_page_lock_queues();
1493 delayed_unlock = 0;
1494
1495 goto next_pg;
1496 }
1497 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1498
1499 if (!m_object->pager_initialized) {
1500
1501 vm_page_unlock_queues();
1502
1503 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1504
1505 if (!m_object->pager_initialized)
1506 vm_object_compressor_pager_create(m_object);
1507
1508 vm_page_lock_queues();
1509 delayed_unlock = 0;
1510 }
1511 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1512 goto reenter_pg_on_q;
1513 /*
1514 * vm_object_compressor_pager_create will drop the object lock
1515 * which means 'm' may no longer be valid to use
1516 */
1517 continue;
1518 }
1519 /*
1520 * we've already factored out pages in the laundry which
1521 * means this page can't be on the pageout queue so it's
1522 * safe to do the vm_page_queues_remove
1523 */
1524 vm_page_queues_remove(m, TRUE);
1525
1526 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1527
1528 vm_pageout_cluster(m);
1529
1530 goto next_pg;
1531
1532 reenter_pg_on_q:
1533 vm_page_queue_remove(q, m, vm_page_t, pageq);
1534 vm_page_queue_enter(q, m, vm_page_t, pageq);
1535 next_pg:
1536 qcount--;
1537 try_failed_count = 0;
1538
1539 if (delayed_unlock++ > 128) {
1540
1541 if (l_object != NULL) {
1542 vm_object_unlock(l_object);
1543 l_object = NULL;
1544 }
1545 lck_mtx_yield(&vm_page_queue_lock);
1546 delayed_unlock = 0;
1547 }
1548 }
1549 if (l_object != NULL) {
1550 vm_object_unlock(l_object);
1551 l_object = NULL;
1552 }
1553 vm_page_unlock_queues();
1554 }
1555
1556
1557
1558 /*
1559 * function in BSD to apply I/O throttle to the pageout thread
1560 */
1561 extern void vm_pageout_io_throttle(void);
1562
1563 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
1564 MACRO_BEGIN \
1565 /* \
1566 * If a "reusable" page somehow made it back into \
1567 * the active queue, it's been re-used and is not \
1568 * quite re-usable. \
1569 * If the VM object was "all_reusable", consider it \
1570 * as "all re-used" instead of converting it to \
1571 * "partially re-used", which could be expensive. \
1572 */ \
1573 assert(VM_PAGE_OBJECT((m)) == (obj)); \
1574 if ((m)->reusable || \
1575 (obj)->all_reusable) { \
1576 vm_object_reuse_pages((obj), \
1577 (m)->offset, \
1578 (m)->offset + PAGE_SIZE_64, \
1579 FALSE); \
1580 } \
1581 MACRO_END
1582
1583
1584 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1585 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1586
1587 #define FCS_IDLE 0
1588 #define FCS_DELAYED 1
1589 #define FCS_DEADLOCK_DETECTED 2
1590
1591 struct flow_control {
1592 int state;
1593 mach_timespec_t ts;
1594 };
1595
1596 #if CONFIG_BACKGROUND_QUEUE
1597 uint64_t vm_pageout_skipped_bq_internal = 0;
1598 uint64_t vm_pageout_considered_bq_internal = 0;
1599 uint64_t vm_pageout_considered_bq_external = 0;
1600 uint64_t vm_pageout_rejected_bq_internal = 0;
1601 uint64_t vm_pageout_rejected_bq_external = 0;
1602 #endif
1603
1604 uint32_t vm_pageout_no_victim = 0;
1605 uint32_t vm_pageout_considered_page = 0;
1606 uint32_t vm_page_filecache_min = 0;
1607
1608 #define ANONS_GRABBED_LIMIT 2
1609
1610 #if CONFIG_SECLUDED_MEMORY
1611 extern vm_page_t vm_page_grab_secluded(void);
1612 uint64_t vm_pageout_secluded_burst_count = 0;
1613 #endif /* CONFIG_SECLUDED_MEMORY */
1614
1615
1616 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1617 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1618
1619 #define VM_PAGEOUT_PB_NO_ACTION 0
1620 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1621 #define VM_PAGEOUT_PB_THREAD_YIELD 2
1622
1623
1624 static void
1625 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1626 {
1627 if (*local_freeq) {
1628 vm_page_unlock_queues();
1629
1630 VM_DEBUG_EVENT(
1631 vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1632 vm_page_free_count, *local_freed, 0, 1);
1633
1634 vm_page_free_list(*local_freeq, TRUE);
1635
1636 VM_DEBUG_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1637 vm_page_free_count, 0, 0, 1);
1638
1639 *local_freeq = NULL;
1640 *local_freed = 0;
1641
1642 vm_page_lock_queues();
1643 } else {
1644 lck_mtx_yield(&vm_page_queue_lock);
1645 }
1646 *delayed_unlock = 1;
1647 }
1648
1649
1650 static void
1651 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1652 vm_page_t *local_freeq, int *local_freed, int action)
1653 {
1654 vm_page_unlock_queues();
1655
1656 if (*object != NULL) {
1657 vm_object_unlock(*object);
1658 *object = NULL;
1659 }
1660 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1661
1662 if (*local_freeq) {
1663
1664 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1665 vm_page_free_count, *local_freed, 0, 2);
1666
1667 vm_page_free_list(*local_freeq, TRUE);
1668
1669 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1670 vm_page_free_count, 0, 0, 2);
1671
1672 *local_freeq = NULL;
1673 *local_freed = 0;
1674 }
1675 *delayed_unlock = 1;
1676
1677 switch (action) {
1678
1679 case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1680 vm_consider_waking_compactor_swapper();
1681 break;
1682 case VM_PAGEOUT_PB_THREAD_YIELD:
1683 thread_yield_internal(1);
1684 break;
1685 case VM_PAGEOUT_PB_NO_ACTION:
1686 default:
1687 break;
1688 }
1689 vm_page_lock_queues();
1690 }
1691
1692
1693 int last_vm_pageout_freed_from_inactive_clean = 0;
1694 int last_vm_pageout_freed_from_cleaned = 0;
1695 int last_vm_pageout_freed_from_speculative = 0;
1696 int last_vm_pageout_freed_after_compression = 0;
1697 int last_vm_pageout_enqueued_cleaned_from_inactive_dirty = 0;
1698 int last_vm_pageout_inactive_force_reclaim = 0;
1699 int last_vm_pageout_scan_inactive_throttled_external = 0;
1700 int last_vm_pageout_scan_inactive_throttled_internal = 0;
1701 int last_vm_pageout_reactivation_limit_exceeded = 0;
1702 int last_vm_pageout_considered_page = 0;
1703 int last_vm_compressor_pages_grabbed = 0;
1704 int last_vm_compressor_failed = 0;
1705
1706 void update_vm_info(void)
1707 {
1708 int tmp1, tmp2, tmp3;
1709
1710 if (!kdebug_enable)
1711 return;
1712
1713 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
1714 vm_page_active_count,
1715 vm_page_speculative_count,
1716 vm_page_inactive_count,
1717 vm_page_anonymous_count,
1718 0);
1719
1720 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
1721 vm_page_free_count,
1722 vm_page_wire_count,
1723 VM_PAGE_COMPRESSOR_COUNT,
1724 0, 0);
1725
1726 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
1727 c_segment_pages_compressed,
1728 vm_page_internal_count,
1729 vm_page_external_count,
1730 vm_page_xpmapped_external_count,
1731 0);
1732
1733
1734 if ((vm_pageout_considered_page - last_vm_pageout_considered_page) == 0 &&
1735 (vm_pageout_enqueued_cleaned_from_inactive_dirty - last_vm_pageout_enqueued_cleaned_from_inactive_dirty == 0) &&
1736 (vm_pageout_freed_after_compression - last_vm_pageout_freed_after_compression == 0))
1737 return;
1738
1739
1740 tmp1 = vm_pageout_considered_page;
1741 tmp2 = vm_pageout_freed_from_speculative;
1742 tmp3 = vm_pageout_freed_from_inactive_clean;
1743
1744 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
1745 tmp1 - last_vm_pageout_considered_page,
1746 tmp2 - last_vm_pageout_freed_from_speculative,
1747 tmp3 - last_vm_pageout_freed_from_inactive_clean,
1748 0, 0);
1749
1750 last_vm_pageout_considered_page = tmp1;
1751 last_vm_pageout_freed_from_speculative = tmp2;
1752 last_vm_pageout_freed_from_inactive_clean = tmp3;
1753
1754
1755 tmp1 = vm_pageout_scan_inactive_throttled_external;
1756 tmp2 = vm_pageout_enqueued_cleaned_from_inactive_dirty;
1757 tmp3 = vm_pageout_freed_from_cleaned;
1758
1759 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
1760 tmp1 - last_vm_pageout_scan_inactive_throttled_external,
1761 tmp2 - last_vm_pageout_enqueued_cleaned_from_inactive_dirty,
1762 tmp3 - last_vm_pageout_freed_from_cleaned,
1763 0, 0);
1764
1765 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q += (tmp1 - last_vm_pageout_scan_inactive_throttled_external);
1766 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external += (tmp2 - last_vm_pageout_enqueued_cleaned_from_inactive_dirty);
1767
1768 last_vm_pageout_scan_inactive_throttled_external = tmp1;
1769 last_vm_pageout_enqueued_cleaned_from_inactive_dirty = tmp2;
1770 last_vm_pageout_freed_from_cleaned = tmp3;
1771
1772
1773 tmp1 = vm_pageout_scan_inactive_throttled_internal;
1774 tmp2 = vm_pageout_freed_after_compression;
1775 tmp3 = vm_compressor_pages_grabbed;
1776
1777 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
1778 tmp1 - last_vm_pageout_scan_inactive_throttled_internal,
1779 tmp2 - last_vm_pageout_freed_after_compression,
1780 tmp3 - last_vm_compressor_pages_grabbed,
1781 0, 0);
1782
1783 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q += (tmp1 - last_vm_pageout_scan_inactive_throttled_internal);
1784 vm_pageout_stats[vm_pageout_stat_now].pages_compressed += (tmp2 - last_vm_pageout_freed_after_compression);
1785 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor += (tmp3 - last_vm_compressor_pages_grabbed);
1786
1787 last_vm_pageout_scan_inactive_throttled_internal = tmp1;
1788 last_vm_pageout_freed_after_compression = tmp2;
1789 last_vm_compressor_pages_grabbed = tmp3;
1790
1791
1792 if ((vm_pageout_reactivation_limit_exceeded - last_vm_pageout_reactivation_limit_exceeded) == 0 &&
1793 (vm_pageout_inactive_force_reclaim - last_vm_pageout_inactive_force_reclaim) == 0 &&
1794 (vm_compressor_failed - last_vm_compressor_failed) == 0)
1795 return;
1796
1797 tmp1 = vm_pageout_reactivation_limit_exceeded;
1798 tmp2 = vm_pageout_inactive_force_reclaim;
1799 tmp3 = vm_compressor_failed;
1800
1801 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
1802 tmp1 - last_vm_pageout_reactivation_limit_exceeded,
1803 tmp2 - last_vm_pageout_inactive_force_reclaim,
1804 tmp3 - last_vm_compressor_failed,
1805 0, 0);
1806
1807 vm_pageout_stats[vm_pageout_stat_now].failed_compressions += (tmp3 - last_vm_compressor_failed);
1808
1809 last_vm_pageout_reactivation_limit_exceeded = tmp1;
1810 last_vm_pageout_inactive_force_reclaim = tmp2;
1811 last_vm_compressor_failed = tmp3;
1812 }
1813
1814
1815 /*
1816 * vm_pageout_scan does the dirty work for the pageout daemon.
1817 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1818 * held and vm_page_free_wanted == 0.
1819 */
1820 void
1821 vm_pageout_scan(void)
1822 {
1823 unsigned int loop_count = 0;
1824 unsigned int inactive_burst_count = 0;
1825 unsigned int active_burst_count = 0;
1826 unsigned int reactivated_this_call;
1827 unsigned int reactivate_limit;
1828 vm_page_t local_freeq = NULL;
1829 int local_freed = 0;
1830 int delayed_unlock;
1831 int delayed_unlock_limit = 0;
1832 int refmod_state = 0;
1833 int vm_pageout_deadlock_target = 0;
1834 struct vm_pageout_queue *iq;
1835 struct vm_pageout_queue *eq;
1836 struct vm_speculative_age_q *sq;
1837 struct flow_control flow_control = { 0, { 0, 0 } };
1838 boolean_t inactive_throttled = FALSE;
1839 boolean_t try_failed;
1840 mach_timespec_t ts;
1841 unsigned int msecs = 0;
1842 vm_object_t object = NULL;
1843 uint32_t inactive_reclaim_run;
1844 boolean_t exceeded_burst_throttle;
1845 boolean_t grab_anonymous = FALSE;
1846 boolean_t force_anonymous = FALSE;
1847 boolean_t force_speculative_aging = FALSE;
1848 int anons_grabbed = 0;
1849 int page_prev_q_state = 0;
1850 #if CONFIG_BACKGROUND_QUEUE
1851 boolean_t page_from_bg_q = FALSE;
1852 #endif
1853 int cache_evict_throttle = 0;
1854 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
1855 int force_purge = 0;
1856 #define DELAY_SPECULATIVE_AGE 1000
1857 int delay_speculative_age = 0;
1858 vm_object_t m_object = VM_OBJECT_NULL;
1859
1860 #if VM_PRESSURE_EVENTS
1861 vm_pressure_level_t pressure_level;
1862 #endif /* VM_PRESSURE_EVENTS */
1863
1864 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1865 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
1866 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
1867
1868 flow_control.state = FCS_IDLE;
1869 iq = &vm_pageout_queue_internal;
1870 eq = &vm_pageout_queue_external;
1871 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1872
1873
1874 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1875
1876 /* Ask the pmap layer to return any pages it no longer needs. */
1877 pmap_release_pages_fast();
1878
1879 vm_page_lock_queues();
1880 delayed_unlock = 1;
1881
1882 /*
1883 * Calculate the max number of referenced pages on the inactive
1884 * queue that we will reactivate.
1885 */
1886 reactivated_this_call = 0;
1887 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1888 vm_page_inactive_count);
1889 inactive_reclaim_run = 0;
1890
1891 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1892
1893 /*
1894 * We want to gradually dribble pages from the active queue
1895 * to the inactive queue. If we let the inactive queue get
1896 * very small, and then suddenly dump many pages into it,
1897 * those pages won't get a sufficient chance to be referenced
1898 * before we start taking them from the inactive queue.
1899 *
1900 * We must limit the rate at which we send pages to the pagers
1901 * so that we don't tie up too many pages in the I/O queues.
1902 * We implement a throttling mechanism using the laundry count
1903 * to limit the number of pages outstanding to the default
1904 * and external pagers. We can bypass the throttles and look
1905 * for clean pages if the pageout queues don't drain in a timely
1906 * fashion since this may indicate that the pageout paths are
1907 * stalled waiting for memory, which only we can provide.
1908 */
1909
1910
1911 Restart:
1912
1913 assert(object == NULL);
1914 assert(delayed_unlock != 0);
1915
1916 /*
1917 * Recalculate vm_page_inactivate_target.
1918 */
1919 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1920 vm_page_inactive_count +
1921 vm_page_speculative_count);
1922
1923 vm_page_anonymous_min = vm_page_inactive_target / 20;
1924
1925
1926 /*
1927 * don't want to wake the pageout_scan thread up everytime we fall below
1928 * the targets... set a low water mark at 0.25% below the target
1929 */
1930 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
1931
1932 if (vm_page_speculative_percentage > 50)
1933 vm_page_speculative_percentage = 50;
1934 else if (vm_page_speculative_percentage <= 0)
1935 vm_page_speculative_percentage = 1;
1936
1937 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1938 vm_page_inactive_count);
1939
1940 try_failed = FALSE;
1941
1942 for (;;) {
1943 vm_page_t m;
1944
1945 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1946
1947 if (vm_upl_wait_for_pages < 0)
1948 vm_upl_wait_for_pages = 0;
1949
1950 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1951
1952 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1953 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1954
1955 #if CONFIG_SECLUDED_MEMORY
1956 /*
1957 * Deal with secluded_q overflow.
1958 */
1959 if (vm_page_secluded_count > vm_page_secluded_target) {
1960 unsigned int secluded_overflow;
1961 vm_page_t secluded_page;
1962
1963 if (object != NULL) {
1964 vm_object_unlock(object);
1965 object = NULL;
1966 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1967 }
1968 /*
1969 * SECLUDED_AGING_BEFORE_ACTIVE:
1970 * Excess secluded pages go to the active queue and
1971 * will later go to the inactive queue.
1972 */
1973 active_burst_count = MIN(vm_pageout_burst_active_throttle,
1974 vm_page_secluded_count_inuse);
1975 secluded_overflow = (vm_page_secluded_count -
1976 vm_page_secluded_target);
1977 while (secluded_overflow-- > 0 &&
1978 vm_page_secluded_count > vm_page_secluded_target) {
1979 assert((vm_page_secluded_count_free +
1980 vm_page_secluded_count_inuse) ==
1981 vm_page_secluded_count);
1982 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1983 assert(secluded_page->vm_page_q_state ==
1984 VM_PAGE_ON_SECLUDED_Q);
1985 vm_page_queues_remove(secluded_page, FALSE);
1986 assert(!secluded_page->fictitious);
1987 assert(!VM_PAGE_WIRED(secluded_page));
1988 if (secluded_page->vm_page_object == 0) {
1989 /* transfer to free queue */
1990 assert(secluded_page->busy);
1991 secluded_page->snext = local_freeq;
1992 local_freeq = secluded_page;
1993 local_freed++;
1994 } else {
1995 /* transfer to head of active queue */
1996 vm_page_enqueue_active(secluded_page, FALSE);
1997 if (active_burst_count-- == 0) {
1998 vm_pageout_secluded_burst_count++;
1999 break;
2000 }
2001 }
2002 secluded_page = VM_PAGE_NULL;
2003
2004 if (delayed_unlock++ > delayed_unlock_limit) {
2005 vm_pageout_delayed_unlock(&delayed_unlock, &local_freed, &local_freeq);
2006 }
2007 }
2008 }
2009 #endif /* CONFIG_SECLUDED_MEMORY */
2010
2011 assert(delayed_unlock);
2012
2013 /*
2014 * Move pages from active to inactive if we're below the target
2015 */
2016 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
2017 goto done_moving_active_pages;
2018
2019 if (object != NULL) {
2020 vm_object_unlock(object);
2021 object = NULL;
2022 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2023 }
2024 /*
2025 * Don't sweep through active queue more than the throttle
2026 * which should be kept relatively low
2027 */
2028 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
2029
2030 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
2031 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
2032
2033 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
2034 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2035 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2036 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START);
2037
2038
2039 while (!vm_page_queue_empty(&vm_page_queue_active) && active_burst_count--) {
2040
2041 vm_pageout_active++;
2042
2043 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
2044
2045 assert(m->vm_page_q_state == VM_PAGE_ON_ACTIVE_Q);
2046 assert(!m->laundry);
2047 assert(VM_PAGE_OBJECT(m) != kernel_object);
2048 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2049
2050 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2051
2052 /*
2053 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
2054 *
2055 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
2056 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
2057 * new reference happens. If no futher references happen on the page after that remote TLB flushes
2058 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
2059 * by pageout_scan, which is just fine since the last reference would have happened quite far
2060 * in the past (TLB caches don't hang around for very long), and of course could just as easily
2061 * have happened before we moved the page
2062 */
2063 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
2064
2065 /*
2066 * The page might be absent or busy,
2067 * but vm_page_deactivate can handle that.
2068 * FALSE indicates that we don't want a H/W clear reference
2069 */
2070 vm_page_deactivate_internal(m, FALSE);
2071
2072 if (delayed_unlock++ > delayed_unlock_limit) {
2073 vm_pageout_delayed_unlock(&delayed_unlock, &local_freed, &local_freeq);
2074 }
2075 }
2076
2077 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
2078 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
2079 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END);
2080
2081 /**********************************************************************
2082 * above this point we're playing with the active and secluded queues
2083 * below this point we're playing with the throttling mechanisms
2084 * and the inactive queue
2085 **********************************************************************/
2086
2087 done_moving_active_pages:
2088
2089 if (vm_page_free_count + local_freed >= vm_page_free_target)
2090 {
2091 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2092 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2093 /*
2094 * make sure the pageout I/O threads are running
2095 * throttled in case there are still requests
2096 * in the laundry... since we have met our targets
2097 * we don't need the laundry to be cleaned in a timely
2098 * fashion... so let's avoid interfering with foreground
2099 * activity
2100 */
2101 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2102
2103 /*
2104 * recalculate vm_page_inactivate_target
2105 */
2106 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
2107 vm_page_inactive_count +
2108 vm_page_speculative_count);
2109 #ifndef CONFIG_EMBEDDED
2110 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
2111 !vm_page_queue_empty(&vm_page_queue_active)) {
2112 /*
2113 * inactive target still not met... keep going
2114 * until we get the queues balanced...
2115 */
2116 continue;
2117 }
2118 #endif
2119 lck_mtx_lock(&vm_page_queue_free_lock);
2120
2121 if ((vm_page_free_count >= vm_page_free_target) &&
2122 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2123 /*
2124 * done - we have met our target *and*
2125 * there is no one waiting for a page.
2126 */
2127 return_from_scan:
2128 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2129
2130 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
2131 vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
2132 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
2133 vm_pageout_speculative_clean, vm_pageout_inactive_clean,
2134 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
2135
2136 return;
2137 }
2138 lck_mtx_unlock(&vm_page_queue_free_lock);
2139 }
2140
2141 /*
2142 * Before anything, we check if we have any ripe volatile
2143 * objects around. If so, try to purge the first object.
2144 * If the purge fails, fall through to reclaim a page instead.
2145 * If the purge succeeds, go back to the top and reevalute
2146 * the new memory situation.
2147 */
2148
2149 assert (available_for_purge>=0);
2150 force_purge = 0; /* no force-purging */
2151
2152 #if VM_PRESSURE_EVENTS
2153 pressure_level = memorystatus_vm_pressure_level;
2154
2155 if (pressure_level > kVMPressureNormal) {
2156
2157 if (pressure_level >= kVMPressureCritical) {
2158 force_purge = memorystatus_purge_on_critical;
2159 } else if (pressure_level >= kVMPressureUrgent) {
2160 force_purge = memorystatus_purge_on_urgent;
2161 } else if (pressure_level >= kVMPressureWarning) {
2162 force_purge = memorystatus_purge_on_warning;
2163 }
2164 }
2165 #endif /* VM_PRESSURE_EVENTS */
2166
2167 if (available_for_purge || force_purge) {
2168
2169 if (object != NULL) {
2170 vm_object_unlock(object);
2171 object = NULL;
2172 }
2173
2174 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2175
2176 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2177 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2178 vm_pageout_purged_objects++;
2179 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2180 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2181 continue;
2182 }
2183 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2184 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2185 }
2186
2187 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2188 /*
2189 * try to pull pages from the aging bins...
2190 * see vm_page.h for an explanation of how
2191 * this mechanism works
2192 */
2193 struct vm_speculative_age_q *aq;
2194 boolean_t can_steal = FALSE;
2195 int num_scanned_queues;
2196
2197 aq = &vm_page_queue_speculative[speculative_steal_index];
2198
2199 num_scanned_queues = 0;
2200 while (vm_page_queue_empty(&aq->age_q) &&
2201 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2202
2203 speculative_steal_index++;
2204
2205 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2206 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2207
2208 aq = &vm_page_queue_speculative[speculative_steal_index];
2209 }
2210
2211 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2212 /*
2213 * XXX We've scanned all the speculative
2214 * queues but still haven't found one
2215 * that is not empty, even though
2216 * vm_page_speculative_count is not 0.
2217 *
2218 * report the anomaly...
2219 */
2220 printf("vm_pageout_scan: "
2221 "all speculative queues empty "
2222 "but count=%d. Re-adjusting.\n",
2223 vm_page_speculative_count);
2224 if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
2225 vm_page_speculative_count_drift_max = vm_page_speculative_count;
2226 vm_page_speculative_count_drifts++;
2227 #if DEVELOPMENT || DEBUG
2228 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2229 #endif /* DEVELOPMENT || DEBUG */
2230 /* readjust... */
2231 vm_page_speculative_count = 0;
2232 /* ... and continue */
2233 continue;
2234 }
2235
2236 if (vm_page_speculative_count > vm_page_speculative_target || force_speculative_aging == TRUE)
2237 can_steal = TRUE;
2238 else {
2239 if (!delay_speculative_age) {
2240 mach_timespec_t ts_fully_aged;
2241
2242 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
2243 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
2244 * 1000 * NSEC_PER_USEC;
2245
2246 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2247
2248 clock_sec_t sec;
2249 clock_nsec_t nsec;
2250 clock_get_system_nanotime(&sec, &nsec);
2251 ts.tv_sec = (unsigned int) sec;
2252 ts.tv_nsec = nsec;
2253
2254 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2255 can_steal = TRUE;
2256 else
2257 delay_speculative_age++;
2258 } else {
2259 delay_speculative_age++;
2260 if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2261 delay_speculative_age = 0;
2262 }
2263 }
2264 if (can_steal == TRUE)
2265 vm_page_speculate_ageit(aq);
2266 }
2267 force_speculative_aging = FALSE;
2268
2269 #if CONFIG_BACKGROUND_QUEUE
2270 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0 &&
2271 ((vm_page_background_mode == VM_PAGE_BG_DISABLED) || (vm_page_background_count <= vm_page_background_target)))
2272 #else
2273 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0)
2274 #endif
2275 {
2276 int pages_evicted;
2277
2278 if (object != NULL) {
2279 vm_object_unlock(object);
2280 object = NULL;
2281 }
2282 pages_evicted = vm_object_cache_evict(100, 10);
2283
2284 if (pages_evicted) {
2285
2286 vm_pageout_cache_evicted += pages_evicted;
2287
2288 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2289 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
2290 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2291
2292 /*
2293 * we just freed up to 100 pages,
2294 * so go back to the top of the main loop
2295 * and re-evaulate the memory situation
2296 */
2297 continue;
2298 } else
2299 cache_evict_throttle = 1000;
2300 }
2301 if (cache_evict_throttle)
2302 cache_evict_throttle--;
2303
2304 #if CONFIG_JETSAM
2305 /*
2306 * don't let the filecache_min fall below 15% of available memory
2307 * on systems with an active compressor that isn't nearing its
2308 * limits w/r to accepting new data
2309 *
2310 * on systems w/o the compressor/swapper, the filecache is always
2311 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2312 * since most (if not all) of the anonymous pages are in the
2313 * throttled queue (which isn't counted as available) which
2314 * effectively disables this filter
2315 */
2316 if (vm_compressor_low_on_space())
2317 vm_page_filecache_min = 0;
2318 else
2319 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 7);
2320 #else
2321 if (vm_compressor_out_of_space())
2322 vm_page_filecache_min = 0;
2323 else {
2324 /*
2325 * don't let the filecache_min fall below 33% of available memory...
2326 */
2327 vm_page_filecache_min = (AVAILABLE_NON_COMPRESSED_MEMORY / 3);
2328 }
2329 #endif
2330 if (vm_page_free_count < (vm_page_free_reserved / 4))
2331 vm_page_filecache_min = 0;
2332
2333 exceeded_burst_throttle = FALSE;
2334 /*
2335 * Sometimes we have to pause:
2336 * 1) No inactive pages - nothing to do.
2337 * 2) Loop control - no acceptable pages found on the inactive queue
2338 * within the last vm_pageout_burst_inactive_throttle iterations
2339 * 3) Flow control - default pageout queue is full
2340 */
2341 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2342 vm_page_queue_empty(&vm_page_queue_anonymous) &&
2343 vm_page_queue_empty(&sq->age_q)) {
2344 vm_pageout_scan_empty_throttle++;
2345 msecs = vm_pageout_empty_wait;
2346 goto vm_pageout_scan_delay;
2347
2348 } else if (inactive_burst_count >=
2349 MIN(vm_pageout_burst_inactive_throttle,
2350 (vm_page_inactive_count +
2351 vm_page_speculative_count))) {
2352 vm_pageout_scan_burst_throttle++;
2353 msecs = vm_pageout_burst_wait;
2354
2355 exceeded_burst_throttle = TRUE;
2356 goto vm_pageout_scan_delay;
2357
2358 } else if (vm_page_free_count > (vm_page_free_reserved / 4) &&
2359 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
2360 vm_pageout_scan_swap_throttle++;
2361 msecs = vm_pageout_swap_wait;
2362 goto vm_pageout_scan_delay;
2363
2364 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2365 VM_DYNAMIC_PAGING_ENABLED()) {
2366 clock_sec_t sec;
2367 clock_nsec_t nsec;
2368
2369 switch (flow_control.state) {
2370
2371 case FCS_IDLE:
2372 if ((vm_page_free_count + local_freed) < vm_page_free_target) {
2373
2374 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2375 VM_PAGEOUT_PB_THREAD_YIELD);
2376 if (!VM_PAGE_Q_THROTTLED(iq)) {
2377 vm_pageout_scan_yield_unthrottled++;
2378 continue;
2379 }
2380 if (vm_page_pageable_external_count > vm_page_filecache_min &&
2381 !vm_page_queue_empty(&vm_page_queue_inactive)) {
2382 anons_grabbed = ANONS_GRABBED_LIMIT;
2383 vm_pageout_scan_throttle_deferred++;
2384 goto consider_inactive;
2385 }
2386 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count)
2387 continue;
2388 }
2389 reset_deadlock_timer:
2390 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
2391 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2392 clock_get_system_nanotime(&sec, &nsec);
2393 flow_control.ts.tv_sec = (unsigned int) sec;
2394 flow_control.ts.tv_nsec = nsec;
2395 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2396
2397 flow_control.state = FCS_DELAYED;
2398 msecs = vm_pageout_deadlock_wait;
2399
2400 break;
2401
2402 case FCS_DELAYED:
2403 clock_get_system_nanotime(&sec, &nsec);
2404 ts.tv_sec = (unsigned int) sec;
2405 ts.tv_nsec = nsec;
2406
2407 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2408 /*
2409 * the pageout thread for the default pager is potentially
2410 * deadlocked since the
2411 * default pager queue has been throttled for more than the
2412 * allowable time... we need to move some clean pages or dirty
2413 * pages belonging to the external pagers if they aren't throttled
2414 * vm_page_free_wanted represents the number of threads currently
2415 * blocked waiting for pages... we'll move one page for each of
2416 * these plus a fixed amount to break the logjam... once we're done
2417 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2418 * with a new timeout target since we have no way of knowing
2419 * whether we've broken the deadlock except through observation
2420 * of the queue associated with the default pager... we need to
2421 * stop moving pages and allow the system to run to see what
2422 * state it settles into.
2423 */
2424 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged;
2425 vm_pageout_scan_deadlock_detected++;
2426 flow_control.state = FCS_DEADLOCK_DETECTED;
2427 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2428 goto consider_inactive;
2429 }
2430 /*
2431 * just resniff instead of trying
2432 * to compute a new delay time... we're going to be
2433 * awakened immediately upon a laundry completion,
2434 * so we won't wait any longer than necessary
2435 */
2436 msecs = vm_pageout_idle_wait;
2437 break;
2438
2439 case FCS_DEADLOCK_DETECTED:
2440 if (vm_pageout_deadlock_target)
2441 goto consider_inactive;
2442 goto reset_deadlock_timer;
2443
2444 }
2445 vm_pageout_scan_delay:
2446 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2447 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2448
2449 if (flow_control.state == FCS_DELAYED &&
2450 !VM_PAGE_Q_THROTTLED(iq)) {
2451 flow_control.state = FCS_IDLE;
2452 goto consider_inactive;
2453 }
2454
2455 if (vm_page_free_count >= vm_page_free_target) {
2456 /*
2457 * we're here because
2458 * 1) someone else freed up some pages while we had
2459 * the queues unlocked above
2460 * and we've hit one of the 3 conditions that
2461 * cause us to pause the pageout scan thread
2462 *
2463 * since we already have enough free pages,
2464 * let's avoid stalling and return normally
2465 *
2466 * before we return, make sure the pageout I/O threads
2467 * are running throttled in case there are still requests
2468 * in the laundry... since we have enough free pages
2469 * we don't need the laundry to be cleaned in a timely
2470 * fashion... so let's avoid interfering with foreground
2471 * activity
2472 *
2473 * we don't want to hold vm_page_queue_free_lock when
2474 * calling vm_pageout_adjust_eq_iothrottle (since it
2475 * may cause other locks to be taken), we do the intitial
2476 * check outside of the lock. Once we take the lock,
2477 * we recheck the condition since it may have changed.
2478 * if it has, no problem, we will make the threads
2479 * non-throttled before actually blocking
2480 */
2481 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2482 }
2483 lck_mtx_lock(&vm_page_queue_free_lock);
2484
2485 if (vm_page_free_count >= vm_page_free_target &&
2486 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2487 goto return_from_scan;
2488 }
2489 lck_mtx_unlock(&vm_page_queue_free_lock);
2490
2491 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2492 /*
2493 * we're most likely about to block due to one of
2494 * the 3 conditions that cause vm_pageout_scan to
2495 * not be able to make forward progress w/r
2496 * to providing new pages to the free queue,
2497 * so unthrottle the I/O threads in case we
2498 * have laundry to be cleaned... it needs
2499 * to be completed ASAP.
2500 *
2501 * even if we don't block, we want the io threads
2502 * running unthrottled since the sum of free +
2503 * clean pages is still under our free target
2504 */
2505 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2506 }
2507 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2508 /*
2509 * if we get here we're below our free target and
2510 * we're stalling due to a full laundry queue or
2511 * we don't have any inactive pages other then
2512 * those in the clean queue...
2513 * however, we have pages on the clean queue that
2514 * can be moved to the free queue, so let's not
2515 * stall the pageout scan
2516 */
2517 flow_control.state = FCS_IDLE;
2518 goto consider_inactive;
2519 }
2520 VM_CHECK_MEMORYSTATUS;
2521
2522 if (flow_control.state != FCS_IDLE)
2523 vm_pageout_scan_throttle++;
2524 iq->pgo_throttled = TRUE;
2525
2526 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2527 counter(c_vm_pageout_scan_block++);
2528
2529 vm_page_unlock_queues();
2530
2531 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2532
2533 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2534 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2535 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2536
2537 thread_block(THREAD_CONTINUE_NULL);
2538
2539 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2540 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2541 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2542
2543 vm_page_lock_queues();
2544
2545 iq->pgo_throttled = FALSE;
2546
2547 if (loop_count >= vm_page_inactive_count)
2548 loop_count = 0;
2549 inactive_burst_count = 0;
2550
2551 goto Restart;
2552 /*NOTREACHED*/
2553 }
2554
2555
2556 flow_control.state = FCS_IDLE;
2557 consider_inactive:
2558 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2559 vm_pageout_inactive_external_forced_reactivate_limit);
2560 loop_count++;
2561 inactive_burst_count++;
2562 vm_pageout_inactive++;
2563
2564
2565 /*
2566 * Choose a victim.
2567 */
2568 while (1) {
2569 uint32_t inactive_external_count;
2570
2571 #if CONFIG_BACKGROUND_QUEUE
2572 page_from_bg_q = FALSE;
2573 #endif /* CONFIG_BACKGROUND_QUEUE */
2574
2575 m = NULL;
2576 m_object = VM_OBJECT_NULL;
2577
2578 if (VM_DYNAMIC_PAGING_ENABLED()) {
2579 assert(vm_page_throttled_count == 0);
2580 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2581 }
2582
2583 /*
2584 * Try for a clean-queue inactive page.
2585 * These are pages that vm_pageout_scan tried to steal earlier, but
2586 * were dirty and had to be cleaned. Pick them up now that they are clean.
2587 */
2588 if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2589 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2590
2591 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2592
2593 break;
2594 }
2595
2596 /*
2597 * The next most eligible pages are ones we paged in speculatively,
2598 * but which have not yet been touched and have been aged out.
2599 */
2600 if (!vm_page_queue_empty(&sq->age_q)) {
2601 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2602
2603 assert(m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2604
2605 if (!m->dirty || force_anonymous == FALSE)
2606 break;
2607 else
2608 m = NULL;
2609 }
2610
2611 #if CONFIG_BACKGROUND_QUEUE
2612 if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2613 vm_object_t bg_m_object = NULL;
2614
2615 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2616
2617 bg_m_object = VM_PAGE_OBJECT(m);
2618
2619 if (!VM_PAGE_PAGEABLE(m)) {
2620 /*
2621 * This page is on the background queue
2622 * but not on a pageable queue. This is
2623 * likely a transient state and whoever
2624 * took it out of its pageable queue
2625 * will likely put it back on a pageable
2626 * queue soon but we can't deal with it
2627 * at this point, so let's ignore this
2628 * page.
2629 */
2630 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2631
2632 if (bg_m_object->internal &&
2633 ((vm_compressor_out_of_space() == TRUE) ||
2634 (vm_page_free_count < (vm_page_free_reserved / 4)))) {
2635
2636 vm_pageout_skipped_bq_internal++;
2637 } else {
2638 page_from_bg_q = TRUE;
2639
2640 if (bg_m_object->internal)
2641 vm_pageout_considered_bq_internal++;
2642 else
2643 vm_pageout_considered_bq_external++;
2644
2645 break;
2646 }
2647 }
2648 }
2649 #endif
2650
2651 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2652 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2653
2654 if ((vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) ||
2655 ((inactive_external_count < vm_page_anonymous_count) && (inactive_external_count < (vm_page_pageable_external_count / 3)))) {
2656 grab_anonymous = TRUE;
2657 anons_grabbed = 0;
2658 }
2659 #if CONFIG_JETSAM
2660 /* If the file-backed pool has accumulated
2661 * significantly more pages than the jetsam
2662 * threshold, prefer to reclaim those
2663 * inline to minimise compute overhead of reclaiming
2664 * anonymous pages.
2665 * This calculation does not account for the CPU local
2666 * external page queues, as those are expected to be
2667 * much smaller relative to the global pools.
2668 */
2669 if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2670 if (vm_page_pageable_external_count >
2671 vm_page_filecache_min) {
2672 if ((vm_page_pageable_external_count *
2673 vm_pageout_memorystatus_fb_factor_dr) >
2674 (memorystatus_available_pages_critical *
2675 vm_pageout_memorystatus_fb_factor_nr)) {
2676 grab_anonymous = FALSE;
2677 #if DEVELOPMENT || DEBUG
2678 vm_grab_anon_overrides++;
2679 #endif
2680 }
2681 }
2682 #if DEVELOPMENT || DEBUG
2683 if (grab_anonymous) {
2684 vm_grab_anon_nops++;
2685 }
2686 #endif
2687 }
2688 #endif /* CONFIG_JETSAM */
2689
2690 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2691
2692 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2693 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2694
2695 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2696 anons_grabbed = 0;
2697
2698 if (vm_page_pageable_external_count < vm_page_filecache_min) {
2699 if ((++reactivated_this_call % 100))
2700 goto must_activate_page;
2701 /*
2702 * steal 1% of the file backed pages even if
2703 * we are under the limit that has been set
2704 * for a healthy filecache
2705 */
2706 }
2707 break;
2708 }
2709 }
2710 if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2711 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2712
2713 assert(m->vm_page_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2714 anons_grabbed++;
2715
2716 break;
2717 }
2718
2719 /*
2720 * if we've gotten here, we have no victim page.
2721 * check to see if we've not finished balancing the queues
2722 * or we have a page on the aged speculative queue that we
2723 * skipped due to force_anonymous == TRUE.. or we have
2724 * speculative pages that we can prematurely age... if
2725 * one of these cases we'll keep going, else panic
2726 */
2727 force_anonymous = FALSE;
2728 vm_pageout_no_victim++;
2729
2730 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target)
2731 goto done_with_inactivepage;
2732
2733 if (!vm_page_queue_empty(&sq->age_q))
2734 goto done_with_inactivepage;
2735
2736 if (vm_page_speculative_count) {
2737 force_speculative_aging = TRUE;
2738 goto done_with_inactivepage;
2739 }
2740 panic("vm_pageout: no victim");
2741
2742 /* NOTREACHED */
2743 }
2744 assert(VM_PAGE_PAGEABLE(m));
2745 m_object = VM_PAGE_OBJECT(m);
2746 force_anonymous = FALSE;
2747
2748 page_prev_q_state = m->vm_page_q_state;
2749 /*
2750 * we just found this page on one of our queues...
2751 * it can't also be on the pageout queue, so safe
2752 * to call vm_page_queues_remove
2753 */
2754 vm_page_queues_remove(m, TRUE);
2755
2756 assert(!m->laundry);
2757 assert(!m->private);
2758 assert(!m->fictitious);
2759 assert(m_object != kernel_object);
2760 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2761
2762 vm_pageout_stats[vm_pageout_stat_now].considered++;
2763 vm_pageout_considered_page++;
2764
2765 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2766
2767 /*
2768 * check to see if we currently are working
2769 * with the same object... if so, we've
2770 * already got the lock
2771 */
2772 if (m_object != object) {
2773 /*
2774 * the object associated with candidate page is
2775 * different from the one we were just working
2776 * with... dump the lock if we still own it
2777 */
2778 if (object != NULL) {
2779 vm_object_unlock(object);
2780 object = NULL;
2781 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2782 }
2783 /*
2784 * Try to lock object; since we've alread got the
2785 * page queues lock, we can only 'try' for this one.
2786 * if the 'try' fails, we need to do a mutex_pause
2787 * to allow the owner of the object lock a chance to
2788 * run... otherwise, we're likely to trip over this
2789 * object in the same state as we work our way through
2790 * the queue... clumps of pages associated with the same
2791 * object are fairly typical on the inactive and active queues
2792 */
2793 if (!vm_object_lock_try_scan(m_object)) {
2794 vm_page_t m_want = NULL;
2795
2796 vm_pageout_inactive_nolock++;
2797
2798 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2799 vm_pageout_cleaned_nolock++;
2800
2801 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2802 m->reference = FALSE;
2803
2804 /*
2805 * m->object must be stable since we hold the page queues lock...
2806 * we can update the scan_collisions field sans the object lock
2807 * since it is a separate field and this is the only spot that does
2808 * a read-modify-write operation and it is never executed concurrently...
2809 * we can asynchronously set this field to 0 when creating a UPL, so it
2810 * is possible for the value to be a bit non-determistic, but that's ok
2811 * since it's only used as a hint
2812 */
2813 m_object->scan_collisions = 1;
2814
2815 if ( !vm_page_queue_empty(&sq->age_q) )
2816 m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2817 else if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2818 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2819 else if ( !vm_page_queue_empty(&vm_page_queue_inactive) &&
2820 (anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)))
2821 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2822 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2823 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2824
2825 /*
2826 * this is the next object we're going to be interested in
2827 * try to make sure its available after the mutex_yield
2828 * returns control
2829 */
2830 if (m_want)
2831 vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2832
2833 /*
2834 * force us to dump any collected free pages
2835 * and to pause before moving on
2836 */
2837 try_failed = TRUE;
2838
2839 goto requeue_page;
2840 }
2841 object = m_object;
2842 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2843
2844 try_failed = FALSE;
2845 }
2846 assert(m_object == object);
2847 assert(VM_PAGE_OBJECT(m) == m_object);
2848
2849 if (m->busy) {
2850 /*
2851 * Somebody is already playing with this page.
2852 * Put it back on the appropriate queue
2853 *
2854 */
2855 vm_pageout_inactive_busy++;
2856
2857 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2858 vm_pageout_cleaned_busy++;
2859 requeue_page:
2860 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2861 vm_page_enqueue_inactive(m, FALSE);
2862 else
2863 vm_page_activate(m);
2864 #if CONFIG_BACKGROUND_QUEUE
2865 if (page_from_bg_q == TRUE) {
2866 if (m_object->internal)
2867 vm_pageout_rejected_bq_internal++;
2868 else
2869 vm_pageout_rejected_bq_external++;
2870 }
2871 #endif
2872 goto done_with_inactivepage;
2873 }
2874
2875
2876 /*
2877 * If it's absent, in error or the object is no longer alive,
2878 * we can reclaim the page... in the no longer alive case,
2879 * there are 2 states the page can be in that preclude us
2880 * from reclaiming it - busy or cleaning - that we've already
2881 * dealt with
2882 */
2883 if (m->absent || m->error || !object->alive) {
2884
2885 if (m->absent)
2886 vm_pageout_inactive_absent++;
2887 else if (!object->alive)
2888 vm_pageout_inactive_notalive++;
2889 else
2890 vm_pageout_inactive_error++;
2891 reclaim_page:
2892 if (vm_pageout_deadlock_target) {
2893 vm_pageout_scan_inactive_throttle_success++;
2894 vm_pageout_deadlock_target--;
2895 }
2896
2897 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2898
2899 if (object->internal) {
2900 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2901 } else {
2902 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2903 }
2904 assert(!m->cleaning);
2905 assert(!m->laundry);
2906
2907 m->busy = TRUE;
2908
2909 /*
2910 * remove page from object here since we're already
2911 * behind the object lock... defer the rest of the work
2912 * we'd normally do in vm_page_free_prepare_object
2913 * until 'vm_page_free_list' is called
2914 */
2915 if (m->tabled)
2916 vm_page_remove(m, TRUE);
2917
2918 assert(m->pageq.next == 0 && m->pageq.prev == 0);
2919 m->snext = local_freeq;
2920 local_freeq = m;
2921 local_freed++;
2922
2923 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2924 vm_pageout_freed_from_speculative++;
2925 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2926 vm_pageout_freed_from_cleaned++;
2927 else
2928 vm_pageout_freed_from_inactive_clean++;
2929
2930 vm_pageout_stats[vm_pageout_stat_now].reclaimed_clean++;
2931
2932 inactive_burst_count = 0;
2933 goto done_with_inactivepage;
2934 }
2935 /*
2936 * If the object is empty, the page must be reclaimed even
2937 * if dirty or used.
2938 * If the page belongs to a volatile object, we stick it back
2939 * on.
2940 */
2941 if (object->copy == VM_OBJECT_NULL) {
2942 if (object->purgable == VM_PURGABLE_EMPTY) {
2943 if (m->pmapped == TRUE) {
2944 /* unmap the page */
2945 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2946 if (refmod_state & VM_MEM_MODIFIED) {
2947 SET_PAGE_DIRTY(m, FALSE);
2948 }
2949 }
2950 if (m->dirty || m->precious) {
2951 /* we saved the cost of cleaning this page ! */
2952 vm_page_purged_count++;
2953 }
2954 goto reclaim_page;
2955 }
2956
2957 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2958 /*
2959 * With the VM compressor, the cost of
2960 * reclaiming a page is much lower (no I/O),
2961 * so if we find a "volatile" page, it's better
2962 * to let it get compressed rather than letting
2963 * it occupy a full page until it gets purged.
2964 * So no need to check for "volatile" here.
2965 */
2966 } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2967 /*
2968 * Avoid cleaning a "volatile" page which might
2969 * be purged soon.
2970 */
2971
2972 /* if it's wired, we can't put it on our queue */
2973 assert(!VM_PAGE_WIRED(m));
2974
2975 /* just stick it back on! */
2976 reactivated_this_call++;
2977
2978 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2979 vm_pageout_cleaned_volatile_reactivated++;
2980
2981 goto reactivate_page;
2982 }
2983 }
2984 /*
2985 * If it's being used, reactivate.
2986 * (Fictitious pages are either busy or absent.)
2987 * First, update the reference and dirty bits
2988 * to make sure the page is unreferenced.
2989 */
2990 refmod_state = -1;
2991
2992 if (m->reference == FALSE && m->pmapped == TRUE) {
2993 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2994
2995 if (refmod_state & VM_MEM_REFERENCED)
2996 m->reference = TRUE;
2997 if (refmod_state & VM_MEM_MODIFIED) {
2998 SET_PAGE_DIRTY(m, FALSE);
2999 }
3000 }
3001
3002 /*
3003 * if (m->cleaning && !m->free_when_done)
3004 * If already cleaning this page in place and it hasn't
3005 * been recently referenced, just pull off the queue.
3006 * We can leave the page mapped, and upl_commit_range
3007 * will put it on the clean queue.
3008 *
3009 * if (m->free_when_done && !m->cleaning)
3010 * an msync INVALIDATE is in progress...
3011 * this page has been marked for destruction
3012 * after it has been cleaned,
3013 * but not yet gathered into a UPL
3014 * where 'cleaning' will be set...
3015 * just leave it off the paging queues
3016 *
3017 * if (m->free_when_done && m->clenaing)
3018 * an msync INVALIDATE is in progress
3019 * and the UPL has already gathered this page...
3020 * just leave it off the paging queues
3021 */
3022
3023 /*
3024 * page with m->free_when_done and still on the queues means that an
3025 * MS_INVALIDATE is in progress on this page... leave it alone
3026 */
3027 if (m->free_when_done) {
3028 goto done_with_inactivepage;
3029 }
3030
3031 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */
3032 if (m->cleaning) {
3033 if (m->reference == TRUE) {
3034 reactivated_this_call++;
3035 goto reactivate_page;
3036 } else {
3037 goto done_with_inactivepage;
3038 }
3039 }
3040
3041 if (m->reference || m->dirty) {
3042 /* deal with a rogue "reusable" page */
3043 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
3044 }
3045
3046 if (!m->no_cache &&
3047 #if CONFIG_BACKGROUND_QUEUE
3048 page_from_bg_q == FALSE &&
3049 #endif
3050 (m->reference ||
3051 (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) {
3052 /*
3053 * The page we pulled off the inactive list has
3054 * been referenced. It is possible for other
3055 * processors to be touching pages faster than we
3056 * can clear the referenced bit and traverse the
3057 * inactive queue, so we limit the number of
3058 * reactivations.
3059 */
3060 if (++reactivated_this_call >= reactivate_limit) {
3061 vm_pageout_reactivation_limit_exceeded++;
3062 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
3063 vm_pageout_inactive_force_reclaim++;
3064 } else {
3065 uint32_t isinuse;
3066
3067 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3068 vm_pageout_cleaned_reference_reactivated++;
3069 reactivate_page:
3070 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
3071 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
3072 /*
3073 * no explict mappings of this object exist
3074 * and it's not open via the filesystem
3075 */
3076 vm_page_deactivate(m);
3077 vm_pageout_inactive_deactivated++;
3078 } else {
3079 must_activate_page:
3080 /*
3081 * The page was/is being used, so put back on active list.
3082 */
3083 vm_page_activate(m);
3084 VM_STAT_INCR(reactivations);
3085 inactive_burst_count = 0;
3086 }
3087 #if CONFIG_BACKGROUND_QUEUE
3088 if (page_from_bg_q == TRUE) {
3089 if (m_object->internal)
3090 vm_pageout_rejected_bq_internal++;
3091 else
3092 vm_pageout_rejected_bq_external++;
3093 }
3094 #endif
3095 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3096 vm_pageout_cleaned_reactivated++;
3097 vm_pageout_inactive_used++;
3098
3099 goto done_with_inactivepage;
3100 }
3101 /*
3102 * Make sure we call pmap_get_refmod() if it
3103 * wasn't already called just above, to update
3104 * the dirty bit.
3105 */
3106 if ((refmod_state == -1) && !m->dirty && m->pmapped) {
3107 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3108 if (refmod_state & VM_MEM_MODIFIED) {
3109 SET_PAGE_DIRTY(m, FALSE);
3110 }
3111 }
3112 }
3113
3114 XPR(XPR_VM_PAGEOUT,
3115 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
3116 object, m->offset, m, 0,0);
3117
3118 /*
3119 * we've got a candidate page to steal...
3120 *
3121 * m->dirty is up to date courtesy of the
3122 * preceding check for m->reference... if
3123 * we get here, then m->reference had to be
3124 * FALSE (or possibly "reactivate_limit" was
3125 * exceeded), but in either case we called
3126 * pmap_get_refmod() and updated both
3127 * m->reference and m->dirty
3128 *
3129 * if it's dirty or precious we need to
3130 * see if the target queue is throtttled
3131 * it if is, we need to skip over it by moving it back
3132 * to the end of the inactive queue
3133 */
3134
3135 inactive_throttled = FALSE;
3136
3137 if (m->dirty || m->precious) {
3138 if (object->internal) {
3139 if (VM_PAGE_Q_THROTTLED(iq))
3140 inactive_throttled = TRUE;
3141 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3142 inactive_throttled = TRUE;
3143 }
3144 }
3145 throttle_inactive:
3146 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3147 object->internal && m->dirty &&
3148 (object->purgable == VM_PURGABLE_DENY ||
3149 object->purgable == VM_PURGABLE_NONVOLATILE ||
3150 object->purgable == VM_PURGABLE_VOLATILE)) {
3151 vm_page_check_pageable_safe(m);
3152 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3153 vm_page_queue_enter(&vm_page_queue_throttled, m,
3154 vm_page_t, pageq);
3155 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
3156 vm_page_throttled_count++;
3157
3158 vm_pageout_scan_reclaimed_throttled++;
3159
3160 inactive_burst_count = 0;
3161 goto done_with_inactivepage;
3162 }
3163 if (inactive_throttled == TRUE) {
3164
3165 if (object->internal == FALSE) {
3166 /*
3167 * we need to break up the following potential deadlock case...
3168 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3169 * b) The thread doing the writing is waiting for pages while holding the truncate lock
3170 * c) Most of the pages in the inactive queue belong to this file.
3171 *
3172 * we are potentially in this deadlock because...
3173 * a) the external pageout queue is throttled
3174 * b) we're done with the active queue and moved on to the inactive queue
3175 * c) we've got a dirty external page
3176 *
3177 * since we don't know the reason for the external pageout queue being throttled we
3178 * must suspect that we are deadlocked, so move the current page onto the active queue
3179 * in an effort to cause a page from the active queue to 'age' to the inactive queue
3180 *
3181 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3182 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3183 * pool the next time we select a victim page... if we can make enough new free pages,
3184 * the deadlock will break, the external pageout queue will empty and it will no longer
3185 * be throttled
3186 *
3187 * if we have jetsam configured, keep a count of the pages reactivated this way so
3188 * that we can try to find clean pages in the active/inactive queues before
3189 * deciding to jetsam a process
3190 */
3191 vm_pageout_scan_inactive_throttled_external++;
3192
3193 vm_page_check_pageable_safe(m);
3194 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
3195 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3196 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
3197 vm_page_active_count++;
3198 vm_page_pageable_external_count++;
3199
3200 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3201
3202 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3203 vm_pageout_inactive_external_forced_reactivate_limit--;
3204
3205 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3206 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3207 /*
3208 * Possible deadlock scenario so request jetsam action
3209 */
3210 assert(object);
3211 vm_object_unlock(object);
3212 object = VM_OBJECT_NULL;
3213 vm_page_unlock_queues();
3214
3215 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3216 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3217
3218 /* Kill first suitable process */
3219 if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) {
3220 panic("vm_pageout_scan: Jetsam request failed\n");
3221 }
3222
3223 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0);
3224
3225 vm_pageout_inactive_external_forced_jetsam_count++;
3226 vm_page_lock_queues();
3227 delayed_unlock = 1;
3228 }
3229 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3230 force_anonymous = TRUE;
3231 #endif
3232 inactive_burst_count = 0;
3233 goto done_with_inactivepage;
3234 } else {
3235 vm_pageout_scan_inactive_throttled_internal++;
3236 goto must_activate_page;
3237 }
3238 }
3239
3240 /*
3241 * we've got a page that we can steal...
3242 * eliminate all mappings and make sure
3243 * we have the up-to-date modified state
3244 *
3245 * if we need to do a pmap_disconnect then we
3246 * need to re-evaluate m->dirty since the pmap_disconnect
3247 * provides the true state atomically... the
3248 * page was still mapped up to the pmap_disconnect
3249 * and may have been dirtied at the last microsecond
3250 *
3251 * Note that if 'pmapped' is FALSE then the page is not
3252 * and has not been in any map, so there is no point calling
3253 * pmap_disconnect(). m->dirty could have been set in anticipation
3254 * of likely usage of the page.
3255 */
3256 if (m->pmapped == TRUE) {
3257 int pmap_options;
3258
3259 /*
3260 * Don't count this page as going into the compressor
3261 * if any of these are true:
3262 * 1) compressed pager isn't enabled
3263 * 2) Freezer enabled device with compressed pager
3264 * backend (exclusive use) i.e. most of the VM system
3265 * (including vm_pageout_scan) has no knowledge of
3266 * the compressor
3267 * 3) This page belongs to a file and hence will not be
3268 * sent into the compressor
3269 */
3270 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3271 object->internal == FALSE) {
3272 pmap_options = 0;
3273 } else if (m->dirty || m->precious) {
3274 /*
3275 * VM knows that this page is dirty (or
3276 * precious) and needs to be compressed
3277 * rather than freed.
3278 * Tell the pmap layer to count this page
3279 * as "compressed".
3280 */
3281 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3282 } else {
3283 /*
3284 * VM does not know if the page needs to
3285 * be preserved but the pmap layer might tell
3286 * us if any mapping has "modified" it.
3287 * Let's the pmap layer to count this page
3288 * as compressed if and only if it has been
3289 * modified.
3290 */
3291 pmap_options =
3292 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3293 }
3294 refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3295 pmap_options,
3296 NULL);
3297 if (refmod_state & VM_MEM_MODIFIED) {
3298 SET_PAGE_DIRTY(m, FALSE);
3299 }
3300 }
3301 /*
3302 * reset our count of pages that have been reclaimed
3303 * since the last page was 'stolen'
3304 */
3305 inactive_reclaim_run = 0;
3306
3307 /*
3308 * If it's clean and not precious, we can free the page.
3309 */
3310 if (!m->dirty && !m->precious) {
3311
3312 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3313 vm_pageout_speculative_clean++;
3314 else {
3315 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3316 vm_pageout_inactive_anonymous++;
3317 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
3318 vm_pageout_cleaned_reclaimed++;
3319
3320 vm_pageout_inactive_clean++;
3321 }
3322 /*
3323 * OK, at this point we have found a page we are going to free.
3324 */
3325 #if CONFIG_PHANTOM_CACHE
3326 if (!object->internal)
3327 vm_phantom_cache_add_ghost(m);
3328 #endif
3329 goto reclaim_page;
3330 }
3331
3332 /*
3333 * The page may have been dirtied since the last check
3334 * for a throttled target queue (which may have been skipped
3335 * if the page was clean then). With the dirty page
3336 * disconnected here, we can make one final check.
3337 */
3338 if (object->internal) {
3339 if (VM_PAGE_Q_THROTTLED(iq))
3340 inactive_throttled = TRUE;
3341 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3342 inactive_throttled = TRUE;
3343 }
3344
3345 if (inactive_throttled == TRUE)
3346 goto throttle_inactive;
3347
3348 #if VM_PRESSURE_EVENTS
3349 #if CONFIG_JETSAM
3350
3351 /*
3352 * If Jetsam is enabled, then the sending
3353 * of memory pressure notifications is handled
3354 * from the same thread that takes care of high-water
3355 * and other jetsams i.e. the memorystatus_thread.
3356 */
3357
3358 #else /* CONFIG_JETSAM */
3359
3360 vm_pressure_response();
3361
3362 #endif /* CONFIG_JETSAM */
3363 #endif /* VM_PRESSURE_EVENTS */
3364
3365 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3366 vm_pageout_speculative_dirty++;
3367 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
3368 vm_pageout_inactive_anonymous++;
3369
3370 if (object->internal)
3371 vm_pageout_inactive_dirty_internal++;
3372 else
3373 vm_pageout_inactive_dirty_external++;
3374
3375 /*
3376 * do NOT set the pageout bit!
3377 * sure, we might need free pages, but this page is going to take time to become free
3378 * anyway, so we may as well put it on the clean queue first and take it from there later
3379 * if necessary. that way, we'll ensure we don't free up too much. -mj
3380 */
3381 vm_pageout_cluster(m);
3382
3383 done_with_inactivepage:
3384
3385 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
3386
3387 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3388 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3389 if (try_failed == TRUE)
3390 lck_mtx_yield(&vm_page_queue_lock);
3391 }
3392
3393 /*
3394 * back to top of pageout scan loop
3395 */
3396 }
3397 }
3398
3399
3400 int vm_page_free_count_init;
3401
3402 void
3403 vm_page_free_reserve(
3404 int pages)
3405 {
3406 int free_after_reserve;
3407
3408 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3409
3410 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3411 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3412 else
3413 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3414
3415 } else {
3416 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3417 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3418 else
3419 vm_page_free_reserved += pages;
3420 }
3421 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
3422
3423 vm_page_free_min = vm_page_free_reserved +
3424 VM_PAGE_FREE_MIN(free_after_reserve);
3425
3426 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3427 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3428
3429 vm_page_free_target = vm_page_free_reserved +
3430 VM_PAGE_FREE_TARGET(free_after_reserve);
3431
3432 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3433 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3434
3435 if (vm_page_free_target < vm_page_free_min + 5)
3436 vm_page_free_target = vm_page_free_min + 5;
3437
3438 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3439 }
3440
3441 /*
3442 * vm_pageout is the high level pageout daemon.
3443 */
3444
3445 void
3446 vm_pageout_continue(void)
3447 {
3448 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3449 vm_pageout_scan_event_counter++;
3450
3451 #if !CONFIG_EMBEDDED
3452 lck_mtx_lock(&vm_page_queue_free_lock);
3453 vm_pageout_running = TRUE;
3454 lck_mtx_unlock(&vm_page_queue_free_lock);
3455 #endif /* CONFIG_EMBEDDED */
3456
3457 vm_pageout_scan();
3458 /*
3459 * we hold both the vm_page_queue_free_lock
3460 * and the vm_page_queues_lock at this point
3461 */
3462 assert(vm_page_free_wanted == 0);
3463 assert(vm_page_free_wanted_privileged == 0);
3464 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3465
3466 #if !CONFIG_EMBEDDED
3467 vm_pageout_running = FALSE;
3468 if (vm_pageout_waiter) {
3469 vm_pageout_waiter = FALSE;
3470 thread_wakeup((event_t)&vm_pageout_waiter);
3471 }
3472 #endif /* !CONFIG_EMBEDDED */
3473
3474 lck_mtx_unlock(&vm_page_queue_free_lock);
3475 vm_page_unlock_queues();
3476
3477 counter(c_vm_pageout_block++);
3478 thread_block((thread_continue_t)vm_pageout_continue);
3479 /*NOTREACHED*/
3480 }
3481
3482 #if !CONFIG_EMBEDDED
3483 kern_return_t
3484 vm_pageout_wait(uint64_t deadline)
3485 {
3486 kern_return_t kr;
3487
3488 lck_mtx_lock(&vm_page_queue_free_lock);
3489 for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3490 vm_pageout_waiter = TRUE;
3491 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3492 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3493 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3494 kr = KERN_OPERATION_TIMED_OUT;
3495 }
3496 }
3497 lck_mtx_unlock(&vm_page_queue_free_lock);
3498
3499 return (kr);
3500 }
3501 #endif /* !CONFIG_EMBEDDED */
3502
3503
3504 static void
3505 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3506 {
3507 vm_page_t m = NULL;
3508 vm_object_t object;
3509 vm_object_offset_t offset;
3510 memory_object_t pager;
3511
3512 /* On systems without a compressor, the external IO thread clears its
3513 * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3514 * creation)
3515 */
3516 if (vm_pageout_internal_iothread != THREAD_NULL)
3517 current_thread()->options &= ~TH_OPT_VMPRIV;
3518
3519 vm_page_lockspin_queues();
3520
3521 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3522
3523 q->pgo_busy = TRUE;
3524 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3525
3526 assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3527 VM_PAGE_CHECK(m);
3528 /*
3529 * grab a snapshot of the object and offset this
3530 * page is tabled in so that we can relookup this
3531 * page after we've taken the object lock - these
3532 * fields are stable while we hold the page queues lock
3533 * but as soon as we drop it, there is nothing to keep
3534 * this page in this object... we hold an activity_in_progress
3535 * on this object which will keep it from terminating
3536 */
3537 object = VM_PAGE_OBJECT(m);
3538 offset = m->offset;
3539
3540 if (object->object_slid) {
3541 panic("slid page %p not allowed on this path\n", m);
3542 }
3543 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3544 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3545
3546 vm_page_unlock_queues();
3547
3548 vm_object_lock(object);
3549
3550 m = vm_page_lookup(object, offset);
3551
3552 if (m == NULL ||
3553 m->busy || m->cleaning || !m->laundry || (m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)) {
3554 /*
3555 * it's either the same page that someone else has
3556 * started cleaning (or it's finished cleaning or
3557 * been put back on the pageout queue), or
3558 * the page has been freed or we have found a
3559 * new page at this offset... in all of these cases
3560 * we merely need to release the activity_in_progress
3561 * we took when we put the page on the pageout queue
3562 */
3563 vm_object_activity_end(object);
3564 vm_object_unlock(object);
3565
3566 vm_page_lockspin_queues();
3567 continue;
3568 }
3569 pager = object->pager;
3570
3571 if (pager == MEMORY_OBJECT_NULL) {
3572 /*
3573 * This pager has been destroyed by either
3574 * memory_object_destroy or vm_object_destroy, and
3575 * so there is nowhere for the page to go.
3576 */
3577 if (m->free_when_done) {
3578 /*
3579 * Just free the page... VM_PAGE_FREE takes
3580 * care of cleaning up all the state...
3581 * including doing the vm_pageout_throttle_up
3582 */
3583 VM_PAGE_FREE(m);
3584 } else {
3585 vm_page_lockspin_queues();
3586
3587 vm_pageout_throttle_up(m);
3588 vm_page_activate(m);
3589
3590 vm_page_unlock_queues();
3591
3592 /*
3593 * And we are done with it.
3594 */
3595 }
3596 vm_object_activity_end(object);
3597 vm_object_unlock(object);
3598
3599 vm_page_lockspin_queues();
3600 continue;
3601 }
3602 #if 0
3603 /*
3604 * we don't hold the page queue lock
3605 * so this check isn't safe to make
3606 */
3607 VM_PAGE_CHECK(m);
3608 #endif
3609 /*
3610 * give back the activity_in_progress reference we
3611 * took when we queued up this page and replace it
3612 * it with a paging_in_progress reference that will
3613 * also hold the paging offset from changing and
3614 * prevent the object from terminating
3615 */
3616 vm_object_activity_end(object);
3617 vm_object_paging_begin(object);
3618 vm_object_unlock(object);
3619
3620 /*
3621 * Send the data to the pager.
3622 * any pageout clustering happens there
3623 */
3624 memory_object_data_return(pager,
3625 m->offset + object->paging_offset,
3626 PAGE_SIZE,
3627 NULL,
3628 NULL,
3629 FALSE,
3630 FALSE,
3631 0);
3632
3633 vm_object_lock(object);
3634 vm_object_paging_end(object);
3635 vm_object_unlock(object);
3636
3637 vm_pageout_io_throttle();
3638
3639 vm_page_lockspin_queues();
3640 }
3641 q->pgo_busy = FALSE;
3642 q->pgo_idle = TRUE;
3643
3644 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3645 vm_page_unlock_queues();
3646
3647 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3648 /*NOTREACHED*/
3649 }
3650
3651
3652 #define MAX_FREE_BATCH 32
3653 uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3654 * this thread.
3655 */
3656
3657
3658 #if DEVELOPMENT || DEBUG
3659 uint64_t compressor_epoch_start, compressor_epoch_stop, compressor_threads_runtime;
3660 #endif
3661
3662 void
3663 vm_pageout_iothread_internal_continue(struct cq *);
3664 void
3665 vm_pageout_iothread_internal_continue(struct cq *cq)
3666 {
3667 struct vm_pageout_queue *q;
3668 vm_page_t m = NULL;
3669 boolean_t pgo_draining;
3670 vm_page_t local_q;
3671 int local_cnt;
3672 vm_page_t local_freeq = NULL;
3673 int local_freed = 0;
3674 int local_batch_size;
3675 int ncomps = 0;
3676 #if DEVELOPMENT || DEBUG
3677 boolean_t marked_active = FALSE;
3678 #endif
3679 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3680
3681 q = cq->q;
3682 local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 2);
3683
3684 #if RECORD_THE_COMPRESSED_DATA
3685 if (q->pgo_laundry)
3686 c_compressed_record_init();
3687 #endif
3688 while (TRUE) {
3689 int pages_left_on_q = 0;
3690
3691 local_cnt = 0;
3692 local_q = NULL;
3693
3694 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3695
3696 vm_page_lock_queues();
3697 #if DEVELOPMENT || DEBUG
3698 if (marked_active == FALSE) {
3699 vmct_active++;
3700 vmct_state[cq->id] = VMCT_ACTIVE;
3701 marked_active = TRUE;
3702 if (vmct_active == 1) {
3703 compressor_epoch_start = mach_absolute_time();
3704 }
3705 }
3706 #endif
3707 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3708
3709 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3710
3711 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3712
3713 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
3714 assert(m->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q);
3715 VM_PAGE_CHECK(m);
3716
3717 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
3718 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3719 m->laundry = FALSE;
3720
3721 m->snext = local_q;
3722 local_q = m;
3723 local_cnt++;
3724 }
3725 if (local_q == NULL)
3726 break;
3727
3728 q->pgo_busy = TRUE;
3729
3730 if ((pgo_draining = q->pgo_draining) == FALSE) {
3731 vm_pageout_throttle_up_batch(q, local_cnt);
3732 pages_left_on_q = q->pgo_laundry;
3733 } else
3734 pages_left_on_q = q->pgo_laundry - local_cnt;
3735
3736 vm_page_unlock_queues();
3737
3738 #if !RECORD_THE_COMPRESSED_DATA
3739 if (pages_left_on_q >= local_batch_size && cq->id < (vm_compressor_thread_count - 1)) {
3740 thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3741 }
3742 #endif
3743 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3744
3745 while (local_q) {
3746
3747 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3748
3749 m = local_q;
3750 local_q = m->snext;
3751 m->snext = NULL;
3752
3753 if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m, FALSE) == KERN_SUCCESS) {
3754 ncomps++;
3755 m->snext = local_freeq;
3756 local_freeq = m;
3757 local_freed++;
3758
3759 if (local_freed >= MAX_FREE_BATCH) {
3760 vm_pageout_freed_after_compression += local_freed;
3761
3762 vm_page_free_list(local_freeq, TRUE);
3763 local_freeq = NULL;
3764 local_freed = 0;
3765 }
3766 }
3767 #if !CONFIG_JETSAM
3768 while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3769 kern_return_t wait_result;
3770 int need_wakeup = 0;
3771
3772 if (local_freeq) {
3773 vm_pageout_freed_after_compression += local_freed;
3774
3775 vm_page_free_list(local_freeq, TRUE);
3776 local_freeq = NULL;
3777 local_freed = 0;
3778
3779 continue;
3780 }
3781 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3782
3783 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3784
3785 if (vm_page_free_wanted_privileged++ == 0)
3786 need_wakeup = 1;
3787 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3788
3789 lck_mtx_unlock(&vm_page_queue_free_lock);
3790
3791 if (need_wakeup)
3792 thread_wakeup((event_t)&vm_page_free_wanted);
3793
3794 if (wait_result == THREAD_WAITING)
3795
3796 thread_block(THREAD_CONTINUE_NULL);
3797 } else
3798 lck_mtx_unlock(&vm_page_queue_free_lock);
3799 }
3800 #endif
3801 }
3802 if (local_freeq) {
3803 vm_pageout_freed_after_compression += local_freed;
3804
3805 vm_page_free_list(local_freeq, TRUE);
3806 local_freeq = NULL;
3807 local_freed = 0;
3808 }
3809 if (pgo_draining == TRUE) {
3810 vm_page_lockspin_queues();
3811 vm_pageout_throttle_up_batch(q, local_cnt);
3812 vm_page_unlock_queues();
3813 }
3814 }
3815 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3816
3817 /*
3818 * queue lock is held and our q is empty
3819 */
3820 q->pgo_busy = FALSE;
3821 q->pgo_idle = TRUE;
3822
3823 assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3824 #if DEVELOPMENT || DEBUG
3825 if (marked_active == TRUE) {
3826 vmct_active--;
3827 vmct_state[cq->id] = VMCT_IDLE;
3828
3829 if (vmct_active == 0) {
3830 compressor_epoch_stop = mach_absolute_time();
3831 assert(compressor_epoch_stop > compressor_epoch_start);
3832 /* This interval includes intervals where one or more
3833 * compressor threads were pre-empted
3834 */
3835 vmct_stats.vmct_cthreads_total += compressor_epoch_stop - compressor_epoch_start;
3836 }
3837
3838 }
3839 #endif
3840 vm_page_unlock_queues();
3841 #if DEVELOPMENT || DEBUG
3842 if (__improbable(vm_compressor_time_thread)) {
3843 vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3844 vmct_stats.vmct_pages[cq->id] += ncomps;
3845 vmct_stats.vmct_iterations[cq->id]++;
3846 if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3847 vmct_stats.vmct_maxpages[cq->id] = ncomps;
3848 }
3849 if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3850 vmct_stats.vmct_minpages[cq->id] = ncomps;
3851 }
3852 }
3853 #endif
3854
3855 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3856
3857 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3858 /*NOTREACHED*/
3859 }
3860
3861
3862 kern_return_t
3863 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m, boolean_t object_locked_by_caller)
3864 {
3865 vm_object_t object;
3866 memory_object_t pager;
3867 int compressed_count_delta;
3868 kern_return_t retval;
3869
3870 object = VM_PAGE_OBJECT(m);
3871
3872 if (object->object_slid) {
3873 panic("slid page %p not allowed on this path\n", m);
3874 }
3875 assert(!m->free_when_done);
3876 assert(!m->laundry);
3877
3878 pager = object->pager;
3879
3880 if (object_locked_by_caller == FALSE && (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)) {
3881
3882 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3883
3884 vm_object_lock(object);
3885
3886 /*
3887 * If there is no memory object for the page, create
3888 * one and hand it to the compression pager.
3889 */
3890
3891 if (!object->pager_initialized)
3892 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3893 if (!object->pager_initialized)
3894 vm_object_compressor_pager_create(object);
3895
3896 pager = object->pager;
3897
3898 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3899 /*
3900 * Still no pager for the object,
3901 * or the pager has been destroyed.
3902 * Reactivate the page.
3903 *
3904 * Should only happen if there is no
3905 * compression pager
3906 */
3907 PAGE_WAKEUP_DONE(m);
3908
3909 vm_page_lockspin_queues();
3910 vm_page_activate(m);
3911 vm_pageout_dirty_no_pager++;
3912 vm_page_unlock_queues();
3913
3914 /*
3915 * And we are done with it.
3916 */
3917 vm_object_activity_end(object);
3918 vm_object_unlock(object);
3919
3920 return KERN_FAILURE;
3921 }
3922 vm_object_unlock(object);
3923
3924 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3925 }
3926 assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3927
3928 if (object_locked_by_caller == FALSE)
3929 assert(object->activity_in_progress > 0);
3930
3931 retval = vm_compressor_pager_put(
3932 pager,
3933 m->offset + object->paging_offset,
3934 VM_PAGE_GET_PHYS_PAGE(m),
3935 current_chead,
3936 scratch_buf,
3937 &compressed_count_delta);
3938
3939 if (object_locked_by_caller == FALSE) {
3940 vm_object_lock(object);
3941
3942 assert(object->activity_in_progress > 0);
3943 assert(VM_PAGE_OBJECT(m) == object);
3944 }
3945
3946 vm_compressor_pager_count(pager,
3947 compressed_count_delta,
3948 FALSE, /* shared_lock */
3949 object);
3950
3951 assert( !VM_PAGE_WIRED(m));
3952
3953 if (retval == KERN_SUCCESS) {
3954 /*
3955 * If the object is purgeable, its owner's
3956 * purgeable ledgers will be updated in
3957 * vm_page_remove() but the page still
3958 * contributes to the owner's memory footprint,
3959 * so account for it as such.
3960 */
3961 if (object->purgable != VM_PURGABLE_DENY &&
3962 object->vo_purgeable_owner != NULL) {
3963 /* one more compressed purgeable page */
3964 vm_purgeable_compressed_update(object,
3965 +1);
3966 }
3967 VM_STAT_INCR(compressions);
3968
3969 if (m->tabled)
3970 vm_page_remove(m, TRUE);
3971
3972 } else {
3973 PAGE_WAKEUP_DONE(m);
3974
3975 vm_page_lockspin_queues();
3976
3977 vm_page_activate(m);
3978 vm_compressor_failed++;
3979
3980 vm_page_unlock_queues();
3981 }
3982 if (object_locked_by_caller == FALSE) {
3983 vm_object_activity_end(object);
3984 vm_object_unlock(object);
3985 }
3986 return retval;
3987 }
3988
3989
3990 static void
3991 vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3992 {
3993 uint32_t policy;
3994
3995 if (hibernate_cleaning_in_progress == TRUE)
3996 req_lowpriority = FALSE;
3997
3998 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
3999
4000 vm_page_unlock_queues();
4001
4002 if (req_lowpriority == TRUE) {
4003 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
4004 DTRACE_VM(laundrythrottle);
4005 } else {
4006 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
4007 DTRACE_VM(laundryunthrottle);
4008 }
4009 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
4010 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
4011
4012 eq->pgo_lowpriority = req_lowpriority;
4013
4014 vm_page_lock_queues();
4015 }
4016 }
4017
4018
4019 static void
4020 vm_pageout_iothread_external(void)
4021 {
4022 thread_t self = current_thread();
4023
4024 self->options |= TH_OPT_VMPRIV;
4025
4026 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
4027
4028 proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
4029 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
4030
4031 vm_page_lock_queues();
4032
4033 vm_pageout_queue_external.pgo_tid = self->thread_id;
4034 vm_pageout_queue_external.pgo_lowpriority = TRUE;
4035 vm_pageout_queue_external.pgo_inited = TRUE;
4036
4037 vm_page_unlock_queues();
4038
4039 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
4040
4041 /*NOTREACHED*/
4042 }
4043
4044
4045 static void
4046 vm_pageout_iothread_internal(struct cq *cq)
4047 {
4048 thread_t self = current_thread();
4049
4050 self->options |= TH_OPT_VMPRIV;
4051
4052 vm_page_lock_queues();
4053
4054 vm_pageout_queue_internal.pgo_tid = self->thread_id;
4055 vm_pageout_queue_internal.pgo_lowpriority = TRUE;
4056 vm_pageout_queue_internal.pgo_inited = TRUE;
4057
4058 vm_page_unlock_queues();
4059
4060 if (vm_restricted_to_single_processor == TRUE)
4061 thread_vm_bind_group_add();
4062
4063
4064 thread_set_thread_name(current_thread(), "VM_compressor");
4065 #if DEVELOPMENT || DEBUG
4066 vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
4067 #endif
4068 vm_pageout_iothread_internal_continue(cq);
4069
4070 /*NOTREACHED*/
4071 }
4072
4073 kern_return_t
4074 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
4075 {
4076 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
4077 return KERN_SUCCESS;
4078 } else {
4079 return KERN_FAILURE; /* Already set */
4080 }
4081 }
4082
4083 extern boolean_t memorystatus_manual_testing_on;
4084 extern unsigned int memorystatus_level;
4085
4086
4087 #if VM_PRESSURE_EVENTS
4088
4089 boolean_t vm_pressure_events_enabled = FALSE;
4090
4091 void
4092 vm_pressure_response(void)
4093 {
4094
4095 vm_pressure_level_t old_level = kVMPressureNormal;
4096 int new_level = -1;
4097 unsigned int total_pages;
4098 uint64_t available_memory = 0;
4099
4100 if (vm_pressure_events_enabled == FALSE)
4101 return;
4102
4103 #if CONFIG_EMBEDDED
4104
4105 available_memory = (uint64_t) memorystatus_available_pages;
4106
4107 #else /* CONFIG_EMBEDDED */
4108
4109 available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4110 memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4111
4112 #endif /* CONFIG_EMBEDDED */
4113
4114 total_pages = (unsigned int) atop_64(max_mem);
4115 #if CONFIG_SECLUDED_MEMORY
4116 total_pages -= vm_page_secluded_count;
4117 #endif /* CONFIG_SECLUDED_MEMORY */
4118 memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
4119
4120 if (memorystatus_manual_testing_on) {
4121 return;
4122 }
4123
4124 old_level = memorystatus_vm_pressure_level;
4125
4126 switch (memorystatus_vm_pressure_level) {
4127
4128 case kVMPressureNormal:
4129 {
4130 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4131 new_level = kVMPressureCritical;
4132 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4133 new_level = kVMPressureWarning;
4134 }
4135 break;
4136 }
4137
4138 case kVMPressureWarning:
4139 case kVMPressureUrgent:
4140 {
4141 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4142 new_level = kVMPressureNormal;
4143 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4144 new_level = kVMPressureCritical;
4145 }
4146 break;
4147 }
4148
4149 case kVMPressureCritical:
4150 {
4151 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4152 new_level = kVMPressureNormal;
4153 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4154 new_level = kVMPressureWarning;
4155 }
4156 break;
4157 }
4158
4159 default:
4160 return;
4161 }
4162
4163 if (new_level != -1) {
4164 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4165
4166 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) {
4167 if (vm_pressure_thread_running == FALSE) {
4168 thread_wakeup(&vm_pressure_thread);
4169 }
4170
4171 if (old_level != new_level) {
4172 thread_wakeup(&vm_pressure_changed);
4173 }
4174 }
4175 }
4176
4177 }
4178 #endif /* VM_PRESSURE_EVENTS */
4179
4180 kern_return_t
4181 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4182
4183 #if CONFIG_EMBEDDED
4184
4185 return KERN_FAILURE;
4186
4187 #elif !VM_PRESSURE_EVENTS
4188
4189 return KERN_FAILURE;
4190
4191 #else /* VM_PRESSURE_EVENTS */
4192
4193 kern_return_t kr = KERN_SUCCESS;
4194
4195 if (pressure_level != NULL) {
4196
4197 vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4198
4199 if (wait_for_pressure == TRUE) {
4200 wait_result_t wr = 0;
4201
4202 while (old_level == *pressure_level) {
4203 wr = assert_wait((event_t) &vm_pressure_changed,
4204 THREAD_INTERRUPTIBLE);
4205 if (wr == THREAD_WAITING) {
4206 wr = thread_block(THREAD_CONTINUE_NULL);
4207 }
4208 if (wr == THREAD_INTERRUPTED) {
4209 return KERN_ABORTED;
4210 }
4211 if (wr == THREAD_AWAKENED) {
4212
4213 old_level = memorystatus_vm_pressure_level;
4214
4215 if (old_level != *pressure_level) {
4216 break;
4217 }
4218 }
4219 }
4220 }
4221
4222 *pressure_level = old_level;
4223 kr = KERN_SUCCESS;
4224 } else {
4225 kr = KERN_INVALID_ARGUMENT;
4226 }
4227
4228 return kr;
4229 #endif /* VM_PRESSURE_EVENTS */
4230 }
4231
4232 #if VM_PRESSURE_EVENTS
4233 void
4234 vm_pressure_thread(void) {
4235 static boolean_t thread_initialized = FALSE;
4236
4237 if (thread_initialized == TRUE) {
4238 vm_pressure_thread_running = TRUE;
4239 consider_vm_pressure_events();
4240 vm_pressure_thread_running = FALSE;
4241 }
4242
4243 thread_initialized = TRUE;
4244 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4245 thread_block((thread_continue_t)vm_pressure_thread);
4246 }
4247 #endif /* VM_PRESSURE_EVENTS */
4248
4249
4250 uint32_t vm_pageout_considered_page_last = 0;
4251
4252 /*
4253 * called once per-second via "compute_averages"
4254 */
4255 void
4256 compute_pageout_gc_throttle(__unused void *arg)
4257 {
4258 if (vm_pageout_considered_page != vm_pageout_considered_page_last) {
4259
4260 vm_pageout_considered_page_last = vm_pageout_considered_page;
4261
4262 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4263 }
4264 }
4265
4266 /*
4267 * vm_pageout_garbage_collect can also be called when the zone allocator needs
4268 * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4269 * jetsams. We need to check if the zone map size is above its jetsam limit to
4270 * decide if this was indeed the case.
4271 *
4272 * We need to do this on a different thread because of the following reasons:
4273 *
4274 * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4275 * itself causing the system to hang. We perform synchronous jetsams if we're
4276 * leaking in the VM map entries zone, so the leaking process could be doing a
4277 * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4278 * jetsam itself. We also need the vm_map lock on the process termination path,
4279 * which would now lead the dying process to deadlock against itself.
4280 *
4281 * 2. The jetsam path might need to allocate zone memory itself. We could try
4282 * using the non-blocking variant of zalloc for this path, but we can still
4283 * end up trying to do a kernel_memory_allocate when the zone_map is almost
4284 * full.
4285 */
4286
4287 extern boolean_t is_zone_map_nearing_exhaustion(void);
4288
4289 void
4290 vm_pageout_garbage_collect(int collect)
4291 {
4292 if (collect) {
4293 if (is_zone_map_nearing_exhaustion()) {
4294 /*
4295 * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4296 *
4297 * Bail out after calling zone_gc (which triggers the
4298 * zone-map-exhaustion jetsams). If we fall through, the subsequent
4299 * operations that clear out a bunch of caches might allocate zone
4300 * memory themselves (for eg. vm_map operations would need VM map
4301 * entries). Since the zone map is almost full at this point, we
4302 * could end up with a panic. We just need to quickly jetsam a
4303 * process and exit here.
4304 *
4305 * It could so happen that we were woken up to relieve memory
4306 * pressure and the zone map also happened to be near its limit at
4307 * the time, in which case we'll skip out early. But that should be
4308 * ok; if memory pressure persists, the thread will simply be woken
4309 * up again.
4310 */
4311 consider_zone_gc(TRUE);
4312
4313 } else {
4314 /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4315 boolean_t buf_large_zfree = FALSE;
4316 boolean_t first_try = TRUE;
4317
4318 stack_collect();
4319
4320 consider_machine_collect();
4321 m_drain();
4322
4323 do {
4324 if (consider_buffer_cache_collect != NULL) {
4325 buf_large_zfree = (*consider_buffer_cache_collect)(0);
4326 }
4327 if (first_try == TRUE || buf_large_zfree == TRUE) {
4328 /*
4329 * consider_zone_gc should be last, because the other operations
4330 * might return memory to zones.
4331 */
4332 consider_zone_gc(FALSE);
4333 }
4334 first_try = FALSE;
4335
4336 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4337
4338 consider_machine_adjust();
4339 }
4340 }
4341
4342 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4343
4344 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4345 /*NOTREACHED*/
4346 }
4347
4348
4349 #if VM_PAGE_BUCKETS_CHECK
4350 #if VM_PAGE_FAKE_BUCKETS
4351 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4352 #endif /* VM_PAGE_FAKE_BUCKETS */
4353 #endif /* VM_PAGE_BUCKETS_CHECK */
4354
4355
4356
4357 void
4358 vm_set_restrictions()
4359 {
4360 host_basic_info_data_t hinfo;
4361 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4362
4363 #define BSD_HOST 1
4364 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4365
4366 assert(hinfo.max_cpus > 0);
4367
4368 if (hinfo.max_cpus <= 3) {
4369 /*
4370 * on systems with a limited number of CPUS, bind the
4371 * 4 major threads that can free memory and that tend to use
4372 * a fair bit of CPU under pressured conditions to a single processor.
4373 * This insures that these threads don't hog all of the available CPUs
4374 * (important for camera launch), while allowing them to run independently
4375 * w/r to locks... the 4 threads are
4376 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4377 * vm_compressor_swap_trigger_thread (minor and major compactions),
4378 * memorystatus_thread (jetsams).
4379 *
4380 * the first time the thread is run, it is responsible for checking the
4381 * state of vm_restricted_to_single_processor, and if TRUE it calls
4382 * thread_bind_master... someday this should be replaced with a group
4383 * scheduling mechanism and KPI.
4384 */
4385 vm_restricted_to_single_processor = TRUE;
4386 }
4387 }
4388
4389 void
4390 vm_pageout(void)
4391 {
4392 thread_t self = current_thread();
4393 thread_t thread;
4394 kern_return_t result;
4395 spl_t s;
4396
4397 /*
4398 * Set thread privileges.
4399 */
4400 s = splsched();
4401
4402 thread_lock(self);
4403 self->options |= TH_OPT_VMPRIV;
4404 sched_set_thread_base_priority(self, BASEPRI_VM);
4405 thread_unlock(self);
4406
4407 if (!self->reserved_stack)
4408 self->reserved_stack = self->kernel_stack;
4409
4410 if (vm_restricted_to_single_processor == TRUE)
4411 thread_vm_bind_group_add();
4412
4413 splx(s);
4414
4415 thread_set_thread_name(current_thread(), "VM_pageout_scan");
4416
4417 /*
4418 * Initialize some paging parameters.
4419 */
4420
4421 if (vm_pageout_swap_wait == 0)
4422 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4423
4424 if (vm_pageout_idle_wait == 0)
4425 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4426
4427 if (vm_pageout_burst_wait == 0)
4428 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4429
4430 if (vm_pageout_empty_wait == 0)
4431 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4432
4433 if (vm_pageout_deadlock_wait == 0)
4434 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4435
4436 if (vm_pageout_deadlock_relief == 0)
4437 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4438
4439 if (vm_pageout_inactive_relief == 0)
4440 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
4441
4442 if (vm_pageout_burst_active_throttle == 0)
4443 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
4444
4445 if (vm_pageout_burst_inactive_throttle == 0)
4446 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4447
4448 /*
4449 * Set kernel task to low backing store privileged
4450 * status
4451 */
4452 task_lock(kernel_task);
4453 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
4454 task_unlock(kernel_task);
4455
4456 vm_page_free_count_init = vm_page_free_count;
4457
4458 /*
4459 * even if we've already called vm_page_free_reserve
4460 * call it again here to insure that the targets are
4461 * accurately calculated (it uses vm_page_free_count_init)
4462 * calling it with an arg of 0 will not change the reserve
4463 * but will re-calculate free_min and free_target
4464 */
4465 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4466 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4467 } else
4468 vm_page_free_reserve(0);
4469
4470
4471 vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4472 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4473 vm_pageout_queue_external.pgo_laundry = 0;
4474 vm_pageout_queue_external.pgo_idle = FALSE;
4475 vm_pageout_queue_external.pgo_busy = FALSE;
4476 vm_pageout_queue_external.pgo_throttled = FALSE;
4477 vm_pageout_queue_external.pgo_draining = FALSE;
4478 vm_pageout_queue_external.pgo_lowpriority = FALSE;
4479 vm_pageout_queue_external.pgo_tid = -1;
4480 vm_pageout_queue_external.pgo_inited = FALSE;
4481
4482 vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4483 vm_pageout_queue_internal.pgo_maxlaundry = 0;
4484 vm_pageout_queue_internal.pgo_laundry = 0;
4485 vm_pageout_queue_internal.pgo_idle = FALSE;
4486 vm_pageout_queue_internal.pgo_busy = FALSE;
4487 vm_pageout_queue_internal.pgo_throttled = FALSE;
4488 vm_pageout_queue_internal.pgo_draining = FALSE;
4489 vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4490 vm_pageout_queue_internal.pgo_tid = -1;
4491 vm_pageout_queue_internal.pgo_inited = FALSE;
4492
4493 /* internal pageout thread started when default pager registered first time */
4494 /* external pageout and garbage collection threads started here */
4495
4496 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4497 BASEPRI_VM,
4498 &vm_pageout_external_iothread);
4499 if (result != KERN_SUCCESS)
4500 panic("vm_pageout_iothread_external: create failed");
4501
4502 thread_deallocate(vm_pageout_external_iothread);
4503
4504 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4505 BASEPRI_DEFAULT,
4506 &thread);
4507 if (result != KERN_SUCCESS)
4508 panic("vm_pageout_garbage_collect: create failed");
4509
4510 thread_deallocate(thread);
4511
4512 #if VM_PRESSURE_EVENTS
4513 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4514 BASEPRI_DEFAULT,
4515 &thread);
4516
4517 if (result != KERN_SUCCESS)
4518 panic("vm_pressure_thread: create failed");
4519
4520 thread_deallocate(thread);
4521 #endif
4522
4523 vm_object_reaper_init();
4524
4525
4526 bzero(&vm_config, sizeof(vm_config));
4527
4528 switch(vm_compressor_mode) {
4529
4530 case VM_PAGER_DEFAULT:
4531 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4532
4533 case VM_PAGER_COMPRESSOR_WITH_SWAP:
4534 vm_config.compressor_is_present = TRUE;
4535 vm_config.swap_is_present = TRUE;
4536 vm_config.compressor_is_active = TRUE;
4537 vm_config.swap_is_active = TRUE;
4538 break;
4539
4540 case VM_PAGER_COMPRESSOR_NO_SWAP:
4541 vm_config.compressor_is_present = TRUE;
4542 vm_config.swap_is_present = TRUE;
4543 vm_config.compressor_is_active = TRUE;
4544 break;
4545
4546 case VM_PAGER_FREEZER_DEFAULT:
4547 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4548
4549 case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4550 vm_config.compressor_is_present = TRUE;
4551 vm_config.swap_is_present = TRUE;
4552 break;
4553
4554 case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4555 vm_config.compressor_is_present = TRUE;
4556 vm_config.swap_is_present = TRUE;
4557 vm_config.compressor_is_active = TRUE;
4558 vm_config.freezer_swap_is_active = TRUE;
4559 break;
4560
4561 case VM_PAGER_NOT_CONFIGURED:
4562 break;
4563
4564 default:
4565 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4566 break;
4567 }
4568 if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4569 vm_compressor_pager_init();
4570
4571 #if VM_PRESSURE_EVENTS
4572 vm_pressure_events_enabled = TRUE;
4573 #endif /* VM_PRESSURE_EVENTS */
4574
4575 #if CONFIG_PHANTOM_CACHE
4576 vm_phantom_cache_init();
4577 #endif
4578 #if VM_PAGE_BUCKETS_CHECK
4579 #if VM_PAGE_FAKE_BUCKETS
4580 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4581 (uint64_t) vm_page_fake_buckets_start,
4582 (uint64_t) vm_page_fake_buckets_end);
4583 pmap_protect(kernel_pmap,
4584 vm_page_fake_buckets_start,
4585 vm_page_fake_buckets_end,
4586 VM_PROT_READ);
4587 // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4588 #endif /* VM_PAGE_FAKE_BUCKETS */
4589 #endif /* VM_PAGE_BUCKETS_CHECK */
4590
4591 #if VM_OBJECT_TRACKING
4592 vm_object_tracking_init();
4593 #endif /* VM_OBJECT_TRACKING */
4594
4595 vm_tests();
4596
4597 vm_pageout_continue();
4598
4599 /*
4600 * Unreached code!
4601 *
4602 * The vm_pageout_continue() call above never returns, so the code below is never
4603 * executed. We take advantage of this to declare several DTrace VM related probe
4604 * points that our kernel doesn't have an analog for. These are probe points that
4605 * exist in Solaris and are in the DTrace documentation, so people may have written
4606 * scripts that use them. Declaring the probe points here means their scripts will
4607 * compile and execute which we want for portability of the scripts, but since this
4608 * section of code is never reached, the probe points will simply never fire. Yes,
4609 * this is basically a hack. The problem is the DTrace probe points were chosen with
4610 * Solaris specific VM events in mind, not portability to different VM implementations.
4611 */
4612
4613 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4614 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4615 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4616 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4617 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4618 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4619 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4620 /*NOTREACHED*/
4621 }
4622
4623
4624
4625 #if CONFIG_EMBEDDED
4626 int vm_compressor_thread_count = 1;
4627 #else
4628 int vm_compressor_thread_count = 2;
4629 #endif
4630
4631 kern_return_t
4632 vm_pageout_internal_start(void)
4633 {
4634 kern_return_t result;
4635 int i;
4636 host_basic_info_data_t hinfo;
4637
4638 assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4639
4640 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4641 #define BSD_HOST 1
4642 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4643
4644 assert(hinfo.max_cpus > 0);
4645
4646 PE_parse_boot_argn("vmcomp_threads", &vm_compressor_thread_count, sizeof(vm_compressor_thread_count));
4647 if (vm_compressor_thread_count >= hinfo.max_cpus)
4648 vm_compressor_thread_count = hinfo.max_cpus - 1;
4649 if (vm_compressor_thread_count <= 0)
4650 vm_compressor_thread_count = 1;
4651 else if (vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4652 vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4653
4654 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4655
4656 PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4657
4658 for (i = 0; i < vm_compressor_thread_count; i++) {
4659 ciq[i].id = i;
4660 ciq[i].q = &vm_pageout_queue_internal;
4661 ciq[i].current_chead = NULL;
4662 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4663
4664 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_VM, &vm_pageout_internal_iothread);
4665
4666 if (result == KERN_SUCCESS)
4667 thread_deallocate(vm_pageout_internal_iothread);
4668 else
4669 break;
4670 }
4671 return result;
4672 }
4673
4674 #if CONFIG_IOSCHED
4675 /*
4676 * To support I/O Expedite for compressed files we mark the upls with special flags.
4677 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4678 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4679 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4680 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4681 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4682 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4683 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4684 * unless the real I/O upl is being destroyed).
4685 */
4686
4687
4688 static void
4689 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4690 {
4691 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4692
4693 upl_lock(src_upl);
4694 if (src_upl->decmp_io_upl) {
4695 /*
4696 * If there is already an alive real I/O UPL, ignore this new UPL.
4697 * This case should rarely happen and even if it does, it just means
4698 * that we might issue a spurious expedite which the driver is expected
4699 * to handle.
4700 */
4701 upl_unlock(src_upl);
4702 return;
4703 }
4704 src_upl->decmp_io_upl = (void *)upl;
4705 src_upl->ref_count++;
4706
4707 upl->flags |= UPL_DECMP_REAL_IO;
4708 upl->decmp_io_upl = (void *)src_upl;
4709 upl_unlock(src_upl);
4710 }
4711 #endif /* CONFIG_IOSCHED */
4712
4713 #if UPL_DEBUG
4714 int upl_debug_enabled = 1;
4715 #else
4716 int upl_debug_enabled = 0;
4717 #endif
4718
4719 static upl_t
4720 upl_create(int type, int flags, upl_size_t size)
4721 {
4722 upl_t upl;
4723 vm_size_t page_field_size = 0;
4724 int upl_flags = 0;
4725 vm_size_t upl_size = sizeof(struct upl);
4726
4727 size = round_page_32(size);
4728
4729 if (type & UPL_CREATE_LITE) {
4730 page_field_size = (atop(size) + 7) >> 3;
4731 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4732
4733 upl_flags |= UPL_LITE;
4734 }
4735 if (type & UPL_CREATE_INTERNAL) {
4736 upl_size += sizeof(struct upl_page_info) * atop(size);
4737
4738 upl_flags |= UPL_INTERNAL;
4739 }
4740 upl = (upl_t)kalloc(upl_size + page_field_size);
4741
4742 if (page_field_size)
4743 bzero((char *)upl + upl_size, page_field_size);
4744
4745 upl->flags = upl_flags | flags;
4746 upl->kaddr = (vm_offset_t)0;
4747 upl->size = 0;
4748 upl->map_object = NULL;
4749 upl->ref_count = 1;
4750 upl->ext_ref_count = 0;
4751 upl->highest_page = 0;
4752 upl_lock_init(upl);
4753 upl->vector_upl = NULL;
4754 upl->associated_upl = NULL;
4755 #if CONFIG_IOSCHED
4756 if (type & UPL_CREATE_IO_TRACKING) {
4757 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4758 }
4759
4760 upl->upl_reprio_info = 0;
4761 upl->decmp_io_upl = 0;
4762 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4763 /* Only support expedite on internal UPLs */
4764 thread_t curthread = current_thread();
4765 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4766 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4767 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4768 if (curthread->decmp_upl != NULL)
4769 upl_set_decmp_info(upl, curthread->decmp_upl);
4770 }
4771 #endif
4772 #if CONFIG_IOSCHED || UPL_DEBUG
4773 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4774 upl->upl_creator = current_thread();
4775 upl->uplq.next = 0;
4776 upl->uplq.prev = 0;
4777 upl->flags |= UPL_TRACKED_BY_OBJECT;
4778 }
4779 #endif
4780
4781 #if UPL_DEBUG
4782 upl->ubc_alias1 = 0;
4783 upl->ubc_alias2 = 0;
4784
4785 upl->upl_state = 0;
4786 upl->upl_commit_index = 0;
4787 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4788
4789 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4790 #endif /* UPL_DEBUG */
4791
4792 return(upl);
4793 }
4794
4795 static void
4796 upl_destroy(upl_t upl)
4797 {
4798 int page_field_size; /* bit field in word size buf */
4799 int size;
4800
4801 if (upl->ext_ref_count) {
4802 panic("upl(%p) ext_ref_count", upl);
4803 }
4804
4805 #if CONFIG_IOSCHED
4806 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4807 upl_t src_upl;
4808 src_upl = upl->decmp_io_upl;
4809 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4810 upl_lock(src_upl);
4811 src_upl->decmp_io_upl = NULL;
4812 upl_unlock(src_upl);
4813 upl_deallocate(src_upl);
4814 }
4815 #endif /* CONFIG_IOSCHED */
4816
4817 #if CONFIG_IOSCHED || UPL_DEBUG
4818 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4819 vm_object_t object;
4820
4821 if (upl->flags & UPL_SHADOWED) {
4822 object = upl->map_object->shadow;
4823 } else {
4824 object = upl->map_object;
4825 }
4826
4827 vm_object_lock(object);
4828 queue_remove(&object->uplq, upl, upl_t, uplq);
4829 vm_object_activity_end(object);
4830 vm_object_collapse(object, 0, TRUE);
4831 vm_object_unlock(object);
4832 }
4833 #endif
4834 /*
4835 * drop a reference on the map_object whether or
4836 * not a pageout object is inserted
4837 */
4838 if (upl->flags & UPL_SHADOWED)
4839 vm_object_deallocate(upl->map_object);
4840
4841 if (upl->flags & UPL_DEVICE_MEMORY)
4842 size = PAGE_SIZE;
4843 else
4844 size = upl->size;
4845 page_field_size = 0;
4846
4847 if (upl->flags & UPL_LITE) {
4848 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4849 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4850 }
4851 upl_lock_destroy(upl);
4852 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4853
4854 #if CONFIG_IOSCHED
4855 if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4856 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4857 #endif
4858
4859 if (upl->flags & UPL_INTERNAL) {
4860 kfree(upl,
4861 sizeof(struct upl) +
4862 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4863 + page_field_size);
4864 } else {
4865 kfree(upl, sizeof(struct upl) + page_field_size);
4866 }
4867 }
4868
4869 void
4870 upl_deallocate(upl_t upl)
4871 {
4872 upl_lock(upl);
4873 if (--upl->ref_count == 0) {
4874 if(vector_upl_is_valid(upl))
4875 vector_upl_deallocate(upl);
4876 upl_unlock(upl);
4877 upl_destroy(upl);
4878 }
4879 else
4880 upl_unlock(upl);
4881 }
4882
4883 #if CONFIG_IOSCHED
4884 void
4885 upl_mark_decmp(upl_t upl)
4886 {
4887 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4888 upl->flags |= UPL_DECMP_REQ;
4889 upl->upl_creator->decmp_upl = (void *)upl;
4890 }
4891 }
4892
4893 void
4894 upl_unmark_decmp(upl_t upl)
4895 {
4896 if(upl && (upl->flags & UPL_DECMP_REQ)) {
4897 upl->upl_creator->decmp_upl = NULL;
4898 }
4899 }
4900
4901 #endif /* CONFIG_IOSCHED */
4902
4903 #define VM_PAGE_Q_BACKING_UP(q) \
4904 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4905
4906 boolean_t must_throttle_writes(void);
4907
4908 boolean_t
4909 must_throttle_writes()
4910 {
4911 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4912 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4913 return (TRUE);
4914
4915 return (FALSE);
4916 }
4917
4918
4919 #if DEVELOPMENT || DEBUG
4920 /*/*
4921 * Statistics about UPL enforcement of copy-on-write obligations.
4922 */
4923 unsigned long upl_cow = 0;
4924 unsigned long upl_cow_again = 0;
4925 unsigned long upl_cow_pages = 0;
4926 unsigned long upl_cow_again_pages = 0;
4927
4928 unsigned long iopl_cow = 0;
4929 unsigned long iopl_cow_pages = 0;
4930 #endif
4931
4932 /*
4933 * Routine: vm_object_upl_request
4934 * Purpose:
4935 * Cause the population of a portion of a vm_object.
4936 * Depending on the nature of the request, the pages
4937 * returned may be contain valid data or be uninitialized.
4938 * A page list structure, listing the physical pages
4939 * will be returned upon request.
4940 * This function is called by the file system or any other
4941 * supplier of backing store to a pager.
4942 * IMPORTANT NOTE: The caller must still respect the relationship
4943 * between the vm_object and its backing memory object. The
4944 * caller MUST NOT substitute changes in the backing file
4945 * without first doing a memory_object_lock_request on the
4946 * target range unless it is know that the pages are not
4947 * shared with another entity at the pager level.
4948 * Copy_in_to:
4949 * if a page list structure is present
4950 * return the mapped physical pages, where a
4951 * page is not present, return a non-initialized
4952 * one. If the no_sync bit is turned on, don't
4953 * call the pager unlock to synchronize with other
4954 * possible copies of the page. Leave pages busy
4955 * in the original object, if a page list structure
4956 * was specified. When a commit of the page list
4957 * pages is done, the dirty bit will be set for each one.
4958 * Copy_out_from:
4959 * If a page list structure is present, return
4960 * all mapped pages. Where a page does not exist
4961 * map a zero filled one. Leave pages busy in
4962 * the original object. If a page list structure
4963 * is not specified, this call is a no-op.
4964 *
4965 * Note: access of default pager objects has a rather interesting
4966 * twist. The caller of this routine, presumably the file system
4967 * page cache handling code, will never actually make a request
4968 * against a default pager backed object. Only the default
4969 * pager will make requests on backing store related vm_objects
4970 * In this way the default pager can maintain the relationship
4971 * between backing store files (abstract memory objects) and
4972 * the vm_objects (cache objects), they support.
4973 *
4974 */
4975
4976 __private_extern__ kern_return_t
4977 vm_object_upl_request(
4978 vm_object_t object,
4979 vm_object_offset_t offset,
4980 upl_size_t size,
4981 upl_t *upl_ptr,
4982 upl_page_info_array_t user_page_list,
4983 unsigned int *page_list_count,
4984 upl_control_flags_t cntrl_flags,
4985 vm_tag_t tag)
4986 {
4987 vm_page_t dst_page = VM_PAGE_NULL;
4988 vm_object_offset_t dst_offset;
4989 upl_size_t xfer_size;
4990 unsigned int size_in_pages;
4991 boolean_t dirty;
4992 boolean_t hw_dirty;
4993 upl_t upl = NULL;
4994 unsigned int entry;
4995 #if MACH_CLUSTER_STATS
4996 boolean_t encountered_lrp = FALSE;
4997 #endif
4998 vm_page_t alias_page = NULL;
4999 int refmod_state = 0;
5000 wpl_array_t lite_list = NULL;
5001 vm_object_t last_copy_object;
5002 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
5003 struct vm_page_delayed_work *dwp;
5004 int dw_count;
5005 int dw_limit;
5006 int io_tracking_flag = 0;
5007 int grab_options;
5008 ppnum_t phys_page;
5009
5010 if (cntrl_flags & ~UPL_VALID_FLAGS) {
5011 /*
5012 * For forward compatibility's sake,
5013 * reject any unknown flag.
5014 */
5015 return KERN_INVALID_VALUE;
5016 }
5017 if ( (!object->internal) && (object->paging_offset != 0) )
5018 panic("vm_object_upl_request: external object with non-zero paging offset\n");
5019 if (object->phys_contiguous)
5020 panic("vm_object_upl_request: contiguous object specified\n");
5021
5022
5023 if (size > MAX_UPL_SIZE_BYTES)
5024 size = MAX_UPL_SIZE_BYTES;
5025
5026 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
5027 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5028
5029 #if CONFIG_IOSCHED || UPL_DEBUG
5030 if (object->io_tracking || upl_debug_enabled)
5031 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5032 #endif
5033 #if CONFIG_IOSCHED
5034 if (object->io_tracking)
5035 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5036 #endif
5037
5038 if (cntrl_flags & UPL_SET_INTERNAL) {
5039 if (cntrl_flags & UPL_SET_LITE) {
5040
5041 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5042
5043 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5044 lite_list = (wpl_array_t)
5045 (((uintptr_t)user_page_list) +
5046 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
5047 if (size == 0) {
5048 user_page_list = NULL;
5049 lite_list = NULL;
5050 }
5051 } else {
5052 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
5053
5054 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
5055 if (size == 0) {
5056 user_page_list = NULL;
5057 }
5058 }
5059 } else {
5060 if (cntrl_flags & UPL_SET_LITE) {
5061
5062 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5063
5064 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
5065 if (size == 0) {
5066 lite_list = NULL;
5067 }
5068 } else {
5069 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
5070 }
5071 }
5072 *upl_ptr = upl;
5073
5074 if (user_page_list)
5075 user_page_list[0].device = FALSE;
5076
5077 if (cntrl_flags & UPL_SET_LITE) {
5078 upl->map_object = object;
5079 } else {
5080 upl->map_object = vm_object_allocate(size);
5081 /*
5082 * No neeed to lock the new object: nobody else knows
5083 * about it yet, so it's all ours so far.
5084 */
5085 upl->map_object->shadow = object;
5086 upl->map_object->pageout = TRUE;
5087 upl->map_object->can_persist = FALSE;
5088 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5089 upl->map_object->vo_shadow_offset = offset;
5090 upl->map_object->wimg_bits = object->wimg_bits;
5091
5092 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5093
5094 upl->flags |= UPL_SHADOWED;
5095 }
5096 if (cntrl_flags & UPL_FOR_PAGEOUT)
5097 upl->flags |= UPL_PAGEOUT;
5098
5099 vm_object_lock(object);
5100 vm_object_activity_begin(object);
5101
5102 grab_options = 0;
5103 #if CONFIG_SECLUDED_MEMORY
5104 if (object->can_grab_secluded) {
5105 grab_options |= VM_PAGE_GRAB_SECLUDED;
5106 }
5107 #endif /* CONFIG_SECLUDED_MEMORY */
5108
5109 /*
5110 * we can lock in the paging_offset once paging_in_progress is set
5111 */
5112 upl->size = size;
5113 upl->offset = offset + object->paging_offset;
5114
5115 #if CONFIG_IOSCHED || UPL_DEBUG
5116 if (object->io_tracking || upl_debug_enabled) {
5117 vm_object_activity_begin(object);
5118 queue_enter(&object->uplq, upl, upl_t, uplq);
5119 }
5120 #endif
5121 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
5122 /*
5123 * Honor copy-on-write obligations
5124 *
5125 * The caller is gathering these pages and
5126 * might modify their contents. We need to
5127 * make sure that the copy object has its own
5128 * private copies of these pages before we let
5129 * the caller modify them.
5130 */
5131 vm_object_update(object,
5132 offset,
5133 size,
5134 NULL,
5135 NULL,
5136 FALSE, /* should_return */
5137 MEMORY_OBJECT_COPY_SYNC,
5138 VM_PROT_NO_CHANGE);
5139 #if DEVELOPMENT || DEBUG
5140 upl_cow++;
5141 upl_cow_pages += size >> PAGE_SHIFT;
5142 #endif
5143 }
5144 /*
5145 * remember which copy object we synchronized with
5146 */
5147 last_copy_object = object->copy;
5148 entry = 0;
5149
5150 xfer_size = size;
5151 dst_offset = offset;
5152 size_in_pages = size / PAGE_SIZE;
5153
5154 dwp = &dw_array[0];
5155 dw_count = 0;
5156 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5157
5158 if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5159 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5160 object->scan_collisions = 0;
5161
5162 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5163 boolean_t isSSD = FALSE;
5164
5165 #if CONFIG_EMBEDDED
5166 isSSD = TRUE;
5167 #else
5168 vnode_pager_get_isSSD(object->pager, &isSSD);
5169 #endif
5170 vm_object_unlock(object);
5171
5172 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5173
5174 if (isSSD == TRUE)
5175 delay(1000 * size_in_pages);
5176 else
5177 delay(5000 * size_in_pages);
5178 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5179
5180 vm_object_lock(object);
5181 }
5182
5183 while (xfer_size) {
5184
5185 dwp->dw_mask = 0;
5186
5187 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5188 vm_object_unlock(object);
5189 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5190 vm_object_lock(object);
5191 }
5192 if (cntrl_flags & UPL_COPYOUT_FROM) {
5193 upl->flags |= UPL_PAGE_SYNC_DONE;
5194
5195 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5196 dst_page->fictitious ||
5197 dst_page->absent ||
5198 dst_page->error ||
5199 dst_page->cleaning ||
5200 (VM_PAGE_WIRED(dst_page))) {
5201
5202 if (user_page_list)
5203 user_page_list[entry].phys_addr = 0;
5204
5205 goto try_next_page;
5206 }
5207 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5208
5209 /*
5210 * grab this up front...
5211 * a high percentange of the time we're going to
5212 * need the hardware modification state a bit later
5213 * anyway... so we can eliminate an extra call into
5214 * the pmap layer by grabbing it here and recording it
5215 */
5216 if (dst_page->pmapped)
5217 refmod_state = pmap_get_refmod(phys_page);
5218 else
5219 refmod_state = 0;
5220
5221 if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5222 /*
5223 * page is on inactive list and referenced...
5224 * reactivate it now... this gets it out of the
5225 * way of vm_pageout_scan which would have to
5226 * reactivate it upon tripping over it
5227 */
5228 dwp->dw_mask |= DW_vm_page_activate;
5229 }
5230 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5231 /*
5232 * we're only asking for DIRTY pages to be returned
5233 */
5234 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5235 /*
5236 * if we were the page stolen by vm_pageout_scan to be
5237 * cleaned (as opposed to a buddy being clustered in
5238 * or this request is not being driven by a PAGEOUT cluster
5239 * then we only need to check for the page being dirty or
5240 * precious to decide whether to return it
5241 */
5242 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED))
5243 goto check_busy;
5244 goto dont_return;
5245 }
5246 /*
5247 * this is a request for a PAGEOUT cluster and this page
5248 * is merely along for the ride as a 'buddy'... not only
5249 * does it have to be dirty to be returned, but it also
5250 * can't have been referenced recently...
5251 */
5252 if ( (hibernate_cleaning_in_progress == TRUE ||
5253 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) ||
5254 (dst_page->vm_page_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5255 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
5256 goto check_busy;
5257 }
5258 dont_return:
5259 /*
5260 * if we reach here, we're not to return
5261 * the page... go on to the next one
5262 */
5263 if (dst_page->laundry == TRUE) {
5264 /*
5265 * if we get here, the page is not 'cleaning' (filtered out above).
5266 * since it has been referenced, remove it from the laundry
5267 * so we don't pay the cost of an I/O to clean a page
5268 * we're just going to take back
5269 */
5270 vm_page_lockspin_queues();
5271
5272 vm_pageout_steal_laundry(dst_page, TRUE);
5273 vm_page_activate(dst_page);
5274
5275 vm_page_unlock_queues();
5276 }
5277 if (user_page_list)
5278 user_page_list[entry].phys_addr = 0;
5279
5280 goto try_next_page;
5281 }
5282 check_busy:
5283 if (dst_page->busy) {
5284 if (cntrl_flags & UPL_NOBLOCK) {
5285 if (user_page_list)
5286 user_page_list[entry].phys_addr = 0;
5287 dwp->dw_mask = 0;
5288
5289 goto try_next_page;
5290 }
5291 /*
5292 * someone else is playing with the
5293 * page. We will have to wait.
5294 */
5295 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5296
5297 continue;
5298 }
5299 if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5300
5301 vm_page_lockspin_queues();
5302
5303 if (dst_page->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5304 /*
5305 * we've buddied up a page for a clustered pageout
5306 * that has already been moved to the pageout
5307 * queue by pageout_scan... we need to remove
5308 * it from the queue and drop the laundry count
5309 * on that queue
5310 */
5311 vm_pageout_throttle_up(dst_page);
5312 }
5313 vm_page_unlock_queues();
5314 }
5315 #if MACH_CLUSTER_STATS
5316 /*
5317 * pageout statistics gathering. count
5318 * all the pages we will page out that
5319 * were not counted in the initial
5320 * vm_pageout_scan work
5321 */
5322 if (dst_page->pageout)
5323 encountered_lrp = TRUE;
5324 if ((dst_page->dirty || (object->internal && dst_page->precious))) {
5325 if (encountered_lrp)
5326 CLUSTER_STAT(pages_at_higher_offsets++;)
5327 else
5328 CLUSTER_STAT(pages_at_lower_offsets++;)
5329 }
5330 #endif
5331 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5332 dirty = hw_dirty ? TRUE : dst_page->dirty;
5333
5334 if (phys_page > upl->highest_page)
5335 upl->highest_page = phys_page;
5336
5337 assert (!pmap_is_noencrypt(phys_page));
5338
5339 if (cntrl_flags & UPL_SET_LITE) {
5340 unsigned int pg_num;
5341
5342 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5343 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5344 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5345
5346 if (hw_dirty)
5347 pmap_clear_modify(phys_page);
5348
5349 /*
5350 * Mark original page as cleaning
5351 * in place.
5352 */
5353 dst_page->cleaning = TRUE;
5354 dst_page->precious = FALSE;
5355 } else {
5356 /*
5357 * use pageclean setup, it is more
5358 * convenient even for the pageout
5359 * cases here
5360 */
5361 vm_object_lock(upl->map_object);
5362 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5363 vm_object_unlock(upl->map_object);
5364
5365 alias_page->absent = FALSE;
5366 alias_page = NULL;
5367 }
5368 if (dirty) {
5369 SET_PAGE_DIRTY(dst_page, FALSE);
5370 } else {
5371 dst_page->dirty = FALSE;
5372 }
5373
5374 if (!dirty)
5375 dst_page->precious = TRUE;
5376
5377 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5378 if ( !VM_PAGE_WIRED(dst_page))
5379 dst_page->free_when_done = TRUE;
5380 }
5381 } else {
5382 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5383 /*
5384 * Honor copy-on-write obligations
5385 *
5386 * The copy object has changed since we
5387 * last synchronized for copy-on-write.
5388 * Another copy object might have been
5389 * inserted while we released the object's
5390 * lock. Since someone could have seen the
5391 * original contents of the remaining pages
5392 * through that new object, we have to
5393 * synchronize with it again for the remaining
5394 * pages only. The previous pages are "busy"
5395 * so they can not be seen through the new
5396 * mapping. The new mapping will see our
5397 * upcoming changes for those previous pages,
5398 * but that's OK since they couldn't see what
5399 * was there before. It's just a race anyway
5400 * and there's no guarantee of consistency or
5401 * atomicity. We just don't want new mappings
5402 * to see both the *before* and *after* pages.
5403 */
5404 if (object->copy != VM_OBJECT_NULL) {
5405 vm_object_update(
5406 object,
5407 dst_offset,/* current offset */
5408 xfer_size, /* remaining size */
5409 NULL,
5410 NULL,
5411 FALSE, /* should_return */
5412 MEMORY_OBJECT_COPY_SYNC,
5413 VM_PROT_NO_CHANGE);
5414
5415 #if DEVELOPMENT || DEBUG
5416 upl_cow_again++;
5417 upl_cow_again_pages += xfer_size >> PAGE_SHIFT;
5418 #endif
5419 }
5420 /*
5421 * remember the copy object we synced with
5422 */
5423 last_copy_object = object->copy;
5424 }
5425 dst_page = vm_page_lookup(object, dst_offset);
5426
5427 if (dst_page != VM_PAGE_NULL) {
5428
5429 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5430 /*
5431 * skip over pages already present in the cache
5432 */
5433 if (user_page_list)
5434 user_page_list[entry].phys_addr = 0;
5435
5436 goto try_next_page;
5437 }
5438 if (dst_page->fictitious) {
5439 panic("need corner case for fictitious page");
5440 }
5441
5442 if (dst_page->busy || dst_page->cleaning) {
5443 /*
5444 * someone else is playing with the
5445 * page. We will have to wait.
5446 */
5447 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5448
5449 continue;
5450 }
5451 if (dst_page->laundry)
5452 vm_pageout_steal_laundry(dst_page, FALSE);
5453 } else {
5454 if (object->private) {
5455 /*
5456 * This is a nasty wrinkle for users
5457 * of upl who encounter device or
5458 * private memory however, it is
5459 * unavoidable, only a fault can
5460 * resolve the actual backing
5461 * physical page by asking the
5462 * backing device.
5463 */
5464 if (user_page_list)
5465 user_page_list[entry].phys_addr = 0;
5466
5467 goto try_next_page;
5468 }
5469 if (object->scan_collisions) {
5470 /*
5471 * the pageout_scan thread is trying to steal
5472 * pages from this object, but has run into our
5473 * lock... grab 2 pages from the head of the object...
5474 * the first is freed on behalf of pageout_scan, the
5475 * 2nd is for our own use... we use vm_object_page_grab
5476 * in both cases to avoid taking pages from the free
5477 * list since we are under memory pressure and our
5478 * lock on this object is getting in the way of
5479 * relieving it
5480 */
5481 dst_page = vm_object_page_grab(object);
5482
5483 if (dst_page != VM_PAGE_NULL)
5484 vm_page_release(dst_page,
5485 FALSE);
5486
5487 dst_page = vm_object_page_grab(object);
5488 }
5489 if (dst_page == VM_PAGE_NULL) {
5490 /*
5491 * need to allocate a page
5492 */
5493 dst_page = vm_page_grab_options(grab_options);
5494 }
5495 if (dst_page == VM_PAGE_NULL) {
5496 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5497 /*
5498 * we don't want to stall waiting for pages to come onto the free list
5499 * while we're already holding absent pages in this UPL
5500 * the caller will deal with the empty slots
5501 */
5502 if (user_page_list)
5503 user_page_list[entry].phys_addr = 0;
5504
5505 goto try_next_page;
5506 }
5507 /*
5508 * no pages available... wait
5509 * then try again for the same
5510 * offset...
5511 */
5512 vm_object_unlock(object);
5513
5514 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5515
5516 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5517
5518 VM_PAGE_WAIT();
5519 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5520
5521 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5522
5523 vm_object_lock(object);
5524
5525 continue;
5526 }
5527 vm_page_insert(dst_page, object, dst_offset);
5528
5529 dst_page->absent = TRUE;
5530 dst_page->busy = FALSE;
5531
5532 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5533 /*
5534 * if UPL_RET_ONLY_ABSENT was specified,
5535 * than we're definitely setting up a
5536 * upl for a clustered read/pagein
5537 * operation... mark the pages as clustered
5538 * so upl_commit_range can put them on the
5539 * speculative list
5540 */
5541 dst_page->clustered = TRUE;
5542
5543 if ( !(cntrl_flags & UPL_FILE_IO))
5544 VM_STAT_INCR(pageins);
5545 }
5546 }
5547 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5548
5549 dst_page->overwriting = TRUE;
5550
5551 if (dst_page->pmapped) {
5552 if ( !(cntrl_flags & UPL_FILE_IO))
5553 /*
5554 * eliminate all mappings from the
5555 * original object and its prodigy
5556 */
5557 refmod_state = pmap_disconnect(phys_page);
5558 else
5559 refmod_state = pmap_get_refmod(phys_page);
5560 } else
5561 refmod_state = 0;
5562
5563 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5564 dirty = hw_dirty ? TRUE : dst_page->dirty;
5565
5566 if (cntrl_flags & UPL_SET_LITE) {
5567 unsigned int pg_num;
5568
5569 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5570 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5571 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5572
5573 if (hw_dirty)
5574 pmap_clear_modify(phys_page);
5575
5576 /*
5577 * Mark original page as cleaning
5578 * in place.
5579 */
5580 dst_page->cleaning = TRUE;
5581 dst_page->precious = FALSE;
5582 } else {
5583 /*
5584 * use pageclean setup, it is more
5585 * convenient even for the pageout
5586 * cases here
5587 */
5588 vm_object_lock(upl->map_object);
5589 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5590 vm_object_unlock(upl->map_object);
5591
5592 alias_page->absent = FALSE;
5593 alias_page = NULL;
5594 }
5595
5596 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5597 upl->flags &= ~UPL_CLEAR_DIRTY;
5598 upl->flags |= UPL_SET_DIRTY;
5599 dirty = TRUE;
5600 upl->flags |= UPL_SET_DIRTY;
5601 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5602 /*
5603 * clean in place for read implies
5604 * that a write will be done on all
5605 * the pages that are dirty before
5606 * a upl commit is done. The caller
5607 * is obligated to preserve the
5608 * contents of all pages marked dirty
5609 */
5610 upl->flags |= UPL_CLEAR_DIRTY;
5611 }
5612 dst_page->dirty = dirty;
5613
5614 if (!dirty)
5615 dst_page->precious = TRUE;
5616
5617 if ( !VM_PAGE_WIRED(dst_page)) {
5618 /*
5619 * deny access to the target page while
5620 * it is being worked on
5621 */
5622 dst_page->busy = TRUE;
5623 } else
5624 dwp->dw_mask |= DW_vm_page_wire;
5625
5626 /*
5627 * We might be about to satisfy a fault which has been
5628 * requested. So no need for the "restart" bit.
5629 */
5630 dst_page->restart = FALSE;
5631 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5632 /*
5633 * expect the page to be used
5634 */
5635 dwp->dw_mask |= DW_set_reference;
5636 }
5637 if (cntrl_flags & UPL_PRECIOUS) {
5638 if (object->internal) {
5639 SET_PAGE_DIRTY(dst_page, FALSE);
5640 dst_page->precious = FALSE;
5641 } else {
5642 dst_page->precious = TRUE;
5643 }
5644 } else {
5645 dst_page->precious = FALSE;
5646 }
5647 }
5648 if (dst_page->busy)
5649 upl->flags |= UPL_HAS_BUSY;
5650
5651 if (phys_page > upl->highest_page)
5652 upl->highest_page = phys_page;
5653 assert (!pmap_is_noencrypt(phys_page));
5654 if (user_page_list) {
5655 user_page_list[entry].phys_addr = phys_page;
5656 user_page_list[entry].free_when_done = dst_page->free_when_done;
5657 user_page_list[entry].absent = dst_page->absent;
5658 user_page_list[entry].dirty = dst_page->dirty;
5659 user_page_list[entry].precious = dst_page->precious;
5660 user_page_list[entry].device = FALSE;
5661 user_page_list[entry].needed = FALSE;
5662 if (dst_page->clustered == TRUE)
5663 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5664 else
5665 user_page_list[entry].speculative = FALSE;
5666 user_page_list[entry].cs_validated = dst_page->cs_validated;
5667 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
5668 user_page_list[entry].cs_nx = dst_page->cs_nx;
5669 user_page_list[entry].mark = FALSE;
5670 }
5671 /*
5672 * if UPL_RET_ONLY_ABSENT is set, then
5673 * we are working with a fresh page and we've
5674 * just set the clustered flag on it to
5675 * indicate that it was drug in as part of a
5676 * speculative cluster... so leave it alone
5677 */
5678 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5679 /*
5680 * someone is explicitly grabbing this page...
5681 * update clustered and speculative state
5682 *
5683 */
5684 if (dst_page->clustered)
5685 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5686 }
5687 try_next_page:
5688 if (dwp->dw_mask) {
5689 if (dwp->dw_mask & DW_vm_page_activate)
5690 VM_STAT_INCR(reactivations);
5691
5692 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5693
5694 if (dw_count >= dw_limit) {
5695 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5696
5697 dwp = &dw_array[0];
5698 dw_count = 0;
5699 }
5700 }
5701 entry++;
5702 dst_offset += PAGE_SIZE_64;
5703 xfer_size -= PAGE_SIZE;
5704 }
5705 if (dw_count)
5706 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5707
5708 if (alias_page != NULL) {
5709 VM_PAGE_FREE(alias_page);
5710 }
5711
5712 if (page_list_count != NULL) {
5713 if (upl->flags & UPL_INTERNAL)
5714 *page_list_count = 0;
5715 else if (*page_list_count > entry)
5716 *page_list_count = entry;
5717 }
5718 #if UPL_DEBUG
5719 upl->upl_state = 1;
5720 #endif
5721 vm_object_unlock(object);
5722
5723 return KERN_SUCCESS;
5724 }
5725
5726 /*
5727 * Routine: vm_object_super_upl_request
5728 * Purpose:
5729 * Cause the population of a portion of a vm_object
5730 * in much the same way as memory_object_upl_request.
5731 * Depending on the nature of the request, the pages
5732 * returned may be contain valid data or be uninitialized.
5733 * However, the region may be expanded up to the super
5734 * cluster size provided.
5735 */
5736
5737 __private_extern__ kern_return_t
5738 vm_object_super_upl_request(
5739 vm_object_t object,
5740 vm_object_offset_t offset,
5741 upl_size_t size,
5742 upl_size_t super_cluster,
5743 upl_t *upl,
5744 upl_page_info_t *user_page_list,
5745 unsigned int *page_list_count,
5746 upl_control_flags_t cntrl_flags,
5747 vm_tag_t tag)
5748 {
5749 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5750 return KERN_FAILURE;
5751
5752 assert(object->paging_in_progress);
5753 offset = offset - object->paging_offset;
5754
5755 if (super_cluster > size) {
5756
5757 vm_object_offset_t base_offset;
5758 upl_size_t super_size;
5759 vm_object_size_t super_size_64;
5760
5761 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5762 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5763 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5764 super_size = (upl_size_t) super_size_64;
5765 assert(super_size == super_size_64);
5766
5767 if (offset > (base_offset + super_size)) {
5768 panic("vm_object_super_upl_request: Missed target pageout"
5769 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5770 offset, base_offset, super_size, super_cluster,
5771 size, object->paging_offset);
5772 }
5773 /*
5774 * apparently there is a case where the vm requests a
5775 * page to be written out who's offset is beyond the
5776 * object size
5777 */
5778 if ((offset + size) > (base_offset + super_size)) {
5779 super_size_64 = (offset + size) - base_offset;
5780 super_size = (upl_size_t) super_size_64;
5781 assert(super_size == super_size_64);
5782 }
5783
5784 offset = base_offset;
5785 size = super_size;
5786 }
5787 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5788 }
5789
5790 #if CONFIG_EMBEDDED
5791 int cs_executable_create_upl = 0;
5792 extern int proc_selfpid(void);
5793 extern char *proc_name_address(void *p);
5794 #endif /* CONFIG_EMBEDDED */
5795
5796 kern_return_t
5797 vm_map_create_upl(
5798 vm_map_t map,
5799 vm_map_address_t offset,
5800 upl_size_t *upl_size,
5801 upl_t *upl,
5802 upl_page_info_array_t page_list,
5803 unsigned int *count,
5804 upl_control_flags_t *flags,
5805 vm_tag_t tag)
5806 {
5807 vm_map_entry_t entry;
5808 upl_control_flags_t caller_flags;
5809 int force_data_sync;
5810 int sync_cow_data;
5811 vm_object_t local_object;
5812 vm_map_offset_t local_offset;
5813 vm_map_offset_t local_start;
5814 kern_return_t ret;
5815
5816 assert(page_aligned(offset));
5817
5818 caller_flags = *flags;
5819
5820 if (caller_flags & ~UPL_VALID_FLAGS) {
5821 /*
5822 * For forward compatibility's sake,
5823 * reject any unknown flag.
5824 */
5825 return KERN_INVALID_VALUE;
5826 }
5827 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5828 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5829
5830 if (upl == NULL)
5831 return KERN_INVALID_ARGUMENT;
5832
5833 REDISCOVER_ENTRY:
5834 vm_map_lock_read(map);
5835
5836 if (!vm_map_lookup_entry(map, offset, &entry)) {
5837 vm_map_unlock_read(map);
5838 return KERN_FAILURE;
5839 }
5840
5841 if ((entry->vme_end - offset) < *upl_size) {
5842 *upl_size = (upl_size_t) (entry->vme_end - offset);
5843 assert(*upl_size == entry->vme_end - offset);
5844 }
5845
5846 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5847 *flags = 0;
5848
5849 if (!entry->is_sub_map &&
5850 VME_OBJECT(entry) != VM_OBJECT_NULL) {
5851 if (VME_OBJECT(entry)->private)
5852 *flags = UPL_DEV_MEMORY;
5853
5854 if (VME_OBJECT(entry)->phys_contiguous)
5855 *flags |= UPL_PHYS_CONTIG;
5856 }
5857 vm_map_unlock_read(map);
5858 return KERN_SUCCESS;
5859 }
5860
5861 if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
5862 !VME_OBJECT(entry)->phys_contiguous) {
5863 if (*upl_size > MAX_UPL_SIZE_BYTES)
5864 *upl_size = MAX_UPL_SIZE_BYTES;
5865 }
5866
5867 /*
5868 * Create an object if necessary.
5869 */
5870 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5871
5872 if (vm_map_lock_read_to_write(map))
5873 goto REDISCOVER_ENTRY;
5874
5875 VME_OBJECT_SET(entry,
5876 vm_object_allocate((vm_size_t)
5877 (entry->vme_end -
5878 entry->vme_start)));
5879 VME_OFFSET_SET(entry, 0);
5880
5881 vm_map_lock_write_to_read(map);
5882 }
5883
5884 if (!(caller_flags & UPL_COPYOUT_FROM) &&
5885 !(entry->protection & VM_PROT_WRITE)) {
5886 vm_map_unlock_read(map);
5887 return KERN_PROTECTION_FAILURE;
5888 }
5889
5890 #if CONFIG_EMBEDDED
5891 if (map->pmap != kernel_pmap &&
5892 (caller_flags & UPL_COPYOUT_FROM) &&
5893 (entry->protection & VM_PROT_EXECUTE) &&
5894 !(entry->protection & VM_PROT_WRITE)) {
5895 vm_offset_t kaddr;
5896 vm_size_t ksize;
5897
5898 /*
5899 * We're about to create a read-only UPL backed by
5900 * memory from an executable mapping.
5901 * Wiring the pages would result in the pages being copied
5902 * (due to the "MAP_PRIVATE" mapping) and no longer
5903 * code-signed, so no longer eligible for execution.
5904 * Instead, let's copy the data into a kernel buffer and
5905 * create the UPL from this kernel buffer.
5906 * The kernel buffer is then freed, leaving the UPL holding
5907 * the last reference on the VM object, so the memory will
5908 * be released when the UPL is committed.
5909 */
5910
5911 vm_map_unlock_read(map);
5912 /* allocate kernel buffer */
5913 ksize = round_page(*upl_size);
5914 kaddr = 0;
5915 ret = kmem_alloc_pageable(kernel_map,
5916 &kaddr,
5917 ksize,
5918 tag);
5919 if (ret == KERN_SUCCESS) {
5920 /* copyin the user data */
5921 assert(page_aligned(offset));
5922 ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
5923 }
5924 if (ret == KERN_SUCCESS) {
5925 if (ksize > *upl_size) {
5926 /* zero out the extra space in kernel buffer */
5927 memset((void *)(kaddr + *upl_size),
5928 0,
5929 ksize - *upl_size);
5930 }
5931 /* create the UPL from the kernel buffer */
5932 ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5933 upl, page_list, count, flags, tag);
5934 }
5935 if (kaddr != 0) {
5936 /* free the kernel buffer */
5937 kmem_free(kernel_map, kaddr, ksize);
5938 kaddr = 0;
5939 ksize = 0;
5940 }
5941 #if DEVELOPMENT || DEBUG
5942 DTRACE_VM4(create_upl_from_executable,
5943 vm_map_t, map,
5944 vm_map_address_t, offset,
5945 upl_size_t, *upl_size,
5946 kern_return_t, ret);
5947 #endif /* DEVELOPMENT || DEBUG */
5948 return ret;
5949 }
5950 #endif /* CONFIG_EMBEDDED */
5951
5952 local_object = VME_OBJECT(entry);
5953 assert(local_object != VM_OBJECT_NULL);
5954
5955 if (!entry->is_sub_map &&
5956 !entry->needs_copy &&
5957 *upl_size != 0 &&
5958 local_object->vo_size > *upl_size && /* partial UPL */
5959 entry->wired_count == 0 && /* No COW for entries that are wired */
5960 (map->pmap != kernel_pmap) && /* alias checks */
5961 (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
5962 ||
5963 (/* case 2 */
5964 local_object->internal &&
5965 (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5966 local_object->ref_count > 1))) {
5967 vm_prot_t prot;
5968
5969 /*
5970 * Case 1:
5971 * Set up the targeted range for copy-on-write to avoid
5972 * applying true_share/copy_delay to the entire object.
5973 *
5974 * Case 2:
5975 * This map entry covers only part of an internal
5976 * object. There could be other map entries covering
5977 * other areas of this object and some of these map
5978 * entries could be marked as "needs_copy", which
5979 * assumes that the object is COPY_SYMMETRIC.
5980 * To avoid marking this object as COPY_DELAY and
5981 * "true_share", let's shadow it and mark the new
5982 * (smaller) object as "true_share" and COPY_DELAY.
5983 */
5984
5985 if (vm_map_lock_read_to_write(map)) {
5986 goto REDISCOVER_ENTRY;
5987 }
5988 vm_map_lock_assert_exclusive(map);
5989 assert(VME_OBJECT(entry) == local_object);
5990
5991 vm_map_clip_start(map,
5992 entry,
5993 vm_map_trunc_page(offset,
5994 VM_MAP_PAGE_MASK(map)));
5995 vm_map_clip_end(map,
5996 entry,
5997 vm_map_round_page(offset + *upl_size,
5998 VM_MAP_PAGE_MASK(map)));
5999 if ((entry->vme_end - offset) < *upl_size) {
6000 *upl_size = (upl_size_t) (entry->vme_end - offset);
6001 assert(*upl_size == entry->vme_end - offset);
6002 }
6003
6004 prot = entry->protection & ~VM_PROT_WRITE;
6005 if (override_nx(map, VME_ALIAS(entry)) && prot)
6006 prot |= VM_PROT_EXECUTE;
6007 vm_object_pmap_protect(local_object,
6008 VME_OFFSET(entry),
6009 entry->vme_end - entry->vme_start,
6010 ((entry->is_shared ||
6011 map->mapped_in_other_pmaps)
6012 ? PMAP_NULL
6013 : map->pmap),
6014 entry->vme_start,
6015 prot);
6016
6017 assert(entry->wired_count == 0);
6018
6019 /*
6020 * Lock the VM object and re-check its status: if it's mapped
6021 * in another address space, we could still be racing with
6022 * another thread holding that other VM map exclusively.
6023 */
6024 vm_object_lock(local_object);
6025 if (local_object->true_share) {
6026 /* object is already in proper state: no COW needed */
6027 assert(local_object->copy_strategy !=
6028 MEMORY_OBJECT_COPY_SYMMETRIC);
6029 } else {
6030 /* not true_share: ask for copy-on-write below */
6031 assert(local_object->copy_strategy ==
6032 MEMORY_OBJECT_COPY_SYMMETRIC);
6033 entry->needs_copy = TRUE;
6034 }
6035 vm_object_unlock(local_object);
6036
6037 vm_map_lock_write_to_read(map);
6038 }
6039
6040 if (entry->needs_copy) {
6041 /*
6042 * Honor copy-on-write for COPY_SYMMETRIC
6043 * strategy.
6044 */
6045 vm_map_t local_map;
6046 vm_object_t object;
6047 vm_object_offset_t new_offset;
6048 vm_prot_t prot;
6049 boolean_t wired;
6050 vm_map_version_t version;
6051 vm_map_t real_map;
6052 vm_prot_t fault_type;
6053
6054 local_map = map;
6055
6056 if (caller_flags & UPL_COPYOUT_FROM) {
6057 fault_type = VM_PROT_READ | VM_PROT_COPY;
6058 vm_counters.create_upl_extra_cow++;
6059 vm_counters.create_upl_extra_cow_pages +=
6060 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
6061 } else {
6062 fault_type = VM_PROT_WRITE;
6063 }
6064 if (vm_map_lookup_locked(&local_map,
6065 offset, fault_type,
6066 OBJECT_LOCK_EXCLUSIVE,
6067 &version, &object,
6068 &new_offset, &prot, &wired,
6069 NULL,
6070 &real_map) != KERN_SUCCESS) {
6071 if (fault_type == VM_PROT_WRITE) {
6072 vm_counters.create_upl_lookup_failure_write++;
6073 } else {
6074 vm_counters.create_upl_lookup_failure_copy++;
6075 }
6076 vm_map_unlock_read(local_map);
6077 return KERN_FAILURE;
6078 }
6079 if (real_map != map)
6080 vm_map_unlock(real_map);
6081 vm_map_unlock_read(local_map);
6082
6083 vm_object_unlock(object);
6084
6085 goto REDISCOVER_ENTRY;
6086 }
6087
6088 if (entry->is_sub_map) {
6089 vm_map_t submap;
6090
6091 submap = VME_SUBMAP(entry);
6092 local_start = entry->vme_start;
6093 local_offset = VME_OFFSET(entry);
6094
6095 vm_map_reference(submap);
6096 vm_map_unlock_read(map);
6097
6098 ret = vm_map_create_upl(submap,
6099 local_offset + (offset - local_start),
6100 upl_size, upl, page_list, count, flags, tag);
6101 vm_map_deallocate(submap);
6102
6103 return ret;
6104 }
6105
6106 if (sync_cow_data &&
6107 (VME_OBJECT(entry)->shadow ||
6108 VME_OBJECT(entry)->copy)) {
6109 local_object = VME_OBJECT(entry);
6110 local_start = entry->vme_start;
6111 local_offset = VME_OFFSET(entry);
6112
6113 vm_object_reference(local_object);
6114 vm_map_unlock_read(map);
6115
6116 if (local_object->shadow && local_object->copy) {
6117 vm_object_lock_request(local_object->shadow,
6118 ((vm_object_offset_t)
6119 ((offset - local_start) +
6120 local_offset) +
6121 local_object->vo_shadow_offset),
6122 *upl_size, FALSE,
6123 MEMORY_OBJECT_DATA_SYNC,
6124 VM_PROT_NO_CHANGE);
6125 }
6126 sync_cow_data = FALSE;
6127 vm_object_deallocate(local_object);
6128
6129 goto REDISCOVER_ENTRY;
6130 }
6131 if (force_data_sync) {
6132 local_object = VME_OBJECT(entry);
6133 local_start = entry->vme_start;
6134 local_offset = VME_OFFSET(entry);
6135
6136 vm_object_reference(local_object);
6137 vm_map_unlock_read(map);
6138
6139 vm_object_lock_request(local_object,
6140 ((vm_object_offset_t)
6141 ((offset - local_start) +
6142 local_offset)),
6143 (vm_object_size_t)*upl_size,
6144 FALSE,
6145 MEMORY_OBJECT_DATA_SYNC,
6146 VM_PROT_NO_CHANGE);
6147
6148 force_data_sync = FALSE;
6149 vm_object_deallocate(local_object);
6150
6151 goto REDISCOVER_ENTRY;
6152 }
6153 if (VME_OBJECT(entry)->private)
6154 *flags = UPL_DEV_MEMORY;
6155 else
6156 *flags = 0;
6157
6158 if (VME_OBJECT(entry)->phys_contiguous)
6159 *flags |= UPL_PHYS_CONTIG;
6160
6161 local_object = VME_OBJECT(entry);
6162 local_offset = VME_OFFSET(entry);
6163 local_start = entry->vme_start;
6164
6165 #if CONFIG_EMBEDDED
6166 /*
6167 * Wiring will copy the pages to the shadow object.
6168 * The shadow object will not be code-signed so
6169 * attempting to execute code from these copied pages
6170 * would trigger a code-signing violation.
6171 */
6172 if (entry->protection & VM_PROT_EXECUTE) {
6173 #if MACH_ASSERT
6174 printf("pid %d[%s] create_upl out of executable range from "
6175 "0x%llx to 0x%llx: side effects may include "
6176 "code-signing violations later on\n",
6177 proc_selfpid(),
6178 (current_task()->bsd_info
6179 ? proc_name_address(current_task()->bsd_info)
6180 : "?"),
6181 (uint64_t) entry->vme_start,
6182 (uint64_t) entry->vme_end);
6183 #endif /* MACH_ASSERT */
6184 DTRACE_VM2(cs_executable_create_upl,
6185 uint64_t, (uint64_t)entry->vme_start,
6186 uint64_t, (uint64_t)entry->vme_end);
6187 cs_executable_create_upl++;
6188 }
6189 #endif /* CONFIG_EMBEDDED */
6190
6191 vm_object_lock(local_object);
6192
6193 /*
6194 * Ensure that this object is "true_share" and "copy_delay" now,
6195 * while we're still holding the VM map lock. After we unlock the map,
6196 * anything could happen to that mapping, including some copy-on-write
6197 * activity. We need to make sure that the IOPL will point at the
6198 * same memory as the mapping.
6199 */
6200 if (local_object->true_share) {
6201 assert(local_object->copy_strategy !=
6202 MEMORY_OBJECT_COPY_SYMMETRIC);
6203 } else if (local_object != kernel_object &&
6204 local_object != compressor_object &&
6205 !local_object->phys_contiguous) {
6206 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6207 if (!local_object->true_share &&
6208 vm_object_tracking_inited) {
6209 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6210 int num = 0;
6211 num = OSBacktrace(bt,
6212 VM_OBJECT_TRACKING_BTDEPTH);
6213 btlog_add_entry(vm_object_tracking_btlog,
6214 local_object,
6215 VM_OBJECT_TRACKING_OP_TRUESHARE,
6216 bt,
6217 num);
6218 }
6219 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6220 local_object->true_share = TRUE;
6221 if (local_object->copy_strategy ==
6222 MEMORY_OBJECT_COPY_SYMMETRIC) {
6223 local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6224 }
6225 }
6226
6227 vm_object_reference_locked(local_object);
6228 vm_object_unlock(local_object);
6229
6230 vm_map_unlock_read(map);
6231
6232 ret = vm_object_iopl_request(local_object,
6233 ((vm_object_offset_t)
6234 ((offset - local_start) + local_offset)),
6235 *upl_size,
6236 upl,
6237 page_list,
6238 count,
6239 caller_flags,
6240 tag);
6241 vm_object_deallocate(local_object);
6242
6243 return ret;
6244 }
6245
6246 /*
6247 * Internal routine to enter a UPL into a VM map.
6248 *
6249 * JMM - This should just be doable through the standard
6250 * vm_map_enter() API.
6251 */
6252 kern_return_t
6253 vm_map_enter_upl(
6254 vm_map_t map,
6255 upl_t upl,
6256 vm_map_offset_t *dst_addr)
6257 {
6258 vm_map_size_t size;
6259 vm_object_offset_t offset;
6260 vm_map_offset_t addr;
6261 vm_page_t m;
6262 kern_return_t kr;
6263 int isVectorUPL = 0, curr_upl=0;
6264 upl_t vector_upl = NULL;
6265 vm_offset_t vector_upl_dst_addr = 0;
6266 vm_map_t vector_upl_submap = NULL;
6267 upl_offset_t subupl_offset = 0;
6268 upl_size_t subupl_size = 0;
6269
6270 if (upl == UPL_NULL)
6271 return KERN_INVALID_ARGUMENT;
6272
6273 if((isVectorUPL = vector_upl_is_valid(upl))) {
6274 int mapped=0,valid_upls=0;
6275 vector_upl = upl;
6276
6277 upl_lock(vector_upl);
6278 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6279 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6280 if(upl == NULL)
6281 continue;
6282 valid_upls++;
6283 if (UPL_PAGE_LIST_MAPPED & upl->flags)
6284 mapped++;
6285 }
6286
6287 if(mapped) {
6288 if(mapped != valid_upls)
6289 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6290 else {
6291 upl_unlock(vector_upl);
6292 return KERN_FAILURE;
6293 }
6294 }
6295
6296 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6297 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6298 &vector_upl_submap);
6299 if( kr != KERN_SUCCESS )
6300 panic("Vector UPL submap allocation failed\n");
6301 map = vector_upl_submap;
6302 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6303 curr_upl=0;
6304 }
6305 else
6306 upl_lock(upl);
6307
6308 process_upl_to_enter:
6309 if(isVectorUPL){
6310 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6311 *dst_addr = vector_upl_dst_addr;
6312 upl_unlock(vector_upl);
6313 return KERN_SUCCESS;
6314 }
6315 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6316 if(upl == NULL)
6317 goto process_upl_to_enter;
6318
6319 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6320 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6321 } else {
6322 /*
6323 * check to see if already mapped
6324 */
6325 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6326 upl_unlock(upl);
6327 return KERN_FAILURE;
6328 }
6329 }
6330 if ((!(upl->flags & UPL_SHADOWED)) &&
6331 ((upl->flags & UPL_HAS_BUSY) ||
6332 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6333
6334 vm_object_t object;
6335 vm_page_t alias_page;
6336 vm_object_offset_t new_offset;
6337 unsigned int pg_num;
6338 wpl_array_t lite_list;
6339
6340 if (upl->flags & UPL_INTERNAL) {
6341 lite_list = (wpl_array_t)
6342 ((((uintptr_t)upl) + sizeof(struct upl))
6343 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6344 } else {
6345 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6346 }
6347 object = upl->map_object;
6348 upl->map_object = vm_object_allocate(upl->size);
6349
6350 vm_object_lock(upl->map_object);
6351
6352 upl->map_object->shadow = object;
6353 upl->map_object->pageout = TRUE;
6354 upl->map_object->can_persist = FALSE;
6355 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6356 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6357 upl->map_object->wimg_bits = object->wimg_bits;
6358 offset = upl->map_object->vo_shadow_offset;
6359 new_offset = 0;
6360 size = upl->size;
6361
6362 upl->flags |= UPL_SHADOWED;
6363
6364 while (size) {
6365 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6366 assert(pg_num == new_offset / PAGE_SIZE);
6367
6368 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6369
6370 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6371
6372 vm_object_lock(object);
6373
6374 m = vm_page_lookup(object, offset);
6375 if (m == VM_PAGE_NULL) {
6376 panic("vm_upl_map: page missing\n");
6377 }
6378
6379 /*
6380 * Convert the fictitious page to a private
6381 * shadow of the real page.
6382 */
6383 assert(alias_page->fictitious);
6384 alias_page->fictitious = FALSE;
6385 alias_page->private = TRUE;
6386 alias_page->free_when_done = TRUE;
6387 /*
6388 * since m is a page in the upl it must
6389 * already be wired or BUSY, so it's
6390 * safe to assign the underlying physical
6391 * page to the alias
6392 */
6393 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6394
6395 vm_object_unlock(object);
6396
6397 vm_page_lockspin_queues();
6398 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6399 vm_page_unlock_queues();
6400
6401 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6402
6403 assert(!alias_page->wanted);
6404 alias_page->busy = FALSE;
6405 alias_page->absent = FALSE;
6406 }
6407 size -= PAGE_SIZE;
6408 offset += PAGE_SIZE_64;
6409 new_offset += PAGE_SIZE_64;
6410 }
6411 vm_object_unlock(upl->map_object);
6412 }
6413 if (upl->flags & UPL_SHADOWED)
6414 offset = 0;
6415 else
6416 offset = upl->offset - upl->map_object->paging_offset;
6417
6418 size = upl->size;
6419
6420 vm_object_reference(upl->map_object);
6421
6422 if(!isVectorUPL) {
6423 *dst_addr = 0;
6424 /*
6425 * NEED A UPL_MAP ALIAS
6426 */
6427 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6428 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6429 upl->map_object, offset, FALSE,
6430 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6431
6432 if (kr != KERN_SUCCESS) {
6433 vm_object_deallocate(upl->map_object);
6434 upl_unlock(upl);
6435 return(kr);
6436 }
6437 }
6438 else {
6439 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6440 VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6441 upl->map_object, offset, FALSE,
6442 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6443 if(kr)
6444 panic("vm_map_enter failed for a Vector UPL\n");
6445 }
6446 vm_object_lock(upl->map_object);
6447
6448 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6449 m = vm_page_lookup(upl->map_object, offset);
6450
6451 if (m) {
6452 m->pmapped = TRUE;
6453
6454 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6455 * but only in kernel space. If this was on a user map,
6456 * we'd have to set the wpmapped bit. */
6457 /* m->wpmapped = TRUE; */
6458 assert(map->pmap == kernel_pmap);
6459
6460 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE, kr);
6461
6462 assert(kr == KERN_SUCCESS);
6463 #if KASAN
6464 kasan_notify_address(addr, PAGE_SIZE_64);
6465 #endif
6466 }
6467 offset += PAGE_SIZE_64;
6468 }
6469 vm_object_unlock(upl->map_object);
6470
6471 /*
6472 * hold a reference for the mapping
6473 */
6474 upl->ref_count++;
6475 upl->flags |= UPL_PAGE_LIST_MAPPED;
6476 upl->kaddr = (vm_offset_t) *dst_addr;
6477 assert(upl->kaddr == *dst_addr);
6478
6479 if(isVectorUPL)
6480 goto process_upl_to_enter;
6481
6482 upl_unlock(upl);
6483
6484 return KERN_SUCCESS;
6485 }
6486
6487 /*
6488 * Internal routine to remove a UPL mapping from a VM map.
6489 *
6490 * XXX - This should just be doable through a standard
6491 * vm_map_remove() operation. Otherwise, implicit clean-up
6492 * of the target map won't be able to correctly remove
6493 * these (and release the reference on the UPL). Having
6494 * to do this means we can't map these into user-space
6495 * maps yet.
6496 */
6497 kern_return_t
6498 vm_map_remove_upl(
6499 vm_map_t map,
6500 upl_t upl)
6501 {
6502 vm_address_t addr;
6503 upl_size_t size;
6504 int isVectorUPL = 0, curr_upl = 0;
6505 upl_t vector_upl = NULL;
6506
6507 if (upl == UPL_NULL)
6508 return KERN_INVALID_ARGUMENT;
6509
6510 if((isVectorUPL = vector_upl_is_valid(upl))) {
6511 int unmapped=0, valid_upls=0;
6512 vector_upl = upl;
6513 upl_lock(vector_upl);
6514 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6515 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6516 if(upl == NULL)
6517 continue;
6518 valid_upls++;
6519 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6520 unmapped++;
6521 }
6522
6523 if(unmapped) {
6524 if(unmapped != valid_upls)
6525 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6526 else {
6527 upl_unlock(vector_upl);
6528 return KERN_FAILURE;
6529 }
6530 }
6531 curr_upl=0;
6532 }
6533 else
6534 upl_lock(upl);
6535
6536 process_upl_to_remove:
6537 if(isVectorUPL) {
6538 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6539 vm_map_t v_upl_submap;
6540 vm_offset_t v_upl_submap_dst_addr;
6541 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6542
6543 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS);
6544 vm_map_deallocate(v_upl_submap);
6545 upl_unlock(vector_upl);
6546 return KERN_SUCCESS;
6547 }
6548
6549 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6550 if(upl == NULL)
6551 goto process_upl_to_remove;
6552 }
6553
6554 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6555 addr = upl->kaddr;
6556 size = upl->size;
6557
6558 assert(upl->ref_count > 1);
6559 upl->ref_count--; /* removing mapping ref */
6560
6561 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6562 upl->kaddr = (vm_offset_t) 0;
6563
6564 if(!isVectorUPL) {
6565 upl_unlock(upl);
6566
6567 vm_map_remove(
6568 map,
6569 vm_map_trunc_page(addr,
6570 VM_MAP_PAGE_MASK(map)),
6571 vm_map_round_page(addr + size,
6572 VM_MAP_PAGE_MASK(map)),
6573 VM_MAP_NO_FLAGS);
6574
6575 return KERN_SUCCESS;
6576 }
6577 else {
6578 /*
6579 * If it's a Vectored UPL, we'll be removing the entire
6580 * submap anyways, so no need to remove individual UPL
6581 * element mappings from within the submap
6582 */
6583 goto process_upl_to_remove;
6584 }
6585 }
6586 upl_unlock(upl);
6587
6588 return KERN_FAILURE;
6589 }
6590
6591
6592 kern_return_t
6593 upl_commit_range(
6594 upl_t upl,
6595 upl_offset_t offset,
6596 upl_size_t size,
6597 int flags,
6598 upl_page_info_t *page_list,
6599 mach_msg_type_number_t count,
6600 boolean_t *empty)
6601 {
6602 upl_size_t xfer_size, subupl_size = size;
6603 vm_object_t shadow_object;
6604 vm_object_t object;
6605 vm_object_t m_object;
6606 vm_object_offset_t target_offset;
6607 upl_offset_t subupl_offset = offset;
6608 int entry;
6609 wpl_array_t lite_list;
6610 int occupied;
6611 int clear_refmod = 0;
6612 int pgpgout_count = 0;
6613 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6614 struct vm_page_delayed_work *dwp;
6615 int dw_count;
6616 int dw_limit;
6617 int isVectorUPL = 0;
6618 upl_t vector_upl = NULL;
6619 boolean_t should_be_throttled = FALSE;
6620
6621 vm_page_t nxt_page = VM_PAGE_NULL;
6622 int fast_path_possible = 0;
6623 int fast_path_full_commit = 0;
6624 int throttle_page = 0;
6625 int unwired_count = 0;
6626 int local_queue_count = 0;
6627 vm_page_t first_local, last_local;
6628
6629 *empty = FALSE;
6630
6631 if (upl == UPL_NULL)
6632 return KERN_INVALID_ARGUMENT;
6633
6634 if (count == 0)
6635 page_list = NULL;
6636
6637 if((isVectorUPL = vector_upl_is_valid(upl))) {
6638 vector_upl = upl;
6639 upl_lock(vector_upl);
6640 }
6641 else
6642 upl_lock(upl);
6643
6644 process_upl_to_commit:
6645
6646 if(isVectorUPL) {
6647 size = subupl_size;
6648 offset = subupl_offset;
6649 if(size == 0) {
6650 upl_unlock(vector_upl);
6651 return KERN_SUCCESS;
6652 }
6653 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6654 if(upl == NULL) {
6655 upl_unlock(vector_upl);
6656 return KERN_FAILURE;
6657 }
6658 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6659 subupl_size -= size;
6660 subupl_offset += size;
6661 }
6662
6663 #if UPL_DEBUG
6664 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6665 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6666
6667 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6668 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6669
6670 upl->upl_commit_index++;
6671 }
6672 #endif
6673 if (upl->flags & UPL_DEVICE_MEMORY)
6674 xfer_size = 0;
6675 else if ((offset + size) <= upl->size)
6676 xfer_size = size;
6677 else {
6678 if(!isVectorUPL)
6679 upl_unlock(upl);
6680 else {
6681 upl_unlock(vector_upl);
6682 }
6683 return KERN_FAILURE;
6684 }
6685 if (upl->flags & UPL_SET_DIRTY)
6686 flags |= UPL_COMMIT_SET_DIRTY;
6687 if (upl->flags & UPL_CLEAR_DIRTY)
6688 flags |= UPL_COMMIT_CLEAR_DIRTY;
6689
6690 if (upl->flags & UPL_INTERNAL)
6691 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6692 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6693 else
6694 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6695
6696 object = upl->map_object;
6697
6698 if (upl->flags & UPL_SHADOWED) {
6699 vm_object_lock(object);
6700 shadow_object = object->shadow;
6701 } else {
6702 shadow_object = object;
6703 }
6704 entry = offset/PAGE_SIZE;
6705 target_offset = (vm_object_offset_t)offset;
6706
6707 assert(!(target_offset & PAGE_MASK));
6708 assert(!(xfer_size & PAGE_MASK));
6709
6710 if (upl->flags & UPL_KERNEL_OBJECT)
6711 vm_object_lock_shared(shadow_object);
6712 else
6713 vm_object_lock(shadow_object);
6714
6715 VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6716
6717 if (upl->flags & UPL_ACCESS_BLOCKED) {
6718 assert(shadow_object->blocked_access);
6719 shadow_object->blocked_access = FALSE;
6720 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6721 }
6722
6723 if (shadow_object->code_signed) {
6724 /*
6725 * CODE SIGNING:
6726 * If the object is code-signed, do not let this UPL tell
6727 * us if the pages are valid or not. Let the pages be
6728 * validated by VM the normal way (when they get mapped or
6729 * copied).
6730 */
6731 flags &= ~UPL_COMMIT_CS_VALIDATED;
6732 }
6733 if (! page_list) {
6734 /*
6735 * No page list to get the code-signing info from !?
6736 */
6737 flags &= ~UPL_COMMIT_CS_VALIDATED;
6738 }
6739 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6740 should_be_throttled = TRUE;
6741
6742 dwp = &dw_array[0];
6743 dw_count = 0;
6744 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6745
6746 if ((upl->flags & UPL_IO_WIRE) &&
6747 !(flags & UPL_COMMIT_FREE_ABSENT) &&
6748 !isVectorUPL &&
6749 shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6750 shadow_object->purgable != VM_PURGABLE_EMPTY) {
6751
6752 if (!vm_page_queue_empty(&shadow_object->memq)) {
6753
6754 if (size == shadow_object->vo_size) {
6755 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6756 fast_path_full_commit = 1;
6757 }
6758 fast_path_possible = 1;
6759
6760 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6761 (shadow_object->purgable == VM_PURGABLE_DENY ||
6762 shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6763 shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6764 throttle_page = 1;
6765 }
6766 }
6767 }
6768 first_local = VM_PAGE_NULL;
6769 last_local = VM_PAGE_NULL;
6770
6771 while (xfer_size) {
6772 vm_page_t t, m;
6773
6774 dwp->dw_mask = 0;
6775 clear_refmod = 0;
6776
6777 m = VM_PAGE_NULL;
6778
6779 if (upl->flags & UPL_LITE) {
6780 unsigned int pg_num;
6781
6782 if (nxt_page != VM_PAGE_NULL) {
6783 m = nxt_page;
6784 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
6785 target_offset = m->offset;
6786 }
6787 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6788 assert(pg_num == target_offset/PAGE_SIZE);
6789
6790 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6791 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6792
6793 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6794 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6795 } else
6796 m = NULL;
6797 }
6798 if (upl->flags & UPL_SHADOWED) {
6799 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6800
6801 t->free_when_done = FALSE;
6802
6803 VM_PAGE_FREE(t);
6804
6805 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6806 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6807 }
6808 }
6809 if (m == VM_PAGE_NULL)
6810 goto commit_next_page;
6811
6812 m_object = VM_PAGE_OBJECT(m);
6813
6814 if (m->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6815 assert(m->busy);
6816
6817 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6818 goto commit_next_page;
6819 }
6820
6821 if (flags & UPL_COMMIT_CS_VALIDATED) {
6822 /*
6823 * CODE SIGNING:
6824 * Set the code signing bits according to
6825 * what the UPL says they should be.
6826 */
6827 m->cs_validated = page_list[entry].cs_validated;
6828 m->cs_tainted = page_list[entry].cs_tainted;
6829 m->cs_nx = page_list[entry].cs_nx;
6830 }
6831 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6832 m->written_by_kernel = TRUE;
6833
6834 if (upl->flags & UPL_IO_WIRE) {
6835
6836 if (page_list)
6837 page_list[entry].phys_addr = 0;
6838
6839 if (flags & UPL_COMMIT_SET_DIRTY) {
6840 SET_PAGE_DIRTY(m, FALSE);
6841 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6842 m->dirty = FALSE;
6843
6844 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6845 m->cs_validated && !m->cs_tainted) {
6846 /*
6847 * CODE SIGNING:
6848 * This page is no longer dirty
6849 * but could have been modified,
6850 * so it will need to be
6851 * re-validated.
6852 */
6853 if (m->slid) {
6854 panic("upl_commit_range(%p): page %p was slid\n",
6855 upl, m);
6856 }
6857 assert(!m->slid);
6858 m->cs_validated = FALSE;
6859 #if DEVELOPMENT || DEBUG
6860 vm_cs_validated_resets++;
6861 #endif
6862 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6863 }
6864 clear_refmod |= VM_MEM_MODIFIED;
6865 }
6866 if (upl->flags & UPL_ACCESS_BLOCKED) {
6867 /*
6868 * We blocked access to the pages in this UPL.
6869 * Clear the "busy" bit and wake up any waiter
6870 * for this page.
6871 */
6872 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6873 }
6874 if (fast_path_possible) {
6875 assert(m_object->purgable != VM_PURGABLE_EMPTY);
6876 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6877 if (m->absent) {
6878 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
6879 assert(m->wire_count == 0);
6880 assert(m->busy);
6881
6882 m->absent = FALSE;
6883 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6884 } else {
6885 if (m->wire_count == 0)
6886 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6887 assert(m->vm_page_q_state == VM_PAGE_IS_WIRED);
6888
6889 /*
6890 * XXX FBDP need to update some other
6891 * counters here (purgeable_wired_count)
6892 * (ledgers), ...
6893 */
6894 assert(m->wire_count > 0);
6895 m->wire_count--;
6896
6897 if (m->wire_count == 0) {
6898 m->vm_page_q_state = VM_PAGE_NOT_ON_Q;
6899 unwired_count++;
6900 }
6901 }
6902 if (m->wire_count == 0) {
6903 assert(m->pageq.next == 0 && m->pageq.prev == 0);
6904
6905 if (last_local == VM_PAGE_NULL) {
6906 assert(first_local == VM_PAGE_NULL);
6907
6908 last_local = m;
6909 first_local = m;
6910 } else {
6911 assert(first_local != VM_PAGE_NULL);
6912
6913 m->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6914 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6915 first_local = m;
6916 }
6917 local_queue_count++;
6918
6919 if (throttle_page) {
6920 m->vm_page_q_state = VM_PAGE_ON_THROTTLED_Q;
6921 } else {
6922 if (flags & UPL_COMMIT_INACTIVATE) {
6923 if (shadow_object->internal)
6924 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6925 else
6926 m->vm_page_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6927 } else
6928 m->vm_page_q_state = VM_PAGE_ON_ACTIVE_Q;
6929 }
6930 }
6931 } else {
6932 if (flags & UPL_COMMIT_INACTIVATE) {
6933 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6934 clear_refmod |= VM_MEM_REFERENCED;
6935 }
6936 if (m->absent) {
6937 if (flags & UPL_COMMIT_FREE_ABSENT)
6938 dwp->dw_mask |= DW_vm_page_free;
6939 else {
6940 m->absent = FALSE;
6941 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6942
6943 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6944 dwp->dw_mask |= DW_vm_page_activate;
6945 }
6946 } else
6947 dwp->dw_mask |= DW_vm_page_unwire;
6948 }
6949 goto commit_next_page;
6950 }
6951 assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6952
6953 if (page_list)
6954 page_list[entry].phys_addr = 0;
6955
6956 /*
6957 * make sure to clear the hardware
6958 * modify or reference bits before
6959 * releasing the BUSY bit on this page
6960 * otherwise we risk losing a legitimate
6961 * change of state
6962 */
6963 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6964 m->dirty = FALSE;
6965
6966 clear_refmod |= VM_MEM_MODIFIED;
6967 }
6968 if (m->laundry)
6969 dwp->dw_mask |= DW_vm_pageout_throttle_up;
6970
6971 if (VM_PAGE_WIRED(m))
6972 m->free_when_done = FALSE;
6973
6974 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6975 m->cs_validated && !m->cs_tainted) {
6976 /*
6977 * CODE SIGNING:
6978 * This page is no longer dirty
6979 * but could have been modified,
6980 * so it will need to be
6981 * re-validated.
6982 */
6983 if (m->slid) {
6984 panic("upl_commit_range(%p): page %p was slid\n",
6985 upl, m);
6986 }
6987 assert(!m->slid);
6988 m->cs_validated = FALSE;
6989 #if DEVELOPMENT || DEBUG
6990 vm_cs_validated_resets++;
6991 #endif
6992 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6993 }
6994 if (m->overwriting) {
6995 /*
6996 * the (COPY_OUT_FROM == FALSE) request_page_list case
6997 */
6998 if (m->busy) {
6999 #if CONFIG_PHANTOM_CACHE
7000 if (m->absent && !m_object->internal)
7001 dwp->dw_mask |= DW_vm_phantom_cache_update;
7002 #endif
7003 m->absent = FALSE;
7004
7005 dwp->dw_mask |= DW_clear_busy;
7006 } else {
7007 /*
7008 * alternate (COPY_OUT_FROM == FALSE) page_list case
7009 * Occurs when the original page was wired
7010 * at the time of the list request
7011 */
7012 assert(VM_PAGE_WIRED(m));
7013
7014 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
7015 }
7016 m->overwriting = FALSE;
7017 }
7018 m->cleaning = FALSE;
7019
7020 if (m->free_when_done) {
7021 /*
7022 * With the clean queue enabled, UPL_PAGEOUT should
7023 * no longer set the pageout bit. It's pages now go
7024 * to the clean queue.
7025 */
7026 assert(!(flags & UPL_PAGEOUT));
7027 assert(!m_object->internal);
7028
7029 m->free_when_done = FALSE;
7030 #if MACH_CLUSTER_STATS
7031 if (m->wanted) vm_pageout_target_collisions++;
7032 #endif
7033 if ((flags & UPL_COMMIT_SET_DIRTY) ||
7034 (m->pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
7035 /*
7036 * page was re-dirtied after we started
7037 * the pageout... reactivate it since
7038 * we don't know whether the on-disk
7039 * copy matches what is now in memory
7040 */
7041 SET_PAGE_DIRTY(m, FALSE);
7042
7043 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
7044
7045 if (upl->flags & UPL_PAGEOUT) {
7046 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
7047 VM_STAT_INCR(reactivations);
7048 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
7049 }
7050 } else {
7051 /*
7052 * page has been successfully cleaned
7053 * go ahead and free it for other use
7054 */
7055 if (m_object->internal) {
7056 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
7057 } else {
7058 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
7059 }
7060 m->dirty = FALSE;
7061 m->busy = TRUE;
7062
7063 dwp->dw_mask |= DW_vm_page_free;
7064 }
7065 goto commit_next_page;
7066 }
7067 #if MACH_CLUSTER_STATS
7068 if (m->wpmapped)
7069 m->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m));
7070
7071 if (m->dirty) vm_pageout_cluster_dirtied++;
7072 else vm_pageout_cluster_cleaned++;
7073 if (m->wanted) vm_pageout_cluster_collisions++;
7074 #endif
7075 /*
7076 * It is a part of the semantic of COPYOUT_FROM
7077 * UPLs that a commit implies cache sync
7078 * between the vm page and the backing store
7079 * this can be used to strip the precious bit
7080 * as well as clean
7081 */
7082 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
7083 m->precious = FALSE;
7084
7085 if (flags & UPL_COMMIT_SET_DIRTY) {
7086 SET_PAGE_DIRTY(m, FALSE);
7087 } else {
7088 m->dirty = FALSE;
7089 }
7090
7091 /* with the clean queue on, move *all* cleaned pages to the clean queue */
7092 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) {
7093 pgpgout_count++;
7094
7095 VM_STAT_INCR(pageouts);
7096 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
7097
7098 dwp->dw_mask |= DW_enqueue_cleaned;
7099 vm_pageout_enqueued_cleaned_from_inactive_dirty++;
7100 } else if (should_be_throttled == TRUE && (m->vm_page_q_state == VM_PAGE_NOT_ON_Q)) {
7101 /*
7102 * page coming back in from being 'frozen'...
7103 * it was dirty before it was frozen, so keep it so
7104 * the vm_page_activate will notice that it really belongs
7105 * on the throttle queue and put it there
7106 */
7107 SET_PAGE_DIRTY(m, FALSE);
7108 dwp->dw_mask |= DW_vm_page_activate;
7109
7110 } else {
7111 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && (m->vm_page_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
7112 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7113 clear_refmod |= VM_MEM_REFERENCED;
7114 } else if ( !VM_PAGE_PAGEABLE(m)) {
7115
7116 if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
7117 dwp->dw_mask |= DW_vm_page_speculate;
7118 else if (m->reference)
7119 dwp->dw_mask |= DW_vm_page_activate;
7120 else {
7121 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7122 clear_refmod |= VM_MEM_REFERENCED;
7123 }
7124 }
7125 }
7126 if (upl->flags & UPL_ACCESS_BLOCKED) {
7127 /*
7128 * We blocked access to the pages in this URL.
7129 * Clear the "busy" bit on this page before we
7130 * wake up any waiter.
7131 */
7132 dwp->dw_mask |= DW_clear_busy;
7133 }
7134 /*
7135 * Wakeup any thread waiting for the page to be un-cleaning.
7136 */
7137 dwp->dw_mask |= DW_PAGE_WAKEUP;
7138
7139 commit_next_page:
7140 if (clear_refmod)
7141 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
7142
7143 target_offset += PAGE_SIZE_64;
7144 xfer_size -= PAGE_SIZE;
7145 entry++;
7146
7147 if (dwp->dw_mask) {
7148 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7149 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7150
7151 if (dw_count >= dw_limit) {
7152 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7153
7154 dwp = &dw_array[0];
7155 dw_count = 0;
7156 }
7157 } else {
7158 if (dwp->dw_mask & DW_clear_busy)
7159 m->busy = FALSE;
7160
7161 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7162 PAGE_WAKEUP(m);
7163 }
7164 }
7165 }
7166 if (dw_count)
7167 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7168
7169 if (fast_path_possible) {
7170
7171 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7172 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7173
7174 if (local_queue_count || unwired_count) {
7175
7176 if (local_queue_count) {
7177 vm_page_t first_target;
7178 vm_page_queue_head_t *target_queue;
7179
7180 if (throttle_page)
7181 target_queue = &vm_page_queue_throttled;
7182 else {
7183 if (flags & UPL_COMMIT_INACTIVATE) {
7184 if (shadow_object->internal)
7185 target_queue = &vm_page_queue_anonymous;
7186 else
7187 target_queue = &vm_page_queue_inactive;
7188 } else
7189 target_queue = &vm_page_queue_active;
7190 }
7191 /*
7192 * Transfer the entire local queue to a regular LRU page queues.
7193 */
7194 vm_page_lockspin_queues();
7195
7196 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7197
7198 if (vm_page_queue_empty(target_queue))
7199 target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7200 else
7201 first_target->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7202
7203 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7204 first_local->pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7205 last_local->pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7206
7207 /*
7208 * Adjust the global page counts.
7209 */
7210 if (throttle_page) {
7211 vm_page_throttled_count += local_queue_count;
7212 } else {
7213 if (flags & UPL_COMMIT_INACTIVATE) {
7214 if (shadow_object->internal)
7215 vm_page_anonymous_count += local_queue_count;
7216 vm_page_inactive_count += local_queue_count;
7217
7218 token_new_pagecount += local_queue_count;
7219 } else
7220 vm_page_active_count += local_queue_count;
7221
7222 if (shadow_object->internal)
7223 vm_page_pageable_internal_count += local_queue_count;
7224 else
7225 vm_page_pageable_external_count += local_queue_count;
7226 }
7227 } else {
7228 vm_page_lockspin_queues();
7229 }
7230 if (unwired_count) {
7231 vm_page_wire_count -= unwired_count;
7232 VM_CHECK_MEMORYSTATUS;
7233 }
7234 vm_page_unlock_queues();
7235
7236 VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7237 }
7238 }
7239 occupied = 1;
7240
7241 if (upl->flags & UPL_DEVICE_MEMORY) {
7242 occupied = 0;
7243 } else if (upl->flags & UPL_LITE) {
7244 int pg_num;
7245 int i;
7246
7247 occupied = 0;
7248
7249 if (!fast_path_full_commit) {
7250 pg_num = upl->size/PAGE_SIZE;
7251 pg_num = (pg_num + 31) >> 5;
7252
7253 for (i = 0; i < pg_num; i++) {
7254 if (lite_list[i] != 0) {
7255 occupied = 1;
7256 break;
7257 }
7258 }
7259 }
7260 } else {
7261 if (vm_page_queue_empty(&upl->map_object->memq))
7262 occupied = 0;
7263 }
7264 if (occupied == 0) {
7265 /*
7266 * If this UPL element belongs to a Vector UPL and is
7267 * empty, then this is the right function to deallocate
7268 * it. So go ahead set the *empty variable. The flag
7269 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7270 * should be considered relevant for the Vector UPL and not
7271 * the internal UPLs.
7272 */
7273 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7274 *empty = TRUE;
7275
7276 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7277 /*
7278 * this is not a paging object
7279 * so we need to drop the paging reference
7280 * that was taken when we created the UPL
7281 * against this object
7282 */
7283 vm_object_activity_end(shadow_object);
7284 vm_object_collapse(shadow_object, 0, TRUE);
7285 } else {
7286 /*
7287 * we dontated the paging reference to
7288 * the map object... vm_pageout_object_terminate
7289 * will drop this reference
7290 */
7291 }
7292 }
7293 VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7294 vm_object_unlock(shadow_object);
7295 if (object != shadow_object)
7296 vm_object_unlock(object);
7297
7298 if(!isVectorUPL)
7299 upl_unlock(upl);
7300 else {
7301 /*
7302 * If we completed our operations on an UPL that is
7303 * part of a Vectored UPL and if empty is TRUE, then
7304 * we should go ahead and deallocate this UPL element.
7305 * Then we check if this was the last of the UPL elements
7306 * within that Vectored UPL. If so, set empty to TRUE
7307 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7308 * can go ahead and deallocate the Vector UPL too.
7309 */
7310 if(*empty==TRUE) {
7311 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7312 upl_deallocate(upl);
7313 }
7314 goto process_upl_to_commit;
7315 }
7316 if (pgpgout_count) {
7317 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7318 }
7319
7320 return KERN_SUCCESS;
7321 }
7322
7323 kern_return_t
7324 upl_abort_range(
7325 upl_t upl,
7326 upl_offset_t offset,
7327 upl_size_t size,
7328 int error,
7329 boolean_t *empty)
7330 {
7331 upl_page_info_t *user_page_list = NULL;
7332 upl_size_t xfer_size, subupl_size = size;
7333 vm_object_t shadow_object;
7334 vm_object_t object;
7335 vm_object_offset_t target_offset;
7336 upl_offset_t subupl_offset = offset;
7337 int entry;
7338 wpl_array_t lite_list;
7339 int occupied;
7340 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7341 struct vm_page_delayed_work *dwp;
7342 int dw_count;
7343 int dw_limit;
7344 int isVectorUPL = 0;
7345 upl_t vector_upl = NULL;
7346
7347 *empty = FALSE;
7348
7349 if (upl == UPL_NULL)
7350 return KERN_INVALID_ARGUMENT;
7351
7352 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7353 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7354
7355 if((isVectorUPL = vector_upl_is_valid(upl))) {
7356 vector_upl = upl;
7357 upl_lock(vector_upl);
7358 }
7359 else
7360 upl_lock(upl);
7361
7362 process_upl_to_abort:
7363 if(isVectorUPL) {
7364 size = subupl_size;
7365 offset = subupl_offset;
7366 if(size == 0) {
7367 upl_unlock(vector_upl);
7368 return KERN_SUCCESS;
7369 }
7370 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7371 if(upl == NULL) {
7372 upl_unlock(vector_upl);
7373 return KERN_FAILURE;
7374 }
7375 subupl_size -= size;
7376 subupl_offset += size;
7377 }
7378
7379 *empty = FALSE;
7380
7381 #if UPL_DEBUG
7382 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7383 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7384
7385 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7386 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7387 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7388
7389 upl->upl_commit_index++;
7390 }
7391 #endif
7392 if (upl->flags & UPL_DEVICE_MEMORY)
7393 xfer_size = 0;
7394 else if ((offset + size) <= upl->size)
7395 xfer_size = size;
7396 else {
7397 if(!isVectorUPL)
7398 upl_unlock(upl);
7399 else {
7400 upl_unlock(vector_upl);
7401 }
7402
7403 return KERN_FAILURE;
7404 }
7405 if (upl->flags & UPL_INTERNAL) {
7406 lite_list = (wpl_array_t)
7407 ((((uintptr_t)upl) + sizeof(struct upl))
7408 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7409
7410 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7411 } else {
7412 lite_list = (wpl_array_t)
7413 (((uintptr_t)upl) + sizeof(struct upl));
7414 }
7415 object = upl->map_object;
7416
7417 if (upl->flags & UPL_SHADOWED) {
7418 vm_object_lock(object);
7419 shadow_object = object->shadow;
7420 } else
7421 shadow_object = object;
7422
7423 entry = offset/PAGE_SIZE;
7424 target_offset = (vm_object_offset_t)offset;
7425
7426 assert(!(target_offset & PAGE_MASK));
7427 assert(!(xfer_size & PAGE_MASK));
7428
7429 if (upl->flags & UPL_KERNEL_OBJECT)
7430 vm_object_lock_shared(shadow_object);
7431 else
7432 vm_object_lock(shadow_object);
7433
7434 if (upl->flags & UPL_ACCESS_BLOCKED) {
7435 assert(shadow_object->blocked_access);
7436 shadow_object->blocked_access = FALSE;
7437 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7438 }
7439
7440 dwp = &dw_array[0];
7441 dw_count = 0;
7442 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7443
7444 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7445 panic("upl_abort_range: kernel_object being DUMPED");
7446
7447 while (xfer_size) {
7448 vm_page_t t, m;
7449 unsigned int pg_num;
7450 boolean_t needed;
7451
7452 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7453 assert(pg_num == target_offset/PAGE_SIZE);
7454
7455 needed = FALSE;
7456
7457 if (user_page_list)
7458 needed = user_page_list[pg_num].needed;
7459
7460 dwp->dw_mask = 0;
7461 m = VM_PAGE_NULL;
7462
7463 if (upl->flags & UPL_LITE) {
7464
7465 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7466 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7467
7468 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7469 m = vm_page_lookup(shadow_object, target_offset +
7470 (upl->offset - shadow_object->paging_offset));
7471 }
7472 }
7473 if (upl->flags & UPL_SHADOWED) {
7474 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7475 t->free_when_done = FALSE;
7476
7477 VM_PAGE_FREE(t);
7478
7479 if (m == VM_PAGE_NULL)
7480 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7481 }
7482 }
7483 if ((upl->flags & UPL_KERNEL_OBJECT))
7484 goto abort_next_page;
7485
7486 if (m != VM_PAGE_NULL) {
7487
7488 assert(m->vm_page_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7489
7490 if (m->absent) {
7491 boolean_t must_free = TRUE;
7492
7493 /*
7494 * COPYOUT = FALSE case
7495 * check for error conditions which must
7496 * be passed back to the pages customer
7497 */
7498 if (error & UPL_ABORT_RESTART) {
7499 m->restart = TRUE;
7500 m->absent = FALSE;
7501 m->unusual = TRUE;
7502 must_free = FALSE;
7503 } else if (error & UPL_ABORT_UNAVAILABLE) {
7504 m->restart = FALSE;
7505 m->unusual = TRUE;
7506 must_free = FALSE;
7507 } else if (error & UPL_ABORT_ERROR) {
7508 m->restart = FALSE;
7509 m->absent = FALSE;
7510 m->error = TRUE;
7511 m->unusual = TRUE;
7512 must_free = FALSE;
7513 }
7514 if (m->clustered && needed == FALSE) {
7515 /*
7516 * This page was a part of a speculative
7517 * read-ahead initiated by the kernel
7518 * itself. No one is expecting this
7519 * page and no one will clean up its
7520 * error state if it ever becomes valid
7521 * in the future.
7522 * We have to free it here.
7523 */
7524 must_free = TRUE;
7525 }
7526 m->cleaning = FALSE;
7527
7528 if (m->overwriting && !m->busy) {
7529 /*
7530 * this shouldn't happen since
7531 * this is an 'absent' page, but
7532 * it doesn't hurt to check for
7533 * the 'alternate' method of
7534 * stabilizing the page...
7535 * we will mark 'busy' to be cleared
7536 * in the following code which will
7537 * take care of the primary stabilzation
7538 * method (i.e. setting 'busy' to TRUE)
7539 */
7540 dwp->dw_mask |= DW_vm_page_unwire;
7541 }
7542 m->overwriting = FALSE;
7543
7544 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7545
7546 if (must_free == TRUE)
7547 dwp->dw_mask |= DW_vm_page_free;
7548 else
7549 dwp->dw_mask |= DW_vm_page_activate;
7550 } else {
7551 /*
7552 * Handle the trusted pager throttle.
7553 */
7554 if (m->laundry)
7555 dwp->dw_mask |= DW_vm_pageout_throttle_up;
7556
7557 if (upl->flags & UPL_ACCESS_BLOCKED) {
7558 /*
7559 * We blocked access to the pages in this UPL.
7560 * Clear the "busy" bit and wake up any waiter
7561 * for this page.
7562 */
7563 dwp->dw_mask |= DW_clear_busy;
7564 }
7565 if (m->overwriting) {
7566 if (m->busy)
7567 dwp->dw_mask |= DW_clear_busy;
7568 else {
7569 /*
7570 * deal with the 'alternate' method
7571 * of stabilizing the page...
7572 * we will either free the page
7573 * or mark 'busy' to be cleared
7574 * in the following code which will
7575 * take care of the primary stabilzation
7576 * method (i.e. setting 'busy' to TRUE)
7577 */
7578 dwp->dw_mask |= DW_vm_page_unwire;
7579 }
7580 m->overwriting = FALSE;
7581 }
7582 m->free_when_done = FALSE;
7583 m->cleaning = FALSE;
7584
7585 if (error & UPL_ABORT_DUMP_PAGES) {
7586 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7587
7588 dwp->dw_mask |= DW_vm_page_free;
7589 } else {
7590 if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7591 if (error & UPL_ABORT_REFERENCE) {
7592 /*
7593 * we've been told to explictly
7594 * reference this page... for
7595 * file I/O, this is done by
7596 * implementing an LRU on the inactive q
7597 */
7598 dwp->dw_mask |= DW_vm_page_lru;
7599
7600 } else if ( !VM_PAGE_PAGEABLE(m))
7601 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7602 }
7603 dwp->dw_mask |= DW_PAGE_WAKEUP;
7604 }
7605 }
7606 }
7607 abort_next_page:
7608 target_offset += PAGE_SIZE_64;
7609 xfer_size -= PAGE_SIZE;
7610 entry++;
7611
7612 if (dwp->dw_mask) {
7613 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7614 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7615
7616 if (dw_count >= dw_limit) {
7617 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7618
7619 dwp = &dw_array[0];
7620 dw_count = 0;
7621 }
7622 } else {
7623 if (dwp->dw_mask & DW_clear_busy)
7624 m->busy = FALSE;
7625
7626 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7627 PAGE_WAKEUP(m);
7628 }
7629 }
7630 }
7631 if (dw_count)
7632 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7633
7634 occupied = 1;
7635
7636 if (upl->flags & UPL_DEVICE_MEMORY) {
7637 occupied = 0;
7638 } else if (upl->flags & UPL_LITE) {
7639 int pg_num;
7640 int i;
7641
7642 pg_num = upl->size/PAGE_SIZE;
7643 pg_num = (pg_num + 31) >> 5;
7644 occupied = 0;
7645
7646 for (i = 0; i < pg_num; i++) {
7647 if (lite_list[i] != 0) {
7648 occupied = 1;
7649 break;
7650 }
7651 }
7652 } else {
7653 if (vm_page_queue_empty(&upl->map_object->memq))
7654 occupied = 0;
7655 }
7656 if (occupied == 0) {
7657 /*
7658 * If this UPL element belongs to a Vector UPL and is
7659 * empty, then this is the right function to deallocate
7660 * it. So go ahead set the *empty variable. The flag
7661 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7662 * should be considered relevant for the Vector UPL and
7663 * not the internal UPLs.
7664 */
7665 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7666 *empty = TRUE;
7667
7668 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7669 /*
7670 * this is not a paging object
7671 * so we need to drop the paging reference
7672 * that was taken when we created the UPL
7673 * against this object
7674 */
7675 vm_object_activity_end(shadow_object);
7676 vm_object_collapse(shadow_object, 0, TRUE);
7677 } else {
7678 /*
7679 * we dontated the paging reference to
7680 * the map object... vm_pageout_object_terminate
7681 * will drop this reference
7682 */
7683 }
7684 }
7685 vm_object_unlock(shadow_object);
7686 if (object != shadow_object)
7687 vm_object_unlock(object);
7688
7689 if(!isVectorUPL)
7690 upl_unlock(upl);
7691 else {
7692 /*
7693 * If we completed our operations on an UPL that is
7694 * part of a Vectored UPL and if empty is TRUE, then
7695 * we should go ahead and deallocate this UPL element.
7696 * Then we check if this was the last of the UPL elements
7697 * within that Vectored UPL. If so, set empty to TRUE
7698 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7699 * can go ahead and deallocate the Vector UPL too.
7700 */
7701 if(*empty == TRUE) {
7702 *empty = vector_upl_set_subupl(vector_upl, upl,0);
7703 upl_deallocate(upl);
7704 }
7705 goto process_upl_to_abort;
7706 }
7707
7708 return KERN_SUCCESS;
7709 }
7710
7711
7712 kern_return_t
7713 upl_abort(
7714 upl_t upl,
7715 int error)
7716 {
7717 boolean_t empty;
7718
7719 if (upl == UPL_NULL)
7720 return KERN_INVALID_ARGUMENT;
7721
7722 return upl_abort_range(upl, 0, upl->size, error, &empty);
7723 }
7724
7725
7726 /* an option on commit should be wire */
7727 kern_return_t
7728 upl_commit(
7729 upl_t upl,
7730 upl_page_info_t *page_list,
7731 mach_msg_type_number_t count)
7732 {
7733 boolean_t empty;
7734
7735 if (upl == UPL_NULL)
7736 return KERN_INVALID_ARGUMENT;
7737
7738 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7739 }
7740
7741
7742 void
7743 iopl_valid_data(
7744 upl_t upl,
7745 vm_tag_t tag)
7746 {
7747 vm_object_t object;
7748 vm_offset_t offset;
7749 vm_page_t m, nxt_page = VM_PAGE_NULL;
7750 upl_size_t size;
7751 int wired_count = 0;
7752
7753 if (upl == NULL)
7754 panic("iopl_valid_data: NULL upl");
7755 if (vector_upl_is_valid(upl))
7756 panic("iopl_valid_data: vector upl");
7757 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7758 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7759
7760 object = upl->map_object;
7761
7762 if (object == kernel_object || object == compressor_object)
7763 panic("iopl_valid_data: object == kernel or compressor");
7764
7765 if (object->purgable == VM_PURGABLE_VOLATILE ||
7766 object->purgable == VM_PURGABLE_EMPTY)
7767 panic("iopl_valid_data: object %p purgable %d",
7768 object, object->purgable);
7769
7770 size = upl->size;
7771
7772 vm_object_lock(object);
7773 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7774
7775 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7776 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7777 else
7778 offset = 0 + upl->offset - object->paging_offset;
7779
7780 while (size) {
7781
7782 if (nxt_page != VM_PAGE_NULL) {
7783 m = nxt_page;
7784 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->listq);
7785 } else {
7786 m = vm_page_lookup(object, offset);
7787 offset += PAGE_SIZE;
7788
7789 if (m == VM_PAGE_NULL)
7790 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7791 }
7792 if (m->busy) {
7793 if (!m->absent)
7794 panic("iopl_valid_data: busy page w/o absent");
7795
7796 if (m->pageq.next || m->pageq.prev)
7797 panic("iopl_valid_data: busy+absent page on page queue");
7798 if (m->reusable) {
7799 panic("iopl_valid_data: %p is reusable", m);
7800 }
7801
7802 m->absent = FALSE;
7803 m->dirty = TRUE;
7804 assert(m->vm_page_q_state == VM_PAGE_NOT_ON_Q);
7805 assert(m->wire_count == 0);
7806 m->wire_count++;
7807 assert(m->wire_count);
7808 if (m->wire_count == 1) {
7809 m->vm_page_q_state = VM_PAGE_IS_WIRED;
7810 wired_count++;
7811 } else {
7812 panic("iopl_valid_data: %p already wired\n", m);
7813 }
7814
7815 PAGE_WAKEUP_DONE(m);
7816 }
7817 size -= PAGE_SIZE;
7818 }
7819 if (wired_count) {
7820
7821 VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7822 assert(object->resident_page_count >= object->wired_page_count);
7823
7824 /* no need to adjust purgeable accounting for this object: */
7825 assert(object->purgable != VM_PURGABLE_VOLATILE);
7826 assert(object->purgable != VM_PURGABLE_EMPTY);
7827
7828 vm_page_lockspin_queues();
7829 vm_page_wire_count += wired_count;
7830 vm_page_unlock_queues();
7831 }
7832 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7833 vm_object_unlock(object);
7834 }
7835
7836
7837 void
7838 vm_object_set_pmap_cache_attr(
7839 vm_object_t object,
7840 upl_page_info_array_t user_page_list,
7841 unsigned int num_pages,
7842 boolean_t batch_pmap_op)
7843 {
7844 unsigned int cache_attr = 0;
7845
7846 cache_attr = object->wimg_bits & VM_WIMG_MASK;
7847 assert(user_page_list);
7848 if (cache_attr != VM_WIMG_USE_DEFAULT) {
7849 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7850 }
7851 }
7852
7853
7854 boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7855 kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t *, int);
7856
7857
7858
7859 boolean_t
7860 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7861 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7862 {
7863 vm_page_t dst_page;
7864 unsigned int entry;
7865 int page_count;
7866 int delayed_unlock = 0;
7867 boolean_t retval = TRUE;
7868 ppnum_t phys_page;
7869
7870 vm_object_lock_assert_exclusive(object);
7871 assert(object->purgable != VM_PURGABLE_VOLATILE);
7872 assert(object->purgable != VM_PURGABLE_EMPTY);
7873 assert(object->pager == NULL);
7874 assert(object->copy == NULL);
7875 assert(object->shadow == NULL);
7876
7877 page_count = object->resident_page_count;
7878 dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7879
7880 vm_page_lock_queues();
7881
7882 while (page_count--) {
7883
7884 if (dst_page->busy ||
7885 dst_page->fictitious ||
7886 dst_page->absent ||
7887 dst_page->error ||
7888 dst_page->cleaning ||
7889 dst_page->restart ||
7890 dst_page->laundry) {
7891 retval = FALSE;
7892 goto done;
7893 }
7894 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
7895 retval = FALSE;
7896 goto done;
7897 }
7898 dst_page->reference = TRUE;
7899
7900 vm_page_wire(dst_page, tag, FALSE);
7901
7902 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7903 SET_PAGE_DIRTY(dst_page, FALSE);
7904 }
7905 entry = (unsigned int)(dst_page->offset / PAGE_SIZE);
7906 assert(entry >= 0 && entry < object->resident_page_count);
7907 lite_list[entry>>5] |= 1 << (entry & 31);
7908
7909 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7910
7911 if (phys_page > upl->highest_page)
7912 upl->highest_page = phys_page;
7913
7914 if (user_page_list) {
7915 user_page_list[entry].phys_addr = phys_page;
7916 user_page_list[entry].absent = dst_page->absent;
7917 user_page_list[entry].dirty = dst_page->dirty;
7918 user_page_list[entry].free_when_done = dst_page->free_when_done;
7919 user_page_list[entry].precious = dst_page->precious;
7920 user_page_list[entry].device = FALSE;
7921 user_page_list[entry].speculative = FALSE;
7922 user_page_list[entry].cs_validated = FALSE;
7923 user_page_list[entry].cs_tainted = FALSE;
7924 user_page_list[entry].cs_nx = FALSE;
7925 user_page_list[entry].needed = FALSE;
7926 user_page_list[entry].mark = FALSE;
7927 }
7928 if (delayed_unlock++ > 256) {
7929 delayed_unlock = 0;
7930 lck_mtx_yield(&vm_page_queue_lock);
7931
7932 VM_CHECK_MEMORYSTATUS;
7933 }
7934 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->listq);
7935 }
7936 done:
7937 vm_page_unlock_queues();
7938
7939 VM_CHECK_MEMORYSTATUS;
7940
7941 return (retval);
7942 }
7943
7944
7945 kern_return_t
7946 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7947 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset, int page_count)
7948 {
7949 vm_page_t dst_page;
7950 boolean_t no_zero_fill = FALSE;
7951 int interruptible;
7952 int pages_wired = 0;
7953 int pages_inserted = 0;
7954 int entry = 0;
7955 uint64_t delayed_ledger_update = 0;
7956 kern_return_t ret = KERN_SUCCESS;
7957 int grab_options;
7958 ppnum_t phys_page;
7959
7960 vm_object_lock_assert_exclusive(object);
7961 assert(object->purgable != VM_PURGABLE_VOLATILE);
7962 assert(object->purgable != VM_PURGABLE_EMPTY);
7963 assert(object->pager == NULL);
7964 assert(object->copy == NULL);
7965 assert(object->shadow == NULL);
7966
7967 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7968 interruptible = THREAD_ABORTSAFE;
7969 else
7970 interruptible = THREAD_UNINT;
7971
7972 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7973 no_zero_fill = TRUE;
7974
7975 grab_options = 0;
7976 #if CONFIG_SECLUDED_MEMORY
7977 if (object->can_grab_secluded) {
7978 grab_options |= VM_PAGE_GRAB_SECLUDED;
7979 }
7980 #endif /* CONFIG_SECLUDED_MEMORY */
7981
7982 while (page_count--) {
7983
7984 while ((dst_page = vm_page_grab_options(grab_options))
7985 == VM_PAGE_NULL) {
7986
7987 OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7988
7989 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7990
7991 if (vm_page_wait(interruptible) == FALSE) {
7992 /*
7993 * interrupted case
7994 */
7995 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7996
7997 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7998
7999 ret = MACH_SEND_INTERRUPTED;
8000 goto done;
8001 }
8002 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
8003
8004 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8005 }
8006 if (no_zero_fill == FALSE)
8007 vm_page_zero_fill(dst_page);
8008 else
8009 dst_page->absent = TRUE;
8010
8011 dst_page->reference = TRUE;
8012
8013 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8014 SET_PAGE_DIRTY(dst_page, FALSE);
8015 }
8016 if (dst_page->absent == FALSE) {
8017 assert(dst_page->vm_page_q_state == VM_PAGE_NOT_ON_Q);
8018 assert(dst_page->wire_count == 0);
8019 dst_page->wire_count++;
8020 dst_page->vm_page_q_state = VM_PAGE_IS_WIRED;
8021 assert(dst_page->wire_count);
8022 pages_wired++;
8023 PAGE_WAKEUP_DONE(dst_page);
8024 }
8025 pages_inserted++;
8026
8027 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
8028
8029 lite_list[entry>>5] |= 1 << (entry & 31);
8030
8031 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8032
8033 if (phys_page > upl->highest_page)
8034 upl->highest_page = phys_page;
8035
8036 if (user_page_list) {
8037 user_page_list[entry].phys_addr = phys_page;
8038 user_page_list[entry].absent = dst_page->absent;
8039 user_page_list[entry].dirty = dst_page->dirty;
8040 user_page_list[entry].free_when_done = FALSE;
8041 user_page_list[entry].precious = FALSE;
8042 user_page_list[entry].device = FALSE;
8043 user_page_list[entry].speculative = FALSE;
8044 user_page_list[entry].cs_validated = FALSE;
8045 user_page_list[entry].cs_tainted = FALSE;
8046 user_page_list[entry].cs_nx = FALSE;
8047 user_page_list[entry].needed = FALSE;
8048 user_page_list[entry].mark = FALSE;
8049 }
8050 entry++;
8051 *dst_offset += PAGE_SIZE_64;
8052 }
8053 done:
8054 if (pages_wired) {
8055 vm_page_lockspin_queues();
8056 vm_page_wire_count += pages_wired;
8057 vm_page_unlock_queues();
8058 }
8059 if (pages_inserted) {
8060 if (object->internal) {
8061 OSAddAtomic(pages_inserted, &vm_page_internal_count);
8062 } else {
8063 OSAddAtomic(pages_inserted, &vm_page_external_count);
8064 }
8065 }
8066 if (delayed_ledger_update) {
8067 task_t owner;
8068
8069 owner = object->vo_purgeable_owner;
8070 assert(owner);
8071
8072 /* more non-volatile bytes */
8073 ledger_credit(owner->ledger,
8074 task_ledgers.purgeable_nonvolatile,
8075 delayed_ledger_update);
8076 /* more footprint */
8077 ledger_credit(owner->ledger,
8078 task_ledgers.phys_footprint,
8079 delayed_ledger_update);
8080 }
8081 return (ret);
8082 }
8083
8084
8085 unsigned int vm_object_iopl_request_sleep_for_cleaning = 0;
8086
8087
8088 kern_return_t
8089 vm_object_iopl_request(
8090 vm_object_t object,
8091 vm_object_offset_t offset,
8092 upl_size_t size,
8093 upl_t *upl_ptr,
8094 upl_page_info_array_t user_page_list,
8095 unsigned int *page_list_count,
8096 upl_control_flags_t cntrl_flags,
8097 vm_tag_t tag)
8098 {
8099 vm_page_t dst_page;
8100 vm_object_offset_t dst_offset;
8101 upl_size_t xfer_size;
8102 upl_t upl = NULL;
8103 unsigned int entry;
8104 wpl_array_t lite_list = NULL;
8105 int no_zero_fill = FALSE;
8106 unsigned int size_in_pages;
8107 u_int32_t psize;
8108 kern_return_t ret;
8109 vm_prot_t prot;
8110 struct vm_object_fault_info fault_info;
8111 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
8112 struct vm_page_delayed_work *dwp;
8113 int dw_count;
8114 int dw_limit;
8115 int dw_index;
8116 boolean_t caller_lookup;
8117 int io_tracking_flag = 0;
8118 int interruptible;
8119 ppnum_t phys_page;
8120
8121 boolean_t set_cache_attr_needed = FALSE;
8122 boolean_t free_wired_pages = FALSE;
8123 boolean_t fast_path_empty_req = FALSE;
8124 boolean_t fast_path_full_req = FALSE;
8125
8126 if (cntrl_flags & ~UPL_VALID_FLAGS) {
8127 /*
8128 * For forward compatibility's sake,
8129 * reject any unknown flag.
8130 */
8131 return KERN_INVALID_VALUE;
8132 }
8133 if (vm_lopage_needed == FALSE)
8134 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8135
8136 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8137 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8138 return KERN_INVALID_VALUE;
8139
8140 if (object->phys_contiguous) {
8141 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8142 return KERN_INVALID_ADDRESS;
8143
8144 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8145 return KERN_INVALID_ADDRESS;
8146 }
8147 }
8148 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8149 no_zero_fill = TRUE;
8150
8151 if (cntrl_flags & UPL_COPYOUT_FROM)
8152 prot = VM_PROT_READ;
8153 else
8154 prot = VM_PROT_READ | VM_PROT_WRITE;
8155
8156 if ((!object->internal) && (object->paging_offset != 0))
8157 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8158
8159 #if CONFIG_IOSCHED || UPL_DEBUG
8160 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8161 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8162 #endif
8163
8164 #if CONFIG_IOSCHED
8165 if (object->io_tracking) {
8166 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8167 if (object != kernel_object)
8168 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8169 }
8170 #endif
8171
8172 if (object->phys_contiguous)
8173 psize = PAGE_SIZE;
8174 else
8175 psize = size;
8176
8177 if (cntrl_flags & UPL_SET_INTERNAL) {
8178 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8179
8180 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8181 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8182 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8183 if (size == 0) {
8184 user_page_list = NULL;
8185 lite_list = NULL;
8186 }
8187 } else {
8188 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8189
8190 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8191 if (size == 0) {
8192 lite_list = NULL;
8193 }
8194 }
8195 if (user_page_list)
8196 user_page_list[0].device = FALSE;
8197 *upl_ptr = upl;
8198
8199 upl->map_object = object;
8200 upl->size = size;
8201
8202 size_in_pages = size / PAGE_SIZE;
8203
8204 if (object == kernel_object &&
8205 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8206 upl->flags |= UPL_KERNEL_OBJECT;
8207 #if UPL_DEBUG
8208 vm_object_lock(object);
8209 #else
8210 vm_object_lock_shared(object);
8211 #endif
8212 } else {
8213 vm_object_lock(object);
8214 vm_object_activity_begin(object);
8215 }
8216 /*
8217 * paging in progress also protects the paging_offset
8218 */
8219 upl->offset = offset + object->paging_offset;
8220
8221 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8222 /*
8223 * The user requested that access to the pages in this UPL
8224 * be blocked until the UPL is commited or aborted.
8225 */
8226 upl->flags |= UPL_ACCESS_BLOCKED;
8227 }
8228
8229 #if CONFIG_IOSCHED || UPL_DEBUG
8230 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8231 vm_object_activity_begin(object);
8232 queue_enter(&object->uplq, upl, upl_t, uplq);
8233 }
8234 #endif
8235
8236 if (object->phys_contiguous) {
8237
8238 if (upl->flags & UPL_ACCESS_BLOCKED) {
8239 assert(!object->blocked_access);
8240 object->blocked_access = TRUE;
8241 }
8242
8243 vm_object_unlock(object);
8244
8245 /*
8246 * don't need any shadow mappings for this one
8247 * since it is already I/O memory
8248 */
8249 upl->flags |= UPL_DEVICE_MEMORY;
8250
8251 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8252
8253 if (user_page_list) {
8254 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8255 user_page_list[0].device = TRUE;
8256 }
8257 if (page_list_count != NULL) {
8258 if (upl->flags & UPL_INTERNAL)
8259 *page_list_count = 0;
8260 else
8261 *page_list_count = 1;
8262 }
8263 return KERN_SUCCESS;
8264 }
8265 if (object != kernel_object && object != compressor_object) {
8266 /*
8267 * Protect user space from future COW operations
8268 */
8269 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8270 if (!object->true_share &&
8271 vm_object_tracking_inited) {
8272 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8273 int num = 0;
8274
8275 num = OSBacktrace(bt,
8276 VM_OBJECT_TRACKING_BTDEPTH);
8277 btlog_add_entry(vm_object_tracking_btlog,
8278 object,
8279 VM_OBJECT_TRACKING_OP_TRUESHARE,
8280 bt,
8281 num);
8282 }
8283 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8284
8285 vm_object_lock_assert_exclusive(object);
8286 object->true_share = TRUE;
8287
8288 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8289 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8290 }
8291
8292 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8293 object->copy != VM_OBJECT_NULL) {
8294 /*
8295 * Honor copy-on-write obligations
8296 *
8297 * The caller is gathering these pages and
8298 * might modify their contents. We need to
8299 * make sure that the copy object has its own
8300 * private copies of these pages before we let
8301 * the caller modify them.
8302 *
8303 * NOTE: someone else could map the original object
8304 * after we've done this copy-on-write here, and they
8305 * could then see an inconsistent picture of the memory
8306 * while it's being modified via the UPL. To prevent this,
8307 * we would have to block access to these pages until the
8308 * UPL is released. We could use the UPL_BLOCK_ACCESS
8309 * code path for that...
8310 */
8311 vm_object_update(object,
8312 offset,
8313 size,
8314 NULL,
8315 NULL,
8316 FALSE, /* should_return */
8317 MEMORY_OBJECT_COPY_SYNC,
8318 VM_PROT_NO_CHANGE);
8319 #if DEVELOPMENT || DEBUG
8320 iopl_cow++;
8321 iopl_cow_pages += size >> PAGE_SHIFT;
8322 #endif
8323 }
8324 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8325 object->purgable != VM_PURGABLE_VOLATILE &&
8326 object->purgable != VM_PURGABLE_EMPTY &&
8327 object->copy == NULL &&
8328 size == object->vo_size &&
8329 offset == 0 &&
8330 object->shadow == NULL &&
8331 object->pager == NULL)
8332 {
8333 if (object->resident_page_count == size_in_pages)
8334 {
8335 assert(object != compressor_object);
8336 assert(object != kernel_object);
8337 fast_path_full_req = TRUE;
8338 }
8339 else if (object->resident_page_count == 0)
8340 {
8341 assert(object != compressor_object);
8342 assert(object != kernel_object);
8343 fast_path_empty_req = TRUE;
8344 set_cache_attr_needed = TRUE;
8345 }
8346 }
8347
8348 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8349 interruptible = THREAD_ABORTSAFE;
8350 else
8351 interruptible = THREAD_UNINT;
8352
8353 entry = 0;
8354
8355 xfer_size = size;
8356 dst_offset = offset;
8357 dw_count = 0;
8358
8359 if (fast_path_full_req) {
8360
8361 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8362 goto finish;
8363 /*
8364 * we couldn't complete the processing of this request on the fast path
8365 * so fall through to the slow path and finish up
8366 */
8367
8368 } else if (fast_path_empty_req) {
8369
8370 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8371 ret = KERN_MEMORY_ERROR;
8372 goto return_err;
8373 }
8374 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages);
8375
8376 if (ret) {
8377 free_wired_pages = TRUE;
8378 goto return_err;
8379 }
8380 goto finish;
8381 }
8382
8383 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8384 fault_info.user_tag = 0;
8385 fault_info.lo_offset = offset;
8386 fault_info.hi_offset = offset + xfer_size;
8387 fault_info.no_cache = FALSE;
8388 fault_info.stealth = FALSE;
8389 fault_info.io_sync = FALSE;
8390 fault_info.cs_bypass = FALSE;
8391 fault_info.mark_zf_absent = TRUE;
8392 fault_info.interruptible = interruptible;
8393 fault_info.batch_pmap_op = TRUE;
8394
8395 dwp = &dw_array[0];
8396 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8397
8398 while (xfer_size) {
8399 vm_fault_return_t result;
8400
8401 dwp->dw_mask = 0;
8402
8403 if (fast_path_full_req) {
8404 /*
8405 * if we get here, it means that we ran into a page
8406 * state we couldn't handle in the fast path and
8407 * bailed out to the slow path... since the order
8408 * we look at pages is different between the 2 paths,
8409 * the following check is needed to determine whether
8410 * this page was already processed in the fast path
8411 */
8412 if (lite_list[entry>>5] & (1 << (entry & 31)))
8413 goto skip_page;
8414 }
8415 dst_page = vm_page_lookup(object, dst_offset);
8416
8417 if (dst_page == VM_PAGE_NULL ||
8418 dst_page->busy ||
8419 dst_page->error ||
8420 dst_page->restart ||
8421 dst_page->absent ||
8422 dst_page->fictitious) {
8423
8424 if (object == kernel_object)
8425 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8426 if (object == compressor_object)
8427 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8428
8429 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8430 ret = KERN_MEMORY_ERROR;
8431 goto return_err;
8432 }
8433 set_cache_attr_needed = TRUE;
8434
8435 /*
8436 * We just looked up the page and the result remains valid
8437 * until the object lock is release, so send it to
8438 * vm_fault_page() (as "dst_page"), to avoid having to
8439 * look it up again there.
8440 */
8441 caller_lookup = TRUE;
8442
8443 do {
8444 vm_page_t top_page;
8445 kern_return_t error_code;
8446
8447 fault_info.cluster_size = xfer_size;
8448
8449 vm_object_paging_begin(object);
8450
8451 result = vm_fault_page(object, dst_offset,
8452 prot | VM_PROT_WRITE, FALSE,
8453 caller_lookup,
8454 &prot, &dst_page, &top_page,
8455 (int *)0,
8456 &error_code, no_zero_fill,
8457 FALSE, &fault_info);
8458
8459 /* our lookup is no longer valid at this point */
8460 caller_lookup = FALSE;
8461
8462 switch (result) {
8463
8464 case VM_FAULT_SUCCESS:
8465
8466 if ( !dst_page->absent) {
8467 PAGE_WAKEUP_DONE(dst_page);
8468 } else {
8469 /*
8470 * we only get back an absent page if we
8471 * requested that it not be zero-filled
8472 * because we are about to fill it via I/O
8473 *
8474 * absent pages should be left BUSY
8475 * to prevent them from being faulted
8476 * into an address space before we've
8477 * had a chance to complete the I/O on
8478 * them since they may contain info that
8479 * shouldn't be seen by the faulting task
8480 */
8481 }
8482 /*
8483 * Release paging references and
8484 * top-level placeholder page, if any.
8485 */
8486 if (top_page != VM_PAGE_NULL) {
8487 vm_object_t local_object;
8488
8489 local_object = VM_PAGE_OBJECT(top_page);
8490
8491 /*
8492 * comparing 2 packed pointers
8493 */
8494 if (top_page->vm_page_object != dst_page->vm_page_object) {
8495 vm_object_lock(local_object);
8496 VM_PAGE_FREE(top_page);
8497 vm_object_paging_end(local_object);
8498 vm_object_unlock(local_object);
8499 } else {
8500 VM_PAGE_FREE(top_page);
8501 vm_object_paging_end(local_object);
8502 }
8503 }
8504 vm_object_paging_end(object);
8505 break;
8506
8507 case VM_FAULT_RETRY:
8508 vm_object_lock(object);
8509 break;
8510
8511 case VM_FAULT_MEMORY_SHORTAGE:
8512 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8513
8514 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8515
8516 if (vm_page_wait(interruptible)) {
8517 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8518
8519 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8520 vm_object_lock(object);
8521
8522 break;
8523 }
8524 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8525
8526 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8527
8528 /* fall thru */
8529
8530 case VM_FAULT_INTERRUPTED:
8531 error_code = MACH_SEND_INTERRUPTED;
8532 case VM_FAULT_MEMORY_ERROR:
8533 memory_error:
8534 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8535
8536 vm_object_lock(object);
8537 goto return_err;
8538
8539 case VM_FAULT_SUCCESS_NO_VM_PAGE:
8540 /* success but no page: fail */
8541 vm_object_paging_end(object);
8542 vm_object_unlock(object);
8543 goto memory_error;
8544
8545 default:
8546 panic("vm_object_iopl_request: unexpected error"
8547 " 0x%x from vm_fault_page()\n", result);
8548 }
8549 } while (result != VM_FAULT_SUCCESS);
8550
8551 }
8552 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8553
8554 if (upl->flags & UPL_KERNEL_OBJECT)
8555 goto record_phys_addr;
8556
8557 if (dst_page->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8558 dst_page->busy = TRUE;
8559 goto record_phys_addr;
8560 }
8561
8562 if (dst_page->cleaning) {
8563 /*
8564 * Someone else is cleaning this page in place.
8565 * In theory, we should be able to proceed and use this
8566 * page but they'll probably end up clearing the "busy"
8567 * bit on it in upl_commit_range() but they didn't set
8568 * it, so they would clear our "busy" bit and open
8569 * us to race conditions.
8570 * We'd better wait for the cleaning to complete and
8571 * then try again.
8572 */
8573 vm_object_iopl_request_sleep_for_cleaning++;
8574 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8575 continue;
8576 }
8577 if (dst_page->laundry)
8578 vm_pageout_steal_laundry(dst_page, FALSE);
8579
8580 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8581 phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8582 vm_page_t low_page;
8583 int refmod;
8584
8585 /*
8586 * support devices that can't DMA above 32 bits
8587 * by substituting pages from a pool of low address
8588 * memory for any pages we find above the 4G mark
8589 * can't substitute if the page is already wired because
8590 * we don't know whether that physical address has been
8591 * handed out to some other 64 bit capable DMA device to use
8592 */
8593 if (VM_PAGE_WIRED(dst_page)) {
8594 ret = KERN_PROTECTION_FAILURE;
8595 goto return_err;
8596 }
8597 low_page = vm_page_grablo();
8598
8599 if (low_page == VM_PAGE_NULL) {
8600 ret = KERN_RESOURCE_SHORTAGE;
8601 goto return_err;
8602 }
8603 /*
8604 * from here until the vm_page_replace completes
8605 * we musn't drop the object lock... we don't
8606 * want anyone refaulting this page in and using
8607 * it after we disconnect it... we want the fault
8608 * to find the new page being substituted.
8609 */
8610 if (dst_page->pmapped)
8611 refmod = pmap_disconnect(phys_page);
8612 else
8613 refmod = 0;
8614
8615 if (!dst_page->absent)
8616 vm_page_copy(dst_page, low_page);
8617
8618 low_page->reference = dst_page->reference;
8619 low_page->dirty = dst_page->dirty;
8620 low_page->absent = dst_page->absent;
8621
8622 if (refmod & VM_MEM_REFERENCED)
8623 low_page->reference = TRUE;
8624 if (refmod & VM_MEM_MODIFIED) {
8625 SET_PAGE_DIRTY(low_page, FALSE);
8626 }
8627
8628 vm_page_replace(low_page, object, dst_offset);
8629
8630 dst_page = low_page;
8631 /*
8632 * vm_page_grablo returned the page marked
8633 * BUSY... we don't need a PAGE_WAKEUP_DONE
8634 * here, because we've never dropped the object lock
8635 */
8636 if ( !dst_page->absent)
8637 dst_page->busy = FALSE;
8638
8639 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8640 }
8641 if ( !dst_page->busy)
8642 dwp->dw_mask |= DW_vm_page_wire;
8643
8644 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8645 /*
8646 * Mark the page "busy" to block any future page fault
8647 * on this page in addition to wiring it.
8648 * We'll also remove the mapping
8649 * of all these pages before leaving this routine.
8650 */
8651 assert(!dst_page->fictitious);
8652 dst_page->busy = TRUE;
8653 }
8654 /*
8655 * expect the page to be used
8656 * page queues lock must be held to set 'reference'
8657 */
8658 dwp->dw_mask |= DW_set_reference;
8659
8660 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8661 SET_PAGE_DIRTY(dst_page, TRUE);
8662 }
8663 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) {
8664 pmap_sync_page_attributes_phys(phys_page);
8665 dst_page->written_by_kernel = FALSE;
8666 }
8667
8668 record_phys_addr:
8669 if (dst_page->busy)
8670 upl->flags |= UPL_HAS_BUSY;
8671
8672 lite_list[entry>>5] |= 1 << (entry & 31);
8673
8674 if (phys_page > upl->highest_page)
8675 upl->highest_page = phys_page;
8676
8677 if (user_page_list) {
8678 user_page_list[entry].phys_addr = phys_page;
8679 user_page_list[entry].free_when_done = dst_page->free_when_done;
8680 user_page_list[entry].absent = dst_page->absent;
8681 user_page_list[entry].dirty = dst_page->dirty;
8682 user_page_list[entry].precious = dst_page->precious;
8683 user_page_list[entry].device = FALSE;
8684 user_page_list[entry].needed = FALSE;
8685 if (dst_page->clustered == TRUE)
8686 user_page_list[entry].speculative = (dst_page->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8687 else
8688 user_page_list[entry].speculative = FALSE;
8689 user_page_list[entry].cs_validated = dst_page->cs_validated;
8690 user_page_list[entry].cs_tainted = dst_page->cs_tainted;
8691 user_page_list[entry].cs_nx = dst_page->cs_nx;
8692 user_page_list[entry].mark = FALSE;
8693 }
8694 if (object != kernel_object && object != compressor_object) {
8695 /*
8696 * someone is explicitly grabbing this page...
8697 * update clustered and speculative state
8698 *
8699 */
8700 if (dst_page->clustered)
8701 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8702 }
8703 skip_page:
8704 entry++;
8705 dst_offset += PAGE_SIZE_64;
8706 xfer_size -= PAGE_SIZE;
8707
8708 if (dwp->dw_mask) {
8709 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8710
8711 if (dw_count >= dw_limit) {
8712 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8713
8714 dwp = &dw_array[0];
8715 dw_count = 0;
8716 }
8717 }
8718 }
8719 assert(entry == size_in_pages);
8720
8721 if (dw_count)
8722 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8723 finish:
8724 if (user_page_list && set_cache_attr_needed == TRUE)
8725 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8726
8727 if (page_list_count != NULL) {
8728 if (upl->flags & UPL_INTERNAL)
8729 *page_list_count = 0;
8730 else if (*page_list_count > size_in_pages)
8731 *page_list_count = size_in_pages;
8732 }
8733 vm_object_unlock(object);
8734
8735 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8736 /*
8737 * We've marked all the pages "busy" so that future
8738 * page faults will block.
8739 * Now remove the mapping for these pages, so that they
8740 * can't be accessed without causing a page fault.
8741 */
8742 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8743 PMAP_NULL, 0, VM_PROT_NONE);
8744 assert(!object->blocked_access);
8745 object->blocked_access = TRUE;
8746 }
8747
8748 return KERN_SUCCESS;
8749
8750 return_err:
8751 dw_index = 0;
8752
8753 for (; offset < dst_offset; offset += PAGE_SIZE) {
8754 boolean_t need_unwire;
8755
8756 dst_page = vm_page_lookup(object, offset);
8757
8758 if (dst_page == VM_PAGE_NULL)
8759 panic("vm_object_iopl_request: Wired page missing. \n");
8760
8761 /*
8762 * if we've already processed this page in an earlier
8763 * dw_do_work, we need to undo the wiring... we will
8764 * leave the dirty and reference bits on if they
8765 * were set, since we don't have a good way of knowing
8766 * what the previous state was and we won't get here
8767 * under any normal circumstances... we will always
8768 * clear BUSY and wakeup any waiters via vm_page_free
8769 * or PAGE_WAKEUP_DONE
8770 */
8771 need_unwire = TRUE;
8772
8773 if (dw_count) {
8774 if (dw_array[dw_index].dw_m == dst_page) {
8775 /*
8776 * still in the deferred work list
8777 * which means we haven't yet called
8778 * vm_page_wire on this page
8779 */
8780 need_unwire = FALSE;
8781
8782 dw_index++;
8783 dw_count--;
8784 }
8785 }
8786 vm_page_lock_queues();
8787
8788 if (dst_page->absent || free_wired_pages == TRUE) {
8789 vm_page_free(dst_page);
8790
8791 need_unwire = FALSE;
8792 } else {
8793 if (need_unwire == TRUE)
8794 vm_page_unwire(dst_page, TRUE);
8795
8796 PAGE_WAKEUP_DONE(dst_page);
8797 }
8798 vm_page_unlock_queues();
8799
8800 if (need_unwire == TRUE)
8801 VM_STAT_INCR(reactivations);
8802 }
8803 #if UPL_DEBUG
8804 upl->upl_state = 2;
8805 #endif
8806 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8807 vm_object_activity_end(object);
8808 vm_object_collapse(object, 0, TRUE);
8809 }
8810 vm_object_unlock(object);
8811 upl_destroy(upl);
8812
8813 return ret;
8814 }
8815
8816 kern_return_t
8817 upl_transpose(
8818 upl_t upl1,
8819 upl_t upl2)
8820 {
8821 kern_return_t retval;
8822 boolean_t upls_locked;
8823 vm_object_t object1, object2;
8824
8825 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8826 return KERN_INVALID_ARGUMENT;
8827 }
8828
8829 upls_locked = FALSE;
8830
8831 /*
8832 * Since we need to lock both UPLs at the same time,
8833 * avoid deadlocks by always taking locks in the same order.
8834 */
8835 if (upl1 < upl2) {
8836 upl_lock(upl1);
8837 upl_lock(upl2);
8838 } else {
8839 upl_lock(upl2);
8840 upl_lock(upl1);
8841 }
8842 upls_locked = TRUE; /* the UPLs will need to be unlocked */
8843
8844 object1 = upl1->map_object;
8845 object2 = upl2->map_object;
8846
8847 if (upl1->offset != 0 || upl2->offset != 0 ||
8848 upl1->size != upl2->size) {
8849 /*
8850 * We deal only with full objects, not subsets.
8851 * That's because we exchange the entire backing store info
8852 * for the objects: pager, resident pages, etc... We can't do
8853 * only part of it.
8854 */
8855 retval = KERN_INVALID_VALUE;
8856 goto done;
8857 }
8858
8859 /*
8860 * Tranpose the VM objects' backing store.
8861 */
8862 retval = vm_object_transpose(object1, object2,
8863 (vm_object_size_t) upl1->size);
8864
8865 if (retval == KERN_SUCCESS) {
8866 /*
8867 * Make each UPL point to the correct VM object, i.e. the
8868 * object holding the pages that the UPL refers to...
8869 */
8870 #if CONFIG_IOSCHED || UPL_DEBUG
8871 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8872 vm_object_lock(object1);
8873 vm_object_lock(object2);
8874 }
8875 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8876 queue_remove(&object1->uplq, upl1, upl_t, uplq);
8877 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8878 queue_remove(&object2->uplq, upl2, upl_t, uplq);
8879 #endif
8880 upl1->map_object = object2;
8881 upl2->map_object = object1;
8882
8883 #if CONFIG_IOSCHED || UPL_DEBUG
8884 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8885 queue_enter(&object2->uplq, upl1, upl_t, uplq);
8886 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8887 queue_enter(&object1->uplq, upl2, upl_t, uplq);
8888 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8889 vm_object_unlock(object2);
8890 vm_object_unlock(object1);
8891 }
8892 #endif
8893 }
8894
8895 done:
8896 /*
8897 * Cleanup.
8898 */
8899 if (upls_locked) {
8900 upl_unlock(upl1);
8901 upl_unlock(upl2);
8902 upls_locked = FALSE;
8903 }
8904
8905 return retval;
8906 }
8907
8908 void
8909 upl_range_needed(
8910 upl_t upl,
8911 int index,
8912 int count)
8913 {
8914 upl_page_info_t *user_page_list;
8915 int size_in_pages;
8916
8917 if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8918 return;
8919
8920 size_in_pages = upl->size / PAGE_SIZE;
8921
8922 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8923
8924 while (count-- && index < size_in_pages)
8925 user_page_list[index++].needed = TRUE;
8926 }
8927
8928
8929 /*
8930 * Reserve of virtual addresses in the kernel address space.
8931 * We need to map the physical pages in the kernel, so that we
8932 * can call the code-signing or slide routines with a kernel
8933 * virtual address. We keep this pool of pre-allocated kernel
8934 * virtual addresses so that we don't have to scan the kernel's
8935 * virtaul address space each time we need to work with
8936 * a physical page.
8937 */
8938 decl_simple_lock_data(,vm_paging_lock)
8939 #define VM_PAGING_NUM_PAGES 64
8940 vm_map_offset_t vm_paging_base_address = 0;
8941 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8942 int vm_paging_max_index = 0;
8943 int vm_paging_page_waiter = 0;
8944 int vm_paging_page_waiter_total = 0;
8945 unsigned long vm_paging_no_kernel_page = 0;
8946 unsigned long vm_paging_objects_mapped = 0;
8947 unsigned long vm_paging_pages_mapped = 0;
8948 unsigned long vm_paging_objects_mapped_slow = 0;
8949 unsigned long vm_paging_pages_mapped_slow = 0;
8950
8951 void
8952 vm_paging_map_init(void)
8953 {
8954 kern_return_t kr;
8955 vm_map_offset_t page_map_offset;
8956 vm_map_entry_t map_entry;
8957
8958 assert(vm_paging_base_address == 0);
8959
8960 /*
8961 * Initialize our pool of pre-allocated kernel
8962 * virtual addresses.
8963 */
8964 page_map_offset = 0;
8965 kr = vm_map_find_space(kernel_map,
8966 &page_map_offset,
8967 VM_PAGING_NUM_PAGES * PAGE_SIZE,
8968 0,
8969 0,
8970 VM_MAP_KERNEL_FLAGS_NONE,
8971 VM_KERN_MEMORY_NONE,
8972 &map_entry);
8973 if (kr != KERN_SUCCESS) {
8974 panic("vm_paging_map_init: kernel_map full\n");
8975 }
8976 VME_OBJECT_SET(map_entry, kernel_object);
8977 VME_OFFSET_SET(map_entry, page_map_offset);
8978 map_entry->protection = VM_PROT_NONE;
8979 map_entry->max_protection = VM_PROT_NONE;
8980 map_entry->permanent = TRUE;
8981 vm_object_reference(kernel_object);
8982 vm_map_unlock(kernel_map);
8983
8984 assert(vm_paging_base_address == 0);
8985 vm_paging_base_address = page_map_offset;
8986 }
8987
8988 /*
8989 * vm_paging_map_object:
8990 * Maps part of a VM object's pages in the kernel
8991 * virtual address space, using the pre-allocated
8992 * kernel virtual addresses, if possible.
8993 * Context:
8994 * The VM object is locked. This lock will get
8995 * dropped and re-acquired though, so the caller
8996 * must make sure the VM object is kept alive
8997 * (by holding a VM map that has a reference
8998 * on it, for example, or taking an extra reference).
8999 * The page should also be kept busy to prevent
9000 * it from being reclaimed.
9001 */
9002 kern_return_t
9003 vm_paging_map_object(
9004 vm_page_t page,
9005 vm_object_t object,
9006 vm_object_offset_t offset,
9007 vm_prot_t protection,
9008 boolean_t can_unlock_object,
9009 vm_map_size_t *size, /* IN/OUT */
9010 vm_map_offset_t *address, /* OUT */
9011 boolean_t *need_unmap) /* OUT */
9012 {
9013 kern_return_t kr;
9014 vm_map_offset_t page_map_offset;
9015 vm_map_size_t map_size;
9016 vm_object_offset_t object_offset;
9017 int i;
9018
9019 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
9020 /* use permanent 1-to-1 kernel mapping of physical memory ? */
9021 #if __x86_64__
9022 *address = (vm_map_offset_t)
9023 PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
9024 PAGE_SHIFT);
9025 *need_unmap = FALSE;
9026 return KERN_SUCCESS;
9027 #elif __arm__ || __arm64__
9028 *address = (vm_map_offset_t)
9029 phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
9030 *need_unmap = FALSE;
9031 return KERN_SUCCESS;
9032 #else
9033 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
9034 #endif
9035
9036 assert(page->busy);
9037 /*
9038 * Use one of the pre-allocated kernel virtual addresses
9039 * and just enter the VM page in the kernel address space
9040 * at that virtual address.
9041 */
9042 simple_lock(&vm_paging_lock);
9043
9044 /*
9045 * Try and find an available kernel virtual address
9046 * from our pre-allocated pool.
9047 */
9048 page_map_offset = 0;
9049 for (;;) {
9050 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9051 if (vm_paging_page_inuse[i] == FALSE) {
9052 page_map_offset =
9053 vm_paging_base_address +
9054 (i * PAGE_SIZE);
9055 break;
9056 }
9057 }
9058 if (page_map_offset != 0) {
9059 /* found a space to map our page ! */
9060 break;
9061 }
9062
9063 if (can_unlock_object) {
9064 /*
9065 * If we can afford to unlock the VM object,
9066 * let's take the slow path now...
9067 */
9068 break;
9069 }
9070 /*
9071 * We can't afford to unlock the VM object, so
9072 * let's wait for a space to become available...
9073 */
9074 vm_paging_page_waiter_total++;
9075 vm_paging_page_waiter++;
9076 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9077 if (kr == THREAD_WAITING) {
9078 simple_unlock(&vm_paging_lock);
9079 kr = thread_block(THREAD_CONTINUE_NULL);
9080 simple_lock(&vm_paging_lock);
9081 }
9082 vm_paging_page_waiter--;
9083 /* ... and try again */
9084 }
9085
9086 if (page_map_offset != 0) {
9087 /*
9088 * We found a kernel virtual address;
9089 * map the physical page to that virtual address.
9090 */
9091 if (i > vm_paging_max_index) {
9092 vm_paging_max_index = i;
9093 }
9094 vm_paging_page_inuse[i] = TRUE;
9095 simple_unlock(&vm_paging_lock);
9096
9097 page->pmapped = TRUE;
9098
9099 /*
9100 * Keep the VM object locked over the PMAP_ENTER
9101 * and the actual use of the page by the kernel,
9102 * or this pmap mapping might get undone by a
9103 * vm_object_pmap_protect() call...
9104 */
9105 PMAP_ENTER(kernel_pmap,
9106 page_map_offset,
9107 page,
9108 protection,
9109 VM_PROT_NONE,
9110 0,
9111 TRUE,
9112 kr);
9113 assert(kr == KERN_SUCCESS);
9114 vm_paging_objects_mapped++;
9115 vm_paging_pages_mapped++;
9116 *address = page_map_offset;
9117 *need_unmap = TRUE;
9118
9119 #if KASAN
9120 kasan_notify_address(page_map_offset, PAGE_SIZE);
9121 #endif
9122
9123 /* all done and mapped, ready to use ! */
9124 return KERN_SUCCESS;
9125 }
9126
9127 /*
9128 * We ran out of pre-allocated kernel virtual
9129 * addresses. Just map the page in the kernel
9130 * the slow and regular way.
9131 */
9132 vm_paging_no_kernel_page++;
9133 simple_unlock(&vm_paging_lock);
9134 }
9135
9136 if (! can_unlock_object) {
9137 *address = 0;
9138 *size = 0;
9139 *need_unmap = FALSE;
9140 return KERN_NOT_SUPPORTED;
9141 }
9142
9143 object_offset = vm_object_trunc_page(offset);
9144 map_size = vm_map_round_page(*size,
9145 VM_MAP_PAGE_MASK(kernel_map));
9146
9147 /*
9148 * Try and map the required range of the object
9149 * in the kernel_map
9150 */
9151
9152 vm_object_reference_locked(object); /* for the map entry */
9153 vm_object_unlock(object);
9154
9155 kr = vm_map_enter(kernel_map,
9156 address,
9157 map_size,
9158 0,
9159 VM_FLAGS_ANYWHERE,
9160 VM_MAP_KERNEL_FLAGS_NONE,
9161 VM_KERN_MEMORY_NONE,
9162 object,
9163 object_offset,
9164 FALSE,
9165 protection,
9166 VM_PROT_ALL,
9167 VM_INHERIT_NONE);
9168 if (kr != KERN_SUCCESS) {
9169 *address = 0;
9170 *size = 0;
9171 *need_unmap = FALSE;
9172 vm_object_deallocate(object); /* for the map entry */
9173 vm_object_lock(object);
9174 return kr;
9175 }
9176
9177 *size = map_size;
9178
9179 /*
9180 * Enter the mapped pages in the page table now.
9181 */
9182 vm_object_lock(object);
9183 /*
9184 * VM object must be kept locked from before PMAP_ENTER()
9185 * until after the kernel is done accessing the page(s).
9186 * Otherwise, the pmap mappings in the kernel could be
9187 * undone by a call to vm_object_pmap_protect().
9188 */
9189
9190 for (page_map_offset = 0;
9191 map_size != 0;
9192 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9193
9194 page = vm_page_lookup(object, offset + page_map_offset);
9195 if (page == VM_PAGE_NULL) {
9196 printf("vm_paging_map_object: no page !?");
9197 vm_object_unlock(object);
9198 kr = vm_map_remove(kernel_map, *address, *size,
9199 VM_MAP_NO_FLAGS);
9200 assert(kr == KERN_SUCCESS);
9201 *address = 0;
9202 *size = 0;
9203 *need_unmap = FALSE;
9204 vm_object_lock(object);
9205 return KERN_MEMORY_ERROR;
9206 }
9207 page->pmapped = TRUE;
9208
9209 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9210 PMAP_ENTER(kernel_pmap,
9211 *address + page_map_offset,
9212 page,
9213 protection,
9214 VM_PROT_NONE,
9215 0,
9216 TRUE,
9217 kr);
9218 assert(kr == KERN_SUCCESS);
9219 #if KASAN
9220 kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9221 #endif
9222 }
9223
9224 vm_paging_objects_mapped_slow++;
9225 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9226
9227 *need_unmap = TRUE;
9228
9229 return KERN_SUCCESS;
9230 }
9231
9232 /*
9233 * vm_paging_unmap_object:
9234 * Unmaps part of a VM object's pages from the kernel
9235 * virtual address space.
9236 * Context:
9237 * The VM object is locked. This lock will get
9238 * dropped and re-acquired though.
9239 */
9240 void
9241 vm_paging_unmap_object(
9242 vm_object_t object,
9243 vm_map_offset_t start,
9244 vm_map_offset_t end)
9245 {
9246 kern_return_t kr;
9247 int i;
9248
9249 if ((vm_paging_base_address == 0) ||
9250 (start < vm_paging_base_address) ||
9251 (end > (vm_paging_base_address
9252 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9253 /*
9254 * We didn't use our pre-allocated pool of
9255 * kernel virtual address. Deallocate the
9256 * virtual memory.
9257 */
9258 if (object != VM_OBJECT_NULL) {
9259 vm_object_unlock(object);
9260 }
9261 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
9262 if (object != VM_OBJECT_NULL) {
9263 vm_object_lock(object);
9264 }
9265 assert(kr == KERN_SUCCESS);
9266 } else {
9267 /*
9268 * We used a kernel virtual address from our
9269 * pre-allocated pool. Put it back in the pool
9270 * for next time.
9271 */
9272 assert(end - start == PAGE_SIZE);
9273 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9274 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9275
9276 /* undo the pmap mapping */
9277 pmap_remove(kernel_pmap, start, end);
9278
9279 simple_lock(&vm_paging_lock);
9280 vm_paging_page_inuse[i] = FALSE;
9281 if (vm_paging_page_waiter) {
9282 thread_wakeup(&vm_paging_page_waiter);
9283 }
9284 simple_unlock(&vm_paging_lock);
9285 }
9286 }
9287
9288
9289 /*
9290 * page->object must be locked
9291 */
9292 void
9293 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9294 {
9295 if (!queues_locked) {
9296 vm_page_lockspin_queues();
9297 }
9298
9299 page->free_when_done = FALSE;
9300 /*
9301 * need to drop the laundry count...
9302 * we may also need to remove it
9303 * from the I/O paging queue...
9304 * vm_pageout_throttle_up handles both cases
9305 *
9306 * the laundry and pageout_queue flags are cleared...
9307 */
9308 vm_pageout_throttle_up(page);
9309
9310 vm_page_steal_pageout_page++;
9311
9312 if (!queues_locked) {
9313 vm_page_unlock_queues();
9314 }
9315 }
9316
9317 upl_t
9318 vector_upl_create(vm_offset_t upl_offset)
9319 {
9320 int vector_upl_size = sizeof(struct _vector_upl);
9321 int i=0;
9322 upl_t upl;
9323 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9324
9325 upl = upl_create(0,UPL_VECTOR,0);
9326 upl->vector_upl = vector_upl;
9327 upl->offset = upl_offset;
9328 vector_upl->size = 0;
9329 vector_upl->offset = upl_offset;
9330 vector_upl->invalid_upls=0;
9331 vector_upl->num_upls=0;
9332 vector_upl->pagelist = NULL;
9333
9334 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9335 vector_upl->upl_iostates[i].size = 0;
9336 vector_upl->upl_iostates[i].offset = 0;
9337
9338 }
9339 return upl;
9340 }
9341
9342 void
9343 vector_upl_deallocate(upl_t upl)
9344 {
9345 if(upl) {
9346 vector_upl_t vector_upl = upl->vector_upl;
9347 if(vector_upl) {
9348 if(vector_upl->invalid_upls != vector_upl->num_upls)
9349 panic("Deallocating non-empty Vectored UPL\n");
9350 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9351 vector_upl->invalid_upls=0;
9352 vector_upl->num_upls = 0;
9353 vector_upl->pagelist = NULL;
9354 vector_upl->size = 0;
9355 vector_upl->offset = 0;
9356 kfree(vector_upl, sizeof(struct _vector_upl));
9357 vector_upl = (vector_upl_t)0xfeedfeed;
9358 }
9359 else
9360 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9361 }
9362 else
9363 panic("vector_upl_deallocate was passed a NULL upl\n");
9364 }
9365
9366 boolean_t
9367 vector_upl_is_valid(upl_t upl)
9368 {
9369 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9370 vector_upl_t vector_upl = upl->vector_upl;
9371 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9372 return FALSE;
9373 else
9374 return TRUE;
9375 }
9376 return FALSE;
9377 }
9378
9379 boolean_t
9380 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9381 {
9382 if(vector_upl_is_valid(upl)) {
9383 vector_upl_t vector_upl = upl->vector_upl;
9384
9385 if(vector_upl) {
9386 if(subupl) {
9387 if(io_size) {
9388 if(io_size < PAGE_SIZE)
9389 io_size = PAGE_SIZE;
9390 subupl->vector_upl = (void*)vector_upl;
9391 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9392 vector_upl->size += io_size;
9393 upl->size += io_size;
9394 }
9395 else {
9396 uint32_t i=0,invalid_upls=0;
9397 for(i = 0; i < vector_upl->num_upls; i++) {
9398 if(vector_upl->upl_elems[i] == subupl)
9399 break;
9400 }
9401 if(i == vector_upl->num_upls)
9402 panic("Trying to remove sub-upl when none exists");
9403
9404 vector_upl->upl_elems[i] = NULL;
9405 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9406 if(invalid_upls == vector_upl->num_upls)
9407 return TRUE;
9408 else
9409 return FALSE;
9410 }
9411 }
9412 else
9413 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9414 }
9415 else
9416 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9417 }
9418 else
9419 panic("vector_upl_set_subupl was passed a NULL upl\n");
9420
9421 return FALSE;
9422 }
9423
9424 void
9425 vector_upl_set_pagelist(upl_t upl)
9426 {
9427 if(vector_upl_is_valid(upl)) {
9428 uint32_t i=0;
9429 vector_upl_t vector_upl = upl->vector_upl;
9430
9431 if(vector_upl) {
9432 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9433
9434 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9435
9436 for(i=0; i < vector_upl->num_upls; i++) {
9437 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9438 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9439 pagelist_size += cur_upl_pagelist_size;
9440 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9441 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9442 }
9443 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9444 }
9445 else
9446 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9447 }
9448 else
9449 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9450
9451 }
9452
9453 upl_t
9454 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9455 {
9456 if(vector_upl_is_valid(upl)) {
9457 vector_upl_t vector_upl = upl->vector_upl;
9458 if(vector_upl) {
9459 if(index < vector_upl->num_upls)
9460 return vector_upl->upl_elems[index];
9461 }
9462 else
9463 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9464 }
9465 return NULL;
9466 }
9467
9468 upl_t
9469 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9470 {
9471 if(vector_upl_is_valid(upl)) {
9472 uint32_t i=0;
9473 vector_upl_t vector_upl = upl->vector_upl;
9474
9475 if(vector_upl) {
9476 upl_t subupl = NULL;
9477 vector_upl_iostates_t subupl_state;
9478
9479 for(i=0; i < vector_upl->num_upls; i++) {
9480 subupl = vector_upl->upl_elems[i];
9481 subupl_state = vector_upl->upl_iostates[i];
9482 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9483 /* We could have been passed an offset/size pair that belongs
9484 * to an UPL element that has already been committed/aborted.
9485 * If so, return NULL.
9486 */
9487 if(subupl == NULL)
9488 return NULL;
9489 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9490 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9491 if(*upl_size > subupl_state.size)
9492 *upl_size = subupl_state.size;
9493 }
9494 if(*upl_offset >= subupl_state.offset)
9495 *upl_offset -= subupl_state.offset;
9496 else if(i)
9497 panic("Vector UPL offset miscalculation\n");
9498 return subupl;
9499 }
9500 }
9501 }
9502 else
9503 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9504 }
9505 return NULL;
9506 }
9507
9508 void
9509 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9510 {
9511 *v_upl_submap = NULL;
9512
9513 if(vector_upl_is_valid(upl)) {
9514 vector_upl_t vector_upl = upl->vector_upl;
9515 if(vector_upl) {
9516 *v_upl_submap = vector_upl->submap;
9517 *submap_dst_addr = vector_upl->submap_dst_addr;
9518 }
9519 else
9520 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9521 }
9522 else
9523 panic("vector_upl_get_submap was passed a null UPL\n");
9524 }
9525
9526 void
9527 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9528 {
9529 if(vector_upl_is_valid(upl)) {
9530 vector_upl_t vector_upl = upl->vector_upl;
9531 if(vector_upl) {
9532 vector_upl->submap = submap;
9533 vector_upl->submap_dst_addr = submap_dst_addr;
9534 }
9535 else
9536 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9537 }
9538 else
9539 panic("vector_upl_get_submap was passed a NULL UPL\n");
9540 }
9541
9542 void
9543 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9544 {
9545 if(vector_upl_is_valid(upl)) {
9546 uint32_t i = 0;
9547 vector_upl_t vector_upl = upl->vector_upl;
9548
9549 if(vector_upl) {
9550 for(i = 0; i < vector_upl->num_upls; i++) {
9551 if(vector_upl->upl_elems[i] == subupl)
9552 break;
9553 }
9554
9555 if(i == vector_upl->num_upls)
9556 panic("setting sub-upl iostate when none exists");
9557
9558 vector_upl->upl_iostates[i].offset = offset;
9559 if(size < PAGE_SIZE)
9560 size = PAGE_SIZE;
9561 vector_upl->upl_iostates[i].size = size;
9562 }
9563 else
9564 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9565 }
9566 else
9567 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9568 }
9569
9570 void
9571 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9572 {
9573 if(vector_upl_is_valid(upl)) {
9574 uint32_t i = 0;
9575 vector_upl_t vector_upl = upl->vector_upl;
9576
9577 if(vector_upl) {
9578 for(i = 0; i < vector_upl->num_upls; i++) {
9579 if(vector_upl->upl_elems[i] == subupl)
9580 break;
9581 }
9582
9583 if(i == vector_upl->num_upls)
9584 panic("getting sub-upl iostate when none exists");
9585
9586 *offset = vector_upl->upl_iostates[i].offset;
9587 *size = vector_upl->upl_iostates[i].size;
9588 }
9589 else
9590 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9591 }
9592 else
9593 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9594 }
9595
9596 void
9597 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9598 {
9599 if(vector_upl_is_valid(upl)) {
9600 vector_upl_t vector_upl = upl->vector_upl;
9601 if(vector_upl) {
9602 if(index < vector_upl->num_upls) {
9603 *offset = vector_upl->upl_iostates[index].offset;
9604 *size = vector_upl->upl_iostates[index].size;
9605 }
9606 else
9607 *offset = *size = 0;
9608 }
9609 else
9610 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9611 }
9612 else
9613 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9614 }
9615
9616 upl_page_info_t *
9617 upl_get_internal_vectorupl_pagelist(upl_t upl)
9618 {
9619 return ((vector_upl_t)(upl->vector_upl))->pagelist;
9620 }
9621
9622 void *
9623 upl_get_internal_vectorupl(upl_t upl)
9624 {
9625 return upl->vector_upl;
9626 }
9627
9628 vm_size_t
9629 upl_get_internal_pagelist_offset(void)
9630 {
9631 return sizeof(struct upl);
9632 }
9633
9634 void
9635 upl_clear_dirty(
9636 upl_t upl,
9637 boolean_t value)
9638 {
9639 if (value) {
9640 upl->flags |= UPL_CLEAR_DIRTY;
9641 } else {
9642 upl->flags &= ~UPL_CLEAR_DIRTY;
9643 }
9644 }
9645
9646 void
9647 upl_set_referenced(
9648 upl_t upl,
9649 boolean_t value)
9650 {
9651 upl_lock(upl);
9652 if (value) {
9653 upl->ext_ref_count++;
9654 } else {
9655 if (!upl->ext_ref_count) {
9656 panic("upl_set_referenced not %p\n", upl);
9657 }
9658 upl->ext_ref_count--;
9659 }
9660 upl_unlock(upl);
9661 }
9662
9663 #if CONFIG_IOSCHED
9664 void
9665 upl_set_blkno(
9666 upl_t upl,
9667 vm_offset_t upl_offset,
9668 int io_size,
9669 int64_t blkno)
9670 {
9671 int i,j;
9672 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9673 return;
9674
9675 assert(upl->upl_reprio_info != 0);
9676 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9677 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9678 }
9679 }
9680 #endif
9681
9682 boolean_t
9683 vm_page_is_slideable(vm_page_t m)
9684 {
9685 boolean_t result = FALSE;
9686 vm_shared_region_slide_info_t si;
9687 vm_object_t m_object;
9688
9689 m_object = VM_PAGE_OBJECT(m);
9690
9691 vm_object_lock_assert_held(m_object);
9692
9693 /* make sure our page belongs to the one object allowed to do this */
9694 if (!m_object->object_slid) {
9695 goto done;
9696 }
9697
9698 si = m_object->vo_slide_info;
9699 if (si == NULL) {
9700 goto done;
9701 }
9702
9703 if(!m->slid && (si->start <= m->offset && si->end > m->offset)) {
9704 result = TRUE;
9705 }
9706
9707 done:
9708 return result;
9709 }
9710
9711 int vm_page_slide_counter = 0;
9712 int vm_page_slide_errors = 0;
9713 kern_return_t
9714 vm_page_slide(
9715 vm_page_t page,
9716 vm_map_offset_t kernel_mapping_offset)
9717 {
9718 kern_return_t kr;
9719 vm_map_size_t kernel_mapping_size;
9720 boolean_t kernel_mapping_needs_unmap;
9721 vm_offset_t kernel_vaddr;
9722 uint32_t pageIndex;
9723 uint32_t slide_chunk;
9724 vm_object_t page_object;
9725
9726 page_object = VM_PAGE_OBJECT(page);
9727
9728 assert(!page->slid);
9729 assert(page_object->object_slid);
9730 vm_object_lock_assert_exclusive(page_object);
9731
9732 if (page->error)
9733 return KERN_FAILURE;
9734
9735 /*
9736 * Take a paging-in-progress reference to keep the object
9737 * alive even if we have to unlock it (in vm_paging_map_object()
9738 * for example)...
9739 */
9740 vm_object_paging_begin(page_object);
9741
9742 if (kernel_mapping_offset == 0) {
9743 /*
9744 * The page hasn't already been mapped in kernel space
9745 * by the caller. Map it now, so that we can access
9746 * its contents and decrypt them.
9747 */
9748 kernel_mapping_size = PAGE_SIZE;
9749 kernel_mapping_needs_unmap = FALSE;
9750 kr = vm_paging_map_object(page,
9751 page_object,
9752 page->offset,
9753 VM_PROT_READ | VM_PROT_WRITE,
9754 FALSE,
9755 &kernel_mapping_size,
9756 &kernel_mapping_offset,
9757 &kernel_mapping_needs_unmap);
9758 if (kr != KERN_SUCCESS) {
9759 panic("vm_page_slide: "
9760 "could not map page in kernel: 0x%x\n",
9761 kr);
9762 }
9763 } else {
9764 kernel_mapping_size = 0;
9765 kernel_mapping_needs_unmap = FALSE;
9766 }
9767 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
9768
9769 /*
9770 * Slide the pointers on the page.
9771 */
9772
9773 /*assert that slide_file_info.start/end are page-aligned?*/
9774
9775 assert(!page->slid);
9776 assert(page_object->object_slid);
9777
9778 pageIndex = (uint32_t)((page->offset -
9779 page_object->vo_slide_info->start) /
9780 PAGE_SIZE_FOR_SR_SLIDE);
9781 for (slide_chunk = 0;
9782 slide_chunk < PAGE_SIZE / PAGE_SIZE_FOR_SR_SLIDE;
9783 slide_chunk++) {
9784 kr = vm_shared_region_slide_page(page_object->vo_slide_info,
9785 (kernel_vaddr +
9786 (slide_chunk *
9787 PAGE_SIZE_FOR_SR_SLIDE)),
9788 (pageIndex + slide_chunk));
9789 if (kr != KERN_SUCCESS) {
9790 break;
9791 }
9792 }
9793
9794 vm_page_slide_counter++;
9795
9796 /*
9797 * Unmap the page from the kernel's address space,
9798 */
9799 if (kernel_mapping_needs_unmap) {
9800 vm_paging_unmap_object(page_object,
9801 kernel_vaddr,
9802 kernel_vaddr + PAGE_SIZE);
9803 }
9804
9805 page->dirty = FALSE;
9806 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(page), VM_MEM_MODIFIED | VM_MEM_REFERENCED);
9807
9808 if (kr != KERN_SUCCESS || cs_debug > 1) {
9809 printf("vm_page_slide(%p): "
9810 "obj %p off 0x%llx mobj %p moff 0x%llx\n",
9811 page,
9812 page_object, page->offset,
9813 page_object->pager,
9814 page->offset + page_object->paging_offset);
9815 }
9816
9817 if (kr == KERN_SUCCESS) {
9818 page->slid = TRUE;
9819 } else {
9820 page->error = TRUE;
9821 vm_page_slide_errors++;
9822 }
9823
9824 vm_object_paging_end(page_object);
9825
9826 return kr;
9827 }
9828
9829 void inline memoryshot(unsigned int event, unsigned int control)
9830 {
9831 if (vm_debug_events) {
9832 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9833 vm_page_active_count, vm_page_inactive_count,
9834 vm_page_free_count, vm_page_speculative_count,
9835 vm_page_throttled_count);
9836 } else {
9837 (void) event;
9838 (void) control;
9839 }
9840
9841 }
9842
9843 #ifdef MACH_BSD
9844
9845 boolean_t upl_device_page(upl_page_info_t *upl)
9846 {
9847 return(UPL_DEVICE_PAGE(upl));
9848 }
9849 boolean_t upl_page_present(upl_page_info_t *upl, int index)
9850 {
9851 return(UPL_PAGE_PRESENT(upl, index));
9852 }
9853 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
9854 {
9855 return(UPL_SPECULATIVE_PAGE(upl, index));
9856 }
9857 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
9858 {
9859 return(UPL_DIRTY_PAGE(upl, index));
9860 }
9861 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
9862 {
9863 return(UPL_VALID_PAGE(upl, index));
9864 }
9865 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
9866 {
9867 return(UPL_PHYS_PAGE(upl, index));
9868 }
9869
9870 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9871 {
9872 upl[index].mark = v;
9873 }
9874
9875 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
9876 {
9877 return upl[index].mark;
9878 }
9879
9880 void
9881 vm_countdirtypages(void)
9882 {
9883 vm_page_t m;
9884 int dpages;
9885 int pgopages;
9886 int precpages;
9887
9888
9889 dpages=0;
9890 pgopages=0;
9891 precpages=0;
9892
9893 vm_page_lock_queues();
9894 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9895 do {
9896 if (m ==(vm_page_t )0) break;
9897
9898 if(m->dirty) dpages++;
9899 if(m->free_when_done) pgopages++;
9900 if(m->precious) precpages++;
9901
9902 assert(VM_PAGE_OBJECT(m) != kernel_object);
9903 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9904 if (m ==(vm_page_t )0) break;
9905
9906 } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9907 vm_page_unlock_queues();
9908
9909 vm_page_lock_queues();
9910 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9911 do {
9912 if (m ==(vm_page_t )0) break;
9913
9914 dpages++;
9915 assert(m->dirty);
9916 assert(!m->free_when_done);
9917 assert(VM_PAGE_OBJECT(m) != kernel_object);
9918 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9919 if (m ==(vm_page_t )0) break;
9920
9921 } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9922 vm_page_unlock_queues();
9923
9924 vm_page_lock_queues();
9925 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9926 do {
9927 if (m ==(vm_page_t )0) break;
9928
9929 if(m->dirty) dpages++;
9930 if(m->free_when_done) pgopages++;
9931 if(m->precious) precpages++;
9932
9933 assert(VM_PAGE_OBJECT(m) != kernel_object);
9934 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9935 if (m ==(vm_page_t )0) break;
9936
9937 } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9938 vm_page_unlock_queues();
9939
9940 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9941
9942 dpages=0;
9943 pgopages=0;
9944 precpages=0;
9945
9946 vm_page_lock_queues();
9947 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9948
9949 do {
9950 if(m == (vm_page_t )0) break;
9951 if(m->dirty) dpages++;
9952 if(m->free_when_done) pgopages++;
9953 if(m->precious) precpages++;
9954
9955 assert(VM_PAGE_OBJECT(m) != kernel_object);
9956 m = (vm_page_t) vm_page_queue_next(&m->pageq);
9957 if(m == (vm_page_t )0) break;
9958
9959 } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9960 vm_page_unlock_queues();
9961
9962 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9963
9964 }
9965 #endif /* MACH_BSD */
9966
9967
9968 #if CONFIG_IOSCHED
9969 int upl_get_cached_tier(upl_t upl)
9970 {
9971 assert(upl);
9972 if (upl->flags & UPL_TRACKED_BY_OBJECT)
9973 return (upl->upl_priority);
9974 return (-1);
9975 }
9976 #endif /* CONFIG_IOSCHED */
9977
9978 ppnum_t upl_get_highest_page(
9979 upl_t upl)
9980 {
9981 return upl->highest_page;
9982 }
9983
9984 upl_size_t upl_get_size(
9985 upl_t upl)
9986 {
9987 return upl->size;
9988 }
9989
9990 upl_t upl_associated_upl(upl_t upl)
9991 {
9992 return upl->associated_upl;
9993 }
9994
9995 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
9996 {
9997 upl->associated_upl = associated_upl;
9998 }
9999
10000 struct vnode * upl_lookup_vnode(upl_t upl)
10001 {
10002 if (!upl->map_object->internal)
10003 return vnode_pager_lookup_vnode(upl->map_object->pager);
10004 else
10005 return NULL;
10006 }
10007
10008 #if UPL_DEBUG
10009 kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
10010 {
10011 upl->ubc_alias1 = alias1;
10012 upl->ubc_alias2 = alias2;
10013 return KERN_SUCCESS;
10014 }
10015 int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
10016 {
10017 if(al)
10018 *al = upl->ubc_alias1;
10019 if(al2)
10020 *al2 = upl->ubc_alias2;
10021 return KERN_SUCCESS;
10022 }
10023 #endif /* UPL_DEBUG */
10024
10025 #if VM_PRESSURE_EVENTS
10026 /*
10027 * Upward trajectory.
10028 */
10029 extern boolean_t vm_compressor_low_on_space(void);
10030
10031 boolean_t
10032 VM_PRESSURE_NORMAL_TO_WARNING(void) {
10033
10034 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10035
10036 /* Available pages below our threshold */
10037 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
10038 /* No frozen processes to kill */
10039 if (memorystatus_frozen_count == 0) {
10040 /* Not enough suspended processes available. */
10041 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
10042 return TRUE;
10043 }
10044 }
10045 }
10046 return FALSE;
10047
10048 } else {
10049 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
10050 }
10051 }
10052
10053 boolean_t
10054 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
10055
10056 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10057
10058 /* Available pages below our threshold */
10059 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
10060 return TRUE;
10061 }
10062 return FALSE;
10063 } else {
10064 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10065 }
10066 }
10067
10068 /*
10069 * Downward trajectory.
10070 */
10071 boolean_t
10072 VM_PRESSURE_WARNING_TO_NORMAL(void) {
10073
10074 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10075
10076 /* Available pages above our threshold */
10077 unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
10078 if (memorystatus_available_pages > target_threshold) {
10079 return TRUE;
10080 }
10081 return FALSE;
10082 } else {
10083 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
10084 }
10085 }
10086
10087 boolean_t
10088 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
10089
10090 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10091
10092 /* Available pages above our threshold */
10093 unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
10094 if (memorystatus_available_pages > target_threshold) {
10095 return TRUE;
10096 }
10097 return FALSE;
10098 } else {
10099 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
10100 }
10101 }
10102 #endif /* VM_PRESSURE_EVENTS */
10103
10104
10105
10106 #define VM_TEST_COLLAPSE_COMPRESSOR 0
10107 #define VM_TEST_WIRE_AND_EXTRACT 0
10108 #define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC 0
10109 #if __arm64__
10110 #define VM_TEST_KERNEL_OBJECT_FAULT 0
10111 #endif /* __arm64__ */
10112 #define VM_TEST_DEVICE_PAGER_TRANSPOSE (DEVELOPMENT || DEBUG)
10113
10114 #if VM_TEST_COLLAPSE_COMPRESSOR
10115 extern boolean_t vm_object_collapse_compressor_allowed;
10116 #include <IOKit/IOLib.h>
10117 static void
10118 vm_test_collapse_compressor(void)
10119 {
10120 vm_object_size_t backing_size, top_size;
10121 vm_object_t backing_object, top_object;
10122 vm_map_offset_t backing_offset, top_offset;
10123 unsigned char *backing_address, *top_address;
10124 kern_return_t kr;
10125
10126 printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
10127
10128 /* create backing object */
10129 backing_size = 15 * PAGE_SIZE;
10130 backing_object = vm_object_allocate(backing_size);
10131 assert(backing_object != VM_OBJECT_NULL);
10132 printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
10133 backing_object);
10134 /* map backing object */
10135 backing_offset = 0;
10136 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
10137 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
10138 backing_object, 0, FALSE,
10139 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
10140 assert(kr == KERN_SUCCESS);
10141 backing_address = (unsigned char *) backing_offset;
10142 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10143 "mapped backing object %p at 0x%llx\n",
10144 backing_object, (uint64_t) backing_offset);
10145 /* populate with pages to be compressed in backing object */
10146 backing_address[0x1*PAGE_SIZE] = 0xB1;
10147 backing_address[0x4*PAGE_SIZE] = 0xB4;
10148 backing_address[0x7*PAGE_SIZE] = 0xB7;
10149 backing_address[0xa*PAGE_SIZE] = 0xBA;
10150 backing_address[0xd*PAGE_SIZE] = 0xBD;
10151 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10152 "populated pages to be compressed in "
10153 "backing_object %p\n", backing_object);
10154 /* compress backing object */
10155 vm_object_pageout(backing_object);
10156 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
10157 backing_object);
10158 /* wait for all the pages to be gone */
10159 while (*(volatile int *)&backing_object->resident_page_count != 0)
10160 IODelay(10);
10161 printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
10162 backing_object);
10163 /* populate with pages to be resident in backing object */
10164 backing_address[0x0*PAGE_SIZE] = 0xB0;
10165 backing_address[0x3*PAGE_SIZE] = 0xB3;
10166 backing_address[0x6*PAGE_SIZE] = 0xB6;
10167 backing_address[0x9*PAGE_SIZE] = 0xB9;
10168 backing_address[0xc*PAGE_SIZE] = 0xBC;
10169 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10170 "populated pages to be resident in "
10171 "backing_object %p\n", backing_object);
10172 /* leave the other pages absent */
10173 /* mess with the paging_offset of the backing_object */
10174 assert(backing_object->paging_offset == 0);
10175 backing_object->paging_offset = 0x3000;
10176
10177 /* create top object */
10178 top_size = 9 * PAGE_SIZE;
10179 top_object = vm_object_allocate(top_size);
10180 assert(top_object != VM_OBJECT_NULL);
10181 printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
10182 top_object);
10183 /* map top object */
10184 top_offset = 0;
10185 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
10186 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
10187 top_object, 0, FALSE,
10188 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
10189 assert(kr == KERN_SUCCESS);
10190 top_address = (unsigned char *) top_offset;
10191 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10192 "mapped top object %p at 0x%llx\n",
10193 top_object, (uint64_t) top_offset);
10194 /* populate with pages to be compressed in top object */
10195 top_address[0x3*PAGE_SIZE] = 0xA3;
10196 top_address[0x4*PAGE_SIZE] = 0xA4;
10197 top_address[0x5*PAGE_SIZE] = 0xA5;
10198 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10199 "populated pages to be compressed in "
10200 "top_object %p\n", top_object);
10201 /* compress top object */
10202 vm_object_pageout(top_object);
10203 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
10204 top_object);
10205 /* wait for all the pages to be gone */
10206 while (top_object->resident_page_count != 0)
10207 IODelay(10);
10208 printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
10209 top_object);
10210 /* populate with pages to be resident in top object */
10211 top_address[0x0*PAGE_SIZE] = 0xA0;
10212 top_address[0x1*PAGE_SIZE] = 0xA1;
10213 top_address[0x2*PAGE_SIZE] = 0xA2;
10214 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10215 "populated pages to be resident in "
10216 "top_object %p\n", top_object);
10217 /* leave the other pages absent */
10218
10219 /* link the 2 objects */
10220 vm_object_reference(backing_object);
10221 top_object->shadow = backing_object;
10222 top_object->vo_shadow_offset = 0x3000;
10223 printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
10224 top_object, backing_object);
10225
10226 /* unmap backing object */
10227 vm_map_remove(kernel_map,
10228 backing_offset,
10229 backing_offset + backing_size,
10230 0);
10231 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10232 "unmapped backing_object %p [0x%llx:0x%llx]\n",
10233 backing_object,
10234 (uint64_t) backing_offset,
10235 (uint64_t) (backing_offset + backing_size));
10236
10237 /* collapse */
10238 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
10239 vm_object_lock(top_object);
10240 vm_object_collapse(top_object, 0, FALSE);
10241 vm_object_unlock(top_object);
10242 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
10243
10244 /* did it work? */
10245 if (top_object->shadow != VM_OBJECT_NULL) {
10246 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
10247 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10248 if (vm_object_collapse_compressor_allowed) {
10249 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10250 }
10251 } else {
10252 /* check the contents of the mapping */
10253 unsigned char expect[9] =
10254 { 0xA0, 0xA1, 0xA2, /* resident in top */
10255 0xA3, 0xA4, 0xA5, /* compressed in top */
10256 0xB9, /* resident in backing + shadow_offset */
10257 0xBD, /* compressed in backing + shadow_offset + paging_offset */
10258 0x00 }; /* absent in both */
10259 unsigned char actual[9];
10260 unsigned int i, errors;
10261
10262 errors = 0;
10263 for (i = 0; i < sizeof (actual); i++) {
10264 actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10265 if (actual[i] != expect[i]) {
10266 errors++;
10267 }
10268 }
10269 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10270 "actual [%x %x %x %x %x %x %x %x %x] "
10271 "expect [%x %x %x %x %x %x %x %x %x] "
10272 "%d errors\n",
10273 actual[0], actual[1], actual[2], actual[3],
10274 actual[4], actual[5], actual[6], actual[7],
10275 actual[8],
10276 expect[0], expect[1], expect[2], expect[3],
10277 expect[4], expect[5], expect[6], expect[7],
10278 expect[8],
10279 errors);
10280 if (errors) {
10281 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10282 } else {
10283 printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10284 }
10285 }
10286 }
10287 #else /* VM_TEST_COLLAPSE_COMPRESSOR */
10288 #define vm_test_collapse_compressor()
10289 #endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10290
10291 #if VM_TEST_WIRE_AND_EXTRACT
10292 extern ledger_template_t task_ledger_template;
10293 #include <mach/mach_vm.h>
10294 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10295 vm_offset_t offset);
10296 static void
10297 vm_test_wire_and_extract(void)
10298 {
10299 ledger_t ledger;
10300 vm_map_t user_map, wire_map;
10301 mach_vm_address_t user_addr, wire_addr;
10302 mach_vm_size_t user_size, wire_size;
10303 mach_vm_offset_t cur_offset;
10304 vm_prot_t cur_prot, max_prot;
10305 ppnum_t user_ppnum, wire_ppnum;
10306 kern_return_t kr;
10307
10308 ledger = ledger_instantiate(task_ledger_template,
10309 LEDGER_CREATE_ACTIVE_ENTRIES);
10310 user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
10311 0x100000000ULL,
10312 0x200000000ULL,
10313 TRUE);
10314 wire_map = vm_map_create(NULL,
10315 0x100000000ULL,
10316 0x200000000ULL,
10317 TRUE);
10318 user_addr = 0;
10319 user_size = 0x10000;
10320 kr = mach_vm_allocate(user_map,
10321 &user_addr,
10322 user_size,
10323 VM_FLAGS_ANYWHERE);
10324 assert(kr == KERN_SUCCESS);
10325 wire_addr = 0;
10326 wire_size = user_size;
10327 kr = mach_vm_remap(wire_map,
10328 &wire_addr,
10329 wire_size,
10330 0,
10331 VM_FLAGS_ANYWHERE,
10332 user_map,
10333 user_addr,
10334 FALSE,
10335 &cur_prot,
10336 &max_prot,
10337 VM_INHERIT_NONE);
10338 assert(kr == KERN_SUCCESS);
10339 for (cur_offset = 0;
10340 cur_offset < wire_size;
10341 cur_offset += PAGE_SIZE) {
10342 kr = vm_map_wire_and_extract(wire_map,
10343 wire_addr + cur_offset,
10344 VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10345 TRUE,
10346 &wire_ppnum);
10347 assert(kr == KERN_SUCCESS);
10348 user_ppnum = vm_map_get_phys_page(user_map,
10349 user_addr + cur_offset);
10350 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10351 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10352 kr,
10353 user_map, user_addr + cur_offset, user_ppnum,
10354 wire_map, wire_addr + cur_offset, wire_ppnum);
10355 if (kr != KERN_SUCCESS ||
10356 wire_ppnum == 0 ||
10357 wire_ppnum != user_ppnum) {
10358 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10359 }
10360 }
10361 cur_offset -= PAGE_SIZE;
10362 kr = vm_map_wire_and_extract(wire_map,
10363 wire_addr + cur_offset,
10364 VM_PROT_DEFAULT,
10365 TRUE,
10366 &wire_ppnum);
10367 assert(kr == KERN_SUCCESS);
10368 printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10369 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10370 kr,
10371 user_map, user_addr + cur_offset, user_ppnum,
10372 wire_map, wire_addr + cur_offset, wire_ppnum);
10373 if (kr != KERN_SUCCESS ||
10374 wire_ppnum == 0 ||
10375 wire_ppnum != user_ppnum) {
10376 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10377 }
10378
10379 printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10380 }
10381 #else /* VM_TEST_WIRE_AND_EXTRACT */
10382 #define vm_test_wire_and_extract()
10383 #endif /* VM_TEST_WIRE_AND_EXTRACT */
10384
10385 #if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10386 static void
10387 vm_test_page_wire_overflow_panic(void)
10388 {
10389 vm_object_t object;
10390 vm_page_t page;
10391
10392 printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10393
10394 object = vm_object_allocate(PAGE_SIZE);
10395 vm_object_lock(object);
10396 page = vm_page_alloc(object, 0x0);
10397 vm_page_lock_queues();
10398 do {
10399 vm_page_wire(page, 1, FALSE);
10400 } while (page->wire_count != 0);
10401 vm_page_unlock_queues();
10402 vm_object_unlock(object);
10403 panic("FBDP(%p,%p): wire_count overflow not detected\n",
10404 object, page);
10405 }
10406 #else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10407 #define vm_test_page_wire_overflow_panic()
10408 #endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10409
10410 #if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10411 extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10412 static void
10413 vm_test_kernel_object_fault(void)
10414 {
10415 kern_return_t kr;
10416 vm_offset_t stack;
10417 uintptr_t frameb[2];
10418 int ret;
10419
10420 kr = kernel_memory_allocate(kernel_map, &stack,
10421 kernel_stack_size + (2*PAGE_SIZE),
10422 0,
10423 (KMA_KSTACK | KMA_KOBJECT |
10424 KMA_GUARD_FIRST | KMA_GUARD_LAST),
10425 VM_KERN_MEMORY_STACK);
10426 if (kr != KERN_SUCCESS) {
10427 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10428 }
10429 ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10430 if (ret != 0) {
10431 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10432 } else {
10433 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10434 }
10435 vm_map_remove(kernel_map,
10436 stack,
10437 stack + kernel_stack_size + (2*PAGE_SIZE),
10438 VM_MAP_REMOVE_KUNWIRE);
10439 stack = 0;
10440 }
10441 #else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10442 #define vm_test_kernel_object_fault()
10443 #endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10444
10445 #if VM_TEST_DEVICE_PAGER_TRANSPOSE
10446 static void
10447 vm_test_device_pager_transpose(void)
10448 {
10449 memory_object_t device_pager;
10450 vm_object_t anon_object, device_object;
10451 vm_size_t size;
10452 vm_map_offset_t anon_mapping, device_mapping;
10453 kern_return_t kr;
10454
10455 size = 3 * PAGE_SIZE;
10456 anon_object = vm_object_allocate(size);
10457 assert(anon_object != VM_OBJECT_NULL);
10458 device_pager = device_pager_setup(NULL, 0, size, 0);
10459 assert(device_pager != NULL);
10460 device_object = memory_object_to_vm_object(device_pager);
10461 assert(device_object != VM_OBJECT_NULL);
10462 anon_mapping = 0;
10463 kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
10464 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10465 anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10466 VM_INHERIT_DEFAULT);
10467 assert(kr == KERN_SUCCESS);
10468 device_mapping = 0;
10469 kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
10470 VM_FLAGS_ANYWHERE,
10471 VM_MAP_KERNEL_FLAGS_NONE,
10472 VM_KERN_MEMORY_NONE,
10473 (void *)device_pager, 0, FALSE,
10474 VM_PROT_DEFAULT, VM_PROT_ALL,
10475 VM_INHERIT_DEFAULT);
10476 assert(kr == KERN_SUCCESS);
10477 memory_object_deallocate(device_pager);
10478
10479 vm_object_lock(anon_object);
10480 vm_object_activity_begin(anon_object);
10481 anon_object->blocked_access = TRUE;
10482 vm_object_unlock(anon_object);
10483 vm_object_lock(device_object);
10484 vm_object_activity_begin(device_object);
10485 device_object->blocked_access = TRUE;
10486 vm_object_unlock(device_object);
10487
10488 assert(anon_object->ref_count == 1);
10489 assert(!anon_object->named);
10490 assert(device_object->ref_count == 2);
10491 assert(device_object->named);
10492
10493 kr = vm_object_transpose(device_object, anon_object, size);
10494 assert(kr == KERN_SUCCESS);
10495
10496 vm_object_lock(anon_object);
10497 vm_object_activity_end(anon_object);
10498 anon_object->blocked_access = FALSE;
10499 vm_object_unlock(anon_object);
10500 vm_object_lock(device_object);
10501 vm_object_activity_end(device_object);
10502 device_object->blocked_access = FALSE;
10503 vm_object_unlock(device_object);
10504
10505 assert(anon_object->ref_count == 2);
10506 assert(anon_object->named);
10507 kr = vm_deallocate(kernel_map, anon_mapping, size);
10508 assert(kr == KERN_SUCCESS);
10509 assert(device_object->ref_count == 1);
10510 assert(!device_object->named);
10511 kr = vm_deallocate(kernel_map, device_mapping, size);
10512 assert(kr == KERN_SUCCESS);
10513
10514 printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10515 }
10516 #else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10517 #define vm_test_device_pager_transpose()
10518 #endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10519
10520 void
10521 vm_tests(void)
10522 {
10523 vm_test_collapse_compressor();
10524 vm_test_wire_and_extract();
10525 vm_test_page_wire_overflow_panic();
10526 vm_test_kernel_object_fault();
10527 vm_test_device_pager_transpose();
10528 }