]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_pageout.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * The proverbial page-out daemon.
64 */
65
66 #include <stdint.h>
67
68 #include <debug.h>
69 #include <mach_pagemap.h>
70 #include <mach_cluster_stats.h>
71
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/memory_object_default.h>
75 #include <mach/memory_object_control_server.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/upl.h>
78 #include <mach/vm_map.h>
79 #include <mach/vm_param.h>
80 #include <mach/vm_statistics.h>
81 #include <mach/sdt.h>
82
83 #include <kern/kern_types.h>
84 #include <kern/counters.h>
85 #include <kern/host_statistics.h>
86 #include <kern/machine.h>
87 #include <kern/misc_protos.h>
88 #include <kern/sched.h>
89 #include <kern/thread.h>
90 #include <kern/xpr.h>
91 #include <kern/kalloc.h>
92 #include <kern/policy_internal.h>
93 #include <kern/thread_group.h>
94
95 #include <machine/vm_tuning.h>
96 #include <machine/commpage.h>
97
98 #include <vm/pmap.h>
99 #include <vm/vm_compressor_pager.h>
100 #include <vm/vm_fault.h>
101 #include <vm/vm_map.h>
102 #include <vm/vm_object.h>
103 #include <vm/vm_page.h>
104 #include <vm/vm_pageout.h>
105 #include <vm/vm_protos.h> /* must be last */
106 #include <vm/memory_object.h>
107 #include <vm/vm_purgeable_internal.h>
108 #include <vm/vm_shared_region.h>
109 #include <vm/vm_compressor.h>
110
111 #include <san/kasan.h>
112
113 #if CONFIG_PHANTOM_CACHE
114 #include <vm/vm_phantom_cache.h>
115 #endif
116
117 #if UPL_DEBUG
118 #include <libkern/OSDebug.h>
119 #endif
120
121 extern int cs_debug;
122
123 extern void mbuf_drain(boolean_t);
124
125 #if VM_PRESSURE_EVENTS
126 #if CONFIG_JETSAM
127 extern unsigned int memorystatus_available_pages;
128 extern unsigned int memorystatus_available_pages_pressure;
129 extern unsigned int memorystatus_available_pages_critical;
130 #else /* CONFIG_JETSAM */
131 extern uint64_t memorystatus_available_pages;
132 extern uint64_t memorystatus_available_pages_pressure;
133 extern uint64_t memorystatus_available_pages_critical;
134 #endif /* CONFIG_JETSAM */
135
136 extern unsigned int memorystatus_frozen_count;
137 extern unsigned int memorystatus_suspended_count;
138 extern vm_pressure_level_t memorystatus_vm_pressure_level;
139
140 void vm_pressure_response(void);
141 extern void consider_vm_pressure_events(void);
142
143 #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4
144 #endif /* VM_PRESSURE_EVENTS */
145
146
147 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
148 #ifdef CONFIG_EMBEDDED
149 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
150 #else
151 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
152 #endif
153 #endif
154
155 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
156 #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
157 #endif
158
159 #ifndef VM_PAGE_LAUNDRY_MAX
160 #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */
161 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
162
163 #ifndef VM_PAGEOUT_BURST_WAIT
164 #define VM_PAGEOUT_BURST_WAIT 1 /* milliseconds */
165 #endif /* VM_PAGEOUT_BURST_WAIT */
166
167 #ifndef VM_PAGEOUT_EMPTY_WAIT
168 #define VM_PAGEOUT_EMPTY_WAIT 50 /* milliseconds */
169 #endif /* VM_PAGEOUT_EMPTY_WAIT */
170
171 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
172 #define VM_PAGEOUT_DEADLOCK_WAIT 100 /* milliseconds */
173 #endif /* VM_PAGEOUT_DEADLOCK_WAIT */
174
175 #ifndef VM_PAGEOUT_IDLE_WAIT
176 #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
177 #endif /* VM_PAGEOUT_IDLE_WAIT */
178
179 #ifndef VM_PAGEOUT_SWAP_WAIT
180 #define VM_PAGEOUT_SWAP_WAIT 10 /* milliseconds */
181 #endif /* VM_PAGEOUT_SWAP_WAIT */
182
183
184 #ifndef VM_PAGE_SPECULATIVE_TARGET
185 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage))
186 #endif /* VM_PAGE_SPECULATIVE_TARGET */
187
188
189 /*
190 * To obtain a reasonable LRU approximation, the inactive queue
191 * needs to be large enough to give pages on it a chance to be
192 * referenced a second time. This macro defines the fraction
193 * of active+inactive pages that should be inactive.
194 * The pageout daemon uses it to update vm_page_inactive_target.
195 *
196 * If vm_page_free_count falls below vm_page_free_target and
197 * vm_page_inactive_count is below vm_page_inactive_target,
198 * then the pageout daemon starts running.
199 */
200
201 #ifndef VM_PAGE_INACTIVE_TARGET
202 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2)
203 #endif /* VM_PAGE_INACTIVE_TARGET */
204
205 /*
206 * Once the pageout daemon starts running, it keeps going
207 * until vm_page_free_count meets or exceeds vm_page_free_target.
208 */
209
210 #ifndef VM_PAGE_FREE_TARGET
211 #ifdef CONFIG_EMBEDDED
212 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
213 #else
214 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
215 #endif
216 #endif /* VM_PAGE_FREE_TARGET */
217
218
219 /*
220 * The pageout daemon always starts running once vm_page_free_count
221 * falls below vm_page_free_min.
222 */
223
224 #ifndef VM_PAGE_FREE_MIN
225 #ifdef CONFIG_EMBEDDED
226 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
227 #else
228 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
229 #endif
230 #endif /* VM_PAGE_FREE_MIN */
231
232 #ifdef CONFIG_EMBEDDED
233 #define VM_PAGE_FREE_RESERVED_LIMIT 100
234 #define VM_PAGE_FREE_MIN_LIMIT 1500
235 #define VM_PAGE_FREE_TARGET_LIMIT 2000
236 #else
237 #define VM_PAGE_FREE_RESERVED_LIMIT 1700
238 #define VM_PAGE_FREE_MIN_LIMIT 3500
239 #define VM_PAGE_FREE_TARGET_LIMIT 4000
240 #endif
241
242 /*
243 * When vm_page_free_count falls below vm_page_free_reserved,
244 * only vm-privileged threads can allocate pages. vm-privilege
245 * allows the pageout daemon and default pager (and any other
246 * associated threads needed for default pageout) to continue
247 * operation by dipping into the reserved pool of pages.
248 */
249
250 #ifndef VM_PAGE_FREE_RESERVED
251 #define VM_PAGE_FREE_RESERVED(n) \
252 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
253 #endif /* VM_PAGE_FREE_RESERVED */
254
255 /*
256 * When we dequeue pages from the inactive list, they are
257 * reactivated (ie, put back on the active queue) if referenced.
258 * However, it is possible to starve the free list if other
259 * processors are referencing pages faster than we can turn off
260 * the referenced bit. So we limit the number of reactivations
261 * we will make per call of vm_pageout_scan().
262 */
263 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
264
265 #ifndef VM_PAGE_REACTIVATE_LIMIT
266 #ifdef CONFIG_EMBEDDED
267 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
268 #else
269 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
270 #endif
271 #endif /* VM_PAGE_REACTIVATE_LIMIT */
272 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
273
274 extern boolean_t hibernate_cleaning_in_progress;
275
276 /*
277 * Forward declarations for internal routines.
278 */
279 struct cq {
280 struct vm_pageout_queue *q;
281 void *current_chead;
282 char *scratch_buf;
283 int id;
284 };
285
286 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
287
288
289 #if VM_PRESSURE_EVENTS
290 void vm_pressure_thread(void);
291
292 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
293 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
294
295 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
296 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
297 #endif
298
299 void vm_pageout_garbage_collect(int);
300 static void vm_pageout_iothread_external(void);
301 static void vm_pageout_iothread_internal(struct cq *cq);
302 static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
303
304 extern void vm_pageout_continue(void);
305 extern void vm_pageout_scan(void);
306
307 void vm_tests(void); /* forward */
308
309 #if !CONFIG_EMBEDDED
310 static boolean_t vm_pageout_waiter = FALSE;
311 static boolean_t vm_pageout_running = FALSE;
312 #endif /* !CONFIG_EMBEDDED */
313
314
315 #if DEVELOPMENT || DEBUG
316 struct vm_pageout_debug vm_pageout_debug;
317 #endif
318 struct vm_pageout_vminfo vm_pageout_vminfo;
319 struct vm_pageout_state vm_pageout_state;
320 struct vm_config vm_config;
321
322 struct vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
323 struct vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
324
325 int vm_upl_wait_for_pages = 0;
326 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
327
328 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
329
330 int vm_debug_events = 0;
331
332 #if CONFIG_MEMORYSTATUS
333 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
334
335 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
336 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
337
338 #endif
339
340
341
342 /*
343 * Routine: vm_pageout_object_terminate
344 * Purpose:
345 * Destroy the pageout_object, and perform all of the
346 * required cleanup actions.
347 *
348 * In/Out conditions:
349 * The object must be locked, and will be returned locked.
350 */
351 void
352 vm_pageout_object_terminate(
353 vm_object_t object)
354 {
355 vm_object_t shadow_object;
356
357 /*
358 * Deal with the deallocation (last reference) of a pageout object
359 * (used for cleaning-in-place) by dropping the paging references/
360 * freeing pages in the original object.
361 */
362
363 assert(object->pageout);
364 shadow_object = object->shadow;
365 vm_object_lock(shadow_object);
366
367 while (!vm_page_queue_empty(&object->memq)) {
368 vm_page_t p, m;
369 vm_object_offset_t offset;
370
371 p = (vm_page_t) vm_page_queue_first(&object->memq);
372
373 assert(p->vmp_private);
374 assert(p->vmp_free_when_done);
375 p->vmp_free_when_done = FALSE;
376 assert(!p->vmp_cleaning);
377 assert(!p->vmp_laundry);
378
379 offset = p->vmp_offset;
380 VM_PAGE_FREE(p);
381 p = VM_PAGE_NULL;
382
383 m = vm_page_lookup(shadow_object,
384 offset + object->vo_shadow_offset);
385
386 if(m == VM_PAGE_NULL)
387 continue;
388
389 assert((m->vmp_dirty) || (m->vmp_precious) ||
390 (m->vmp_busy && m->vmp_cleaning));
391
392 /*
393 * Handle the trusted pager throttle.
394 * Also decrement the burst throttle (if external).
395 */
396 vm_page_lock_queues();
397 if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
398 vm_pageout_throttle_up(m);
399
400 /*
401 * Handle the "target" page(s). These pages are to be freed if
402 * successfully cleaned. Target pages are always busy, and are
403 * wired exactly once. The initial target pages are not mapped,
404 * (so cannot be referenced or modified) but converted target
405 * pages may have been modified between the selection as an
406 * adjacent page and conversion to a target.
407 */
408 if (m->vmp_free_when_done) {
409 assert(m->vmp_busy);
410 assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
411 assert(m->vmp_wire_count == 1);
412 m->vmp_cleaning = FALSE;
413 m->vmp_free_when_done = FALSE;
414 /*
415 * Revoke all access to the page. Since the object is
416 * locked, and the page is busy, this prevents the page
417 * from being dirtied after the pmap_disconnect() call
418 * returns.
419 *
420 * Since the page is left "dirty" but "not modifed", we
421 * can detect whether the page was redirtied during
422 * pageout by checking the modify state.
423 */
424 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
425 SET_PAGE_DIRTY(m, FALSE);
426 } else {
427 m->vmp_dirty = FALSE;
428 }
429
430 if (m->vmp_dirty) {
431 vm_page_unwire(m, TRUE); /* reactivates */
432 VM_STAT_INCR(reactivations);
433 PAGE_WAKEUP_DONE(m);
434 } else {
435 vm_page_free(m); /* clears busy, etc. */
436 }
437 vm_page_unlock_queues();
438 continue;
439 }
440 /*
441 * Handle the "adjacent" pages. These pages were cleaned in
442 * place, and should be left alone.
443 * If prep_pin_count is nonzero, then someone is using the
444 * page, so make it active.
445 */
446 if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) {
447 if (m->vmp_reference)
448 vm_page_activate(m);
449 else
450 vm_page_deactivate(m);
451 }
452 if (m->vmp_overwriting) {
453 /*
454 * the (COPY_OUT_FROM == FALSE) request_page_list case
455 */
456 if (m->vmp_busy) {
457 /*
458 * We do not re-set m->vmp_dirty !
459 * The page was busy so no extraneous activity
460 * could have occurred. COPY_INTO is a read into the
461 * new pages. CLEAN_IN_PLACE does actually write
462 * out the pages but handling outside of this code
463 * will take care of resetting dirty. We clear the
464 * modify however for the Programmed I/O case.
465 */
466 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
467
468 m->vmp_busy = FALSE;
469 m->vmp_absent = FALSE;
470 } else {
471 /*
472 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
473 * Occurs when the original page was wired
474 * at the time of the list request
475 */
476 assert(VM_PAGE_WIRED(m));
477 vm_page_unwire(m, TRUE); /* reactivates */
478 }
479 m->vmp_overwriting = FALSE;
480 } else {
481 m->vmp_dirty = FALSE;
482 }
483 m->vmp_cleaning = FALSE;
484
485 /*
486 * Wakeup any thread waiting for the page to be un-cleaning.
487 */
488 PAGE_WAKEUP(m);
489 vm_page_unlock_queues();
490 }
491 /*
492 * Account for the paging reference taken in vm_paging_object_allocate.
493 */
494 vm_object_activity_end(shadow_object);
495 vm_object_unlock(shadow_object);
496
497 assert(object->ref_count == 0);
498 assert(object->paging_in_progress == 0);
499 assert(object->activity_in_progress == 0);
500 assert(object->resident_page_count == 0);
501 return;
502 }
503
504 /*
505 * Routine: vm_pageclean_setup
506 *
507 * Purpose: setup a page to be cleaned (made non-dirty), but not
508 * necessarily flushed from the VM page cache.
509 * This is accomplished by cleaning in place.
510 *
511 * The page must not be busy, and new_object
512 * must be locked.
513 *
514 */
515 static void
516 vm_pageclean_setup(
517 vm_page_t m,
518 vm_page_t new_m,
519 vm_object_t new_object,
520 vm_object_offset_t new_offset)
521 {
522 assert(!m->vmp_busy);
523 #if 0
524 assert(!m->vmp_cleaning);
525 #endif
526
527 XPR(XPR_VM_PAGEOUT,
528 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
529 VM_PAGE_OBJECT(m), m->vmp_offset, m,
530 new_m, new_offset);
531
532 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
533
534 /*
535 * Mark original page as cleaning in place.
536 */
537 m->vmp_cleaning = TRUE;
538 SET_PAGE_DIRTY(m, FALSE);
539 m->vmp_precious = FALSE;
540
541 /*
542 * Convert the fictitious page to a private shadow of
543 * the real page.
544 */
545 assert(new_m->vmp_fictitious);
546 assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
547 new_m->vmp_fictitious = FALSE;
548 new_m->vmp_private = TRUE;
549 new_m->vmp_free_when_done = TRUE;
550 VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
551
552 vm_page_lockspin_queues();
553 vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
554 vm_page_unlock_queues();
555
556 vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
557 assert(!new_m->vmp_wanted);
558 new_m->vmp_busy = FALSE;
559 }
560
561 /*
562 * Routine: vm_pageout_initialize_page
563 * Purpose:
564 * Causes the specified page to be initialized in
565 * the appropriate memory object. This routine is used to push
566 * pages into a copy-object when they are modified in the
567 * permanent object.
568 *
569 * The page is moved to a temporary object and paged out.
570 *
571 * In/out conditions:
572 * The page in question must not be on any pageout queues.
573 * The object to which it belongs must be locked.
574 * The page must be busy, but not hold a paging reference.
575 *
576 * Implementation:
577 * Move this page to a completely new object.
578 */
579 void
580 vm_pageout_initialize_page(
581 vm_page_t m)
582 {
583 vm_object_t object;
584 vm_object_offset_t paging_offset;
585 memory_object_t pager;
586
587 XPR(XPR_VM_PAGEOUT,
588 "vm_pageout_initialize_page, page 0x%X\n",
589 m, 0, 0, 0, 0);
590
591 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
592
593 object = VM_PAGE_OBJECT(m);
594
595 assert(m->vmp_busy);
596 assert(object->internal);
597
598 /*
599 * Verify that we really want to clean this page
600 */
601 assert(!m->vmp_absent);
602 assert(!m->vmp_error);
603 assert(m->vmp_dirty);
604
605 /*
606 * Create a paging reference to let us play with the object.
607 */
608 paging_offset = m->vmp_offset + object->paging_offset;
609
610 if (m->vmp_absent || m->vmp_error || m->vmp_restart || (!m->vmp_dirty && !m->vmp_precious)) {
611 panic("reservation without pageout?"); /* alan */
612
613 VM_PAGE_FREE(m);
614 vm_object_unlock(object);
615
616 return;
617 }
618
619 /*
620 * If there's no pager, then we can't clean the page. This should
621 * never happen since this should be a copy object and therefore not
622 * an external object, so the pager should always be there.
623 */
624
625 pager = object->pager;
626
627 if (pager == MEMORY_OBJECT_NULL) {
628 panic("missing pager for copy object");
629
630 VM_PAGE_FREE(m);
631 return;
632 }
633
634 /*
635 * set the page for future call to vm_fault_list_request
636 */
637 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
638 SET_PAGE_DIRTY(m, FALSE);
639
640 /*
641 * keep the object from collapsing or terminating
642 */
643 vm_object_paging_begin(object);
644 vm_object_unlock(object);
645
646 /*
647 * Write the data to its pager.
648 * Note that the data is passed by naming the new object,
649 * not a virtual address; the pager interface has been
650 * manipulated to use the "internal memory" data type.
651 * [The object reference from its allocation is donated
652 * to the eventual recipient.]
653 */
654 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
655
656 vm_object_lock(object);
657 vm_object_paging_end(object);
658 }
659
660
661 /*
662 * vm_pageout_cluster:
663 *
664 * Given a page, queue it to the appropriate I/O thread,
665 * which will page it out and attempt to clean adjacent pages
666 * in the same operation.
667 *
668 * The object and queues must be locked. We will take a
669 * paging reference to prevent deallocation or collapse when we
670 * release the object lock back at the call site. The I/O thread
671 * is responsible for consuming this reference
672 *
673 * The page must not be on any pageout queue.
674 */
675 #if DEVELOPMENT || DEBUG
676 vmct_stats_t vmct_stats;
677
678 int32_t vmct_active = 0;
679 uint64_t vm_compressor_epoch_start = 0;
680 uint64_t vm_compressor_epoch_stop = 0;
681
682 typedef enum vmct_state_t {
683 VMCT_IDLE,
684 VMCT_AWAKENED,
685 VMCT_ACTIVE,
686 } vmct_state_t;
687 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
688 #endif
689
690
691 void
692 vm_pageout_cluster(vm_page_t m)
693 {
694 vm_object_t object = VM_PAGE_OBJECT(m);
695 struct vm_pageout_queue *q;
696
697
698 XPR(XPR_VM_PAGEOUT,
699 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
700 object, m->vmp_offset, m, 0, 0);
701
702 VM_PAGE_CHECK(m);
703 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
704 vm_object_lock_assert_exclusive(object);
705
706 /*
707 * Only a certain kind of page is appreciated here.
708 */
709 assert((m->vmp_dirty || m->vmp_precious) && (!VM_PAGE_WIRED(m)));
710 assert(!m->vmp_cleaning && !m->vmp_laundry);
711 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
712
713 /*
714 * protect the object from collapse or termination
715 */
716 vm_object_activity_begin(object);
717
718 if (object->internal == TRUE) {
719 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
720
721 m->vmp_busy = TRUE;
722
723 q = &vm_pageout_queue_internal;
724 } else
725 q = &vm_pageout_queue_external;
726
727 /*
728 * pgo_laundry count is tied to the laundry bit
729 */
730 m->vmp_laundry = TRUE;
731 q->pgo_laundry++;
732
733 m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q;
734 vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, vmp_pageq);
735
736 if (q->pgo_idle == TRUE) {
737 q->pgo_idle = FALSE;
738 thread_wakeup((event_t) &q->pgo_pending);
739 }
740 VM_PAGE_CHECK(m);
741 }
742
743
744 /*
745 * A page is back from laundry or we are stealing it back from
746 * the laundering state. See if there are some pages waiting to
747 * go to laundry and if we can let some of them go now.
748 *
749 * Object and page queues must be locked.
750 */
751 void
752 vm_pageout_throttle_up(
753 vm_page_t m)
754 {
755 struct vm_pageout_queue *q;
756 vm_object_t m_object;
757
758 m_object = VM_PAGE_OBJECT(m);
759
760 assert(m_object != VM_OBJECT_NULL);
761 assert(m_object != kernel_object);
762
763 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
764 vm_object_lock_assert_exclusive(m_object);
765
766 if (m_object->internal == TRUE)
767 q = &vm_pageout_queue_internal;
768 else
769 q = &vm_pageout_queue_external;
770
771 if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
772
773 vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, vmp_pageq);
774 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
775
776 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
777
778 vm_object_activity_end(m_object);
779
780 VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, 1);
781 }
782 if (m->vmp_laundry == TRUE) {
783
784 m->vmp_laundry = FALSE;
785 q->pgo_laundry--;
786
787 if (q->pgo_throttled == TRUE) {
788 q->pgo_throttled = FALSE;
789 thread_wakeup((event_t) &q->pgo_laundry);
790 }
791 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
792 q->pgo_draining = FALSE;
793 thread_wakeup((event_t) (&q->pgo_laundry+1));
794 }
795 VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, 1);
796 }
797 }
798
799
800 static void
801 vm_pageout_throttle_up_batch(
802 struct vm_pageout_queue *q,
803 int batch_cnt)
804 {
805 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
806
807 VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt);
808
809 q->pgo_laundry -= batch_cnt;
810
811 if (q->pgo_throttled == TRUE) {
812 q->pgo_throttled = FALSE;
813 thread_wakeup((event_t) &q->pgo_laundry);
814 }
815 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
816 q->pgo_draining = FALSE;
817 thread_wakeup((event_t) (&q->pgo_laundry+1));
818 }
819 }
820
821
822
823 /*
824 * VM memory pressure monitoring.
825 *
826 * vm_pageout_scan() keeps track of the number of pages it considers and
827 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
828 *
829 * compute_memory_pressure() is called every second from compute_averages()
830 * and moves "vm_pageout_stat_now" forward, to start accumulating the number
831 * of recalimed pages in a new vm_pageout_stat[] bucket.
832 *
833 * mach_vm_pressure_monitor() collects past statistics about memory pressure.
834 * The caller provides the number of seconds ("nsecs") worth of statistics
835 * it wants, up to 30 seconds.
836 * It computes the number of pages reclaimed in the past "nsecs" seconds and
837 * also returns the number of pages the system still needs to reclaim at this
838 * moment in time.
839 */
840 #if DEVELOPMENT || DEBUG
841 #define VM_PAGEOUT_STAT_SIZE (30 * 8) + 1
842 #else
843 #define VM_PAGEOUT_STAT_SIZE (1 * 8) + 1
844 #endif
845 struct vm_pageout_stat {
846 unsigned long vm_page_active_count;
847 unsigned long vm_page_speculative_count;
848 unsigned long vm_page_inactive_count;
849 unsigned long vm_page_anonymous_count;
850
851 unsigned long vm_page_free_count;
852 unsigned long vm_page_wire_count;
853 unsigned long vm_page_compressor_count;
854
855 unsigned long vm_page_pages_compressed;
856 unsigned long vm_page_pageable_internal_count;
857 unsigned long vm_page_pageable_external_count;
858 unsigned long vm_page_xpmapped_external_count;
859
860 unsigned int pages_grabbed;
861 unsigned int pages_freed;
862
863 unsigned int pages_compressed;
864 unsigned int pages_grabbed_by_compressor;
865 unsigned int failed_compressions;
866
867 unsigned int pages_evicted;
868 unsigned int pages_purged;
869
870 unsigned int considered;
871 unsigned int considered_bq_internal;
872 unsigned int considered_bq_external;
873
874 unsigned int skipped_external;
875 unsigned int filecache_min_reactivations;
876
877 unsigned int freed_speculative;
878 unsigned int freed_cleaned;
879 unsigned int freed_internal;
880 unsigned int freed_external;
881
882 unsigned int cleaned_dirty_external;
883 unsigned int cleaned_dirty_internal;
884
885 unsigned int inactive_referenced;
886 unsigned int inactive_nolock;
887 unsigned int reactivation_limit_exceeded;
888 unsigned int forced_inactive_reclaim;
889
890 unsigned int throttled_internal_q;
891 unsigned int throttled_external_q;
892
893 unsigned int phantom_ghosts_found;
894 unsigned int phantom_ghosts_added;
895 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, };
896
897 unsigned int vm_pageout_stat_now = 0;
898
899 #define VM_PAGEOUT_STAT_BEFORE(i) \
900 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
901 #define VM_PAGEOUT_STAT_AFTER(i) \
902 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
903
904 #if VM_PAGE_BUCKETS_CHECK
905 int vm_page_buckets_check_interval = 80; /* in eighths of a second */
906 #endif /* VM_PAGE_BUCKETS_CHECK */
907
908
909 void
910 record_memory_pressure(void);
911 void
912 record_memory_pressure(void)
913 {
914 unsigned int vm_pageout_next;
915
916 #if VM_PAGE_BUCKETS_CHECK
917 /* check the consistency of VM page buckets at regular interval */
918 static int counter = 0;
919 if ((++counter % vm_page_buckets_check_interval) == 0) {
920 vm_page_buckets_check();
921 }
922 #endif /* VM_PAGE_BUCKETS_CHECK */
923
924 vm_pageout_state.vm_memory_pressure =
925 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative +
926 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned +
927 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal +
928 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external;
929
930 commpage_set_memory_pressure( (unsigned int)vm_pageout_state.vm_memory_pressure );
931
932 /* move "now" forward */
933 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
934
935 bzero(&vm_pageout_stats[vm_pageout_next], sizeof(struct vm_pageout_stat));
936
937 vm_pageout_stat_now = vm_pageout_next;
938 }
939
940
941 /*
942 * IMPORTANT
943 * mach_vm_ctl_page_free_wanted() is called indirectly, via
944 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
945 * it must be safe in the restricted stackshot context. Locks and/or
946 * blocking are not allowable.
947 */
948 unsigned int
949 mach_vm_ctl_page_free_wanted(void)
950 {
951 unsigned int page_free_target, page_free_count, page_free_wanted;
952
953 page_free_target = vm_page_free_target;
954 page_free_count = vm_page_free_count;
955 if (page_free_target > page_free_count) {
956 page_free_wanted = page_free_target - page_free_count;
957 } else {
958 page_free_wanted = 0;
959 }
960
961 return page_free_wanted;
962 }
963
964
965 /*
966 * IMPORTANT:
967 * mach_vm_pressure_monitor() is called when taking a stackshot, with
968 * wait_for_pressure FALSE, so that code path must remain safe in the
969 * restricted stackshot context. No blocking or locks are allowable.
970 * on that code path.
971 */
972
973 kern_return_t
974 mach_vm_pressure_monitor(
975 boolean_t wait_for_pressure,
976 unsigned int nsecs_monitored,
977 unsigned int *pages_reclaimed_p,
978 unsigned int *pages_wanted_p)
979 {
980 wait_result_t wr;
981 unsigned int vm_pageout_then, vm_pageout_now;
982 unsigned int pages_reclaimed;
983 unsigned int units_of_monitor;
984
985 units_of_monitor = 8 * nsecs_monitored;
986 /*
987 * We don't take the vm_page_queue_lock here because we don't want
988 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
989 * thread when it's trying to reclaim memory. We don't need fully
990 * accurate monitoring anyway...
991 */
992
993 if (wait_for_pressure) {
994 /* wait until there's memory pressure */
995 while (vm_page_free_count >= vm_page_free_target) {
996 wr = assert_wait((event_t) &vm_page_free_wanted,
997 THREAD_INTERRUPTIBLE);
998 if (wr == THREAD_WAITING) {
999 wr = thread_block(THREAD_CONTINUE_NULL);
1000 }
1001 if (wr == THREAD_INTERRUPTED) {
1002 return KERN_ABORTED;
1003 }
1004 if (wr == THREAD_AWAKENED) {
1005 /*
1006 * The memory pressure might have already
1007 * been relieved but let's not block again
1008 * and let's report that there was memory
1009 * pressure at some point.
1010 */
1011 break;
1012 }
1013 }
1014 }
1015
1016 /* provide the number of pages the system wants to reclaim */
1017 if (pages_wanted_p != NULL) {
1018 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1019 }
1020
1021 if (pages_reclaimed_p == NULL) {
1022 return KERN_SUCCESS;
1023 }
1024
1025 /* provide number of pages reclaimed in the last "nsecs_monitored" */
1026 vm_pageout_now = vm_pageout_stat_now;
1027 pages_reclaimed = 0;
1028 for (vm_pageout_then =
1029 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1030 vm_pageout_then != vm_pageout_now &&
1031 units_of_monitor-- != 0;
1032 vm_pageout_then =
1033 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1034 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative;
1035 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned;
1036 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal;
1037 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external;
1038 }
1039 *pages_reclaimed_p = pages_reclaimed;
1040
1041 return KERN_SUCCESS;
1042 }
1043
1044
1045
1046 #if DEVELOPMENT || DEBUG
1047
1048 static void
1049 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1050
1051 /*
1052 * condition variable used to make sure there is
1053 * only a single sweep going on at a time
1054 */
1055 boolean_t vm_pageout_disconnect_all_pages_active = FALSE;
1056
1057
1058 void
1059 vm_pageout_disconnect_all_pages()
1060 {
1061 vm_page_lock_queues();
1062
1063 if (vm_pageout_disconnect_all_pages_active == TRUE) {
1064 vm_page_unlock_queues();
1065 return;
1066 }
1067 vm_pageout_disconnect_all_pages_active = TRUE;
1068 vm_page_unlock_queues();
1069
1070 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1071 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1072 vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1073
1074 vm_pageout_disconnect_all_pages_active = FALSE;
1075 }
1076
1077
1078 void
1079 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1080 {
1081 vm_page_t m;
1082 vm_object_t t_object = NULL;
1083 vm_object_t l_object = NULL;
1084 vm_object_t m_object = NULL;
1085 int delayed_unlock = 0;
1086 int try_failed_count = 0;
1087 int disconnected_count = 0;
1088 int paused_count = 0;
1089 int object_locked_count = 0;
1090
1091 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1092 q, qcount, 0, 0, 0);
1093
1094 vm_page_lock_queues();
1095
1096 while (qcount && !vm_page_queue_empty(q)) {
1097
1098 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1099
1100 m = (vm_page_t) vm_page_queue_first(q);
1101 m_object = VM_PAGE_OBJECT(m);
1102
1103 /*
1104 * check to see if we currently are working
1105 * with the same object... if so, we've
1106 * already got the lock
1107 */
1108 if (m_object != l_object) {
1109 /*
1110 * the object associated with candidate page is
1111 * different from the one we were just working
1112 * with... dump the lock if we still own it
1113 */
1114 if (l_object != NULL) {
1115 vm_object_unlock(l_object);
1116 l_object = NULL;
1117 }
1118 if (m_object != t_object)
1119 try_failed_count = 0;
1120
1121 /*
1122 * Try to lock object; since we've alread got the
1123 * page queues lock, we can only 'try' for this one.
1124 * if the 'try' fails, we need to do a mutex_pause
1125 * to allow the owner of the object lock a chance to
1126 * run...
1127 */
1128 if ( !vm_object_lock_try_scan(m_object)) {
1129
1130 if (try_failed_count > 20) {
1131 goto reenter_pg_on_q;
1132 }
1133 vm_page_unlock_queues();
1134 mutex_pause(try_failed_count++);
1135 vm_page_lock_queues();
1136 delayed_unlock = 0;
1137
1138 paused_count++;
1139
1140 t_object = m_object;
1141 continue;
1142 }
1143 object_locked_count++;
1144
1145 l_object = m_object;
1146 }
1147 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1148 /*
1149 * put it back on the head of its queue
1150 */
1151 goto reenter_pg_on_q;
1152 }
1153 if (m->vmp_pmapped == TRUE) {
1154
1155 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1156
1157 disconnected_count++;
1158 }
1159 reenter_pg_on_q:
1160 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1161 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1162
1163 qcount--;
1164 try_failed_count = 0;
1165
1166 if (delayed_unlock++ > 128) {
1167
1168 if (l_object != NULL) {
1169 vm_object_unlock(l_object);
1170 l_object = NULL;
1171 }
1172 lck_mtx_yield(&vm_page_queue_lock);
1173 delayed_unlock = 0;
1174 }
1175 }
1176 if (l_object != NULL) {
1177 vm_object_unlock(l_object);
1178 l_object = NULL;
1179 }
1180 vm_page_unlock_queues();
1181
1182 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1183 q, disconnected_count, object_locked_count, paused_count, 0);
1184 }
1185
1186 #endif
1187
1188
1189 static void
1190 vm_pageout_page_queue(vm_page_queue_head_t *, int);
1191
1192 /*
1193 * condition variable used to make sure there is
1194 * only a single sweep going on at a time
1195 */
1196 boolean_t vm_pageout_anonymous_pages_active = FALSE;
1197
1198
1199 void
1200 vm_pageout_anonymous_pages()
1201 {
1202 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1203
1204 vm_page_lock_queues();
1205
1206 if (vm_pageout_anonymous_pages_active == TRUE) {
1207 vm_page_unlock_queues();
1208 return;
1209 }
1210 vm_pageout_anonymous_pages_active = TRUE;
1211 vm_page_unlock_queues();
1212
1213 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1214 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1215 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1216
1217 if (VM_CONFIG_SWAP_IS_PRESENT)
1218 vm_consider_swapping();
1219
1220 vm_page_lock_queues();
1221 vm_pageout_anonymous_pages_active = FALSE;
1222 vm_page_unlock_queues();
1223 }
1224 }
1225
1226
1227 void
1228 vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1229 {
1230 vm_page_t m;
1231 vm_object_t t_object = NULL;
1232 vm_object_t l_object = NULL;
1233 vm_object_t m_object = NULL;
1234 int delayed_unlock = 0;
1235 int try_failed_count = 0;
1236 int refmod_state;
1237 int pmap_options;
1238 struct vm_pageout_queue *iq;
1239 ppnum_t phys_page;
1240
1241
1242 iq = &vm_pageout_queue_internal;
1243
1244 vm_page_lock_queues();
1245
1246 while (qcount && !vm_page_queue_empty(q)) {
1247
1248 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1249
1250 if (VM_PAGE_Q_THROTTLED(iq)) {
1251
1252 if (l_object != NULL) {
1253 vm_object_unlock(l_object);
1254 l_object = NULL;
1255 }
1256 iq->pgo_draining = TRUE;
1257
1258 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1259 vm_page_unlock_queues();
1260
1261 thread_block(THREAD_CONTINUE_NULL);
1262
1263 vm_page_lock_queues();
1264 delayed_unlock = 0;
1265 continue;
1266 }
1267 m = (vm_page_t) vm_page_queue_first(q);
1268 m_object = VM_PAGE_OBJECT(m);
1269
1270 /*
1271 * check to see if we currently are working
1272 * with the same object... if so, we've
1273 * already got the lock
1274 */
1275 if (m_object != l_object) {
1276 if ( !m_object->internal)
1277 goto reenter_pg_on_q;
1278
1279 /*
1280 * the object associated with candidate page is
1281 * different from the one we were just working
1282 * with... dump the lock if we still own it
1283 */
1284 if (l_object != NULL) {
1285 vm_object_unlock(l_object);
1286 l_object = NULL;
1287 }
1288 if (m_object != t_object)
1289 try_failed_count = 0;
1290
1291 /*
1292 * Try to lock object; since we've alread got the
1293 * page queues lock, we can only 'try' for this one.
1294 * if the 'try' fails, we need to do a mutex_pause
1295 * to allow the owner of the object lock a chance to
1296 * run...
1297 */
1298 if ( !vm_object_lock_try_scan(m_object)) {
1299
1300 if (try_failed_count > 20) {
1301 goto reenter_pg_on_q;
1302 }
1303 vm_page_unlock_queues();
1304 mutex_pause(try_failed_count++);
1305 vm_page_lock_queues();
1306 delayed_unlock = 0;
1307
1308 t_object = m_object;
1309 continue;
1310 }
1311 l_object = m_object;
1312 }
1313 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1314 /*
1315 * page is not to be cleaned
1316 * put it back on the head of its queue
1317 */
1318 goto reenter_pg_on_q;
1319 }
1320 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1321
1322 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
1323 refmod_state = pmap_get_refmod(phys_page);
1324
1325 if (refmod_state & VM_MEM_REFERENCED)
1326 m->vmp_reference = TRUE;
1327 if (refmod_state & VM_MEM_MODIFIED) {
1328 SET_PAGE_DIRTY(m, FALSE);
1329 }
1330 }
1331 if (m->vmp_reference == TRUE) {
1332 m->vmp_reference = FALSE;
1333 pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1334 goto reenter_pg_on_q;
1335 }
1336 if (m->vmp_pmapped == TRUE) {
1337 if (m->vmp_dirty || m->vmp_precious) {
1338 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1339 } else {
1340 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1341 }
1342 refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1343 if (refmod_state & VM_MEM_MODIFIED) {
1344 SET_PAGE_DIRTY(m, FALSE);
1345 }
1346 }
1347
1348 if ( !m->vmp_dirty && !m->vmp_precious) {
1349 vm_page_unlock_queues();
1350 VM_PAGE_FREE(m);
1351 vm_page_lock_queues();
1352 delayed_unlock = 0;
1353
1354 goto next_pg;
1355 }
1356 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1357
1358 if (!m_object->pager_initialized) {
1359
1360 vm_page_unlock_queues();
1361
1362 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1363
1364 if (!m_object->pager_initialized)
1365 vm_object_compressor_pager_create(m_object);
1366
1367 vm_page_lock_queues();
1368 delayed_unlock = 0;
1369 }
1370 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1371 goto reenter_pg_on_q;
1372 /*
1373 * vm_object_compressor_pager_create will drop the object lock
1374 * which means 'm' may no longer be valid to use
1375 */
1376 continue;
1377 }
1378 /*
1379 * we've already factored out pages in the laundry which
1380 * means this page can't be on the pageout queue so it's
1381 * safe to do the vm_page_queues_remove
1382 */
1383 vm_page_queues_remove(m, TRUE);
1384
1385 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1386
1387 vm_pageout_cluster(m);
1388
1389 goto next_pg;
1390
1391 reenter_pg_on_q:
1392 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1393 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1394 next_pg:
1395 qcount--;
1396 try_failed_count = 0;
1397
1398 if (delayed_unlock++ > 128) {
1399
1400 if (l_object != NULL) {
1401 vm_object_unlock(l_object);
1402 l_object = NULL;
1403 }
1404 lck_mtx_yield(&vm_page_queue_lock);
1405 delayed_unlock = 0;
1406 }
1407 }
1408 if (l_object != NULL) {
1409 vm_object_unlock(l_object);
1410 l_object = NULL;
1411 }
1412 vm_page_unlock_queues();
1413 }
1414
1415
1416
1417 /*
1418 * function in BSD to apply I/O throttle to the pageout thread
1419 */
1420 extern void vm_pageout_io_throttle(void);
1421
1422 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \
1423 MACRO_BEGIN \
1424 /* \
1425 * If a "reusable" page somehow made it back into \
1426 * the active queue, it's been re-used and is not \
1427 * quite re-usable. \
1428 * If the VM object was "all_reusable", consider it \
1429 * as "all re-used" instead of converting it to \
1430 * "partially re-used", which could be expensive. \
1431 */ \
1432 assert(VM_PAGE_OBJECT((m)) == (obj)); \
1433 if ((m)->vmp_reusable || \
1434 (obj)->all_reusable) { \
1435 vm_object_reuse_pages((obj), \
1436 (m)->vmp_offset, \
1437 (m)->vmp_offset + PAGE_SIZE_64, \
1438 FALSE); \
1439 } \
1440 MACRO_END
1441
1442
1443 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64
1444 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024
1445
1446 #define FCS_IDLE 0
1447 #define FCS_DELAYED 1
1448 #define FCS_DEADLOCK_DETECTED 2
1449
1450 struct flow_control {
1451 int state;
1452 mach_timespec_t ts;
1453 };
1454
1455
1456 #if CONFIG_BACKGROUND_QUEUE
1457 uint64_t vm_pageout_rejected_bq_internal = 0;
1458 uint64_t vm_pageout_rejected_bq_external = 0;
1459 uint64_t vm_pageout_skipped_bq_internal = 0;
1460 #endif
1461
1462 #define ANONS_GRABBED_LIMIT 2
1463
1464
1465 #if 0
1466 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1467 #endif
1468 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1469
1470 #define VM_PAGEOUT_PB_NO_ACTION 0
1471 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1472 #define VM_PAGEOUT_PB_THREAD_YIELD 2
1473
1474
1475 #if 0
1476 static void
1477 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1478 {
1479 if (*local_freeq) {
1480 vm_page_unlock_queues();
1481
1482 VM_DEBUG_CONSTANT_EVENT(
1483 vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1484 vm_page_free_count, 0, 0, 1);
1485
1486 vm_page_free_list(*local_freeq, TRUE);
1487
1488 VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1489 vm_page_free_count, *local_freed, 0, 1);
1490
1491 *local_freeq = NULL;
1492 *local_freed = 0;
1493
1494 vm_page_lock_queues();
1495 } else {
1496 lck_mtx_yield(&vm_page_queue_lock);
1497 }
1498 *delayed_unlock = 1;
1499 }
1500 #endif
1501
1502
1503 static void
1504 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1505 vm_page_t *local_freeq, int *local_freed, int action)
1506 {
1507 vm_page_unlock_queues();
1508
1509 if (*object != NULL) {
1510 vm_object_unlock(*object);
1511 *object = NULL;
1512 }
1513 if (*local_freeq) {
1514
1515 vm_page_free_list(*local_freeq, TRUE);
1516
1517 *local_freeq = NULL;
1518 *local_freed = 0;
1519 }
1520 *delayed_unlock = 1;
1521
1522 switch (action) {
1523
1524 case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1525 vm_consider_waking_compactor_swapper();
1526 break;
1527 case VM_PAGEOUT_PB_THREAD_YIELD:
1528 thread_yield_internal(1);
1529 break;
1530 case VM_PAGEOUT_PB_NO_ACTION:
1531 default:
1532 break;
1533 }
1534 vm_page_lock_queues();
1535 }
1536
1537
1538 static struct vm_pageout_vminfo last;
1539
1540 uint64_t last_vm_page_pages_grabbed = 0;
1541
1542 extern uint32_t c_segment_pages_compressed;
1543
1544 extern uint64_t shared_region_pager_reclaimed;
1545 extern struct memory_object_pager_ops shared_region_pager_ops;
1546
1547 void update_vm_info(void)
1548 {
1549 uint64_t tmp;
1550
1551 vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
1552 vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
1553 vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
1554 vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
1555
1556 vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
1557 vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
1558 vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
1559
1560 vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
1561 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
1562 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
1563 vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
1564
1565
1566 tmp = vm_pageout_vminfo.vm_pageout_considered_page;
1567 vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
1568 last.vm_pageout_considered_page = tmp;
1569
1570 tmp = vm_pageout_vminfo.vm_pageout_compressions;
1571 vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp - last.vm_pageout_compressions);
1572 last.vm_pageout_compressions = tmp;
1573
1574 tmp = vm_pageout_vminfo.vm_compressor_failed;
1575 vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
1576 last.vm_compressor_failed = tmp;
1577
1578 tmp = vm_pageout_vminfo.vm_compressor_pages_grabbed;
1579 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp - last.vm_compressor_pages_grabbed);
1580 last.vm_compressor_pages_grabbed = tmp;
1581
1582 tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
1583 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
1584 last.vm_phantom_cache_found_ghost = tmp;
1585
1586 tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
1587 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
1588 last.vm_phantom_cache_added_ghost = tmp;
1589
1590 tmp = get_pages_grabbed_count();
1591 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp - last_vm_page_pages_grabbed);
1592 last_vm_page_pages_grabbed = tmp;
1593
1594 tmp = vm_pageout_vminfo.vm_page_pages_freed;
1595 vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
1596 last.vm_page_pages_freed = tmp;
1597
1598
1599 if (vm_pageout_stats[vm_pageout_stat_now].considered) {
1600
1601 tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
1602 vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
1603 last.vm_pageout_pages_evicted = tmp;
1604
1605 tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
1606 vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
1607 last.vm_pageout_pages_purged = tmp;
1608
1609 tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
1610 vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
1611 last.vm_pageout_freed_speculative = tmp;
1612
1613 tmp = vm_pageout_vminfo.vm_pageout_freed_external;
1614 vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
1615 last.vm_pageout_freed_external = tmp;
1616
1617 tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
1618 vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
1619 last.vm_pageout_inactive_referenced = tmp;
1620
1621 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
1622 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
1623 last.vm_pageout_scan_inactive_throttled_external = tmp;
1624
1625 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
1626 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
1627 last.vm_pageout_inactive_dirty_external = tmp;
1628
1629 tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
1630 vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
1631 last.vm_pageout_freed_cleaned = tmp;
1632
1633 tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
1634 vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
1635 last.vm_pageout_inactive_nolock = tmp;
1636
1637 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
1638 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
1639 last.vm_pageout_scan_inactive_throttled_internal = tmp;
1640
1641 tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
1642 vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
1643 last.vm_pageout_skipped_external = tmp;
1644
1645 tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
1646 vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
1647 last.vm_pageout_reactivation_limit_exceeded = tmp;
1648
1649 tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
1650 vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
1651 last.vm_pageout_inactive_force_reclaim = tmp;
1652
1653 tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
1654 vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
1655 last.vm_pageout_freed_internal = tmp;
1656
1657 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
1658 vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
1659 last.vm_pageout_considered_bq_internal = tmp;
1660
1661 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
1662 vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
1663 last.vm_pageout_considered_bq_external = tmp;
1664
1665 tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
1666 vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
1667 last.vm_pageout_filecache_min_reactivated = tmp;
1668
1669 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
1670 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
1671 last.vm_pageout_inactive_dirty_internal = tmp;
1672 }
1673
1674 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
1675 vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
1676 vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
1677 vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
1678 vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count,
1679 0);
1680
1681 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
1682 vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
1683 vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
1684 vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count,
1685 0,
1686 0);
1687
1688 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
1689 vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
1690 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
1691 vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
1692 vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count,
1693 0);
1694
1695 if (vm_pageout_stats[vm_pageout_stat_now].considered ||
1696 vm_pageout_stats[vm_pageout_stat_now].pages_compressed ||
1697 vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
1698
1699 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
1700 vm_pageout_stats[vm_pageout_stat_now].considered,
1701 vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
1702 vm_pageout_stats[vm_pageout_stat_now].freed_external,
1703 vm_pageout_stats[vm_pageout_stat_now].inactive_referenced,
1704 0);
1705
1706 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
1707 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
1708 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
1709 vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
1710 vm_pageout_stats[vm_pageout_stat_now].inactive_nolock,
1711 0);
1712
1713 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
1714 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
1715 vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
1716 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
1717 vm_pageout_stats[vm_pageout_stat_now].skipped_external,
1718 0);
1719
1720 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
1721 vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
1722 vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
1723 vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
1724 vm_pageout_stats[vm_pageout_stat_now].freed_internal,
1725 0);
1726
1727 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO8)) | DBG_FUNC_NONE,
1728 vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
1729 vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
1730 vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
1731 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal,
1732 0);
1733
1734 }
1735 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO9)) | DBG_FUNC_NONE,
1736 vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
1737 vm_pageout_stats[vm_pageout_stat_now].pages_freed,
1738 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
1739 vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added,
1740 0);
1741
1742 record_memory_pressure();
1743 }
1744
1745 extern boolean_t hibernation_vmqueues_inspection;
1746
1747 void
1748 vm_page_balance_inactive(int max_to_move)
1749 {
1750 vm_page_t m;
1751
1752 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1753
1754 if (hibernation_vmqueues_inspection == TRUE) {
1755 /*
1756 * It is likely that the hibernation code path is
1757 * dealing with these very queues as we are about
1758 * to move pages around in/from them and completely
1759 * change the linkage of the pages.
1760 *
1761 * And so we skip the rebalancing of these queues.
1762 */
1763 return;
1764 }
1765 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1766 vm_page_inactive_count +
1767 vm_page_speculative_count);
1768
1769 while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
1770
1771 VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
1772
1773 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
1774
1775 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
1776 assert(!m->vmp_laundry);
1777 assert(VM_PAGE_OBJECT(m) != kernel_object);
1778 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1779
1780 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1781
1782 /*
1783 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1784 *
1785 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1786 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1787 * new reference happens. If no futher references happen on the page after that remote TLB flushes
1788 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1789 * by pageout_scan, which is just fine since the last reference would have happened quite far
1790 * in the past (TLB caches don't hang around for very long), and of course could just as easily
1791 * have happened before we moved the page
1792 */
1793 if (m->vmp_pmapped == TRUE)
1794 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1795
1796 /*
1797 * The page might be absent or busy,
1798 * but vm_page_deactivate can handle that.
1799 * FALSE indicates that we don't want a H/W clear reference
1800 */
1801 vm_page_deactivate_internal(m, FALSE);
1802 }
1803 }
1804
1805
1806 /*
1807 * vm_pageout_scan does the dirty work for the pageout daemon.
1808 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1809 * held and vm_page_free_wanted == 0.
1810 */
1811 void
1812 vm_pageout_scan(void)
1813 {
1814 unsigned int loop_count = 0;
1815 unsigned int inactive_burst_count = 0;
1816 unsigned int reactivated_this_call;
1817 unsigned int reactivate_limit;
1818 vm_page_t local_freeq = NULL;
1819 int local_freed = 0;
1820 int delayed_unlock;
1821 int delayed_unlock_limit = 0;
1822 int refmod_state = 0;
1823 int vm_pageout_deadlock_target = 0;
1824 struct vm_pageout_queue *iq;
1825 struct vm_pageout_queue *eq;
1826 struct vm_speculative_age_q *sq;
1827 struct flow_control flow_control = { 0, { 0, 0 } };
1828 boolean_t inactive_throttled = FALSE;
1829 mach_timespec_t ts;
1830 unsigned int msecs = 0;
1831 vm_object_t object = NULL;
1832 uint32_t inactive_reclaim_run;
1833 boolean_t exceeded_burst_throttle;
1834 boolean_t grab_anonymous = FALSE;
1835 boolean_t force_anonymous = FALSE;
1836 boolean_t force_speculative_aging = FALSE;
1837 int anons_grabbed = 0;
1838 int page_prev_q_state = 0;
1839 #if CONFIG_BACKGROUND_QUEUE
1840 boolean_t page_from_bg_q = FALSE;
1841 #endif
1842 int cache_evict_throttle = 0;
1843 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0;
1844 uint32_t inactive_external_count;
1845 int force_purge = 0;
1846 int divisor;
1847 #define DELAY_SPECULATIVE_AGE 1000
1848 int delay_speculative_age = 0;
1849 vm_object_t m_object = VM_OBJECT_NULL;
1850
1851 #if VM_PRESSURE_EVENTS
1852 vm_pressure_level_t pressure_level;
1853 #endif /* VM_PRESSURE_EVENTS */
1854
1855 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1856 vm_pageout_vminfo.vm_pageout_freed_speculative,
1857 vm_pageout_state.vm_pageout_inactive_clean,
1858 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
1859 vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
1860
1861 flow_control.state = FCS_IDLE;
1862 iq = &vm_pageout_queue_internal;
1863 eq = &vm_pageout_queue_external;
1864 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1865
1866
1867 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1868
1869 /* Ask the pmap layer to return any pages it no longer needs. */
1870 uint64_t pmap_wired_pages_freed = pmap_release_pages_fast();
1871
1872 vm_page_lock_queues();
1873
1874 vm_page_wire_count -= pmap_wired_pages_freed;
1875
1876 delayed_unlock = 1;
1877
1878 /*
1879 * Calculate the max number of referenced pages on the inactive
1880 * queue that we will reactivate.
1881 */
1882 reactivated_this_call = 0;
1883 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1884 vm_page_inactive_count);
1885 inactive_reclaim_run = 0;
1886
1887 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1888
1889 /*
1890 * We must limit the rate at which we send pages to the pagers
1891 * so that we don't tie up too many pages in the I/O queues.
1892 * We implement a throttling mechanism using the laundry count
1893 * to limit the number of pages outstanding to the default
1894 * and external pagers. We can bypass the throttles and look
1895 * for clean pages if the pageout queues don't drain in a timely
1896 * fashion since this may indicate that the pageout paths are
1897 * stalled waiting for memory, which only we can provide.
1898 */
1899
1900 Restart:
1901
1902 assert(object == NULL);
1903 assert(delayed_unlock != 0);
1904
1905 vm_page_anonymous_min = vm_page_inactive_target / 20;
1906
1907 if (vm_pageout_state.vm_page_speculative_percentage > 50)
1908 vm_pageout_state.vm_page_speculative_percentage = 50;
1909 else if (vm_pageout_state.vm_page_speculative_percentage <= 0)
1910 vm_pageout_state.vm_page_speculative_percentage = 1;
1911
1912 vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1913 vm_page_inactive_count);
1914
1915 for (;;) {
1916 vm_page_t m;
1917
1918 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1919
1920 if (vm_upl_wait_for_pages < 0)
1921 vm_upl_wait_for_pages = 0;
1922
1923 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1924
1925 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1926 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1927
1928 #if CONFIG_SECLUDED_MEMORY
1929 /*
1930 * Deal with secluded_q overflow.
1931 */
1932 if (vm_page_secluded_count > vm_page_secluded_target) {
1933 vm_page_t secluded_page;
1934
1935 /*
1936 * SECLUDED_AGING_BEFORE_ACTIVE:
1937 * Excess secluded pages go to the active queue and
1938 * will later go to the inactive queue.
1939 */
1940 assert((vm_page_secluded_count_free +
1941 vm_page_secluded_count_inuse) ==
1942 vm_page_secluded_count);
1943 secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1944 assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
1945
1946 vm_page_queues_remove(secluded_page, FALSE);
1947 assert(!secluded_page->vmp_fictitious);
1948 assert(!VM_PAGE_WIRED(secluded_page));
1949
1950 if (secluded_page->vmp_object == 0) {
1951 /* transfer to free queue */
1952 assert(secluded_page->vmp_busy);
1953 secluded_page->vmp_snext = local_freeq;
1954 local_freeq = secluded_page;
1955 local_freed++;
1956 } else {
1957 /* transfer to head of active queue */
1958 vm_page_enqueue_active(secluded_page, FALSE);
1959 secluded_page = VM_PAGE_NULL;
1960 }
1961 }
1962 #endif /* CONFIG_SECLUDED_MEMORY */
1963
1964 assert(delayed_unlock);
1965
1966 /*
1967 * maintain our balance
1968 */
1969 vm_page_balance_inactive(1);
1970
1971
1972 /**********************************************************************
1973 * above this point we're playing with the active and secluded queues
1974 * below this point we're playing with the throttling mechanisms
1975 * and the inactive queue
1976 **********************************************************************/
1977
1978 if (vm_page_free_count + local_freed >= vm_page_free_target)
1979 {
1980 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1981
1982 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
1983 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
1984 /*
1985 * make sure the pageout I/O threads are running
1986 * throttled in case there are still requests
1987 * in the laundry... since we have met our targets
1988 * we don't need the laundry to be cleaned in a timely
1989 * fashion... so let's avoid interfering with foreground
1990 * activity
1991 */
1992 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
1993
1994 lck_mtx_lock(&vm_page_queue_free_lock);
1995
1996 if ((vm_page_free_count >= vm_page_free_target) &&
1997 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1998 /*
1999 * done - we have met our target *and*
2000 * there is no one waiting for a page.
2001 */
2002 return_from_scan:
2003 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2004
2005 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
2006 vm_pageout_state.vm_pageout_inactive,
2007 vm_pageout_state.vm_pageout_inactive_used, 0, 0);
2008 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
2009 vm_pageout_vminfo.vm_pageout_freed_speculative,
2010 vm_pageout_state.vm_pageout_inactive_clean,
2011 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
2012 vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
2013
2014 return;
2015 }
2016 lck_mtx_unlock(&vm_page_queue_free_lock);
2017 }
2018
2019 /*
2020 * Before anything, we check if we have any ripe volatile
2021 * objects around. If so, try to purge the first object.
2022 * If the purge fails, fall through to reclaim a page instead.
2023 * If the purge succeeds, go back to the top and reevalute
2024 * the new memory situation.
2025 */
2026
2027 assert (available_for_purge>=0);
2028 force_purge = 0; /* no force-purging */
2029
2030 #if VM_PRESSURE_EVENTS
2031 pressure_level = memorystatus_vm_pressure_level;
2032
2033 if (pressure_level > kVMPressureNormal) {
2034
2035 if (pressure_level >= kVMPressureCritical) {
2036 force_purge = vm_pageout_state.memorystatus_purge_on_critical;
2037 } else if (pressure_level >= kVMPressureUrgent) {
2038 force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
2039 } else if (pressure_level >= kVMPressureWarning) {
2040 force_purge = vm_pageout_state.memorystatus_purge_on_warning;
2041 }
2042 }
2043 #endif /* VM_PRESSURE_EVENTS */
2044
2045 if (available_for_purge || force_purge) {
2046
2047 if (object != NULL) {
2048 vm_object_unlock(object);
2049 object = NULL;
2050 }
2051
2052 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2053
2054 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2055 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2056 VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
2057 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2058 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2059 continue;
2060 }
2061 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2062 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2063 }
2064
2065 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2066 /*
2067 * try to pull pages from the aging bins...
2068 * see vm_page.h for an explanation of how
2069 * this mechanism works
2070 */
2071 struct vm_speculative_age_q *aq;
2072 boolean_t can_steal = FALSE;
2073 int num_scanned_queues;
2074
2075 aq = &vm_page_queue_speculative[speculative_steal_index];
2076
2077 num_scanned_queues = 0;
2078 while (vm_page_queue_empty(&aq->age_q) &&
2079 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2080
2081 speculative_steal_index++;
2082
2083 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2084 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2085
2086 aq = &vm_page_queue_speculative[speculative_steal_index];
2087 }
2088
2089 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2090 /*
2091 * XXX We've scanned all the speculative
2092 * queues but still haven't found one
2093 * that is not empty, even though
2094 * vm_page_speculative_count is not 0.
2095 */
2096 if (!vm_page_queue_empty(&sq->age_q))
2097 continue;
2098 #if DEVELOPMENT || DEBUG
2099 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2100 #endif
2101 /* readjust... */
2102 vm_page_speculative_count = 0;
2103 /* ... and continue */
2104 continue;
2105 }
2106
2107 if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE)
2108 can_steal = TRUE;
2109 else {
2110 if (!delay_speculative_age) {
2111 mach_timespec_t ts_fully_aged;
2112
2113 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
2114 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
2115 * 1000 * NSEC_PER_USEC;
2116
2117 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2118
2119 clock_sec_t sec;
2120 clock_nsec_t nsec;
2121 clock_get_system_nanotime(&sec, &nsec);
2122 ts.tv_sec = (unsigned int) sec;
2123 ts.tv_nsec = nsec;
2124
2125 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2126 can_steal = TRUE;
2127 else
2128 delay_speculative_age++;
2129 } else {
2130 delay_speculative_age++;
2131 if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2132 delay_speculative_age = 0;
2133 }
2134 }
2135 if (can_steal == TRUE)
2136 vm_page_speculate_ageit(aq);
2137 }
2138 force_speculative_aging = FALSE;
2139
2140 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
2141
2142 int pages_evicted;
2143
2144 if (object != NULL) {
2145 vm_object_unlock(object);
2146 object = NULL;
2147 }
2148 KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
2149
2150 pages_evicted = vm_object_cache_evict(100, 10);
2151
2152 KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0);
2153
2154 if (pages_evicted) {
2155
2156 vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
2157
2158 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2159 vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
2160 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2161
2162 /*
2163 * we just freed up to 100 pages,
2164 * so go back to the top of the main loop
2165 * and re-evaulate the memory situation
2166 */
2167 continue;
2168 } else
2169 cache_evict_throttle = 1000;
2170 }
2171 if (cache_evict_throttle)
2172 cache_evict_throttle--;
2173
2174 divisor = vm_pageout_state.vm_page_filecache_min_divisor;
2175
2176 #if CONFIG_JETSAM
2177 /*
2178 * don't let the filecache_min fall below 15% of available memory
2179 * on systems with an active compressor that isn't nearing its
2180 * limits w/r to accepting new data
2181 *
2182 * on systems w/o the compressor/swapper, the filecache is always
2183 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2184 * since most (if not all) of the anonymous pages are in the
2185 * throttled queue (which isn't counted as available) which
2186 * effectively disables this filter
2187 */
2188 if (vm_compressor_low_on_space() || divisor == 0)
2189 vm_pageout_state.vm_page_filecache_min = 0;
2190 else
2191 vm_pageout_state.vm_page_filecache_min =
2192 ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2193 #else
2194 if (vm_compressor_out_of_space() || divisor == 0)
2195 vm_pageout_state.vm_page_filecache_min = 0;
2196 else {
2197 /*
2198 * don't let the filecache_min fall below the specified critical level
2199 */
2200 vm_pageout_state.vm_page_filecache_min =
2201 ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2202 }
2203 #endif
2204 if (vm_page_free_count < (vm_page_free_reserved / 4))
2205 vm_pageout_state.vm_page_filecache_min = 0;
2206
2207 exceeded_burst_throttle = FALSE;
2208 /*
2209 * Sometimes we have to pause:
2210 * 1) No inactive pages - nothing to do.
2211 * 2) Loop control - no acceptable pages found on the inactive queue
2212 * within the last vm_pageout_burst_inactive_throttle iterations
2213 * 3) Flow control - default pageout queue is full
2214 */
2215 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2216 vm_page_queue_empty(&vm_page_queue_anonymous) &&
2217 vm_page_queue_empty(&vm_page_queue_cleaned) &&
2218 vm_page_queue_empty(&sq->age_q)) {
2219 VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
2220 msecs = vm_pageout_state.vm_pageout_empty_wait;
2221 goto vm_pageout_scan_delay;
2222
2223 } else if (inactive_burst_count >=
2224 MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
2225 (vm_page_inactive_count +
2226 vm_page_speculative_count))) {
2227 VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
2228 msecs = vm_pageout_state.vm_pageout_burst_wait;
2229
2230 exceeded_burst_throttle = TRUE;
2231 goto vm_pageout_scan_delay;
2232
2233 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2234 VM_DYNAMIC_PAGING_ENABLED()) {
2235 clock_sec_t sec;
2236 clock_nsec_t nsec;
2237
2238 switch (flow_control.state) {
2239
2240 case FCS_IDLE:
2241 if ((vm_page_free_count + local_freed) < vm_page_free_target &&
2242 vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2243 /*
2244 * since the compressor is running independently of vm_pageout_scan
2245 * let's not wait for it just yet... as long as we have a healthy supply
2246 * of filecache pages to work with, let's keep stealing those.
2247 */
2248 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2249
2250 if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
2251 (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2252 anons_grabbed = ANONS_GRABBED_LIMIT;
2253 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
2254 goto consider_inactive;
2255 }
2256 }
2257 reset_deadlock_timer:
2258 ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
2259 ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2260 clock_get_system_nanotime(&sec, &nsec);
2261 flow_control.ts.tv_sec = (unsigned int) sec;
2262 flow_control.ts.tv_nsec = nsec;
2263 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2264
2265 flow_control.state = FCS_DELAYED;
2266 msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2267
2268 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
2269 break;
2270
2271 case FCS_DELAYED:
2272 clock_get_system_nanotime(&sec, &nsec);
2273 ts.tv_sec = (unsigned int) sec;
2274 ts.tv_nsec = nsec;
2275
2276 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2277 /*
2278 * the pageout thread for the default pager is potentially
2279 * deadlocked since the
2280 * default pager queue has been throttled for more than the
2281 * allowable time... we need to move some clean pages or dirty
2282 * pages belonging to the external pagers if they aren't throttled
2283 * vm_page_free_wanted represents the number of threads currently
2284 * blocked waiting for pages... we'll move one page for each of
2285 * these plus a fixed amount to break the logjam... once we're done
2286 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2287 * with a new timeout target since we have no way of knowing
2288 * whether we've broken the deadlock except through observation
2289 * of the queue associated with the default pager... we need to
2290 * stop moving pages and allow the system to run to see what
2291 * state it settles into.
2292 */
2293 vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
2294 vm_page_free_wanted + vm_page_free_wanted_privileged;
2295 VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
2296 flow_control.state = FCS_DEADLOCK_DETECTED;
2297 thread_wakeup((event_t) &vm_pageout_garbage_collect);
2298 goto consider_inactive;
2299 }
2300 /*
2301 * just resniff instead of trying
2302 * to compute a new delay time... we're going to be
2303 * awakened immediately upon a laundry completion,
2304 * so we won't wait any longer than necessary
2305 */
2306 msecs = vm_pageout_state.vm_pageout_idle_wait;
2307 break;
2308
2309 case FCS_DEADLOCK_DETECTED:
2310 if (vm_pageout_deadlock_target)
2311 goto consider_inactive;
2312 goto reset_deadlock_timer;
2313
2314 }
2315 vm_pageout_scan_delay:
2316 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2317
2318 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2319 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2320
2321 if (vm_page_free_count >= vm_page_free_target) {
2322 /*
2323 * we're here because
2324 * 1) someone else freed up some pages while we had
2325 * the queues unlocked above
2326 * and we've hit one of the 3 conditions that
2327 * cause us to pause the pageout scan thread
2328 *
2329 * since we already have enough free pages,
2330 * let's avoid stalling and return normally
2331 *
2332 * before we return, make sure the pageout I/O threads
2333 * are running throttled in case there are still requests
2334 * in the laundry... since we have enough free pages
2335 * we don't need the laundry to be cleaned in a timely
2336 * fashion... so let's avoid interfering with foreground
2337 * activity
2338 *
2339 * we don't want to hold vm_page_queue_free_lock when
2340 * calling vm_pageout_adjust_eq_iothrottle (since it
2341 * may cause other locks to be taken), we do the intitial
2342 * check outside of the lock. Once we take the lock,
2343 * we recheck the condition since it may have changed.
2344 * if it has, no problem, we will make the threads
2345 * non-throttled before actually blocking
2346 */
2347 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2348 }
2349 lck_mtx_lock(&vm_page_queue_free_lock);
2350
2351 if (vm_page_free_count >= vm_page_free_target &&
2352 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2353 goto return_from_scan;
2354 }
2355 lck_mtx_unlock(&vm_page_queue_free_lock);
2356
2357 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2358 /*
2359 * we're most likely about to block due to one of
2360 * the 3 conditions that cause vm_pageout_scan to
2361 * not be able to make forward progress w/r
2362 * to providing new pages to the free queue,
2363 * so unthrottle the I/O threads in case we
2364 * have laundry to be cleaned... it needs
2365 * to be completed ASAP.
2366 *
2367 * even if we don't block, we want the io threads
2368 * running unthrottled since the sum of free +
2369 * clean pages is still under our free target
2370 */
2371 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2372 }
2373 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2374 /*
2375 * if we get here we're below our free target and
2376 * we're stalling due to a full laundry queue or
2377 * we don't have any inactive pages other then
2378 * those in the clean queue...
2379 * however, we have pages on the clean queue that
2380 * can be moved to the free queue, so let's not
2381 * stall the pageout scan
2382 */
2383 flow_control.state = FCS_IDLE;
2384 goto consider_inactive;
2385 }
2386 if (flow_control.state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
2387 flow_control.state = FCS_IDLE;
2388 goto consider_inactive;
2389 }
2390
2391 VM_CHECK_MEMORYSTATUS;
2392
2393 if (flow_control.state != FCS_IDLE)
2394 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
2395
2396 iq->pgo_throttled = TRUE;
2397 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2398
2399 counter(c_vm_pageout_scan_block++);
2400
2401 vm_page_unlock_queues();
2402
2403 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2404
2405 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2406 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2407 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2408
2409 thread_block(THREAD_CONTINUE_NULL);
2410
2411 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2412 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2413 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2414
2415 vm_page_lock_queues();
2416
2417 iq->pgo_throttled = FALSE;
2418
2419 if (loop_count >= vm_page_inactive_count)
2420 loop_count = 0;
2421 inactive_burst_count = 0;
2422
2423 goto Restart;
2424 /*NOTREACHED*/
2425 }
2426
2427
2428 flow_control.state = FCS_IDLE;
2429 consider_inactive:
2430 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2431 vm_pageout_inactive_external_forced_reactivate_limit);
2432 loop_count++;
2433 inactive_burst_count++;
2434 vm_pageout_state.vm_pageout_inactive++;
2435
2436 /*
2437 * Choose a victim.
2438 */
2439 while (1) {
2440
2441 #if CONFIG_BACKGROUND_QUEUE
2442 page_from_bg_q = FALSE;
2443 #endif /* CONFIG_BACKGROUND_QUEUE */
2444
2445 m = NULL;
2446 m_object = VM_OBJECT_NULL;
2447
2448 if (VM_DYNAMIC_PAGING_ENABLED()) {
2449 assert(vm_page_throttled_count == 0);
2450 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2451 }
2452
2453 /*
2454 * Try for a clean-queue inactive page.
2455 * These are pages that vm_pageout_scan tried to steal earlier, but
2456 * were dirty and had to be cleaned. Pick them up now that they are clean.
2457 */
2458 if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2459 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2460
2461 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2462
2463 break;
2464 }
2465
2466 /*
2467 * The next most eligible pages are ones we paged in speculatively,
2468 * but which have not yet been touched and have been aged out.
2469 */
2470 if (!vm_page_queue_empty(&sq->age_q)) {
2471 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2472
2473 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2474
2475 if (!m->vmp_dirty || force_anonymous == FALSE)
2476 break;
2477 else
2478 m = NULL;
2479 }
2480
2481 #if CONFIG_BACKGROUND_QUEUE
2482 if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2483 vm_object_t bg_m_object = NULL;
2484
2485 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2486
2487 bg_m_object = VM_PAGE_OBJECT(m);
2488
2489 if (!VM_PAGE_PAGEABLE(m)) {
2490 /*
2491 * This page is on the background queue
2492 * but not on a pageable queue. This is
2493 * likely a transient state and whoever
2494 * took it out of its pageable queue
2495 * will likely put it back on a pageable
2496 * queue soon but we can't deal with it
2497 * at this point, so let's ignore this
2498 * page.
2499 */
2500 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2501
2502 if (bg_m_object->internal &&
2503 (VM_PAGE_Q_THROTTLED(iq) ||
2504 vm_compressor_out_of_space() == TRUE ||
2505 vm_page_free_count < (vm_page_free_reserved / 4))) {
2506
2507 vm_pageout_skipped_bq_internal++;
2508 } else {
2509 page_from_bg_q = TRUE;
2510
2511 if (bg_m_object->internal)
2512 vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
2513 else
2514 vm_pageout_vminfo.vm_pageout_considered_bq_external++;
2515 break;
2516 }
2517 }
2518 }
2519 #endif
2520 inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2521
2522 if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
2523 (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2524 grab_anonymous = TRUE;
2525 anons_grabbed = 0;
2526
2527 vm_pageout_vminfo.vm_pageout_skipped_external++;
2528 goto want_anonymous;
2529 }
2530 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2531
2532 #if CONFIG_JETSAM
2533 /* If the file-backed pool has accumulated
2534 * significantly more pages than the jetsam
2535 * threshold, prefer to reclaim those
2536 * inline to minimise compute overhead of reclaiming
2537 * anonymous pages.
2538 * This calculation does not account for the CPU local
2539 * external page queues, as those are expected to be
2540 * much smaller relative to the global pools.
2541 */
2542 if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2543 if (vm_page_pageable_external_count >
2544 vm_pageout_state.vm_page_filecache_min) {
2545 if ((vm_page_pageable_external_count *
2546 vm_pageout_memorystatus_fb_factor_dr) >
2547 (memorystatus_available_pages_critical *
2548 vm_pageout_memorystatus_fb_factor_nr)) {
2549 grab_anonymous = FALSE;
2550
2551 VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
2552 }
2553 }
2554 if (grab_anonymous) {
2555 VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
2556 }
2557 }
2558 #endif /* CONFIG_JETSAM */
2559
2560 want_anonymous:
2561 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2562
2563 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2564 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2565
2566 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2567 anons_grabbed = 0;
2568
2569 if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
2570
2571 if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2572 if ((++reactivated_this_call % 100)) {
2573 vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
2574 goto must_activate_page;
2575 }
2576 /*
2577 * steal 1% of the file backed pages even if
2578 * we are under the limit that has been set
2579 * for a healthy filecache
2580 */
2581 }
2582 }
2583 break;
2584 }
2585 }
2586 if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2587 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2588
2589 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2590 anons_grabbed++;
2591
2592 break;
2593 }
2594
2595 /*
2596 * if we've gotten here, we have no victim page.
2597 * check to see if we've not finished balancing the queues
2598 * or we have a page on the aged speculative queue that we
2599 * skipped due to force_anonymous == TRUE.. or we have
2600 * speculative pages that we can prematurely age... if
2601 * one of these cases we'll keep going, else panic
2602 */
2603 force_anonymous = FALSE;
2604 VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1);
2605
2606 if (!vm_page_queue_empty(&sq->age_q))
2607 goto done_with_inactivepage;
2608
2609 if (vm_page_speculative_count) {
2610 force_speculative_aging = TRUE;
2611 goto done_with_inactivepage;
2612 }
2613 panic("vm_pageout: no victim");
2614
2615 /* NOTREACHED */
2616 }
2617 assert(VM_PAGE_PAGEABLE(m));
2618 m_object = VM_PAGE_OBJECT(m);
2619 force_anonymous = FALSE;
2620
2621 page_prev_q_state = m->vmp_q_state;
2622 /*
2623 * we just found this page on one of our queues...
2624 * it can't also be on the pageout queue, so safe
2625 * to call vm_page_queues_remove
2626 */
2627 vm_page_queues_remove(m, TRUE);
2628
2629 assert(!m->vmp_laundry);
2630 assert(!m->vmp_private);
2631 assert(!m->vmp_fictitious);
2632 assert(m_object != kernel_object);
2633 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2634
2635 vm_pageout_vminfo.vm_pageout_considered_page++;
2636
2637 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2638
2639 /*
2640 * check to see if we currently are working
2641 * with the same object... if so, we've
2642 * already got the lock
2643 */
2644 if (m_object != object) {
2645 /*
2646 * the object associated with candidate page is
2647 * different from the one we were just working
2648 * with... dump the lock if we still own it
2649 */
2650 if (object != NULL) {
2651 vm_object_unlock(object);
2652 object = NULL;
2653 }
2654 /*
2655 * Try to lock object; since we've alread got the
2656 * page queues lock, we can only 'try' for this one.
2657 * if the 'try' fails, we need to do a mutex_pause
2658 * to allow the owner of the object lock a chance to
2659 * run... otherwise, we're likely to trip over this
2660 * object in the same state as we work our way through
2661 * the queue... clumps of pages associated with the same
2662 * object are fairly typical on the inactive and active queues
2663 */
2664 if (!vm_object_lock_try_scan(m_object)) {
2665 vm_page_t m_want = NULL;
2666
2667 vm_pageout_vminfo.vm_pageout_inactive_nolock++;
2668
2669 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2670 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
2671
2672 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2673
2674 m->vmp_reference = FALSE;
2675
2676 if ( !m_object->object_is_shared_cache) {
2677 /*
2678 * don't apply this optimization if this is the shared cache
2679 * object, it's too easy to get rid of very hot and important
2680 * pages...
2681 * m->vmp_object must be stable since we hold the page queues lock...
2682 * we can update the scan_collisions field sans the object lock
2683 * since it is a separate field and this is the only spot that does
2684 * a read-modify-write operation and it is never executed concurrently...
2685 * we can asynchronously set this field to 0 when creating a UPL, so it
2686 * is possible for the value to be a bit non-determistic, but that's ok
2687 * since it's only used as a hint
2688 */
2689 m_object->scan_collisions = 1;
2690 }
2691 if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2692 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2693 else if ( !vm_page_queue_empty(&sq->age_q))
2694 m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2695 else if ( (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT ||
2696 vm_page_queue_empty(&vm_page_queue_anonymous)) &&
2697 !vm_page_queue_empty(&vm_page_queue_inactive))
2698 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2699 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2700 m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2701
2702 /*
2703 * this is the next object we're going to be interested in
2704 * try to make sure its available after the mutex_pause
2705 * returns control
2706 */
2707 if (m_want)
2708 vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2709
2710 goto requeue_page;
2711 }
2712 object = m_object;
2713 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2714 }
2715 assert(m_object == object);
2716 assert(VM_PAGE_OBJECT(m) == m_object);
2717
2718 if (m->vmp_busy) {
2719 /*
2720 * Somebody is already playing with this page.
2721 * Put it back on the appropriate queue
2722 *
2723 */
2724 VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, 1);
2725
2726 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2727 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1);
2728 requeue_page:
2729 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2730 vm_page_enqueue_inactive(m, FALSE);
2731 else
2732 vm_page_activate(m);
2733 #if CONFIG_BACKGROUND_QUEUE
2734 #if DEVELOPMENT || DEBUG
2735 if (page_from_bg_q == TRUE) {
2736 if (m_object->internal)
2737 vm_pageout_rejected_bq_internal++;
2738 else
2739 vm_pageout_rejected_bq_external++;
2740 }
2741 #endif
2742 #endif
2743 goto done_with_inactivepage;
2744 }
2745
2746 /*
2747 * if (m->vmp_cleaning && !m->vmp_free_when_done)
2748 * If already cleaning this page in place
2749 * just leave if off the paging queues.
2750 * We can leave the page mapped, and upl_commit_range
2751 * will put it on the clean queue.
2752 *
2753 * if (m->vmp_free_when_done && !m->vmp_cleaning)
2754 * an msync INVALIDATE is in progress...
2755 * this page has been marked for destruction
2756 * after it has been cleaned,
2757 * but not yet gathered into a UPL
2758 * where 'cleaning' will be set...
2759 * just leave it off the paging queues
2760 *
2761 * if (m->vmp_free_when_done && m->vmp_clenaing)
2762 * an msync INVALIDATE is in progress
2763 * and the UPL has already gathered this page...
2764 * just leave it off the paging queues
2765 */
2766 if (m->vmp_free_when_done || m->vmp_cleaning) {
2767 goto done_with_inactivepage;
2768 }
2769
2770
2771 /*
2772 * If it's absent, in error or the object is no longer alive,
2773 * we can reclaim the page... in the no longer alive case,
2774 * there are 2 states the page can be in that preclude us
2775 * from reclaiming it - busy or cleaning - that we've already
2776 * dealt with
2777 */
2778 if (m->vmp_absent || m->vmp_error || !object->alive) {
2779
2780 if (m->vmp_absent)
2781 VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, 1);
2782 else if (!object->alive)
2783 VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, 1);
2784 else
2785 VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, 1);
2786 reclaim_page:
2787 if (vm_pageout_deadlock_target) {
2788 VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, 1);
2789 vm_pageout_deadlock_target--;
2790 }
2791
2792 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2793
2794 if (object->internal) {
2795 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2796 } else {
2797 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2798 }
2799 assert(!m->vmp_cleaning);
2800 assert(!m->vmp_laundry);
2801
2802 if (!object->internal &&
2803 object->pager != NULL &&
2804 object->pager->mo_pager_ops == &shared_region_pager_ops) {
2805 shared_region_pager_reclaimed++;
2806 }
2807
2808 m->vmp_busy = TRUE;
2809
2810 /*
2811 * remove page from object here since we're already
2812 * behind the object lock... defer the rest of the work
2813 * we'd normally do in vm_page_free_prepare_object
2814 * until 'vm_page_free_list' is called
2815 */
2816 if (m->vmp_tabled)
2817 vm_page_remove(m, TRUE);
2818
2819 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
2820 m->vmp_snext = local_freeq;
2821 local_freeq = m;
2822 local_freed++;
2823
2824 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2825 vm_pageout_vminfo.vm_pageout_freed_speculative++;
2826 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2827 vm_pageout_vminfo.vm_pageout_freed_cleaned++;
2828 else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
2829 vm_pageout_vminfo.vm_pageout_freed_internal++;
2830 else
2831 vm_pageout_vminfo.vm_pageout_freed_external++;
2832
2833 inactive_burst_count = 0;
2834 goto done_with_inactivepage;
2835 }
2836 if (object->copy == VM_OBJECT_NULL) {
2837 /*
2838 * No one else can have any interest in this page.
2839 * If this is an empty purgable object, the page can be
2840 * reclaimed even if dirty.
2841 * If the page belongs to a volatile purgable object, we
2842 * reactivate it if the compressor isn't active.
2843 */
2844 if (object->purgable == VM_PURGABLE_EMPTY) {
2845 if (m->vmp_pmapped == TRUE) {
2846 /* unmap the page */
2847 refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2848 if (refmod_state & VM_MEM_MODIFIED) {
2849 SET_PAGE_DIRTY(m, FALSE);
2850 }
2851 }
2852 if (m->vmp_dirty || m->vmp_precious) {
2853 /* we saved the cost of cleaning this page ! */
2854 vm_page_purged_count++;
2855 }
2856 goto reclaim_page;
2857 }
2858
2859 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2860 /*
2861 * With the VM compressor, the cost of
2862 * reclaiming a page is much lower (no I/O),
2863 * so if we find a "volatile" page, it's better
2864 * to let it get compressed rather than letting
2865 * it occupy a full page until it gets purged.
2866 * So no need to check for "volatile" here.
2867 */
2868 } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2869 /*
2870 * Avoid cleaning a "volatile" page which might
2871 * be purged soon.
2872 */
2873
2874 /* if it's wired, we can't put it on our queue */
2875 assert(!VM_PAGE_WIRED(m));
2876
2877 /* just stick it back on! */
2878 reactivated_this_call++;
2879
2880 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2881 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, 1);
2882
2883 goto reactivate_page;
2884 }
2885 }
2886 /*
2887 * If it's being used, reactivate.
2888 * (Fictitious pages are either busy or absent.)
2889 * First, update the reference and dirty bits
2890 * to make sure the page is unreferenced.
2891 */
2892 refmod_state = -1;
2893
2894 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
2895 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2896
2897 if (refmod_state & VM_MEM_REFERENCED)
2898 m->vmp_reference = TRUE;
2899 if (refmod_state & VM_MEM_MODIFIED) {
2900 SET_PAGE_DIRTY(m, FALSE);
2901 }
2902 }
2903
2904 if (m->vmp_reference || m->vmp_dirty) {
2905 /* deal with a rogue "reusable" page */
2906 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
2907 }
2908 divisor = vm_pageout_state.vm_page_xpmapped_min_divisor;
2909
2910 if (divisor == 0)
2911 vm_pageout_state.vm_page_xpmapped_min = 0;
2912 else
2913 vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * 10) / divisor;
2914
2915 if (!m->vmp_no_cache &&
2916 #if CONFIG_BACKGROUND_QUEUE
2917 page_from_bg_q == FALSE &&
2918 #endif
2919 (m->vmp_reference || (m->vmp_xpmapped && !object->internal &&
2920 (vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
2921 /*
2922 * The page we pulled off the inactive list has
2923 * been referenced. It is possible for other
2924 * processors to be touching pages faster than we
2925 * can clear the referenced bit and traverse the
2926 * inactive queue, so we limit the number of
2927 * reactivations.
2928 */
2929 if (++reactivated_this_call >= reactivate_limit) {
2930 vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++;
2931 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2932 vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++;
2933 } else {
2934 uint32_t isinuse;
2935
2936 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2937 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, 1);
2938
2939 vm_pageout_vminfo.vm_pageout_inactive_referenced++;
2940 reactivate_page:
2941 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2942 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2943 /*
2944 * no explict mappings of this object exist
2945 * and it's not open via the filesystem
2946 */
2947 vm_page_deactivate(m);
2948 VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1);
2949 } else {
2950 must_activate_page:
2951 /*
2952 * The page was/is being used, so put back on active list.
2953 */
2954 vm_page_activate(m);
2955 VM_STAT_INCR(reactivations);
2956 inactive_burst_count = 0;
2957 }
2958 #if CONFIG_BACKGROUND_QUEUE
2959 #if DEVELOPMENT || DEBUG
2960 if (page_from_bg_q == TRUE) {
2961 if (m_object->internal)
2962 vm_pageout_rejected_bq_internal++;
2963 else
2964 vm_pageout_rejected_bq_external++;
2965 }
2966 #endif
2967 #endif
2968 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2969 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
2970 vm_pageout_state.vm_pageout_inactive_used++;
2971
2972 goto done_with_inactivepage;
2973 }
2974 /*
2975 * Make sure we call pmap_get_refmod() if it
2976 * wasn't already called just above, to update
2977 * the dirty bit.
2978 */
2979 if ((refmod_state == -1) && !m->vmp_dirty && m->vmp_pmapped) {
2980 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2981 if (refmod_state & VM_MEM_MODIFIED) {
2982 SET_PAGE_DIRTY(m, FALSE);
2983 }
2984 }
2985 }
2986
2987 XPR(XPR_VM_PAGEOUT,
2988 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2989 object, m->vmp_offset, m, 0,0);
2990
2991 /*
2992 * we've got a candidate page to steal...
2993 *
2994 * m->vmp_dirty is up to date courtesy of the
2995 * preceding check for m->vmp_reference... if
2996 * we get here, then m->vmp_reference had to be
2997 * FALSE (or possibly "reactivate_limit" was
2998 * exceeded), but in either case we called
2999 * pmap_get_refmod() and updated both
3000 * m->vmp_reference and m->vmp_dirty
3001 *
3002 * if it's dirty or precious we need to
3003 * see if the target queue is throtttled
3004 * it if is, we need to skip over it by moving it back
3005 * to the end of the inactive queue
3006 */
3007
3008 inactive_throttled = FALSE;
3009
3010 if (m->vmp_dirty || m->vmp_precious) {
3011 if (object->internal) {
3012 if (VM_PAGE_Q_THROTTLED(iq))
3013 inactive_throttled = TRUE;
3014 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3015 inactive_throttled = TRUE;
3016 }
3017 }
3018 throttle_inactive:
3019 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3020 object->internal && m->vmp_dirty &&
3021 (object->purgable == VM_PURGABLE_DENY ||
3022 object->purgable == VM_PURGABLE_NONVOLATILE ||
3023 object->purgable == VM_PURGABLE_VOLATILE)) {
3024 vm_page_check_pageable_safe(m);
3025 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3026 vm_page_queue_enter(&vm_page_queue_throttled, m,
3027 vm_page_t, vmp_pageq);
3028 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
3029 vm_page_throttled_count++;
3030
3031 VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1);
3032
3033 inactive_burst_count = 0;
3034 goto done_with_inactivepage;
3035 }
3036 if (inactive_throttled == TRUE) {
3037
3038 if (object->internal == FALSE) {
3039 /*
3040 * we need to break up the following potential deadlock case...
3041 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3042 * b) The thread doing the writing is waiting for pages while holding the truncate lock
3043 * c) Most of the pages in the inactive queue belong to this file.
3044 *
3045 * we are potentially in this deadlock because...
3046 * a) the external pageout queue is throttled
3047 * b) we're done with the active queue and moved on to the inactive queue
3048 * c) we've got a dirty external page
3049 *
3050 * since we don't know the reason for the external pageout queue being throttled we
3051 * must suspect that we are deadlocked, so move the current page onto the active queue
3052 * in an effort to cause a page from the active queue to 'age' to the inactive queue
3053 *
3054 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3055 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3056 * pool the next time we select a victim page... if we can make enough new free pages,
3057 * the deadlock will break, the external pageout queue will empty and it will no longer
3058 * be throttled
3059 *
3060 * if we have jetsam configured, keep a count of the pages reactivated this way so
3061 * that we can try to find clean pages in the active/inactive queues before
3062 * deciding to jetsam a process
3063 */
3064 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
3065
3066 vm_page_check_pageable_safe(m);
3067 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3068 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, vmp_pageq);
3069 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
3070 vm_page_active_count++;
3071 vm_page_pageable_external_count++;
3072
3073 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3074
3075 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3076 vm_pageout_inactive_external_forced_reactivate_limit--;
3077
3078 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3079 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3080 /*
3081 * Possible deadlock scenario so request jetsam action
3082 */
3083 assert(object);
3084 vm_object_unlock(object);
3085 object = VM_OBJECT_NULL;
3086 vm_page_unlock_queues();
3087
3088 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3089 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3090
3091 /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
3092 if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
3093 VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, 1);
3094 }
3095
3096 VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
3097 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3098
3099 vm_page_lock_queues();
3100 delayed_unlock = 1;
3101 }
3102 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3103 force_anonymous = TRUE;
3104 #endif
3105 inactive_burst_count = 0;
3106 goto done_with_inactivepage;
3107 } else {
3108 goto must_activate_page;
3109 }
3110 }
3111
3112 /*
3113 * we've got a page that we can steal...
3114 * eliminate all mappings and make sure
3115 * we have the up-to-date modified state
3116 *
3117 * if we need to do a pmap_disconnect then we
3118 * need to re-evaluate m->vmp_dirty since the pmap_disconnect
3119 * provides the true state atomically... the
3120 * page was still mapped up to the pmap_disconnect
3121 * and may have been dirtied at the last microsecond
3122 *
3123 * Note that if 'pmapped' is FALSE then the page is not
3124 * and has not been in any map, so there is no point calling
3125 * pmap_disconnect(). m->vmp_dirty could have been set in anticipation
3126 * of likely usage of the page.
3127 */
3128 if (m->vmp_pmapped == TRUE) {
3129 int pmap_options;
3130
3131 /*
3132 * Don't count this page as going into the compressor
3133 * if any of these are true:
3134 * 1) compressed pager isn't enabled
3135 * 2) Freezer enabled device with compressed pager
3136 * backend (exclusive use) i.e. most of the VM system
3137 * (including vm_pageout_scan) has no knowledge of
3138 * the compressor
3139 * 3) This page belongs to a file and hence will not be
3140 * sent into the compressor
3141 */
3142 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3143 object->internal == FALSE) {
3144 pmap_options = 0;
3145 } else if (m->vmp_dirty || m->vmp_precious) {
3146 /*
3147 * VM knows that this page is dirty (or
3148 * precious) and needs to be compressed
3149 * rather than freed.
3150 * Tell the pmap layer to count this page
3151 * as "compressed".
3152 */
3153 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3154 } else {
3155 /*
3156 * VM does not know if the page needs to
3157 * be preserved but the pmap layer might tell
3158 * us if any mapping has "modified" it.
3159 * Let's the pmap layer to count this page
3160 * as compressed if and only if it has been
3161 * modified.
3162 */
3163 pmap_options =
3164 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3165 }
3166 refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3167 pmap_options,
3168 NULL);
3169 if (refmod_state & VM_MEM_MODIFIED) {
3170 SET_PAGE_DIRTY(m, FALSE);
3171 }
3172 }
3173
3174 /*
3175 * reset our count of pages that have been reclaimed
3176 * since the last page was 'stolen'
3177 */
3178 inactive_reclaim_run = 0;
3179
3180 /*
3181 * If it's clean and not precious, we can free the page.
3182 */
3183 if (!m->vmp_dirty && !m->vmp_precious) {
3184
3185 vm_pageout_state.vm_pageout_inactive_clean++;
3186
3187 /*
3188 * OK, at this point we have found a page we are going to free.
3189 */
3190 #if CONFIG_PHANTOM_CACHE
3191 if (!object->internal)
3192 vm_phantom_cache_add_ghost(m);
3193 #endif
3194 goto reclaim_page;
3195 }
3196
3197 /*
3198 * The page may have been dirtied since the last check
3199 * for a throttled target queue (which may have been skipped
3200 * if the page was clean then). With the dirty page
3201 * disconnected here, we can make one final check.
3202 */
3203 if (object->internal) {
3204 if (VM_PAGE_Q_THROTTLED(iq))
3205 inactive_throttled = TRUE;
3206 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3207 inactive_throttled = TRUE;
3208 }
3209
3210 if (inactive_throttled == TRUE)
3211 goto throttle_inactive;
3212
3213 #if VM_PRESSURE_EVENTS
3214 #if CONFIG_JETSAM
3215
3216 /*
3217 * If Jetsam is enabled, then the sending
3218 * of memory pressure notifications is handled
3219 * from the same thread that takes care of high-water
3220 * and other jetsams i.e. the memorystatus_thread.
3221 */
3222
3223 #else /* CONFIG_JETSAM */
3224
3225 vm_pressure_response();
3226
3227 #endif /* CONFIG_JETSAM */
3228 #endif /* VM_PRESSURE_EVENTS */
3229
3230 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3231 VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, 1);
3232
3233 if (object->internal)
3234 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++;
3235 else
3236 vm_pageout_vminfo.vm_pageout_inactive_dirty_external++;
3237
3238 /*
3239 * internal pages will go to the compressor...
3240 * external pages will go to the appropriate pager to be cleaned
3241 * and upon completion will end up on 'vm_page_queue_cleaned' which
3242 * is a preferred queue to steal from
3243 */
3244 vm_pageout_cluster(m);
3245 inactive_burst_count = 0;
3246
3247 done_with_inactivepage:
3248
3249 if (delayed_unlock++ > delayed_unlock_limit) {
3250 int freed = local_freed;
3251
3252 vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3253 VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3254 if (freed == 0)
3255 lck_mtx_yield(&vm_page_queue_lock);
3256 } else if (vm_pageout_scan_wants_object) {
3257 vm_page_unlock_queues();
3258 mutex_pause(0);
3259 vm_page_lock_queues();
3260 }
3261 /*
3262 * back to top of pageout scan loop
3263 */
3264 }
3265 }
3266
3267
3268 void
3269 vm_page_free_reserve(
3270 int pages)
3271 {
3272 int free_after_reserve;
3273
3274 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3275
3276 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3277 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3278 else
3279 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3280
3281 } else {
3282 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3283 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3284 else
3285 vm_page_free_reserved += pages;
3286 }
3287 free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved;
3288
3289 vm_page_free_min = vm_page_free_reserved +
3290 VM_PAGE_FREE_MIN(free_after_reserve);
3291
3292 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3293 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3294
3295 vm_page_free_target = vm_page_free_reserved +
3296 VM_PAGE_FREE_TARGET(free_after_reserve);
3297
3298 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3299 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3300
3301 if (vm_page_free_target < vm_page_free_min + 5)
3302 vm_page_free_target = vm_page_free_min + 5;
3303
3304 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3305 }
3306
3307 /*
3308 * vm_pageout is the high level pageout daemon.
3309 */
3310
3311 void
3312 vm_pageout_continue(void)
3313 {
3314 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3315 VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1);
3316
3317 #if !CONFIG_EMBEDDED
3318 lck_mtx_lock(&vm_page_queue_free_lock);
3319 vm_pageout_running = TRUE;
3320 lck_mtx_unlock(&vm_page_queue_free_lock);
3321 #endif /* CONFIG_EMBEDDED */
3322
3323 vm_pageout_scan();
3324 /*
3325 * we hold both the vm_page_queue_free_lock
3326 * and the vm_page_queues_lock at this point
3327 */
3328 assert(vm_page_free_wanted == 0);
3329 assert(vm_page_free_wanted_privileged == 0);
3330 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3331
3332 #if !CONFIG_EMBEDDED
3333 vm_pageout_running = FALSE;
3334 if (vm_pageout_waiter) {
3335 vm_pageout_waiter = FALSE;
3336 thread_wakeup((event_t)&vm_pageout_waiter);
3337 }
3338 #endif /* !CONFIG_EMBEDDED */
3339
3340 lck_mtx_unlock(&vm_page_queue_free_lock);
3341 vm_page_unlock_queues();
3342
3343 counter(c_vm_pageout_block++);
3344 thread_block((thread_continue_t)vm_pageout_continue);
3345 /*NOTREACHED*/
3346 }
3347
3348 #if !CONFIG_EMBEDDED
3349 kern_return_t
3350 vm_pageout_wait(uint64_t deadline)
3351 {
3352 kern_return_t kr;
3353
3354 lck_mtx_lock(&vm_page_queue_free_lock);
3355 for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3356 vm_pageout_waiter = TRUE;
3357 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3358 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3359 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3360 kr = KERN_OPERATION_TIMED_OUT;
3361 }
3362 }
3363 lck_mtx_unlock(&vm_page_queue_free_lock);
3364
3365 return (kr);
3366 }
3367 #endif /* !CONFIG_EMBEDDED */
3368
3369
3370 static void
3371 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3372 {
3373 vm_page_t m = NULL;
3374 vm_object_t object;
3375 vm_object_offset_t offset;
3376 memory_object_t pager;
3377
3378 /* On systems with a compressor, the external IO thread clears its
3379 * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3380 * creation)
3381 */
3382 if (vm_pageout_state.vm_pageout_internal_iothread != THREAD_NULL)
3383 current_thread()->options &= ~TH_OPT_VMPRIV;
3384
3385 vm_page_lockspin_queues();
3386
3387 while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3388
3389 q->pgo_busy = TRUE;
3390 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3391
3392 assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3393 VM_PAGE_CHECK(m);
3394 /*
3395 * grab a snapshot of the object and offset this
3396 * page is tabled in so that we can relookup this
3397 * page after we've taken the object lock - these
3398 * fields are stable while we hold the page queues lock
3399 * but as soon as we drop it, there is nothing to keep
3400 * this page in this object... we hold an activity_in_progress
3401 * on this object which will keep it from terminating
3402 */
3403 object = VM_PAGE_OBJECT(m);
3404 offset = m->vmp_offset;
3405
3406 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3407 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3408
3409 vm_page_unlock_queues();
3410
3411 vm_object_lock(object);
3412
3413 m = vm_page_lookup(object, offset);
3414
3415 if (m == NULL ||
3416 m->vmp_busy || m->vmp_cleaning || !m->vmp_laundry || (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) {
3417 /*
3418 * it's either the same page that someone else has
3419 * started cleaning (or it's finished cleaning or
3420 * been put back on the pageout queue), or
3421 * the page has been freed or we have found a
3422 * new page at this offset... in all of these cases
3423 * we merely need to release the activity_in_progress
3424 * we took when we put the page on the pageout queue
3425 */
3426 vm_object_activity_end(object);
3427 vm_object_unlock(object);
3428
3429 vm_page_lockspin_queues();
3430 continue;
3431 }
3432 pager = object->pager;
3433
3434 if (pager == MEMORY_OBJECT_NULL) {
3435 /*
3436 * This pager has been destroyed by either
3437 * memory_object_destroy or vm_object_destroy, and
3438 * so there is nowhere for the page to go.
3439 */
3440 if (m->vmp_free_when_done) {
3441 /*
3442 * Just free the page... VM_PAGE_FREE takes
3443 * care of cleaning up all the state...
3444 * including doing the vm_pageout_throttle_up
3445 */
3446 VM_PAGE_FREE(m);
3447 } else {
3448 vm_page_lockspin_queues();
3449
3450 vm_pageout_throttle_up(m);
3451 vm_page_activate(m);
3452
3453 vm_page_unlock_queues();
3454
3455 /*
3456 * And we are done with it.
3457 */
3458 }
3459 vm_object_activity_end(object);
3460 vm_object_unlock(object);
3461
3462 vm_page_lockspin_queues();
3463 continue;
3464 }
3465 #if 0
3466 /*
3467 * we don't hold the page queue lock
3468 * so this check isn't safe to make
3469 */
3470 VM_PAGE_CHECK(m);
3471 #endif
3472 /*
3473 * give back the activity_in_progress reference we
3474 * took when we queued up this page and replace it
3475 * it with a paging_in_progress reference that will
3476 * also hold the paging offset from changing and
3477 * prevent the object from terminating
3478 */
3479 vm_object_activity_end(object);
3480 vm_object_paging_begin(object);
3481 vm_object_unlock(object);
3482
3483 /*
3484 * Send the data to the pager.
3485 * any pageout clustering happens there
3486 */
3487 memory_object_data_return(pager,
3488 m->vmp_offset + object->paging_offset,
3489 PAGE_SIZE,
3490 NULL,
3491 NULL,
3492 FALSE,
3493 FALSE,
3494 0);
3495
3496 vm_object_lock(object);
3497 vm_object_paging_end(object);
3498 vm_object_unlock(object);
3499
3500 vm_pageout_io_throttle();
3501
3502 vm_page_lockspin_queues();
3503 }
3504 q->pgo_busy = FALSE;
3505 q->pgo_idle = TRUE;
3506
3507 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3508 vm_page_unlock_queues();
3509
3510 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3511 /*NOTREACHED*/
3512 }
3513
3514
3515 #define MAX_FREE_BATCH 32
3516 uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3517 * this thread.
3518 */
3519
3520
3521 void
3522 vm_pageout_iothread_internal_continue(struct cq *);
3523 void
3524 vm_pageout_iothread_internal_continue(struct cq *cq)
3525 {
3526 struct vm_pageout_queue *q;
3527 vm_page_t m = NULL;
3528 boolean_t pgo_draining;
3529 vm_page_t local_q;
3530 int local_cnt;
3531 vm_page_t local_freeq = NULL;
3532 int local_freed = 0;
3533 int local_batch_size;
3534 #if DEVELOPMENT || DEBUG
3535 int ncomps = 0;
3536 boolean_t marked_active = FALSE;
3537 #endif
3538 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3539
3540 q = cq->q;
3541 local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
3542
3543 #if RECORD_THE_COMPRESSED_DATA
3544 if (q->pgo_laundry)
3545 c_compressed_record_init();
3546 #endif
3547 while (TRUE) {
3548 int pages_left_on_q = 0;
3549
3550 local_cnt = 0;
3551 local_q = NULL;
3552
3553 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3554
3555 vm_page_lock_queues();
3556 #if DEVELOPMENT || DEBUG
3557 if (marked_active == FALSE) {
3558 vmct_active++;
3559 vmct_state[cq->id] = VMCT_ACTIVE;
3560 marked_active = TRUE;
3561 if (vmct_active == 1) {
3562 vm_compressor_epoch_start = mach_absolute_time();
3563 }
3564 }
3565 #endif
3566 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3567
3568 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3569
3570 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3571
3572 vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3573 assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3574 VM_PAGE_CHECK(m);
3575
3576 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3577 VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3578 m->vmp_laundry = FALSE;
3579
3580 m->vmp_snext = local_q;
3581 local_q = m;
3582 local_cnt++;
3583 }
3584 if (local_q == NULL)
3585 break;
3586
3587 q->pgo_busy = TRUE;
3588
3589 if ((pgo_draining = q->pgo_draining) == FALSE) {
3590 vm_pageout_throttle_up_batch(q, local_cnt);
3591 pages_left_on_q = q->pgo_laundry;
3592 } else
3593 pages_left_on_q = q->pgo_laundry - local_cnt;
3594
3595 vm_page_unlock_queues();
3596
3597 #if !RECORD_THE_COMPRESSED_DATA
3598 if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - 1)) {
3599 thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3600 }
3601 #endif
3602 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3603
3604 while (local_q) {
3605
3606 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3607
3608 m = local_q;
3609 local_q = m->vmp_snext;
3610 m->vmp_snext = NULL;
3611
3612 if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m) == KERN_SUCCESS) {
3613 #if DEVELOPMENT || DEBUG
3614 ncomps++;
3615 #endif
3616 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_END, local_cnt, 0, 0, 0, 0);
3617
3618 m->vmp_snext = local_freeq;
3619 local_freeq = m;
3620 local_freed++;
3621
3622 if (local_freed >= MAX_FREE_BATCH) {
3623
3624 OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3625
3626 vm_page_free_list(local_freeq, TRUE);
3627
3628 local_freeq = NULL;
3629 local_freed = 0;
3630 }
3631 }
3632 #if !CONFIG_JETSAM
3633 while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3634 kern_return_t wait_result;
3635 int need_wakeup = 0;
3636
3637 if (local_freeq) {
3638 OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3639
3640 vm_page_free_list(local_freeq, TRUE);
3641 local_freeq = NULL;
3642 local_freed = 0;
3643
3644 continue;
3645 }
3646 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3647
3648 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3649
3650 if (vm_page_free_wanted_privileged++ == 0)
3651 need_wakeup = 1;
3652 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3653
3654 lck_mtx_unlock(&vm_page_queue_free_lock);
3655
3656 if (need_wakeup)
3657 thread_wakeup((event_t)&vm_page_free_wanted);
3658
3659 if (wait_result == THREAD_WAITING)
3660
3661 thread_block(THREAD_CONTINUE_NULL);
3662 } else
3663 lck_mtx_unlock(&vm_page_queue_free_lock);
3664 }
3665 #endif
3666 }
3667 if (local_freeq) {
3668 OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3669
3670 vm_page_free_list(local_freeq, TRUE);
3671 local_freeq = NULL;
3672 local_freed = 0;
3673 }
3674 if (pgo_draining == TRUE) {
3675 vm_page_lockspin_queues();
3676 vm_pageout_throttle_up_batch(q, local_cnt);
3677 vm_page_unlock_queues();
3678 }
3679 }
3680 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3681
3682 /*
3683 * queue lock is held and our q is empty
3684 */
3685 q->pgo_busy = FALSE;
3686 q->pgo_idle = TRUE;
3687
3688 assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3689 #if DEVELOPMENT || DEBUG
3690 if (marked_active == TRUE) {
3691 vmct_active--;
3692 vmct_state[cq->id] = VMCT_IDLE;
3693
3694 if (vmct_active == 0) {
3695 vm_compressor_epoch_stop = mach_absolute_time();
3696 assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start,
3697 "Compressor epoch non-monotonic: 0x%llx -> 0x%llx",
3698 vm_compressor_epoch_start, vm_compressor_epoch_stop);
3699 /* This interval includes intervals where one or more
3700 * compressor threads were pre-empted
3701 */
3702 vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start;
3703 }
3704 }
3705 #endif
3706 vm_page_unlock_queues();
3707 #if DEVELOPMENT || DEBUG
3708 if (__improbable(vm_compressor_time_thread)) {
3709 vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3710 vmct_stats.vmct_pages[cq->id] += ncomps;
3711 vmct_stats.vmct_iterations[cq->id]++;
3712 if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3713 vmct_stats.vmct_maxpages[cq->id] = ncomps;
3714 }
3715 if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3716 vmct_stats.vmct_minpages[cq->id] = ncomps;
3717 }
3718 }
3719 #endif
3720
3721 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3722
3723 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3724 /*NOTREACHED*/
3725 }
3726
3727
3728 kern_return_t
3729 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m)
3730 {
3731 vm_object_t object;
3732 memory_object_t pager;
3733 int compressed_count_delta;
3734 kern_return_t retval;
3735
3736 object = VM_PAGE_OBJECT(m);
3737
3738 assert(!m->vmp_free_when_done);
3739 assert(!m->vmp_laundry);
3740
3741 pager = object->pager;
3742
3743 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3744
3745 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3746
3747 vm_object_lock(object);
3748
3749 /*
3750 * If there is no memory object for the page, create
3751 * one and hand it to the compression pager.
3752 */
3753
3754 if (!object->pager_initialized)
3755 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3756 if (!object->pager_initialized)
3757 vm_object_compressor_pager_create(object);
3758
3759 pager = object->pager;
3760
3761 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3762 /*
3763 * Still no pager for the object,
3764 * or the pager has been destroyed.
3765 * Reactivate the page.
3766 *
3767 * Should only happen if there is no
3768 * compression pager
3769 */
3770 PAGE_WAKEUP_DONE(m);
3771
3772 vm_page_lockspin_queues();
3773 vm_page_activate(m);
3774 VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, 1);
3775 vm_page_unlock_queues();
3776
3777 /*
3778 * And we are done with it.
3779 */
3780 vm_object_activity_end(object);
3781 vm_object_unlock(object);
3782
3783 return KERN_FAILURE;
3784 }
3785 vm_object_unlock(object);
3786
3787 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3788 }
3789 assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3790 assert(object->activity_in_progress > 0);
3791
3792 retval = vm_compressor_pager_put(
3793 pager,
3794 m->vmp_offset + object->paging_offset,
3795 VM_PAGE_GET_PHYS_PAGE(m),
3796 current_chead,
3797 scratch_buf,
3798 &compressed_count_delta);
3799
3800 vm_object_lock(object);
3801
3802 assert(object->activity_in_progress > 0);
3803 assert(VM_PAGE_OBJECT(m) == object);
3804 assert( !VM_PAGE_WIRED(m));
3805
3806 vm_compressor_pager_count(pager,
3807 compressed_count_delta,
3808 FALSE, /* shared_lock */
3809 object);
3810
3811 if (retval == KERN_SUCCESS) {
3812 /*
3813 * If the object is purgeable, its owner's
3814 * purgeable ledgers will be updated in
3815 * vm_page_remove() but the page still
3816 * contributes to the owner's memory footprint,
3817 * so account for it as such.
3818 */
3819 if ((object->purgable != VM_PURGABLE_DENY ||
3820 object->vo_ledger_tag) &&
3821 object->vo_owner != NULL) {
3822 /* one more compressed purgeable/tagged page */
3823 vm_object_owner_compressed_update(object,
3824 +1);
3825 }
3826 VM_STAT_INCR(compressions);
3827
3828 if (m->vmp_tabled)
3829 vm_page_remove(m, TRUE);
3830
3831 } else {
3832 PAGE_WAKEUP_DONE(m);
3833
3834 vm_page_lockspin_queues();
3835
3836 vm_page_activate(m);
3837 vm_pageout_vminfo.vm_compressor_failed++;
3838
3839 vm_page_unlock_queues();
3840 }
3841 vm_object_activity_end(object);
3842 vm_object_unlock(object);
3843
3844 return retval;
3845 }
3846
3847
3848 static void
3849 vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3850 {
3851 uint32_t policy;
3852
3853 if (hibernate_cleaning_in_progress == TRUE)
3854 req_lowpriority = FALSE;
3855
3856 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
3857
3858 vm_page_unlock_queues();
3859
3860 if (req_lowpriority == TRUE) {
3861 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
3862 DTRACE_VM(laundrythrottle);
3863 } else {
3864 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
3865 DTRACE_VM(laundryunthrottle);
3866 }
3867 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
3868 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3869
3870 eq->pgo_lowpriority = req_lowpriority;
3871
3872 vm_page_lock_queues();
3873 }
3874 }
3875
3876
3877 static void
3878 vm_pageout_iothread_external(void)
3879 {
3880 thread_t self = current_thread();
3881
3882 self->options |= TH_OPT_VMPRIV;
3883
3884 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3885
3886 proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
3887 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3888
3889 vm_page_lock_queues();
3890
3891 vm_pageout_queue_external.pgo_tid = self->thread_id;
3892 vm_pageout_queue_external.pgo_lowpriority = TRUE;
3893 vm_pageout_queue_external.pgo_inited = TRUE;
3894
3895 vm_page_unlock_queues();
3896
3897 vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3898
3899 /*NOTREACHED*/
3900 }
3901
3902
3903 static void
3904 vm_pageout_iothread_internal(struct cq *cq)
3905 {
3906 thread_t self = current_thread();
3907
3908 self->options |= TH_OPT_VMPRIV;
3909
3910 vm_page_lock_queues();
3911
3912 vm_pageout_queue_internal.pgo_tid = self->thread_id;
3913 vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3914 vm_pageout_queue_internal.pgo_inited = TRUE;
3915
3916 vm_page_unlock_queues();
3917
3918 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
3919 thread_vm_bind_group_add();
3920
3921
3922 thread_set_thread_name(current_thread(), "VM_compressor");
3923 #if DEVELOPMENT || DEBUG
3924 vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
3925 #endif
3926 vm_pageout_iothread_internal_continue(cq);
3927
3928 /*NOTREACHED*/
3929 }
3930
3931 kern_return_t
3932 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
3933 {
3934 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3935 return KERN_SUCCESS;
3936 } else {
3937 return KERN_FAILURE; /* Already set */
3938 }
3939 }
3940
3941 extern boolean_t memorystatus_manual_testing_on;
3942 extern unsigned int memorystatus_level;
3943
3944
3945 #if VM_PRESSURE_EVENTS
3946
3947 boolean_t vm_pressure_events_enabled = FALSE;
3948
3949 void
3950 vm_pressure_response(void)
3951 {
3952
3953 vm_pressure_level_t old_level = kVMPressureNormal;
3954 int new_level = -1;
3955 unsigned int total_pages;
3956 uint64_t available_memory = 0;
3957
3958 if (vm_pressure_events_enabled == FALSE)
3959 return;
3960
3961 #if CONFIG_EMBEDDED
3962
3963 available_memory = (uint64_t) memorystatus_available_pages;
3964
3965 #else /* CONFIG_EMBEDDED */
3966
3967 available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3968 memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3969
3970 #endif /* CONFIG_EMBEDDED */
3971
3972 total_pages = (unsigned int) atop_64(max_mem);
3973 #if CONFIG_SECLUDED_MEMORY
3974 total_pages -= vm_page_secluded_count;
3975 #endif /* CONFIG_SECLUDED_MEMORY */
3976 memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
3977
3978 if (memorystatus_manual_testing_on) {
3979 return;
3980 }
3981
3982 old_level = memorystatus_vm_pressure_level;
3983
3984 switch (memorystatus_vm_pressure_level) {
3985
3986 case kVMPressureNormal:
3987 {
3988 if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3989 new_level = kVMPressureCritical;
3990 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
3991 new_level = kVMPressureWarning;
3992 }
3993 break;
3994 }
3995
3996 case kVMPressureWarning:
3997 case kVMPressureUrgent:
3998 {
3999 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4000 new_level = kVMPressureNormal;
4001 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4002 new_level = kVMPressureCritical;
4003 }
4004 break;
4005 }
4006
4007 case kVMPressureCritical:
4008 {
4009 if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4010 new_level = kVMPressureNormal;
4011 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4012 new_level = kVMPressureWarning;
4013 }
4014 break;
4015 }
4016
4017 default:
4018 return;
4019 }
4020
4021 if (new_level != -1) {
4022 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4023
4024 if (new_level != old_level) {
4025 VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4026 new_level, old_level, 0, 0);
4027 }
4028
4029 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != memorystatus_vm_pressure_level)) {
4030 if (vm_pageout_state.vm_pressure_thread_running == FALSE) {
4031 thread_wakeup(&vm_pressure_thread);
4032 }
4033
4034 if (old_level != memorystatus_vm_pressure_level) {
4035 thread_wakeup(&vm_pageout_state.vm_pressure_changed);
4036 }
4037 }
4038 }
4039
4040 }
4041 #endif /* VM_PRESSURE_EVENTS */
4042
4043 kern_return_t
4044 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4045
4046 #if CONFIG_EMBEDDED
4047
4048 return KERN_FAILURE;
4049
4050 #elif !VM_PRESSURE_EVENTS
4051
4052 return KERN_FAILURE;
4053
4054 #else /* VM_PRESSURE_EVENTS */
4055
4056 kern_return_t kr = KERN_SUCCESS;
4057
4058 if (pressure_level != NULL) {
4059
4060 vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4061
4062 if (wait_for_pressure == TRUE) {
4063 wait_result_t wr = 0;
4064
4065 while (old_level == *pressure_level) {
4066 wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
4067 THREAD_INTERRUPTIBLE);
4068 if (wr == THREAD_WAITING) {
4069 wr = thread_block(THREAD_CONTINUE_NULL);
4070 }
4071 if (wr == THREAD_INTERRUPTED) {
4072 return KERN_ABORTED;
4073 }
4074 if (wr == THREAD_AWAKENED) {
4075
4076 old_level = memorystatus_vm_pressure_level;
4077
4078 if (old_level != *pressure_level) {
4079 break;
4080 }
4081 }
4082 }
4083 }
4084
4085 *pressure_level = old_level;
4086 kr = KERN_SUCCESS;
4087 } else {
4088 kr = KERN_INVALID_ARGUMENT;
4089 }
4090
4091 return kr;
4092 #endif /* VM_PRESSURE_EVENTS */
4093 }
4094
4095 #if VM_PRESSURE_EVENTS
4096 void
4097 vm_pressure_thread(void) {
4098 static boolean_t thread_initialized = FALSE;
4099
4100 if (thread_initialized == TRUE) {
4101 vm_pageout_state.vm_pressure_thread_running = TRUE;
4102 consider_vm_pressure_events();
4103 vm_pageout_state.vm_pressure_thread_running = FALSE;
4104 }
4105
4106 thread_set_thread_name(current_thread(), "VM_pressure");
4107 thread_initialized = TRUE;
4108 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4109 thread_block((thread_continue_t)vm_pressure_thread);
4110 }
4111 #endif /* VM_PRESSURE_EVENTS */
4112
4113
4114 /*
4115 * called once per-second via "compute_averages"
4116 */
4117 void
4118 compute_pageout_gc_throttle(__unused void *arg)
4119 {
4120 if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) {
4121
4122 vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page;
4123
4124 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4125 }
4126 }
4127
4128 /*
4129 * vm_pageout_garbage_collect can also be called when the zone allocator needs
4130 * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4131 * jetsams. We need to check if the zone map size is above its jetsam limit to
4132 * decide if this was indeed the case.
4133 *
4134 * We need to do this on a different thread because of the following reasons:
4135 *
4136 * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4137 * itself causing the system to hang. We perform synchronous jetsams if we're
4138 * leaking in the VM map entries zone, so the leaking process could be doing a
4139 * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4140 * jetsam itself. We also need the vm_map lock on the process termination path,
4141 * which would now lead the dying process to deadlock against itself.
4142 *
4143 * 2. The jetsam path might need to allocate zone memory itself. We could try
4144 * using the non-blocking variant of zalloc for this path, but we can still
4145 * end up trying to do a kernel_memory_allocate when the zone_map is almost
4146 * full.
4147 */
4148
4149 extern boolean_t is_zone_map_nearing_exhaustion(void);
4150
4151 void
4152 vm_pageout_garbage_collect(int collect)
4153 {
4154 if (collect) {
4155 if (is_zone_map_nearing_exhaustion()) {
4156 /*
4157 * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4158 *
4159 * Bail out after calling zone_gc (which triggers the
4160 * zone-map-exhaustion jetsams). If we fall through, the subsequent
4161 * operations that clear out a bunch of caches might allocate zone
4162 * memory themselves (for eg. vm_map operations would need VM map
4163 * entries). Since the zone map is almost full at this point, we
4164 * could end up with a panic. We just need to quickly jetsam a
4165 * process and exit here.
4166 *
4167 * It could so happen that we were woken up to relieve memory
4168 * pressure and the zone map also happened to be near its limit at
4169 * the time, in which case we'll skip out early. But that should be
4170 * ok; if memory pressure persists, the thread will simply be woken
4171 * up again.
4172 */
4173 consider_zone_gc(TRUE);
4174
4175 } else {
4176 /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4177 boolean_t buf_large_zfree = FALSE;
4178 boolean_t first_try = TRUE;
4179
4180 stack_collect();
4181
4182 consider_machine_collect();
4183 mbuf_drain(FALSE);
4184
4185 do {
4186 if (consider_buffer_cache_collect != NULL) {
4187 buf_large_zfree = (*consider_buffer_cache_collect)(0);
4188 }
4189 if (first_try == TRUE || buf_large_zfree == TRUE) {
4190 /*
4191 * consider_zone_gc should be last, because the other operations
4192 * might return memory to zones.
4193 */
4194 consider_zone_gc(FALSE);
4195 }
4196 first_try = FALSE;
4197
4198 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4199
4200 consider_machine_adjust();
4201 }
4202 }
4203
4204 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4205
4206 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4207 /*NOTREACHED*/
4208 }
4209
4210
4211 #if VM_PAGE_BUCKETS_CHECK
4212 #if VM_PAGE_FAKE_BUCKETS
4213 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4214 #endif /* VM_PAGE_FAKE_BUCKETS */
4215 #endif /* VM_PAGE_BUCKETS_CHECK */
4216
4217
4218
4219 void
4220 vm_set_restrictions()
4221 {
4222 host_basic_info_data_t hinfo;
4223 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4224
4225 #define BSD_HOST 1
4226 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4227
4228 assert(hinfo.max_cpus > 0);
4229
4230 if (hinfo.max_cpus <= 3) {
4231 /*
4232 * on systems with a limited number of CPUS, bind the
4233 * 4 major threads that can free memory and that tend to use
4234 * a fair bit of CPU under pressured conditions to a single processor.
4235 * This insures that these threads don't hog all of the available CPUs
4236 * (important for camera launch), while allowing them to run independently
4237 * w/r to locks... the 4 threads are
4238 * vm_pageout_scan, vm_pageout_iothread_internal (compressor),
4239 * vm_compressor_swap_trigger_thread (minor and major compactions),
4240 * memorystatus_thread (jetsams).
4241 *
4242 * the first time the thread is run, it is responsible for checking the
4243 * state of vm_restricted_to_single_processor, and if TRUE it calls
4244 * thread_bind_master... someday this should be replaced with a group
4245 * scheduling mechanism and KPI.
4246 */
4247 vm_pageout_state.vm_restricted_to_single_processor = TRUE;
4248 } else
4249 vm_pageout_state.vm_restricted_to_single_processor = FALSE;
4250 }
4251
4252 void
4253 vm_pageout(void)
4254 {
4255 thread_t self = current_thread();
4256 thread_t thread;
4257 kern_return_t result;
4258 spl_t s;
4259
4260 /*
4261 * Set thread privileges.
4262 */
4263 s = splsched();
4264
4265 thread_lock(self);
4266 self->options |= TH_OPT_VMPRIV;
4267 sched_set_thread_base_priority(self, BASEPRI_VM);
4268 thread_unlock(self);
4269
4270 if (!self->reserved_stack)
4271 self->reserved_stack = self->kernel_stack;
4272
4273 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
4274 thread_vm_bind_group_add();
4275
4276 splx(s);
4277
4278 thread_set_thread_name(current_thread(), "VM_pageout_scan");
4279
4280 /*
4281 * Initialize some paging parameters.
4282 */
4283
4284 vm_pageout_state.vm_pressure_thread_running = FALSE;
4285 vm_pageout_state.vm_pressure_changed = FALSE;
4286 vm_pageout_state.memorystatus_purge_on_warning = 2;
4287 vm_pageout_state.memorystatus_purge_on_urgent = 5;
4288 vm_pageout_state.memorystatus_purge_on_critical = 8;
4289 vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
4290 vm_pageout_state.vm_page_speculative_percentage = 5;
4291 vm_pageout_state.vm_page_speculative_target = 0;
4292
4293 vm_pageout_state.vm_pageout_external_iothread = THREAD_NULL;
4294 vm_pageout_state.vm_pageout_internal_iothread = THREAD_NULL;
4295
4296 vm_pageout_state.vm_pageout_swap_wait = 0;
4297 vm_pageout_state.vm_pageout_idle_wait = 0;
4298 vm_pageout_state.vm_pageout_empty_wait = 0;
4299 vm_pageout_state.vm_pageout_burst_wait = 0;
4300 vm_pageout_state.vm_pageout_deadlock_wait = 0;
4301 vm_pageout_state.vm_pageout_deadlock_relief = 0;
4302 vm_pageout_state.vm_pageout_burst_inactive_throttle = 0;
4303
4304 vm_pageout_state.vm_pageout_inactive = 0;
4305 vm_pageout_state.vm_pageout_inactive_used = 0;
4306 vm_pageout_state.vm_pageout_inactive_clean = 0;
4307
4308 vm_pageout_state.vm_memory_pressure = 0;
4309 vm_pageout_state.vm_page_filecache_min = 0;
4310 #if CONFIG_JETSAM
4311 vm_pageout_state.vm_page_filecache_min_divisor = 70;
4312 vm_pageout_state.vm_page_xpmapped_min_divisor = 40;
4313 #else
4314 vm_pageout_state.vm_page_filecache_min_divisor = 27;
4315 vm_pageout_state.vm_page_xpmapped_min_divisor = 36;
4316 #endif
4317 vm_pageout_state.vm_page_free_count_init = vm_page_free_count;
4318
4319 vm_pageout_state.vm_pageout_considered_page_last = 0;
4320
4321 if (vm_pageout_state.vm_pageout_swap_wait == 0)
4322 vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4323
4324 if (vm_pageout_state.vm_pageout_idle_wait == 0)
4325 vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4326
4327 if (vm_pageout_state.vm_pageout_burst_wait == 0)
4328 vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4329
4330 if (vm_pageout_state.vm_pageout_empty_wait == 0)
4331 vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4332
4333 if (vm_pageout_state.vm_pageout_deadlock_wait == 0)
4334 vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4335
4336 if (vm_pageout_state.vm_pageout_deadlock_relief == 0)
4337 vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4338
4339 if (vm_pageout_state.vm_pageout_burst_inactive_throttle == 0)
4340 vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4341 /*
4342 * even if we've already called vm_page_free_reserve
4343 * call it again here to insure that the targets are
4344 * accurately calculated (it uses vm_page_free_count_init)
4345 * calling it with an arg of 0 will not change the reserve
4346 * but will re-calculate free_min and free_target
4347 */
4348 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4349 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4350 } else
4351 vm_page_free_reserve(0);
4352
4353
4354 vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4355 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4356 vm_pageout_queue_external.pgo_laundry = 0;
4357 vm_pageout_queue_external.pgo_idle = FALSE;
4358 vm_pageout_queue_external.pgo_busy = FALSE;
4359 vm_pageout_queue_external.pgo_throttled = FALSE;
4360 vm_pageout_queue_external.pgo_draining = FALSE;
4361 vm_pageout_queue_external.pgo_lowpriority = FALSE;
4362 vm_pageout_queue_external.pgo_tid = -1;
4363 vm_pageout_queue_external.pgo_inited = FALSE;
4364
4365 vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4366 vm_pageout_queue_internal.pgo_maxlaundry = 0;
4367 vm_pageout_queue_internal.pgo_laundry = 0;
4368 vm_pageout_queue_internal.pgo_idle = FALSE;
4369 vm_pageout_queue_internal.pgo_busy = FALSE;
4370 vm_pageout_queue_internal.pgo_throttled = FALSE;
4371 vm_pageout_queue_internal.pgo_draining = FALSE;
4372 vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4373 vm_pageout_queue_internal.pgo_tid = -1;
4374 vm_pageout_queue_internal.pgo_inited = FALSE;
4375
4376 /* internal pageout thread started when default pager registered first time */
4377 /* external pageout and garbage collection threads started here */
4378
4379 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4380 BASEPRI_VM,
4381 &vm_pageout_state.vm_pageout_external_iothread);
4382 if (result != KERN_SUCCESS)
4383 panic("vm_pageout_iothread_external: create failed");
4384
4385 thread_deallocate(vm_pageout_state.vm_pageout_external_iothread);
4386
4387 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4388 BASEPRI_DEFAULT,
4389 &thread);
4390 if (result != KERN_SUCCESS)
4391 panic("vm_pageout_garbage_collect: create failed");
4392
4393 thread_deallocate(thread);
4394
4395 #if VM_PRESSURE_EVENTS
4396 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4397 BASEPRI_DEFAULT,
4398 &thread);
4399
4400 if (result != KERN_SUCCESS)
4401 panic("vm_pressure_thread: create failed");
4402
4403 thread_deallocate(thread);
4404 #endif
4405
4406 vm_object_reaper_init();
4407
4408
4409 bzero(&vm_config, sizeof(vm_config));
4410
4411 switch(vm_compressor_mode) {
4412
4413 case VM_PAGER_DEFAULT:
4414 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4415
4416 case VM_PAGER_COMPRESSOR_WITH_SWAP:
4417 vm_config.compressor_is_present = TRUE;
4418 vm_config.swap_is_present = TRUE;
4419 vm_config.compressor_is_active = TRUE;
4420 vm_config.swap_is_active = TRUE;
4421 break;
4422
4423 case VM_PAGER_COMPRESSOR_NO_SWAP:
4424 vm_config.compressor_is_present = TRUE;
4425 vm_config.swap_is_present = TRUE;
4426 vm_config.compressor_is_active = TRUE;
4427 break;
4428
4429 case VM_PAGER_FREEZER_DEFAULT:
4430 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4431
4432 case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4433 vm_config.compressor_is_present = TRUE;
4434 vm_config.swap_is_present = TRUE;
4435 break;
4436
4437 case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4438 vm_config.compressor_is_present = TRUE;
4439 vm_config.swap_is_present = TRUE;
4440 vm_config.compressor_is_active = TRUE;
4441 vm_config.freezer_swap_is_active = TRUE;
4442 break;
4443
4444 case VM_PAGER_NOT_CONFIGURED:
4445 break;
4446
4447 default:
4448 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4449 break;
4450 }
4451 if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4452 vm_compressor_pager_init();
4453
4454 #if VM_PRESSURE_EVENTS
4455 vm_pressure_events_enabled = TRUE;
4456 #endif /* VM_PRESSURE_EVENTS */
4457
4458 #if CONFIG_PHANTOM_CACHE
4459 vm_phantom_cache_init();
4460 #endif
4461 #if VM_PAGE_BUCKETS_CHECK
4462 #if VM_PAGE_FAKE_BUCKETS
4463 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4464 (uint64_t) vm_page_fake_buckets_start,
4465 (uint64_t) vm_page_fake_buckets_end);
4466 pmap_protect(kernel_pmap,
4467 vm_page_fake_buckets_start,
4468 vm_page_fake_buckets_end,
4469 VM_PROT_READ);
4470 // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */
4471 #endif /* VM_PAGE_FAKE_BUCKETS */
4472 #endif /* VM_PAGE_BUCKETS_CHECK */
4473
4474 #if VM_OBJECT_TRACKING
4475 vm_object_tracking_init();
4476 #endif /* VM_OBJECT_TRACKING */
4477
4478 vm_tests();
4479
4480 vm_pageout_continue();
4481
4482 /*
4483 * Unreached code!
4484 *
4485 * The vm_pageout_continue() call above never returns, so the code below is never
4486 * executed. We take advantage of this to declare several DTrace VM related probe
4487 * points that our kernel doesn't have an analog for. These are probe points that
4488 * exist in Solaris and are in the DTrace documentation, so people may have written
4489 * scripts that use them. Declaring the probe points here means their scripts will
4490 * compile and execute which we want for portability of the scripts, but since this
4491 * section of code is never reached, the probe points will simply never fire. Yes,
4492 * this is basically a hack. The problem is the DTrace probe points were chosen with
4493 * Solaris specific VM events in mind, not portability to different VM implementations.
4494 */
4495
4496 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4497 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4498 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4499 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4500 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4501 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4502 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4503 /*NOTREACHED*/
4504 }
4505
4506
4507
4508 kern_return_t
4509 vm_pageout_internal_start(void)
4510 {
4511 kern_return_t result;
4512 int i;
4513 host_basic_info_data_t hinfo;
4514
4515 assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4516
4517 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4518 #define BSD_HOST 1
4519 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4520
4521 assert(hinfo.max_cpus > 0);
4522
4523 #if CONFIG_EMBEDDED
4524 vm_pageout_state.vm_compressor_thread_count = 1;
4525 #else
4526 if (hinfo.max_cpus > 4)
4527 vm_pageout_state.vm_compressor_thread_count = 2;
4528 else
4529 vm_pageout_state.vm_compressor_thread_count = 1;
4530 #endif
4531 PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
4532 sizeof(vm_pageout_state.vm_compressor_thread_count));
4533
4534 if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus)
4535 vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
4536 if (vm_pageout_state.vm_compressor_thread_count <= 0)
4537 vm_pageout_state.vm_compressor_thread_count = 1;
4538 else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4539 vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4540
4541 vm_pageout_queue_internal.pgo_maxlaundry = (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4542
4543 PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4544
4545 for (i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
4546 ciq[i].id = i;
4547 ciq[i].q = &vm_pageout_queue_internal;
4548 ciq[i].current_chead = NULL;
4549 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4550
4551 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i],
4552 BASEPRI_VM, &vm_pageout_state.vm_pageout_internal_iothread);
4553
4554 if (result == KERN_SUCCESS)
4555 thread_deallocate(vm_pageout_state.vm_pageout_internal_iothread);
4556 else
4557 break;
4558 }
4559 return result;
4560 }
4561
4562 #if CONFIG_IOSCHED
4563 /*
4564 * To support I/O Expedite for compressed files we mark the upls with special flags.
4565 * The way decmpfs works is that we create a big upl which marks all the pages needed to
4566 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4567 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4568 * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4569 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4570 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4571 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4572 * unless the real I/O upl is being destroyed).
4573 */
4574
4575
4576 static void
4577 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4578 {
4579 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4580
4581 upl_lock(src_upl);
4582 if (src_upl->decmp_io_upl) {
4583 /*
4584 * If there is already an alive real I/O UPL, ignore this new UPL.
4585 * This case should rarely happen and even if it does, it just means
4586 * that we might issue a spurious expedite which the driver is expected
4587 * to handle.
4588 */
4589 upl_unlock(src_upl);
4590 return;
4591 }
4592 src_upl->decmp_io_upl = (void *)upl;
4593 src_upl->ref_count++;
4594
4595 upl->flags |= UPL_DECMP_REAL_IO;
4596 upl->decmp_io_upl = (void *)src_upl;
4597 upl_unlock(src_upl);
4598 }
4599 #endif /* CONFIG_IOSCHED */
4600
4601 #if UPL_DEBUG
4602 int upl_debug_enabled = 1;
4603 #else
4604 int upl_debug_enabled = 0;
4605 #endif
4606
4607 static upl_t
4608 upl_create(int type, int flags, upl_size_t size)
4609 {
4610 upl_t upl;
4611 vm_size_t page_field_size = 0;
4612 int upl_flags = 0;
4613 vm_size_t upl_size = sizeof(struct upl);
4614
4615 size = round_page_32(size);
4616
4617 if (type & UPL_CREATE_LITE) {
4618 page_field_size = (atop(size) + 7) >> 3;
4619 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4620
4621 upl_flags |= UPL_LITE;
4622 }
4623 if (type & UPL_CREATE_INTERNAL) {
4624 upl_size += sizeof(struct upl_page_info) * atop(size);
4625
4626 upl_flags |= UPL_INTERNAL;
4627 }
4628 upl = (upl_t)kalloc(upl_size + page_field_size);
4629
4630 if (page_field_size)
4631 bzero((char *)upl + upl_size, page_field_size);
4632
4633 upl->flags = upl_flags | flags;
4634 upl->kaddr = (vm_offset_t)0;
4635 upl->size = 0;
4636 upl->map_object = NULL;
4637 upl->ref_count = 1;
4638 upl->ext_ref_count = 0;
4639 upl->highest_page = 0;
4640 upl_lock_init(upl);
4641 upl->vector_upl = NULL;
4642 upl->associated_upl = NULL;
4643 upl->upl_iodone = NULL;
4644 #if CONFIG_IOSCHED
4645 if (type & UPL_CREATE_IO_TRACKING) {
4646 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4647 }
4648
4649 upl->upl_reprio_info = 0;
4650 upl->decmp_io_upl = 0;
4651 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4652 /* Only support expedite on internal UPLs */
4653 thread_t curthread = current_thread();
4654 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4655 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4656 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4657 if (curthread->decmp_upl != NULL)
4658 upl_set_decmp_info(upl, curthread->decmp_upl);
4659 }
4660 #endif
4661 #if CONFIG_IOSCHED || UPL_DEBUG
4662 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4663 upl->upl_creator = current_thread();
4664 upl->uplq.next = 0;
4665 upl->uplq.prev = 0;
4666 upl->flags |= UPL_TRACKED_BY_OBJECT;
4667 }
4668 #endif
4669
4670 #if UPL_DEBUG
4671 upl->ubc_alias1 = 0;
4672 upl->ubc_alias2 = 0;
4673
4674 upl->upl_state = 0;
4675 upl->upl_commit_index = 0;
4676 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4677
4678 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4679 #endif /* UPL_DEBUG */
4680
4681 return(upl);
4682 }
4683
4684 static void
4685 upl_destroy(upl_t upl)
4686 {
4687 int page_field_size; /* bit field in word size buf */
4688 int size;
4689
4690 if (upl->ext_ref_count) {
4691 panic("upl(%p) ext_ref_count", upl);
4692 }
4693
4694 #if CONFIG_IOSCHED
4695 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4696 upl_t src_upl;
4697 src_upl = upl->decmp_io_upl;
4698 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4699 upl_lock(src_upl);
4700 src_upl->decmp_io_upl = NULL;
4701 upl_unlock(src_upl);
4702 upl_deallocate(src_upl);
4703 }
4704 #endif /* CONFIG_IOSCHED */
4705
4706 #if CONFIG_IOSCHED || UPL_DEBUG
4707 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4708 vm_object_t object;
4709
4710 if (upl->flags & UPL_SHADOWED) {
4711 object = upl->map_object->shadow;
4712 } else {
4713 object = upl->map_object;
4714 }
4715
4716 vm_object_lock(object);
4717 queue_remove(&object->uplq, upl, upl_t, uplq);
4718 vm_object_activity_end(object);
4719 vm_object_collapse(object, 0, TRUE);
4720 vm_object_unlock(object);
4721 }
4722 #endif
4723 /*
4724 * drop a reference on the map_object whether or
4725 * not a pageout object is inserted
4726 */
4727 if (upl->flags & UPL_SHADOWED)
4728 vm_object_deallocate(upl->map_object);
4729
4730 if (upl->flags & UPL_DEVICE_MEMORY)
4731 size = PAGE_SIZE;
4732 else
4733 size = upl->size;
4734 page_field_size = 0;
4735
4736 if (upl->flags & UPL_LITE) {
4737 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4738 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4739 }
4740 upl_lock_destroy(upl);
4741 upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4742
4743 #if CONFIG_IOSCHED
4744 if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4745 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4746 #endif
4747
4748 if (upl->flags & UPL_INTERNAL) {
4749 kfree(upl,
4750 sizeof(struct upl) +
4751 (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4752 + page_field_size);
4753 } else {
4754 kfree(upl, sizeof(struct upl) + page_field_size);
4755 }
4756 }
4757
4758 void
4759 upl_deallocate(upl_t upl)
4760 {
4761 upl_lock(upl);
4762
4763 if (--upl->ref_count == 0) {
4764 if(vector_upl_is_valid(upl))
4765 vector_upl_deallocate(upl);
4766 upl_unlock(upl);
4767
4768 if (upl->upl_iodone)
4769 upl_callout_iodone(upl);
4770
4771 upl_destroy(upl);
4772 } else
4773 upl_unlock(upl);
4774 }
4775
4776 #if CONFIG_IOSCHED
4777 void
4778 upl_mark_decmp(upl_t upl)
4779 {
4780 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4781 upl->flags |= UPL_DECMP_REQ;
4782 upl->upl_creator->decmp_upl = (void *)upl;
4783 }
4784 }
4785
4786 void
4787 upl_unmark_decmp(upl_t upl)
4788 {
4789 if(upl && (upl->flags & UPL_DECMP_REQ)) {
4790 upl->upl_creator->decmp_upl = NULL;
4791 }
4792 }
4793
4794 #endif /* CONFIG_IOSCHED */
4795
4796 #define VM_PAGE_Q_BACKING_UP(q) \
4797 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4798
4799 boolean_t must_throttle_writes(void);
4800
4801 boolean_t
4802 must_throttle_writes()
4803 {
4804 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4805 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4806 return (TRUE);
4807
4808 return (FALSE);
4809 }
4810
4811
4812 /*
4813 * Routine: vm_object_upl_request
4814 * Purpose:
4815 * Cause the population of a portion of a vm_object.
4816 * Depending on the nature of the request, the pages
4817 * returned may be contain valid data or be uninitialized.
4818 * A page list structure, listing the physical pages
4819 * will be returned upon request.
4820 * This function is called by the file system or any other
4821 * supplier of backing store to a pager.
4822 * IMPORTANT NOTE: The caller must still respect the relationship
4823 * between the vm_object and its backing memory object. The
4824 * caller MUST NOT substitute changes in the backing file
4825 * without first doing a memory_object_lock_request on the
4826 * target range unless it is know that the pages are not
4827 * shared with another entity at the pager level.
4828 * Copy_in_to:
4829 * if a page list structure is present
4830 * return the mapped physical pages, where a
4831 * page is not present, return a non-initialized
4832 * one. If the no_sync bit is turned on, don't
4833 * call the pager unlock to synchronize with other
4834 * possible copies of the page. Leave pages busy
4835 * in the original object, if a page list structure
4836 * was specified. When a commit of the page list
4837 * pages is done, the dirty bit will be set for each one.
4838 * Copy_out_from:
4839 * If a page list structure is present, return
4840 * all mapped pages. Where a page does not exist
4841 * map a zero filled one. Leave pages busy in
4842 * the original object. If a page list structure
4843 * is not specified, this call is a no-op.
4844 *
4845 * Note: access of default pager objects has a rather interesting
4846 * twist. The caller of this routine, presumably the file system
4847 * page cache handling code, will never actually make a request
4848 * against a default pager backed object. Only the default
4849 * pager will make requests on backing store related vm_objects
4850 * In this way the default pager can maintain the relationship
4851 * between backing store files (abstract memory objects) and
4852 * the vm_objects (cache objects), they support.
4853 *
4854 */
4855
4856 __private_extern__ kern_return_t
4857 vm_object_upl_request(
4858 vm_object_t object,
4859 vm_object_offset_t offset,
4860 upl_size_t size,
4861 upl_t *upl_ptr,
4862 upl_page_info_array_t user_page_list,
4863 unsigned int *page_list_count,
4864 upl_control_flags_t cntrl_flags,
4865 vm_tag_t tag)
4866 {
4867 vm_page_t dst_page = VM_PAGE_NULL;
4868 vm_object_offset_t dst_offset;
4869 upl_size_t xfer_size;
4870 unsigned int size_in_pages;
4871 boolean_t dirty;
4872 boolean_t hw_dirty;
4873 upl_t upl = NULL;
4874 unsigned int entry;
4875 vm_page_t alias_page = NULL;
4876 int refmod_state = 0;
4877 wpl_array_t lite_list = NULL;
4878 vm_object_t last_copy_object;
4879 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
4880 struct vm_page_delayed_work *dwp;
4881 int dw_count;
4882 int dw_limit;
4883 int io_tracking_flag = 0;
4884 int grab_options;
4885 int page_grab_count = 0;
4886 ppnum_t phys_page;
4887 pmap_flush_context pmap_flush_context_storage;
4888 boolean_t pmap_flushes_delayed = FALSE;
4889
4890 if (cntrl_flags & ~UPL_VALID_FLAGS) {
4891 /*
4892 * For forward compatibility's sake,
4893 * reject any unknown flag.
4894 */
4895 return KERN_INVALID_VALUE;
4896 }
4897 if ( (!object->internal) && (object->paging_offset != 0) )
4898 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4899 if (object->phys_contiguous)
4900 panic("vm_object_upl_request: contiguous object specified\n");
4901
4902 VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0);
4903
4904 if (size > MAX_UPL_SIZE_BYTES)
4905 size = MAX_UPL_SIZE_BYTES;
4906
4907 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
4908 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
4909
4910 #if CONFIG_IOSCHED || UPL_DEBUG
4911 if (object->io_tracking || upl_debug_enabled)
4912 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
4913 #endif
4914 #if CONFIG_IOSCHED
4915 if (object->io_tracking)
4916 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
4917 #endif
4918
4919 if (cntrl_flags & UPL_SET_INTERNAL) {
4920 if (cntrl_flags & UPL_SET_LITE) {
4921
4922 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4923
4924 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4925 lite_list = (wpl_array_t)
4926 (((uintptr_t)user_page_list) +
4927 ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4928 if (size == 0) {
4929 user_page_list = NULL;
4930 lite_list = NULL;
4931 }
4932 } else {
4933 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
4934
4935 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4936 if (size == 0) {
4937 user_page_list = NULL;
4938 }
4939 }
4940 } else {
4941 if (cntrl_flags & UPL_SET_LITE) {
4942
4943 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4944
4945 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4946 if (size == 0) {
4947 lite_list = NULL;
4948 }
4949 } else {
4950 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
4951 }
4952 }
4953 *upl_ptr = upl;
4954
4955 if (user_page_list)
4956 user_page_list[0].device = FALSE;
4957
4958 if (cntrl_flags & UPL_SET_LITE) {
4959 upl->map_object = object;
4960 } else {
4961 upl->map_object = vm_object_allocate(size);
4962 /*
4963 * No neeed to lock the new object: nobody else knows
4964 * about it yet, so it's all ours so far.
4965 */
4966 upl->map_object->shadow = object;
4967 upl->map_object->pageout = TRUE;
4968 upl->map_object->can_persist = FALSE;
4969 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4970 upl->map_object->vo_shadow_offset = offset;
4971 upl->map_object->wimg_bits = object->wimg_bits;
4972
4973 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4974
4975 upl->flags |= UPL_SHADOWED;
4976 }
4977 if (cntrl_flags & UPL_FOR_PAGEOUT)
4978 upl->flags |= UPL_PAGEOUT;
4979
4980 vm_object_lock(object);
4981 vm_object_activity_begin(object);
4982
4983 grab_options = 0;
4984 #if CONFIG_SECLUDED_MEMORY
4985 if (object->can_grab_secluded) {
4986 grab_options |= VM_PAGE_GRAB_SECLUDED;
4987 }
4988 #endif /* CONFIG_SECLUDED_MEMORY */
4989
4990 /*
4991 * we can lock in the paging_offset once paging_in_progress is set
4992 */
4993 upl->size = size;
4994 upl->offset = offset + object->paging_offset;
4995
4996 #if CONFIG_IOSCHED || UPL_DEBUG
4997 if (object->io_tracking || upl_debug_enabled) {
4998 vm_object_activity_begin(object);
4999 queue_enter(&object->uplq, upl, upl_t, uplq);
5000 }
5001 #endif
5002 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
5003 /*
5004 * Honor copy-on-write obligations
5005 *
5006 * The caller is gathering these pages and
5007 * might modify their contents. We need to
5008 * make sure that the copy object has its own
5009 * private copies of these pages before we let
5010 * the caller modify them.
5011 */
5012 vm_object_update(object,
5013 offset,
5014 size,
5015 NULL,
5016 NULL,
5017 FALSE, /* should_return */
5018 MEMORY_OBJECT_COPY_SYNC,
5019 VM_PROT_NO_CHANGE);
5020
5021 VM_PAGEOUT_DEBUG(upl_cow, 1);
5022 VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT));
5023 }
5024 /*
5025 * remember which copy object we synchronized with
5026 */
5027 last_copy_object = object->copy;
5028 entry = 0;
5029
5030 xfer_size = size;
5031 dst_offset = offset;
5032 size_in_pages = size / PAGE_SIZE;
5033
5034 dwp = &dw_array[0];
5035 dw_count = 0;
5036 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5037
5038 if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5039 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5040 object->scan_collisions = 0;
5041
5042 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5043 boolean_t isSSD = FALSE;
5044
5045 #if CONFIG_EMBEDDED
5046 isSSD = TRUE;
5047 #else
5048 vnode_pager_get_isSSD(object->pager, &isSSD);
5049 #endif
5050 vm_object_unlock(object);
5051
5052 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5053
5054 if (isSSD == TRUE)
5055 delay(1000 * size_in_pages);
5056 else
5057 delay(5000 * size_in_pages);
5058 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5059
5060 vm_object_lock(object);
5061 }
5062
5063 while (xfer_size) {
5064
5065 dwp->dw_mask = 0;
5066
5067 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5068 vm_object_unlock(object);
5069 VM_PAGE_GRAB_FICTITIOUS(alias_page);
5070 vm_object_lock(object);
5071 }
5072 if (cntrl_flags & UPL_COPYOUT_FROM) {
5073 upl->flags |= UPL_PAGE_SYNC_DONE;
5074
5075 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5076 dst_page->vmp_fictitious ||
5077 dst_page->vmp_absent ||
5078 dst_page->vmp_error ||
5079 dst_page->vmp_cleaning ||
5080 (VM_PAGE_WIRED(dst_page))) {
5081
5082 if (user_page_list)
5083 user_page_list[entry].phys_addr = 0;
5084
5085 goto try_next_page;
5086 }
5087 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5088
5089 /*
5090 * grab this up front...
5091 * a high percentange of the time we're going to
5092 * need the hardware modification state a bit later
5093 * anyway... so we can eliminate an extra call into
5094 * the pmap layer by grabbing it here and recording it
5095 */
5096 if (dst_page->vmp_pmapped)
5097 refmod_state = pmap_get_refmod(phys_page);
5098 else
5099 refmod_state = 0;
5100
5101 if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5102 /*
5103 * page is on inactive list and referenced...
5104 * reactivate it now... this gets it out of the
5105 * way of vm_pageout_scan which would have to
5106 * reactivate it upon tripping over it
5107 */
5108 dwp->dw_mask |= DW_vm_page_activate;
5109 }
5110 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5111 /*
5112 * we're only asking for DIRTY pages to be returned
5113 */
5114 if (dst_page->vmp_laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5115 /*
5116 * if we were the page stolen by vm_pageout_scan to be
5117 * cleaned (as opposed to a buddy being clustered in
5118 * or this request is not being driven by a PAGEOUT cluster
5119 * then we only need to check for the page being dirty or
5120 * precious to decide whether to return it
5121 */
5122 if (dst_page->vmp_dirty || dst_page->vmp_precious || (refmod_state & VM_MEM_MODIFIED))
5123 goto check_busy;
5124 goto dont_return;
5125 }
5126 /*
5127 * this is a request for a PAGEOUT cluster and this page
5128 * is merely along for the ride as a 'buddy'... not only
5129 * does it have to be dirty to be returned, but it also
5130 * can't have been referenced recently...
5131 */
5132 if ( (hibernate_cleaning_in_progress == TRUE ||
5133 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->vmp_reference) ||
5134 (dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5135 ((refmod_state & VM_MEM_MODIFIED) || dst_page->vmp_dirty || dst_page->vmp_precious) ) {
5136 goto check_busy;
5137 }
5138 dont_return:
5139 /*
5140 * if we reach here, we're not to return
5141 * the page... go on to the next one
5142 */
5143 if (dst_page->vmp_laundry == TRUE) {
5144 /*
5145 * if we get here, the page is not 'cleaning' (filtered out above).
5146 * since it has been referenced, remove it from the laundry
5147 * so we don't pay the cost of an I/O to clean a page
5148 * we're just going to take back
5149 */
5150 vm_page_lockspin_queues();
5151
5152 vm_pageout_steal_laundry(dst_page, TRUE);
5153 vm_page_activate(dst_page);
5154
5155 vm_page_unlock_queues();
5156 }
5157 if (user_page_list)
5158 user_page_list[entry].phys_addr = 0;
5159
5160 goto try_next_page;
5161 }
5162 check_busy:
5163 if (dst_page->vmp_busy) {
5164 if (cntrl_flags & UPL_NOBLOCK) {
5165 if (user_page_list)
5166 user_page_list[entry].phys_addr = 0;
5167 dwp->dw_mask = 0;
5168
5169 goto try_next_page;
5170 }
5171 /*
5172 * someone else is playing with the
5173 * page. We will have to wait.
5174 */
5175 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5176
5177 continue;
5178 }
5179 if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5180
5181 vm_page_lockspin_queues();
5182
5183 if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5184 /*
5185 * we've buddied up a page for a clustered pageout
5186 * that has already been moved to the pageout
5187 * queue by pageout_scan... we need to remove
5188 * it from the queue and drop the laundry count
5189 * on that queue
5190 */
5191 vm_pageout_throttle_up(dst_page);
5192 }
5193 vm_page_unlock_queues();
5194 }
5195 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5196 dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5197
5198 if (phys_page > upl->highest_page)
5199 upl->highest_page = phys_page;
5200
5201 assert (!pmap_is_noencrypt(phys_page));
5202
5203 if (cntrl_flags & UPL_SET_LITE) {
5204 unsigned int pg_num;
5205
5206 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5207 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5208 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5209
5210 if (hw_dirty) {
5211 if (pmap_flushes_delayed == FALSE) {
5212 pmap_flush_context_init(&pmap_flush_context_storage);
5213 pmap_flushes_delayed = TRUE;
5214 }
5215 pmap_clear_refmod_options(phys_page,
5216 VM_MEM_MODIFIED,
5217 PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_CLEAR_WRITE,
5218 &pmap_flush_context_storage);
5219 }
5220
5221 /*
5222 * Mark original page as cleaning
5223 * in place.
5224 */
5225 dst_page->vmp_cleaning = TRUE;
5226 dst_page->vmp_precious = FALSE;
5227 } else {
5228 /*
5229 * use pageclean setup, it is more
5230 * convenient even for the pageout
5231 * cases here
5232 */
5233 vm_object_lock(upl->map_object);
5234 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5235 vm_object_unlock(upl->map_object);
5236
5237 alias_page->vmp_absent = FALSE;
5238 alias_page = NULL;
5239 }
5240 if (dirty) {
5241 SET_PAGE_DIRTY(dst_page, FALSE);
5242 } else {
5243 dst_page->vmp_dirty = FALSE;
5244 }
5245
5246 if (!dirty)
5247 dst_page->vmp_precious = TRUE;
5248
5249 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5250 if ( !VM_PAGE_WIRED(dst_page))
5251 dst_page->vmp_free_when_done = TRUE;
5252 }
5253 } else {
5254 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5255 /*
5256 * Honor copy-on-write obligations
5257 *
5258 * The copy object has changed since we
5259 * last synchronized for copy-on-write.
5260 * Another copy object might have been
5261 * inserted while we released the object's
5262 * lock. Since someone could have seen the
5263 * original contents of the remaining pages
5264 * through that new object, we have to
5265 * synchronize with it again for the remaining
5266 * pages only. The previous pages are "busy"
5267 * so they can not be seen through the new
5268 * mapping. The new mapping will see our
5269 * upcoming changes for those previous pages,
5270 * but that's OK since they couldn't see what
5271 * was there before. It's just a race anyway
5272 * and there's no guarantee of consistency or
5273 * atomicity. We just don't want new mappings
5274 * to see both the *before* and *after* pages.
5275 */
5276 if (object->copy != VM_OBJECT_NULL) {
5277 vm_object_update(
5278 object,
5279 dst_offset,/* current offset */
5280 xfer_size, /* remaining size */
5281 NULL,
5282 NULL,
5283 FALSE, /* should_return */
5284 MEMORY_OBJECT_COPY_SYNC,
5285 VM_PROT_NO_CHANGE);
5286
5287 VM_PAGEOUT_DEBUG(upl_cow_again, 1);
5288 VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT));
5289 }
5290 /*
5291 * remember the copy object we synced with
5292 */
5293 last_copy_object = object->copy;
5294 }
5295 dst_page = vm_page_lookup(object, dst_offset);
5296
5297 if (dst_page != VM_PAGE_NULL) {
5298
5299 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5300 /*
5301 * skip over pages already present in the cache
5302 */
5303 if (user_page_list)
5304 user_page_list[entry].phys_addr = 0;
5305
5306 goto try_next_page;
5307 }
5308 if (dst_page->vmp_fictitious) {
5309 panic("need corner case for fictitious page");
5310 }
5311
5312 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
5313 /*
5314 * someone else is playing with the
5315 * page. We will have to wait.
5316 */
5317 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5318
5319 continue;
5320 }
5321 if (dst_page->vmp_laundry)
5322 vm_pageout_steal_laundry(dst_page, FALSE);
5323 } else {
5324 if (object->private) {
5325 /*
5326 * This is a nasty wrinkle for users
5327 * of upl who encounter device or
5328 * private memory however, it is
5329 * unavoidable, only a fault can
5330 * resolve the actual backing
5331 * physical page by asking the
5332 * backing device.
5333 */
5334 if (user_page_list)
5335 user_page_list[entry].phys_addr = 0;
5336
5337 goto try_next_page;
5338 }
5339 if (object->scan_collisions) {
5340 /*
5341 * the pageout_scan thread is trying to steal
5342 * pages from this object, but has run into our
5343 * lock... grab 2 pages from the head of the object...
5344 * the first is freed on behalf of pageout_scan, the
5345 * 2nd is for our own use... we use vm_object_page_grab
5346 * in both cases to avoid taking pages from the free
5347 * list since we are under memory pressure and our
5348 * lock on this object is getting in the way of
5349 * relieving it
5350 */
5351 dst_page = vm_object_page_grab(object);
5352
5353 if (dst_page != VM_PAGE_NULL)
5354 vm_page_release(dst_page,
5355 FALSE);
5356
5357 dst_page = vm_object_page_grab(object);
5358 }
5359 if (dst_page == VM_PAGE_NULL) {
5360 /*
5361 * need to allocate a page
5362 */
5363 dst_page = vm_page_grab_options(grab_options);
5364 if (dst_page != VM_PAGE_NULL)
5365 page_grab_count++;
5366 }
5367 if (dst_page == VM_PAGE_NULL) {
5368 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5369 /*
5370 * we don't want to stall waiting for pages to come onto the free list
5371 * while we're already holding absent pages in this UPL
5372 * the caller will deal with the empty slots
5373 */
5374 if (user_page_list)
5375 user_page_list[entry].phys_addr = 0;
5376
5377 goto try_next_page;
5378 }
5379 /*
5380 * no pages available... wait
5381 * then try again for the same
5382 * offset...
5383 */
5384 vm_object_unlock(object);
5385
5386 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5387
5388 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5389
5390 VM_PAGE_WAIT();
5391 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5392
5393 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5394
5395 vm_object_lock(object);
5396
5397 continue;
5398 }
5399 vm_page_insert(dst_page, object, dst_offset);
5400
5401 dst_page->vmp_absent = TRUE;
5402 dst_page->vmp_busy = FALSE;
5403
5404 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5405 /*
5406 * if UPL_RET_ONLY_ABSENT was specified,
5407 * than we're definitely setting up a
5408 * upl for a clustered read/pagein
5409 * operation... mark the pages as clustered
5410 * so upl_commit_range can put them on the
5411 * speculative list
5412 */
5413 dst_page->vmp_clustered = TRUE;
5414
5415 if ( !(cntrl_flags & UPL_FILE_IO))
5416 VM_STAT_INCR(pageins);
5417 }
5418 }
5419 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5420
5421 dst_page->vmp_overwriting = TRUE;
5422
5423 if (dst_page->vmp_pmapped) {
5424 if ( !(cntrl_flags & UPL_FILE_IO))
5425 /*
5426 * eliminate all mappings from the
5427 * original object and its prodigy
5428 */
5429 refmod_state = pmap_disconnect(phys_page);
5430 else
5431 refmod_state = pmap_get_refmod(phys_page);
5432 } else
5433 refmod_state = 0;
5434
5435 hw_dirty = refmod_state & VM_MEM_MODIFIED;
5436 dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5437
5438 if (cntrl_flags & UPL_SET_LITE) {
5439 unsigned int pg_num;
5440
5441 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5442 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5443 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5444
5445 if (hw_dirty)
5446 pmap_clear_modify(phys_page);
5447
5448 /*
5449 * Mark original page as cleaning
5450 * in place.
5451 */
5452 dst_page->vmp_cleaning = TRUE;
5453 dst_page->vmp_precious = FALSE;
5454 } else {
5455 /*
5456 * use pageclean setup, it is more
5457 * convenient even for the pageout
5458 * cases here
5459 */
5460 vm_object_lock(upl->map_object);
5461 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5462 vm_object_unlock(upl->map_object);
5463
5464 alias_page->vmp_absent = FALSE;
5465 alias_page = NULL;
5466 }
5467
5468 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5469 upl->flags &= ~UPL_CLEAR_DIRTY;
5470 upl->flags |= UPL_SET_DIRTY;
5471 dirty = TRUE;
5472 upl->flags |= UPL_SET_DIRTY;
5473 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5474 /*
5475 * clean in place for read implies
5476 * that a write will be done on all
5477 * the pages that are dirty before
5478 * a upl commit is done. The caller
5479 * is obligated to preserve the
5480 * contents of all pages marked dirty
5481 */
5482 upl->flags |= UPL_CLEAR_DIRTY;
5483 }
5484 dst_page->vmp_dirty = dirty;
5485
5486 if (!dirty)
5487 dst_page->vmp_precious = TRUE;
5488
5489 if ( !VM_PAGE_WIRED(dst_page)) {
5490 /*
5491 * deny access to the target page while
5492 * it is being worked on
5493 */
5494 dst_page->vmp_busy = TRUE;
5495 } else
5496 dwp->dw_mask |= DW_vm_page_wire;
5497
5498 /*
5499 * We might be about to satisfy a fault which has been
5500 * requested. So no need for the "restart" bit.
5501 */
5502 dst_page->vmp_restart = FALSE;
5503 if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5504 /*
5505 * expect the page to be used
5506 */
5507 dwp->dw_mask |= DW_set_reference;
5508 }
5509 if (cntrl_flags & UPL_PRECIOUS) {
5510 if (object->internal) {
5511 SET_PAGE_DIRTY(dst_page, FALSE);
5512 dst_page->vmp_precious = FALSE;
5513 } else {
5514 dst_page->vmp_precious = TRUE;
5515 }
5516 } else {
5517 dst_page->vmp_precious = FALSE;
5518 }
5519 }
5520 if (dst_page->vmp_busy)
5521 upl->flags |= UPL_HAS_BUSY;
5522
5523 if (phys_page > upl->highest_page)
5524 upl->highest_page = phys_page;
5525 assert (!pmap_is_noencrypt(phys_page));
5526 if (user_page_list) {
5527 user_page_list[entry].phys_addr = phys_page;
5528 user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
5529 user_page_list[entry].absent = dst_page->vmp_absent;
5530 user_page_list[entry].dirty = dst_page->vmp_dirty;
5531 user_page_list[entry].precious = dst_page->vmp_precious;
5532 user_page_list[entry].device = FALSE;
5533 user_page_list[entry].needed = FALSE;
5534 if (dst_page->vmp_clustered == TRUE)
5535 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5536 else
5537 user_page_list[entry].speculative = FALSE;
5538 user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
5539 user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
5540 user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
5541 user_page_list[entry].mark = FALSE;
5542 }
5543 /*
5544 * if UPL_RET_ONLY_ABSENT is set, then
5545 * we are working with a fresh page and we've
5546 * just set the clustered flag on it to
5547 * indicate that it was drug in as part of a
5548 * speculative cluster... so leave it alone
5549 */
5550 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5551 /*
5552 * someone is explicitly grabbing this page...
5553 * update clustered and speculative state
5554 *
5555 */
5556 if (dst_page->vmp_clustered)
5557 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5558 }
5559 try_next_page:
5560 if (dwp->dw_mask) {
5561 if (dwp->dw_mask & DW_vm_page_activate)
5562 VM_STAT_INCR(reactivations);
5563
5564 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5565
5566 if (dw_count >= dw_limit) {
5567 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5568
5569 dwp = &dw_array[0];
5570 dw_count = 0;
5571 }
5572 }
5573 entry++;
5574 dst_offset += PAGE_SIZE_64;
5575 xfer_size -= PAGE_SIZE;
5576 }
5577 if (dw_count)
5578 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5579
5580 if (alias_page != NULL) {
5581 VM_PAGE_FREE(alias_page);
5582 }
5583 if (pmap_flushes_delayed == TRUE)
5584 pmap_flush(&pmap_flush_context_storage);
5585
5586 if (page_list_count != NULL) {
5587 if (upl->flags & UPL_INTERNAL)
5588 *page_list_count = 0;
5589 else if (*page_list_count > entry)
5590 *page_list_count = entry;
5591 }
5592 #if UPL_DEBUG
5593 upl->upl_state = 1;
5594 #endif
5595 vm_object_unlock(object);
5596
5597 VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
5598
5599 return KERN_SUCCESS;
5600 }
5601
5602 /*
5603 * Routine: vm_object_super_upl_request
5604 * Purpose:
5605 * Cause the population of a portion of a vm_object
5606 * in much the same way as memory_object_upl_request.
5607 * Depending on the nature of the request, the pages
5608 * returned may be contain valid data or be uninitialized.
5609 * However, the region may be expanded up to the super
5610 * cluster size provided.
5611 */
5612
5613 __private_extern__ kern_return_t
5614 vm_object_super_upl_request(
5615 vm_object_t object,
5616 vm_object_offset_t offset,
5617 upl_size_t size,
5618 upl_size_t super_cluster,
5619 upl_t *upl,
5620 upl_page_info_t *user_page_list,
5621 unsigned int *page_list_count,
5622 upl_control_flags_t cntrl_flags,
5623 vm_tag_t tag)
5624 {
5625 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5626 return KERN_FAILURE;
5627
5628 assert(object->paging_in_progress);
5629 offset = offset - object->paging_offset;
5630
5631 if (super_cluster > size) {
5632
5633 vm_object_offset_t base_offset;
5634 upl_size_t super_size;
5635 vm_object_size_t super_size_64;
5636
5637 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5638 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5639 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5640 super_size = (upl_size_t) super_size_64;
5641 assert(super_size == super_size_64);
5642
5643 if (offset > (base_offset + super_size)) {
5644 panic("vm_object_super_upl_request: Missed target pageout"
5645 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5646 offset, base_offset, super_size, super_cluster,
5647 size, object->paging_offset);
5648 }
5649 /*
5650 * apparently there is a case where the vm requests a
5651 * page to be written out who's offset is beyond the
5652 * object size
5653 */
5654 if ((offset + size) > (base_offset + super_size)) {
5655 super_size_64 = (offset + size) - base_offset;
5656 super_size = (upl_size_t) super_size_64;
5657 assert(super_size == super_size_64);
5658 }
5659
5660 offset = base_offset;
5661 size = super_size;
5662 }
5663 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5664 }
5665
5666 #if CONFIG_EMBEDDED
5667 int cs_executable_create_upl = 0;
5668 extern int proc_selfpid(void);
5669 extern char *proc_name_address(void *p);
5670 #endif /* CONFIG_EMBEDDED */
5671
5672 kern_return_t
5673 vm_map_create_upl(
5674 vm_map_t map,
5675 vm_map_address_t offset,
5676 upl_size_t *upl_size,
5677 upl_t *upl,
5678 upl_page_info_array_t page_list,
5679 unsigned int *count,
5680 upl_control_flags_t *flags,
5681 vm_tag_t tag)
5682 {
5683 vm_map_entry_t entry;
5684 upl_control_flags_t caller_flags;
5685 int force_data_sync;
5686 int sync_cow_data;
5687 vm_object_t local_object;
5688 vm_map_offset_t local_offset;
5689 vm_map_offset_t local_start;
5690 kern_return_t ret;
5691
5692 assert(page_aligned(offset));
5693
5694 caller_flags = *flags;
5695
5696 if (caller_flags & ~UPL_VALID_FLAGS) {
5697 /*
5698 * For forward compatibility's sake,
5699 * reject any unknown flag.
5700 */
5701 return KERN_INVALID_VALUE;
5702 }
5703 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5704 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5705
5706 if (upl == NULL)
5707 return KERN_INVALID_ARGUMENT;
5708
5709 REDISCOVER_ENTRY:
5710 vm_map_lock_read(map);
5711
5712 if (!vm_map_lookup_entry(map, offset, &entry)) {
5713 vm_map_unlock_read(map);
5714 return KERN_FAILURE;
5715 }
5716
5717 if ((entry->vme_end - offset) < *upl_size) {
5718 *upl_size = (upl_size_t) (entry->vme_end - offset);
5719 assert(*upl_size == entry->vme_end - offset);
5720 }
5721
5722 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5723 *flags = 0;
5724
5725 if (!entry->is_sub_map &&
5726 VME_OBJECT(entry) != VM_OBJECT_NULL) {
5727 if (VME_OBJECT(entry)->private)
5728 *flags = UPL_DEV_MEMORY;
5729
5730 if (VME_OBJECT(entry)->phys_contiguous)
5731 *flags |= UPL_PHYS_CONTIG;
5732 }
5733 vm_map_unlock_read(map);
5734 return KERN_SUCCESS;
5735 }
5736
5737 if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
5738 !VME_OBJECT(entry)->phys_contiguous) {
5739 if (*upl_size > MAX_UPL_SIZE_BYTES)
5740 *upl_size = MAX_UPL_SIZE_BYTES;
5741 }
5742
5743 /*
5744 * Create an object if necessary.
5745 */
5746 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5747
5748 if (vm_map_lock_read_to_write(map))
5749 goto REDISCOVER_ENTRY;
5750
5751 VME_OBJECT_SET(entry,
5752 vm_object_allocate((vm_size_t)
5753 (entry->vme_end -
5754 entry->vme_start)));
5755 VME_OFFSET_SET(entry, 0);
5756 assert(entry->use_pmap);
5757
5758 vm_map_lock_write_to_read(map);
5759 }
5760
5761 if (!(caller_flags & UPL_COPYOUT_FROM) &&
5762 !entry->is_sub_map &&
5763 !(entry->protection & VM_PROT_WRITE)) {
5764 vm_map_unlock_read(map);
5765 return KERN_PROTECTION_FAILURE;
5766 }
5767
5768 #if CONFIG_EMBEDDED
5769 if (map->pmap != kernel_pmap &&
5770 (caller_flags & UPL_COPYOUT_FROM) &&
5771 (entry->protection & VM_PROT_EXECUTE) &&
5772 !(entry->protection & VM_PROT_WRITE)) {
5773 vm_offset_t kaddr;
5774 vm_size_t ksize;
5775
5776 /*
5777 * We're about to create a read-only UPL backed by
5778 * memory from an executable mapping.
5779 * Wiring the pages would result in the pages being copied
5780 * (due to the "MAP_PRIVATE" mapping) and no longer
5781 * code-signed, so no longer eligible for execution.
5782 * Instead, let's copy the data into a kernel buffer and
5783 * create the UPL from this kernel buffer.
5784 * The kernel buffer is then freed, leaving the UPL holding
5785 * the last reference on the VM object, so the memory will
5786 * be released when the UPL is committed.
5787 */
5788
5789 vm_map_unlock_read(map);
5790 /* allocate kernel buffer */
5791 ksize = round_page(*upl_size);
5792 kaddr = 0;
5793 ret = kmem_alloc_pageable(kernel_map,
5794 &kaddr,
5795 ksize,
5796 tag);
5797 if (ret == KERN_SUCCESS) {
5798 /* copyin the user data */
5799 assert(page_aligned(offset));
5800 ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
5801 }
5802 if (ret == KERN_SUCCESS) {
5803 if (ksize > *upl_size) {
5804 /* zero out the extra space in kernel buffer */
5805 memset((void *)(kaddr + *upl_size),
5806 0,
5807 ksize - *upl_size);
5808 }
5809 /* create the UPL from the kernel buffer */
5810 ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5811 upl, page_list, count, flags, tag);
5812 }
5813 if (kaddr != 0) {
5814 /* free the kernel buffer */
5815 kmem_free(kernel_map, kaddr, ksize);
5816 kaddr = 0;
5817 ksize = 0;
5818 }
5819 #if DEVELOPMENT || DEBUG
5820 DTRACE_VM4(create_upl_from_executable,
5821 vm_map_t, map,
5822 vm_map_address_t, offset,
5823 upl_size_t, *upl_size,
5824 kern_return_t, ret);
5825 #endif /* DEVELOPMENT || DEBUG */
5826 return ret;
5827 }
5828 #endif /* CONFIG_EMBEDDED */
5829
5830 local_object = VME_OBJECT(entry);
5831 assert(local_object != VM_OBJECT_NULL);
5832
5833 if (!entry->is_sub_map &&
5834 !entry->needs_copy &&
5835 *upl_size != 0 &&
5836 local_object->vo_size > *upl_size && /* partial UPL */
5837 entry->wired_count == 0 && /* No COW for entries that are wired */
5838 (map->pmap != kernel_pmap) && /* alias checks */
5839 (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
5840 ||
5841 (/* case 2 */
5842 local_object->internal &&
5843 (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5844 local_object->ref_count > 1))) {
5845 vm_prot_t prot;
5846
5847 /*
5848 * Case 1:
5849 * Set up the targeted range for copy-on-write to avoid
5850 * applying true_share/copy_delay to the entire object.
5851 *
5852 * Case 2:
5853 * This map entry covers only part of an internal
5854 * object. There could be other map entries covering
5855 * other areas of this object and some of these map
5856 * entries could be marked as "needs_copy", which
5857 * assumes that the object is COPY_SYMMETRIC.
5858 * To avoid marking this object as COPY_DELAY and
5859 * "true_share", let's shadow it and mark the new
5860 * (smaller) object as "true_share" and COPY_DELAY.
5861 */
5862
5863 if (vm_map_lock_read_to_write(map)) {
5864 goto REDISCOVER_ENTRY;
5865 }
5866 vm_map_lock_assert_exclusive(map);
5867 assert(VME_OBJECT(entry) == local_object);
5868
5869 vm_map_clip_start(map,
5870 entry,
5871 vm_map_trunc_page(offset,
5872 VM_MAP_PAGE_MASK(map)));
5873 vm_map_clip_end(map,
5874 entry,
5875 vm_map_round_page(offset + *upl_size,
5876 VM_MAP_PAGE_MASK(map)));
5877 if ((entry->vme_end - offset) < *upl_size) {
5878 *upl_size = (upl_size_t) (entry->vme_end - offset);
5879 assert(*upl_size == entry->vme_end - offset);
5880 }
5881
5882 prot = entry->protection & ~VM_PROT_WRITE;
5883 if (override_nx(map, VME_ALIAS(entry)) && prot)
5884 prot |= VM_PROT_EXECUTE;
5885 vm_object_pmap_protect(local_object,
5886 VME_OFFSET(entry),
5887 entry->vme_end - entry->vme_start,
5888 ((entry->is_shared ||
5889 map->mapped_in_other_pmaps)
5890 ? PMAP_NULL
5891 : map->pmap),
5892 entry->vme_start,
5893 prot);
5894
5895 assert(entry->wired_count == 0);
5896
5897 /*
5898 * Lock the VM object and re-check its status: if it's mapped
5899 * in another address space, we could still be racing with
5900 * another thread holding that other VM map exclusively.
5901 */
5902 vm_object_lock(local_object);
5903 if (local_object->true_share) {
5904 /* object is already in proper state: no COW needed */
5905 assert(local_object->copy_strategy !=
5906 MEMORY_OBJECT_COPY_SYMMETRIC);
5907 } else {
5908 /* not true_share: ask for copy-on-write below */
5909 assert(local_object->copy_strategy ==
5910 MEMORY_OBJECT_COPY_SYMMETRIC);
5911 entry->needs_copy = TRUE;
5912 }
5913 vm_object_unlock(local_object);
5914
5915 vm_map_lock_write_to_read(map);
5916 }
5917
5918 if (entry->needs_copy) {
5919 /*
5920 * Honor copy-on-write for COPY_SYMMETRIC
5921 * strategy.
5922 */
5923 vm_map_t local_map;
5924 vm_object_t object;
5925 vm_object_offset_t new_offset;
5926 vm_prot_t prot;
5927 boolean_t wired;
5928 vm_map_version_t version;
5929 vm_map_t real_map;
5930 vm_prot_t fault_type;
5931
5932 local_map = map;
5933
5934 if (caller_flags & UPL_COPYOUT_FROM) {
5935 fault_type = VM_PROT_READ | VM_PROT_COPY;
5936 vm_counters.create_upl_extra_cow++;
5937 vm_counters.create_upl_extra_cow_pages +=
5938 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
5939 } else {
5940 fault_type = VM_PROT_WRITE;
5941 }
5942 if (vm_map_lookup_locked(&local_map,
5943 offset, fault_type,
5944 OBJECT_LOCK_EXCLUSIVE,
5945 &version, &object,
5946 &new_offset, &prot, &wired,
5947 NULL,
5948 &real_map) != KERN_SUCCESS) {
5949 if (fault_type == VM_PROT_WRITE) {
5950 vm_counters.create_upl_lookup_failure_write++;
5951 } else {
5952 vm_counters.create_upl_lookup_failure_copy++;
5953 }
5954 vm_map_unlock_read(local_map);
5955 return KERN_FAILURE;
5956 }
5957 if (real_map != map)
5958 vm_map_unlock(real_map);
5959 vm_map_unlock_read(local_map);
5960
5961 vm_object_unlock(object);
5962
5963 goto REDISCOVER_ENTRY;
5964 }
5965
5966 if (entry->is_sub_map) {
5967 vm_map_t submap;
5968
5969 submap = VME_SUBMAP(entry);
5970 local_start = entry->vme_start;
5971 local_offset = VME_OFFSET(entry);
5972
5973 vm_map_reference(submap);
5974 vm_map_unlock_read(map);
5975
5976 ret = vm_map_create_upl(submap,
5977 local_offset + (offset - local_start),
5978 upl_size, upl, page_list, count, flags, tag);
5979 vm_map_deallocate(submap);
5980
5981 return ret;
5982 }
5983
5984 if (sync_cow_data &&
5985 (VME_OBJECT(entry)->shadow ||
5986 VME_OBJECT(entry)->copy)) {
5987 local_object = VME_OBJECT(entry);
5988 local_start = entry->vme_start;
5989 local_offset = VME_OFFSET(entry);
5990
5991 vm_object_reference(local_object);
5992 vm_map_unlock_read(map);
5993
5994 if (local_object->shadow && local_object->copy) {
5995 vm_object_lock_request(local_object->shadow,
5996 ((vm_object_offset_t)
5997 ((offset - local_start) +
5998 local_offset) +
5999 local_object->vo_shadow_offset),
6000 *upl_size, FALSE,
6001 MEMORY_OBJECT_DATA_SYNC,
6002 VM_PROT_NO_CHANGE);
6003 }
6004 sync_cow_data = FALSE;
6005 vm_object_deallocate(local_object);
6006
6007 goto REDISCOVER_ENTRY;
6008 }
6009 if (force_data_sync) {
6010 local_object = VME_OBJECT(entry);
6011 local_start = entry->vme_start;
6012 local_offset = VME_OFFSET(entry);
6013
6014 vm_object_reference(local_object);
6015 vm_map_unlock_read(map);
6016
6017 vm_object_lock_request(local_object,
6018 ((vm_object_offset_t)
6019 ((offset - local_start) +
6020 local_offset)),
6021 (vm_object_size_t)*upl_size,
6022 FALSE,
6023 MEMORY_OBJECT_DATA_SYNC,
6024 VM_PROT_NO_CHANGE);
6025
6026 force_data_sync = FALSE;
6027 vm_object_deallocate(local_object);
6028
6029 goto REDISCOVER_ENTRY;
6030 }
6031 if (VME_OBJECT(entry)->private)
6032 *flags = UPL_DEV_MEMORY;
6033 else
6034 *flags = 0;
6035
6036 if (VME_OBJECT(entry)->phys_contiguous)
6037 *flags |= UPL_PHYS_CONTIG;
6038
6039 local_object = VME_OBJECT(entry);
6040 local_offset = VME_OFFSET(entry);
6041 local_start = entry->vme_start;
6042
6043 #if CONFIG_EMBEDDED
6044 /*
6045 * Wiring will copy the pages to the shadow object.
6046 * The shadow object will not be code-signed so
6047 * attempting to execute code from these copied pages
6048 * would trigger a code-signing violation.
6049 */
6050 if (entry->protection & VM_PROT_EXECUTE) {
6051 #if MACH_ASSERT
6052 printf("pid %d[%s] create_upl out of executable range from "
6053 "0x%llx to 0x%llx: side effects may include "
6054 "code-signing violations later on\n",
6055 proc_selfpid(),
6056 (current_task()->bsd_info
6057 ? proc_name_address(current_task()->bsd_info)
6058 : "?"),
6059 (uint64_t) entry->vme_start,
6060 (uint64_t) entry->vme_end);
6061 #endif /* MACH_ASSERT */
6062 DTRACE_VM2(cs_executable_create_upl,
6063 uint64_t, (uint64_t)entry->vme_start,
6064 uint64_t, (uint64_t)entry->vme_end);
6065 cs_executable_create_upl++;
6066 }
6067 #endif /* CONFIG_EMBEDDED */
6068
6069 vm_object_lock(local_object);
6070
6071 /*
6072 * Ensure that this object is "true_share" and "copy_delay" now,
6073 * while we're still holding the VM map lock. After we unlock the map,
6074 * anything could happen to that mapping, including some copy-on-write
6075 * activity. We need to make sure that the IOPL will point at the
6076 * same memory as the mapping.
6077 */
6078 if (local_object->true_share) {
6079 assert(local_object->copy_strategy !=
6080 MEMORY_OBJECT_COPY_SYMMETRIC);
6081 } else if (local_object != kernel_object &&
6082 local_object != compressor_object &&
6083 !local_object->phys_contiguous) {
6084 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6085 if (!local_object->true_share &&
6086 vm_object_tracking_inited) {
6087 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6088 int num = 0;
6089 num = OSBacktrace(bt,
6090 VM_OBJECT_TRACKING_BTDEPTH);
6091 btlog_add_entry(vm_object_tracking_btlog,
6092 local_object,
6093 VM_OBJECT_TRACKING_OP_TRUESHARE,
6094 bt,
6095 num);
6096 }
6097 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6098 local_object->true_share = TRUE;
6099 if (local_object->copy_strategy ==
6100 MEMORY_OBJECT_COPY_SYMMETRIC) {
6101 local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6102 }
6103 }
6104
6105 vm_object_reference_locked(local_object);
6106 vm_object_unlock(local_object);
6107
6108 vm_map_unlock_read(map);
6109
6110 ret = vm_object_iopl_request(local_object,
6111 ((vm_object_offset_t)
6112 ((offset - local_start) + local_offset)),
6113 *upl_size,
6114 upl,
6115 page_list,
6116 count,
6117 caller_flags,
6118 tag);
6119 vm_object_deallocate(local_object);
6120
6121 return ret;
6122 }
6123
6124 /*
6125 * Internal routine to enter a UPL into a VM map.
6126 *
6127 * JMM - This should just be doable through the standard
6128 * vm_map_enter() API.
6129 */
6130 kern_return_t
6131 vm_map_enter_upl(
6132 vm_map_t map,
6133 upl_t upl,
6134 vm_map_offset_t *dst_addr)
6135 {
6136 vm_map_size_t size;
6137 vm_object_offset_t offset;
6138 vm_map_offset_t addr;
6139 vm_page_t m;
6140 kern_return_t kr;
6141 int isVectorUPL = 0, curr_upl=0;
6142 upl_t vector_upl = NULL;
6143 vm_offset_t vector_upl_dst_addr = 0;
6144 vm_map_t vector_upl_submap = NULL;
6145 upl_offset_t subupl_offset = 0;
6146 upl_size_t subupl_size = 0;
6147
6148 if (upl == UPL_NULL)
6149 return KERN_INVALID_ARGUMENT;
6150
6151 if((isVectorUPL = vector_upl_is_valid(upl))) {
6152 int mapped=0,valid_upls=0;
6153 vector_upl = upl;
6154
6155 upl_lock(vector_upl);
6156 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6157 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6158 if(upl == NULL)
6159 continue;
6160 valid_upls++;
6161 if (UPL_PAGE_LIST_MAPPED & upl->flags)
6162 mapped++;
6163 }
6164
6165 if(mapped) {
6166 if(mapped != valid_upls)
6167 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6168 else {
6169 upl_unlock(vector_upl);
6170 return KERN_FAILURE;
6171 }
6172 }
6173
6174 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6175 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6176 &vector_upl_submap);
6177 if( kr != KERN_SUCCESS )
6178 panic("Vector UPL submap allocation failed\n");
6179 map = vector_upl_submap;
6180 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6181 curr_upl=0;
6182 }
6183 else
6184 upl_lock(upl);
6185
6186 process_upl_to_enter:
6187 if(isVectorUPL){
6188 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6189 *dst_addr = vector_upl_dst_addr;
6190 upl_unlock(vector_upl);
6191 return KERN_SUCCESS;
6192 }
6193 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6194 if(upl == NULL)
6195 goto process_upl_to_enter;
6196
6197 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6198 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6199 } else {
6200 /*
6201 * check to see if already mapped
6202 */
6203 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6204 upl_unlock(upl);
6205 return KERN_FAILURE;
6206 }
6207 }
6208 if ((!(upl->flags & UPL_SHADOWED)) &&
6209 ((upl->flags & UPL_HAS_BUSY) ||
6210 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6211
6212 vm_object_t object;
6213 vm_page_t alias_page;
6214 vm_object_offset_t new_offset;
6215 unsigned int pg_num;
6216 wpl_array_t lite_list;
6217
6218 if (upl->flags & UPL_INTERNAL) {
6219 lite_list = (wpl_array_t)
6220 ((((uintptr_t)upl) + sizeof(struct upl))
6221 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6222 } else {
6223 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6224 }
6225 object = upl->map_object;
6226 upl->map_object = vm_object_allocate(upl->size);
6227
6228 vm_object_lock(upl->map_object);
6229
6230 upl->map_object->shadow = object;
6231 upl->map_object->pageout = TRUE;
6232 upl->map_object->can_persist = FALSE;
6233 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6234 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6235 upl->map_object->wimg_bits = object->wimg_bits;
6236 offset = upl->map_object->vo_shadow_offset;
6237 new_offset = 0;
6238 size = upl->size;
6239
6240 upl->flags |= UPL_SHADOWED;
6241
6242 while (size) {
6243 pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6244 assert(pg_num == new_offset / PAGE_SIZE);
6245
6246 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6247
6248 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6249
6250 vm_object_lock(object);
6251
6252 m = vm_page_lookup(object, offset);
6253 if (m == VM_PAGE_NULL) {
6254 panic("vm_upl_map: page missing\n");
6255 }
6256
6257 /*
6258 * Convert the fictitious page to a private
6259 * shadow of the real page.
6260 */
6261 assert(alias_page->vmp_fictitious);
6262 alias_page->vmp_fictitious = FALSE;
6263 alias_page->vmp_private = TRUE;
6264 alias_page->vmp_free_when_done = TRUE;
6265 /*
6266 * since m is a page in the upl it must
6267 * already be wired or BUSY, so it's
6268 * safe to assign the underlying physical
6269 * page to the alias
6270 */
6271 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6272
6273 vm_object_unlock(object);
6274
6275 vm_page_lockspin_queues();
6276 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6277 vm_page_unlock_queues();
6278
6279 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6280
6281 assert(!alias_page->vmp_wanted);
6282 alias_page->vmp_busy = FALSE;
6283 alias_page->vmp_absent = FALSE;
6284 }
6285 size -= PAGE_SIZE;
6286 offset += PAGE_SIZE_64;
6287 new_offset += PAGE_SIZE_64;
6288 }
6289 vm_object_unlock(upl->map_object);
6290 }
6291 if (upl->flags & UPL_SHADOWED)
6292 offset = 0;
6293 else
6294 offset = upl->offset - upl->map_object->paging_offset;
6295
6296 size = upl->size;
6297
6298 vm_object_reference(upl->map_object);
6299
6300 if(!isVectorUPL) {
6301 *dst_addr = 0;
6302 /*
6303 * NEED A UPL_MAP ALIAS
6304 */
6305 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6306 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6307 upl->map_object, offset, FALSE,
6308 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6309
6310 if (kr != KERN_SUCCESS) {
6311 vm_object_deallocate(upl->map_object);
6312 upl_unlock(upl);
6313 return(kr);
6314 }
6315 }
6316 else {
6317 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6318 VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6319 upl->map_object, offset, FALSE,
6320 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6321 if(kr)
6322 panic("vm_map_enter failed for a Vector UPL\n");
6323 }
6324 vm_object_lock(upl->map_object);
6325
6326 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6327 m = vm_page_lookup(upl->map_object, offset);
6328
6329 if (m) {
6330 m->vmp_pmapped = TRUE;
6331
6332 /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6333 * but only in kernel space. If this was on a user map,
6334 * we'd have to set the wpmapped bit. */
6335 /* m->vmp_wpmapped = TRUE; */
6336 assert(map->pmap == kernel_pmap);
6337
6338 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE, kr);
6339
6340 assert(kr == KERN_SUCCESS);
6341 #if KASAN
6342 kasan_notify_address(addr, PAGE_SIZE_64);
6343 #endif
6344 }
6345 offset += PAGE_SIZE_64;
6346 }
6347 vm_object_unlock(upl->map_object);
6348
6349 /*
6350 * hold a reference for the mapping
6351 */
6352 upl->ref_count++;
6353 upl->flags |= UPL_PAGE_LIST_MAPPED;
6354 upl->kaddr = (vm_offset_t) *dst_addr;
6355 assert(upl->kaddr == *dst_addr);
6356
6357 if(isVectorUPL)
6358 goto process_upl_to_enter;
6359
6360 upl_unlock(upl);
6361
6362 return KERN_SUCCESS;
6363 }
6364
6365 /*
6366 * Internal routine to remove a UPL mapping from a VM map.
6367 *
6368 * XXX - This should just be doable through a standard
6369 * vm_map_remove() operation. Otherwise, implicit clean-up
6370 * of the target map won't be able to correctly remove
6371 * these (and release the reference on the UPL). Having
6372 * to do this means we can't map these into user-space
6373 * maps yet.
6374 */
6375 kern_return_t
6376 vm_map_remove_upl(
6377 vm_map_t map,
6378 upl_t upl)
6379 {
6380 vm_address_t addr;
6381 upl_size_t size;
6382 int isVectorUPL = 0, curr_upl = 0;
6383 upl_t vector_upl = NULL;
6384
6385 if (upl == UPL_NULL)
6386 return KERN_INVALID_ARGUMENT;
6387
6388 if((isVectorUPL = vector_upl_is_valid(upl))) {
6389 int unmapped=0, valid_upls=0;
6390 vector_upl = upl;
6391 upl_lock(vector_upl);
6392 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6393 upl = vector_upl_subupl_byindex(vector_upl, curr_upl );
6394 if(upl == NULL)
6395 continue;
6396 valid_upls++;
6397 if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6398 unmapped++;
6399 }
6400
6401 if(unmapped) {
6402 if(unmapped != valid_upls)
6403 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6404 else {
6405 upl_unlock(vector_upl);
6406 return KERN_FAILURE;
6407 }
6408 }
6409 curr_upl=0;
6410 }
6411 else
6412 upl_lock(upl);
6413
6414 process_upl_to_remove:
6415 if(isVectorUPL) {
6416 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6417 vm_map_t v_upl_submap;
6418 vm_offset_t v_upl_submap_dst_addr;
6419 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6420
6421 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_REMOVE_NO_FLAGS);
6422 vm_map_deallocate(v_upl_submap);
6423 upl_unlock(vector_upl);
6424 return KERN_SUCCESS;
6425 }
6426
6427 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6428 if(upl == NULL)
6429 goto process_upl_to_remove;
6430 }
6431
6432 if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6433 addr = upl->kaddr;
6434 size = upl->size;
6435
6436 assert(upl->ref_count > 1);
6437 upl->ref_count--; /* removing mapping ref */
6438
6439 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6440 upl->kaddr = (vm_offset_t) 0;
6441
6442 if(!isVectorUPL) {
6443 upl_unlock(upl);
6444
6445 vm_map_remove(
6446 map,
6447 vm_map_trunc_page(addr,
6448 VM_MAP_PAGE_MASK(map)),
6449 vm_map_round_page(addr + size,
6450 VM_MAP_PAGE_MASK(map)),
6451 VM_MAP_REMOVE_NO_FLAGS);
6452 return KERN_SUCCESS;
6453 }
6454 else {
6455 /*
6456 * If it's a Vectored UPL, we'll be removing the entire
6457 * submap anyways, so no need to remove individual UPL
6458 * element mappings from within the submap
6459 */
6460 goto process_upl_to_remove;
6461 }
6462 }
6463 upl_unlock(upl);
6464
6465 return KERN_FAILURE;
6466 }
6467
6468
6469 kern_return_t
6470 upl_commit_range(
6471 upl_t upl,
6472 upl_offset_t offset,
6473 upl_size_t size,
6474 int flags,
6475 upl_page_info_t *page_list,
6476 mach_msg_type_number_t count,
6477 boolean_t *empty)
6478 {
6479 upl_size_t xfer_size, subupl_size = size;
6480 vm_object_t shadow_object;
6481 vm_object_t object;
6482 vm_object_t m_object;
6483 vm_object_offset_t target_offset;
6484 upl_offset_t subupl_offset = offset;
6485 int entry;
6486 wpl_array_t lite_list;
6487 int occupied;
6488 int clear_refmod = 0;
6489 int pgpgout_count = 0;
6490 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6491 struct vm_page_delayed_work *dwp;
6492 int dw_count;
6493 int dw_limit;
6494 int isVectorUPL = 0;
6495 upl_t vector_upl = NULL;
6496 boolean_t should_be_throttled = FALSE;
6497
6498 vm_page_t nxt_page = VM_PAGE_NULL;
6499 int fast_path_possible = 0;
6500 int fast_path_full_commit = 0;
6501 int throttle_page = 0;
6502 int unwired_count = 0;
6503 int local_queue_count = 0;
6504 vm_page_t first_local, last_local;
6505
6506 *empty = FALSE;
6507
6508 if (upl == UPL_NULL)
6509 return KERN_INVALID_ARGUMENT;
6510
6511 if (count == 0)
6512 page_list = NULL;
6513
6514 if((isVectorUPL = vector_upl_is_valid(upl))) {
6515 vector_upl = upl;
6516 upl_lock(vector_upl);
6517 }
6518 else
6519 upl_lock(upl);
6520
6521 process_upl_to_commit:
6522
6523 if(isVectorUPL) {
6524 size = subupl_size;
6525 offset = subupl_offset;
6526 if(size == 0) {
6527 upl_unlock(vector_upl);
6528 return KERN_SUCCESS;
6529 }
6530 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6531 if(upl == NULL) {
6532 upl_unlock(vector_upl);
6533 return KERN_FAILURE;
6534 }
6535 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6536 subupl_size -= size;
6537 subupl_offset += size;
6538 }
6539
6540 #if UPL_DEBUG
6541 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6542 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6543
6544 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6545 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6546
6547 upl->upl_commit_index++;
6548 }
6549 #endif
6550 if (upl->flags & UPL_DEVICE_MEMORY)
6551 xfer_size = 0;
6552 else if ((offset + size) <= upl->size)
6553 xfer_size = size;
6554 else {
6555 if(!isVectorUPL)
6556 upl_unlock(upl);
6557 else {
6558 upl_unlock(vector_upl);
6559 }
6560 return KERN_FAILURE;
6561 }
6562 if (upl->flags & UPL_SET_DIRTY)
6563 flags |= UPL_COMMIT_SET_DIRTY;
6564 if (upl->flags & UPL_CLEAR_DIRTY)
6565 flags |= UPL_COMMIT_CLEAR_DIRTY;
6566
6567 if (upl->flags & UPL_INTERNAL)
6568 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6569 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6570 else
6571 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6572
6573 object = upl->map_object;
6574
6575 if (upl->flags & UPL_SHADOWED) {
6576 vm_object_lock(object);
6577 shadow_object = object->shadow;
6578 } else {
6579 shadow_object = object;
6580 }
6581 entry = offset/PAGE_SIZE;
6582 target_offset = (vm_object_offset_t)offset;
6583
6584 assert(!(target_offset & PAGE_MASK));
6585 assert(!(xfer_size & PAGE_MASK));
6586
6587 if (upl->flags & UPL_KERNEL_OBJECT)
6588 vm_object_lock_shared(shadow_object);
6589 else
6590 vm_object_lock(shadow_object);
6591
6592 VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6593
6594 if (upl->flags & UPL_ACCESS_BLOCKED) {
6595 assert(shadow_object->blocked_access);
6596 shadow_object->blocked_access = FALSE;
6597 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6598 }
6599
6600 if (shadow_object->code_signed) {
6601 /*
6602 * CODE SIGNING:
6603 * If the object is code-signed, do not let this UPL tell
6604 * us if the pages are valid or not. Let the pages be
6605 * validated by VM the normal way (when they get mapped or
6606 * copied).
6607 */
6608 flags &= ~UPL_COMMIT_CS_VALIDATED;
6609 }
6610 if (! page_list) {
6611 /*
6612 * No page list to get the code-signing info from !?
6613 */
6614 flags &= ~UPL_COMMIT_CS_VALIDATED;
6615 }
6616 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6617 should_be_throttled = TRUE;
6618
6619 dwp = &dw_array[0];
6620 dw_count = 0;
6621 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6622
6623 if ((upl->flags & UPL_IO_WIRE) &&
6624 !(flags & UPL_COMMIT_FREE_ABSENT) &&
6625 !isVectorUPL &&
6626 shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6627 shadow_object->purgable != VM_PURGABLE_EMPTY) {
6628
6629 if (!vm_page_queue_empty(&shadow_object->memq)) {
6630
6631 if (size == shadow_object->vo_size) {
6632 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6633 fast_path_full_commit = 1;
6634 }
6635 fast_path_possible = 1;
6636
6637 if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6638 (shadow_object->purgable == VM_PURGABLE_DENY ||
6639 shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6640 shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6641 throttle_page = 1;
6642 }
6643 }
6644 }
6645 first_local = VM_PAGE_NULL;
6646 last_local = VM_PAGE_NULL;
6647
6648 while (xfer_size) {
6649 vm_page_t t, m;
6650
6651 dwp->dw_mask = 0;
6652 clear_refmod = 0;
6653
6654 m = VM_PAGE_NULL;
6655
6656 if (upl->flags & UPL_LITE) {
6657 unsigned int pg_num;
6658
6659 if (nxt_page != VM_PAGE_NULL) {
6660 m = nxt_page;
6661 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
6662 target_offset = m->vmp_offset;
6663 }
6664 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6665 assert(pg_num == target_offset/PAGE_SIZE);
6666
6667 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6668 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6669
6670 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6671 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6672 } else
6673 m = NULL;
6674 }
6675 if (upl->flags & UPL_SHADOWED) {
6676 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6677
6678 t->vmp_free_when_done = FALSE;
6679
6680 VM_PAGE_FREE(t);
6681
6682 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6683 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6684 }
6685 }
6686 if (m == VM_PAGE_NULL)
6687 goto commit_next_page;
6688
6689 m_object = VM_PAGE_OBJECT(m);
6690
6691 if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6692 assert(m->vmp_busy);
6693
6694 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6695 goto commit_next_page;
6696 }
6697
6698 if (flags & UPL_COMMIT_CS_VALIDATED) {
6699 /*
6700 * CODE SIGNING:
6701 * Set the code signing bits according to
6702 * what the UPL says they should be.
6703 */
6704 m->vmp_cs_validated = page_list[entry].cs_validated;
6705 m->vmp_cs_tainted = page_list[entry].cs_tainted;
6706 m->vmp_cs_nx = page_list[entry].cs_nx;
6707 }
6708 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6709 m->vmp_written_by_kernel = TRUE;
6710
6711 if (upl->flags & UPL_IO_WIRE) {
6712
6713 if (page_list)
6714 page_list[entry].phys_addr = 0;
6715
6716 if (flags & UPL_COMMIT_SET_DIRTY) {
6717 SET_PAGE_DIRTY(m, FALSE);
6718 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6719 m->vmp_dirty = FALSE;
6720
6721 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6722 m->vmp_cs_validated && !m->vmp_cs_tainted) {
6723 /*
6724 * CODE SIGNING:
6725 * This page is no longer dirty
6726 * but could have been modified,
6727 * so it will need to be
6728 * re-validated.
6729 */
6730 m->vmp_cs_validated = FALSE;
6731
6732 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6733
6734 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6735 }
6736 clear_refmod |= VM_MEM_MODIFIED;
6737 }
6738 if (upl->flags & UPL_ACCESS_BLOCKED) {
6739 /*
6740 * We blocked access to the pages in this UPL.
6741 * Clear the "busy" bit and wake up any waiter
6742 * for this page.
6743 */
6744 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6745 }
6746 if (fast_path_possible) {
6747 assert(m_object->purgable != VM_PURGABLE_EMPTY);
6748 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6749 if (m->vmp_absent) {
6750 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
6751 assert(m->vmp_wire_count == 0);
6752 assert(m->vmp_busy);
6753
6754 m->vmp_absent = FALSE;
6755 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6756 } else {
6757 if (m->vmp_wire_count == 0)
6758 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6759 assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
6760
6761 /*
6762 * XXX FBDP need to update some other
6763 * counters here (purgeable_wired_count)
6764 * (ledgers), ...
6765 */
6766 assert(m->vmp_wire_count > 0);
6767 m->vmp_wire_count--;
6768
6769 if (m->vmp_wire_count == 0) {
6770 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
6771 unwired_count++;
6772 }
6773 }
6774 if (m->vmp_wire_count == 0) {
6775 assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6776
6777 if (last_local == VM_PAGE_NULL) {
6778 assert(first_local == VM_PAGE_NULL);
6779
6780 last_local = m;
6781 first_local = m;
6782 } else {
6783 assert(first_local != VM_PAGE_NULL);
6784
6785 m->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6786 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6787 first_local = m;
6788 }
6789 local_queue_count++;
6790
6791 if (throttle_page) {
6792 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
6793 } else {
6794 if (flags & UPL_COMMIT_INACTIVATE) {
6795 if (shadow_object->internal)
6796 m->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6797 else
6798 m->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6799 } else
6800 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
6801 }
6802 }
6803 } else {
6804 if (flags & UPL_COMMIT_INACTIVATE) {
6805 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6806 clear_refmod |= VM_MEM_REFERENCED;
6807 }
6808 if (m->vmp_absent) {
6809 if (flags & UPL_COMMIT_FREE_ABSENT)
6810 dwp->dw_mask |= DW_vm_page_free;
6811 else {
6812 m->vmp_absent = FALSE;
6813 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6814
6815 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6816 dwp->dw_mask |= DW_vm_page_activate;
6817 }
6818 } else
6819 dwp->dw_mask |= DW_vm_page_unwire;
6820 }
6821 goto commit_next_page;
6822 }
6823 assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6824
6825 if (page_list)
6826 page_list[entry].phys_addr = 0;
6827
6828 /*
6829 * make sure to clear the hardware
6830 * modify or reference bits before
6831 * releasing the BUSY bit on this page
6832 * otherwise we risk losing a legitimate
6833 * change of state
6834 */
6835 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6836 m->vmp_dirty = FALSE;
6837
6838 clear_refmod |= VM_MEM_MODIFIED;
6839 }
6840 if (m->vmp_laundry)
6841 dwp->dw_mask |= DW_vm_pageout_throttle_up;
6842
6843 if (VM_PAGE_WIRED(m))
6844 m->vmp_free_when_done = FALSE;
6845
6846 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6847 m->vmp_cs_validated && !m->vmp_cs_tainted) {
6848 /*
6849 * CODE SIGNING:
6850 * This page is no longer dirty
6851 * but could have been modified,
6852 * so it will need to be
6853 * re-validated.
6854 */
6855 m->vmp_cs_validated = FALSE;
6856
6857 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6858
6859 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6860 }
6861 if (m->vmp_overwriting) {
6862 /*
6863 * the (COPY_OUT_FROM == FALSE) request_page_list case
6864 */
6865 if (m->vmp_busy) {
6866 #if CONFIG_PHANTOM_CACHE
6867 if (m->vmp_absent && !m_object->internal)
6868 dwp->dw_mask |= DW_vm_phantom_cache_update;
6869 #endif
6870 m->vmp_absent = FALSE;
6871
6872 dwp->dw_mask |= DW_clear_busy;
6873 } else {
6874 /*
6875 * alternate (COPY_OUT_FROM == FALSE) page_list case
6876 * Occurs when the original page was wired
6877 * at the time of the list request
6878 */
6879 assert(VM_PAGE_WIRED(m));
6880
6881 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
6882 }
6883 m->vmp_overwriting = FALSE;
6884 }
6885 m->vmp_cleaning = FALSE;
6886
6887 if (m->vmp_free_when_done) {
6888 /*
6889 * With the clean queue enabled, UPL_PAGEOUT should
6890 * no longer set the pageout bit. It's pages now go
6891 * to the clean queue.
6892 */
6893 assert(!(flags & UPL_PAGEOUT));
6894 assert(!m_object->internal);
6895
6896 m->vmp_free_when_done = FALSE;
6897
6898 if ((flags & UPL_COMMIT_SET_DIRTY) ||
6899 (m->vmp_pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
6900 /*
6901 * page was re-dirtied after we started
6902 * the pageout... reactivate it since
6903 * we don't know whether the on-disk
6904 * copy matches what is now in memory
6905 */
6906 SET_PAGE_DIRTY(m, FALSE);
6907
6908 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
6909
6910 if (upl->flags & UPL_PAGEOUT) {
6911 VM_STAT_INCR(reactivations);
6912 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
6913 }
6914 } else {
6915 /*
6916 * page has been successfully cleaned
6917 * go ahead and free it for other use
6918 */
6919 if (m_object->internal) {
6920 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
6921 } else {
6922 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
6923 }
6924 m->vmp_dirty = FALSE;
6925 m->vmp_busy = TRUE;
6926
6927 dwp->dw_mask |= DW_vm_page_free;
6928 }
6929 goto commit_next_page;
6930 }
6931 /*
6932 * It is a part of the semantic of COPYOUT_FROM
6933 * UPLs that a commit implies cache sync
6934 * between the vm page and the backing store
6935 * this can be used to strip the precious bit
6936 * as well as clean
6937 */
6938 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
6939 m->vmp_precious = FALSE;
6940
6941 if (flags & UPL_COMMIT_SET_DIRTY) {
6942 SET_PAGE_DIRTY(m, FALSE);
6943 } else {
6944 m->vmp_dirty = FALSE;
6945 }
6946
6947 /* with the clean queue on, move *all* cleaned pages to the clean queue */
6948 if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) {
6949 pgpgout_count++;
6950
6951 VM_STAT_INCR(pageouts);
6952 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
6953
6954 dwp->dw_mask |= DW_enqueue_cleaned;
6955 } else if (should_be_throttled == TRUE && (m->vmp_q_state == VM_PAGE_NOT_ON_Q)) {
6956 /*
6957 * page coming back in from being 'frozen'...
6958 * it was dirty before it was frozen, so keep it so
6959 * the vm_page_activate will notice that it really belongs
6960 * on the throttle queue and put it there
6961 */
6962 SET_PAGE_DIRTY(m, FALSE);
6963 dwp->dw_mask |= DW_vm_page_activate;
6964
6965 } else {
6966 if ((flags & UPL_COMMIT_INACTIVATE) && !m->vmp_clustered && (m->vmp_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
6967 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6968 clear_refmod |= VM_MEM_REFERENCED;
6969 } else if ( !VM_PAGE_PAGEABLE(m)) {
6970
6971 if (m->vmp_clustered || (flags & UPL_COMMIT_SPECULATE))
6972 dwp->dw_mask |= DW_vm_page_speculate;
6973 else if (m->vmp_reference)
6974 dwp->dw_mask |= DW_vm_page_activate;
6975 else {
6976 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6977 clear_refmod |= VM_MEM_REFERENCED;
6978 }
6979 }
6980 }
6981 if (upl->flags & UPL_ACCESS_BLOCKED) {
6982 /*
6983 * We blocked access to the pages in this URL.
6984 * Clear the "busy" bit on this page before we
6985 * wake up any waiter.
6986 */
6987 dwp->dw_mask |= DW_clear_busy;
6988 }
6989 /*
6990 * Wakeup any thread waiting for the page to be un-cleaning.
6991 */
6992 dwp->dw_mask |= DW_PAGE_WAKEUP;
6993
6994 commit_next_page:
6995 if (clear_refmod)
6996 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
6997
6998 target_offset += PAGE_SIZE_64;
6999 xfer_size -= PAGE_SIZE;
7000 entry++;
7001
7002 if (dwp->dw_mask) {
7003 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7004 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7005
7006 if (dw_count >= dw_limit) {
7007 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7008
7009 dwp = &dw_array[0];
7010 dw_count = 0;
7011 }
7012 } else {
7013 if (dwp->dw_mask & DW_clear_busy)
7014 m->vmp_busy = FALSE;
7015
7016 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7017 PAGE_WAKEUP(m);
7018 }
7019 }
7020 }
7021 if (dw_count)
7022 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7023
7024 if (fast_path_possible) {
7025
7026 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7027 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7028
7029 if (local_queue_count || unwired_count) {
7030
7031 if (local_queue_count) {
7032 vm_page_t first_target;
7033 vm_page_queue_head_t *target_queue;
7034
7035 if (throttle_page)
7036 target_queue = &vm_page_queue_throttled;
7037 else {
7038 if (flags & UPL_COMMIT_INACTIVATE) {
7039 if (shadow_object->internal)
7040 target_queue = &vm_page_queue_anonymous;
7041 else
7042 target_queue = &vm_page_queue_inactive;
7043 } else
7044 target_queue = &vm_page_queue_active;
7045 }
7046 /*
7047 * Transfer the entire local queue to a regular LRU page queues.
7048 */
7049 vm_page_lockspin_queues();
7050
7051 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7052
7053 if (vm_page_queue_empty(target_queue))
7054 target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7055 else
7056 first_target->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7057
7058 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7059 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7060 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7061
7062 /*
7063 * Adjust the global page counts.
7064 */
7065 if (throttle_page) {
7066 vm_page_throttled_count += local_queue_count;
7067 } else {
7068 if (flags & UPL_COMMIT_INACTIVATE) {
7069 if (shadow_object->internal)
7070 vm_page_anonymous_count += local_queue_count;
7071 vm_page_inactive_count += local_queue_count;
7072
7073 token_new_pagecount += local_queue_count;
7074 } else
7075 vm_page_active_count += local_queue_count;
7076
7077 if (shadow_object->internal)
7078 vm_page_pageable_internal_count += local_queue_count;
7079 else
7080 vm_page_pageable_external_count += local_queue_count;
7081 }
7082 } else {
7083 vm_page_lockspin_queues();
7084 }
7085 if (unwired_count) {
7086 vm_page_wire_count -= unwired_count;
7087 VM_CHECK_MEMORYSTATUS;
7088 }
7089 vm_page_unlock_queues();
7090
7091 VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7092 }
7093 }
7094 occupied = 1;
7095
7096 if (upl->flags & UPL_DEVICE_MEMORY) {
7097 occupied = 0;
7098 } else if (upl->flags & UPL_LITE) {
7099 int pg_num;
7100 int i;
7101
7102 occupied = 0;
7103
7104 if (!fast_path_full_commit) {
7105 pg_num = upl->size/PAGE_SIZE;
7106 pg_num = (pg_num + 31) >> 5;
7107
7108 for (i = 0; i < pg_num; i++) {
7109 if (lite_list[i] != 0) {
7110 occupied = 1;
7111 break;
7112 }
7113 }
7114 }
7115 } else {
7116 if (vm_page_queue_empty(&upl->map_object->memq))
7117 occupied = 0;
7118 }
7119 if (occupied == 0) {
7120 /*
7121 * If this UPL element belongs to a Vector UPL and is
7122 * empty, then this is the right function to deallocate
7123 * it. So go ahead set the *empty variable. The flag
7124 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7125 * should be considered relevant for the Vector UPL and not
7126 * the internal UPLs.
7127 */
7128 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7129 *empty = TRUE;
7130
7131 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7132 /*
7133 * this is not a paging object
7134 * so we need to drop the paging reference
7135 * that was taken when we created the UPL
7136 * against this object
7137 */
7138 vm_object_activity_end(shadow_object);
7139 vm_object_collapse(shadow_object, 0, TRUE);
7140 } else {
7141 /*
7142 * we dontated the paging reference to
7143 * the map object... vm_pageout_object_terminate
7144 * will drop this reference
7145 */
7146 }
7147 }
7148 VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7149 vm_object_unlock(shadow_object);
7150 if (object != shadow_object)
7151 vm_object_unlock(object);
7152
7153 if(!isVectorUPL)
7154 upl_unlock(upl);
7155 else {
7156 /*
7157 * If we completed our operations on an UPL that is
7158 * part of a Vectored UPL and if empty is TRUE, then
7159 * we should go ahead and deallocate this UPL element.
7160 * Then we check if this was the last of the UPL elements
7161 * within that Vectored UPL. If so, set empty to TRUE
7162 * so that in ubc_upl_commit_range or ubc_upl_commit, we
7163 * can go ahead and deallocate the Vector UPL too.
7164 */
7165 if(*empty==TRUE) {
7166 *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7167 upl_deallocate(upl);
7168 }
7169 goto process_upl_to_commit;
7170 }
7171 if (pgpgout_count) {
7172 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7173 }
7174
7175 return KERN_SUCCESS;
7176 }
7177
7178 kern_return_t
7179 upl_abort_range(
7180 upl_t upl,
7181 upl_offset_t offset,
7182 upl_size_t size,
7183 int error,
7184 boolean_t *empty)
7185 {
7186 upl_page_info_t *user_page_list = NULL;
7187 upl_size_t xfer_size, subupl_size = size;
7188 vm_object_t shadow_object;
7189 vm_object_t object;
7190 vm_object_offset_t target_offset;
7191 upl_offset_t subupl_offset = offset;
7192 int entry;
7193 wpl_array_t lite_list;
7194 int occupied;
7195 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7196 struct vm_page_delayed_work *dwp;
7197 int dw_count;
7198 int dw_limit;
7199 int isVectorUPL = 0;
7200 upl_t vector_upl = NULL;
7201
7202 *empty = FALSE;
7203
7204 if (upl == UPL_NULL)
7205 return KERN_INVALID_ARGUMENT;
7206
7207 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7208 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7209
7210 if((isVectorUPL = vector_upl_is_valid(upl))) {
7211 vector_upl = upl;
7212 upl_lock(vector_upl);
7213 }
7214 else
7215 upl_lock(upl);
7216
7217 process_upl_to_abort:
7218 if(isVectorUPL) {
7219 size = subupl_size;
7220 offset = subupl_offset;
7221 if(size == 0) {
7222 upl_unlock(vector_upl);
7223 return KERN_SUCCESS;
7224 }
7225 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7226 if(upl == NULL) {
7227 upl_unlock(vector_upl);
7228 return KERN_FAILURE;
7229 }
7230 subupl_size -= size;
7231 subupl_offset += size;
7232 }
7233
7234 *empty = FALSE;
7235
7236 #if UPL_DEBUG
7237 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7238 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7239
7240 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7241 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7242 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7243
7244 upl->upl_commit_index++;
7245 }
7246 #endif
7247 if (upl->flags & UPL_DEVICE_MEMORY)
7248 xfer_size = 0;
7249 else if ((offset + size) <= upl->size)
7250 xfer_size = size;
7251 else {
7252 if(!isVectorUPL)
7253 upl_unlock(upl);
7254 else {
7255 upl_unlock(vector_upl);
7256 }
7257
7258 return KERN_FAILURE;
7259 }
7260 if (upl->flags & UPL_INTERNAL) {
7261 lite_list = (wpl_array_t)
7262 ((((uintptr_t)upl) + sizeof(struct upl))
7263 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7264
7265 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7266 } else {
7267 lite_list = (wpl_array_t)
7268 (((uintptr_t)upl) + sizeof(struct upl));
7269 }
7270 object = upl->map_object;
7271
7272 if (upl->flags & UPL_SHADOWED) {
7273 vm_object_lock(object);
7274 shadow_object = object->shadow;
7275 } else
7276 shadow_object = object;
7277
7278 entry = offset/PAGE_SIZE;
7279 target_offset = (vm_object_offset_t)offset;
7280
7281 assert(!(target_offset & PAGE_MASK));
7282 assert(!(xfer_size & PAGE_MASK));
7283
7284 if (upl->flags & UPL_KERNEL_OBJECT)
7285 vm_object_lock_shared(shadow_object);
7286 else
7287 vm_object_lock(shadow_object);
7288
7289 if (upl->flags & UPL_ACCESS_BLOCKED) {
7290 assert(shadow_object->blocked_access);
7291 shadow_object->blocked_access = FALSE;
7292 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7293 }
7294
7295 dwp = &dw_array[0];
7296 dw_count = 0;
7297 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7298
7299 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7300 panic("upl_abort_range: kernel_object being DUMPED");
7301
7302 while (xfer_size) {
7303 vm_page_t t, m;
7304 unsigned int pg_num;
7305 boolean_t needed;
7306
7307 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7308 assert(pg_num == target_offset/PAGE_SIZE);
7309
7310 needed = FALSE;
7311
7312 if (user_page_list)
7313 needed = user_page_list[pg_num].needed;
7314
7315 dwp->dw_mask = 0;
7316 m = VM_PAGE_NULL;
7317
7318 if (upl->flags & UPL_LITE) {
7319
7320 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7321 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7322
7323 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7324 m = vm_page_lookup(shadow_object, target_offset +
7325 (upl->offset - shadow_object->paging_offset));
7326 }
7327 }
7328 if (upl->flags & UPL_SHADOWED) {
7329 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7330 t->vmp_free_when_done = FALSE;
7331
7332 VM_PAGE_FREE(t);
7333
7334 if (m == VM_PAGE_NULL)
7335 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7336 }
7337 }
7338 if ((upl->flags & UPL_KERNEL_OBJECT))
7339 goto abort_next_page;
7340
7341 if (m != VM_PAGE_NULL) {
7342
7343 assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7344
7345 if (m->vmp_absent) {
7346 boolean_t must_free = TRUE;
7347
7348 /*
7349 * COPYOUT = FALSE case
7350 * check for error conditions which must
7351 * be passed back to the pages customer
7352 */
7353 if (error & UPL_ABORT_RESTART) {
7354 m->vmp_restart = TRUE;
7355 m->vmp_absent = FALSE;
7356 m->vmp_unusual = TRUE;
7357 must_free = FALSE;
7358 } else if (error & UPL_ABORT_UNAVAILABLE) {
7359 m->vmp_restart = FALSE;
7360 m->vmp_unusual = TRUE;
7361 must_free = FALSE;
7362 } else if (error & UPL_ABORT_ERROR) {
7363 m->vmp_restart = FALSE;
7364 m->vmp_absent = FALSE;
7365 m->vmp_error = TRUE;
7366 m->vmp_unusual = TRUE;
7367 must_free = FALSE;
7368 }
7369 if (m->vmp_clustered && needed == FALSE) {
7370 /*
7371 * This page was a part of a speculative
7372 * read-ahead initiated by the kernel
7373 * itself. No one is expecting this
7374 * page and no one will clean up its
7375 * error state if it ever becomes valid
7376 * in the future.
7377 * We have to free it here.
7378 */
7379 must_free = TRUE;
7380 }
7381 m->vmp_cleaning = FALSE;
7382
7383 if (m->vmp_overwriting && !m->vmp_busy) {
7384 /*
7385 * this shouldn't happen since
7386 * this is an 'absent' page, but
7387 * it doesn't hurt to check for
7388 * the 'alternate' method of
7389 * stabilizing the page...
7390 * we will mark 'busy' to be cleared
7391 * in the following code which will
7392 * take care of the primary stabilzation
7393 * method (i.e. setting 'busy' to TRUE)
7394 */
7395 dwp->dw_mask |= DW_vm_page_unwire;
7396 }
7397 m->vmp_overwriting = FALSE;
7398
7399 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7400
7401 if (must_free == TRUE)
7402 dwp->dw_mask |= DW_vm_page_free;
7403 else
7404 dwp->dw_mask |= DW_vm_page_activate;
7405 } else {
7406 /*
7407 * Handle the trusted pager throttle.
7408 */
7409 if (m->vmp_laundry)
7410 dwp->dw_mask |= DW_vm_pageout_throttle_up;
7411
7412 if (upl->flags & UPL_ACCESS_BLOCKED) {
7413 /*
7414 * We blocked access to the pages in this UPL.
7415 * Clear the "busy" bit and wake up any waiter
7416 * for this page.
7417 */
7418 dwp->dw_mask |= DW_clear_busy;
7419 }
7420 if (m->vmp_overwriting) {
7421 if (m->vmp_busy)
7422 dwp->dw_mask |= DW_clear_busy;
7423 else {
7424 /*
7425 * deal with the 'alternate' method
7426 * of stabilizing the page...
7427 * we will either free the page
7428 * or mark 'busy' to be cleared
7429 * in the following code which will
7430 * take care of the primary stabilzation
7431 * method (i.e. setting 'busy' to TRUE)
7432 */
7433 dwp->dw_mask |= DW_vm_page_unwire;
7434 }
7435 m->vmp_overwriting = FALSE;
7436 }
7437 m->vmp_free_when_done = FALSE;
7438 m->vmp_cleaning = FALSE;
7439
7440 if (error & UPL_ABORT_DUMP_PAGES) {
7441 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7442
7443 dwp->dw_mask |= DW_vm_page_free;
7444 } else {
7445 if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7446 if (error & UPL_ABORT_REFERENCE) {
7447 /*
7448 * we've been told to explictly
7449 * reference this page... for
7450 * file I/O, this is done by
7451 * implementing an LRU on the inactive q
7452 */
7453 dwp->dw_mask |= DW_vm_page_lru;
7454
7455 } else if ( !VM_PAGE_PAGEABLE(m))
7456 dwp->dw_mask |= DW_vm_page_deactivate_internal;
7457 }
7458 dwp->dw_mask |= DW_PAGE_WAKEUP;
7459 }
7460 }
7461 }
7462 abort_next_page:
7463 target_offset += PAGE_SIZE_64;
7464 xfer_size -= PAGE_SIZE;
7465 entry++;
7466
7467 if (dwp->dw_mask) {
7468 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7469 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7470
7471 if (dw_count >= dw_limit) {
7472 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7473
7474 dwp = &dw_array[0];
7475 dw_count = 0;
7476 }
7477 } else {
7478 if (dwp->dw_mask & DW_clear_busy)
7479 m->vmp_busy = FALSE;
7480
7481 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7482 PAGE_WAKEUP(m);
7483 }
7484 }
7485 }
7486 if (dw_count)
7487 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7488
7489 occupied = 1;
7490
7491 if (upl->flags & UPL_DEVICE_MEMORY) {
7492 occupied = 0;
7493 } else if (upl->flags & UPL_LITE) {
7494 int pg_num;
7495 int i;
7496
7497 pg_num = upl->size/PAGE_SIZE;
7498 pg_num = (pg_num + 31) >> 5;
7499 occupied = 0;
7500
7501 for (i = 0; i < pg_num; i++) {
7502 if (lite_list[i] != 0) {
7503 occupied = 1;
7504 break;
7505 }
7506 }
7507 } else {
7508 if (vm_page_queue_empty(&upl->map_object->memq))
7509 occupied = 0;
7510 }
7511 if (occupied == 0) {
7512 /*
7513 * If this UPL element belongs to a Vector UPL and is
7514 * empty, then this is the right function to deallocate
7515 * it. So go ahead set the *empty variable. The flag
7516 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7517 * should be considered relevant for the Vector UPL and
7518 * not the internal UPLs.
7519 */
7520 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7521 *empty = TRUE;
7522
7523 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7524 /*
7525 * this is not a paging object
7526 * so we need to drop the paging reference
7527 * that was taken when we created the UPL
7528 * against this object
7529 */
7530 vm_object_activity_end(shadow_object);
7531 vm_object_collapse(shadow_object, 0, TRUE);
7532 } else {
7533 /*
7534 * we dontated the paging reference to
7535 * the map object... vm_pageout_object_terminate
7536 * will drop this reference
7537 */
7538 }
7539 }
7540 vm_object_unlock(shadow_object);
7541 if (object != shadow_object)
7542 vm_object_unlock(object);
7543
7544 if(!isVectorUPL)
7545 upl_unlock(upl);
7546 else {
7547 /*
7548 * If we completed our operations on an UPL that is
7549 * part of a Vectored UPL and if empty is TRUE, then
7550 * we should go ahead and deallocate this UPL element.
7551 * Then we check if this was the last of the UPL elements
7552 * within that Vectored UPL. If so, set empty to TRUE
7553 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7554 * can go ahead and deallocate the Vector UPL too.
7555 */
7556 if(*empty == TRUE) {
7557 *empty = vector_upl_set_subupl(vector_upl, upl,0);
7558 upl_deallocate(upl);
7559 }
7560 goto process_upl_to_abort;
7561 }
7562
7563 return KERN_SUCCESS;
7564 }
7565
7566
7567 kern_return_t
7568 upl_abort(
7569 upl_t upl,
7570 int error)
7571 {
7572 boolean_t empty;
7573
7574 if (upl == UPL_NULL)
7575 return KERN_INVALID_ARGUMENT;
7576
7577 return upl_abort_range(upl, 0, upl->size, error, &empty);
7578 }
7579
7580
7581 /* an option on commit should be wire */
7582 kern_return_t
7583 upl_commit(
7584 upl_t upl,
7585 upl_page_info_t *page_list,
7586 mach_msg_type_number_t count)
7587 {
7588 boolean_t empty;
7589
7590 if (upl == UPL_NULL)
7591 return KERN_INVALID_ARGUMENT;
7592
7593 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7594 }
7595
7596
7597 void
7598 iopl_valid_data(
7599 upl_t upl,
7600 vm_tag_t tag)
7601 {
7602 vm_object_t object;
7603 vm_offset_t offset;
7604 vm_page_t m, nxt_page = VM_PAGE_NULL;
7605 upl_size_t size;
7606 int wired_count = 0;
7607
7608 if (upl == NULL)
7609 panic("iopl_valid_data: NULL upl");
7610 if (vector_upl_is_valid(upl))
7611 panic("iopl_valid_data: vector upl");
7612 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7613 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7614
7615 object = upl->map_object;
7616
7617 if (object == kernel_object || object == compressor_object)
7618 panic("iopl_valid_data: object == kernel or compressor");
7619
7620 if (object->purgable == VM_PURGABLE_VOLATILE ||
7621 object->purgable == VM_PURGABLE_EMPTY)
7622 panic("iopl_valid_data: object %p purgable %d",
7623 object, object->purgable);
7624
7625 size = upl->size;
7626
7627 vm_object_lock(object);
7628 VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7629
7630 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7631 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7632 else
7633 offset = 0 + upl->offset - object->paging_offset;
7634
7635 while (size) {
7636
7637 if (nxt_page != VM_PAGE_NULL) {
7638 m = nxt_page;
7639 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
7640 } else {
7641 m = vm_page_lookup(object, offset);
7642 offset += PAGE_SIZE;
7643
7644 if (m == VM_PAGE_NULL)
7645 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7646 }
7647 if (m->vmp_busy) {
7648 if (!m->vmp_absent)
7649 panic("iopl_valid_data: busy page w/o absent");
7650
7651 if (m->vmp_pageq.next || m->vmp_pageq.prev)
7652 panic("iopl_valid_data: busy+absent page on page queue");
7653 if (m->vmp_reusable) {
7654 panic("iopl_valid_data: %p is reusable", m);
7655 }
7656
7657 m->vmp_absent = FALSE;
7658 m->vmp_dirty = TRUE;
7659 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
7660 assert(m->vmp_wire_count == 0);
7661 m->vmp_wire_count++;
7662 assert(m->vmp_wire_count);
7663 if (m->vmp_wire_count == 1) {
7664 m->vmp_q_state = VM_PAGE_IS_WIRED;
7665 wired_count++;
7666 } else {
7667 panic("iopl_valid_data: %p already wired\n", m);
7668 }
7669
7670 PAGE_WAKEUP_DONE(m);
7671 }
7672 size -= PAGE_SIZE;
7673 }
7674 if (wired_count) {
7675
7676 VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7677 assert(object->resident_page_count >= object->wired_page_count);
7678
7679 /* no need to adjust purgeable accounting for this object: */
7680 assert(object->purgable != VM_PURGABLE_VOLATILE);
7681 assert(object->purgable != VM_PURGABLE_EMPTY);
7682
7683 vm_page_lockspin_queues();
7684 vm_page_wire_count += wired_count;
7685 vm_page_unlock_queues();
7686 }
7687 VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7688 vm_object_unlock(object);
7689 }
7690
7691
7692 void
7693 vm_object_set_pmap_cache_attr(
7694 vm_object_t object,
7695 upl_page_info_array_t user_page_list,
7696 unsigned int num_pages,
7697 boolean_t batch_pmap_op)
7698 {
7699 unsigned int cache_attr = 0;
7700
7701 cache_attr = object->wimg_bits & VM_WIMG_MASK;
7702 assert(user_page_list);
7703 if (cache_attr != VM_WIMG_USE_DEFAULT) {
7704 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7705 }
7706 }
7707
7708
7709 boolean_t vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7710 kern_return_t vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t *, int, int*);
7711
7712
7713
7714 boolean_t
7715 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7716 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7717 {
7718 vm_page_t dst_page;
7719 unsigned int entry;
7720 int page_count;
7721 int delayed_unlock = 0;
7722 boolean_t retval = TRUE;
7723 ppnum_t phys_page;
7724
7725 vm_object_lock_assert_exclusive(object);
7726 assert(object->purgable != VM_PURGABLE_VOLATILE);
7727 assert(object->purgable != VM_PURGABLE_EMPTY);
7728 assert(object->pager == NULL);
7729 assert(object->copy == NULL);
7730 assert(object->shadow == NULL);
7731
7732 page_count = object->resident_page_count;
7733 dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7734
7735 vm_page_lock_queues();
7736
7737 while (page_count--) {
7738
7739 if (dst_page->vmp_busy ||
7740 dst_page->vmp_fictitious ||
7741 dst_page->vmp_absent ||
7742 dst_page->vmp_error ||
7743 dst_page->vmp_cleaning ||
7744 dst_page->vmp_restart ||
7745 dst_page->vmp_laundry) {
7746 retval = FALSE;
7747 goto done;
7748 }
7749 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
7750 retval = FALSE;
7751 goto done;
7752 }
7753 dst_page->vmp_reference = TRUE;
7754
7755 vm_page_wire(dst_page, tag, FALSE);
7756
7757 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7758 SET_PAGE_DIRTY(dst_page, FALSE);
7759 }
7760 entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
7761 assert(entry >= 0 && entry < object->resident_page_count);
7762 lite_list[entry>>5] |= 1 << (entry & 31);
7763
7764 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7765
7766 if (phys_page > upl->highest_page)
7767 upl->highest_page = phys_page;
7768
7769 if (user_page_list) {
7770 user_page_list[entry].phys_addr = phys_page;
7771 user_page_list[entry].absent = dst_page->vmp_absent;
7772 user_page_list[entry].dirty = dst_page->vmp_dirty;
7773 user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
7774 user_page_list[entry].precious = dst_page->vmp_precious;
7775 user_page_list[entry].device = FALSE;
7776 user_page_list[entry].speculative = FALSE;
7777 user_page_list[entry].cs_validated = FALSE;
7778 user_page_list[entry].cs_tainted = FALSE;
7779 user_page_list[entry].cs_nx = FALSE;
7780 user_page_list[entry].needed = FALSE;
7781 user_page_list[entry].mark = FALSE;
7782 }
7783 if (delayed_unlock++ > 256) {
7784 delayed_unlock = 0;
7785 lck_mtx_yield(&vm_page_queue_lock);
7786
7787 VM_CHECK_MEMORYSTATUS;
7788 }
7789 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq);
7790 }
7791 done:
7792 vm_page_unlock_queues();
7793
7794 VM_CHECK_MEMORYSTATUS;
7795
7796 return (retval);
7797 }
7798
7799
7800 kern_return_t
7801 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7802 wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset,
7803 int page_count, int* page_grab_count)
7804 {
7805 vm_page_t dst_page;
7806 boolean_t no_zero_fill = FALSE;
7807 int interruptible;
7808 int pages_wired = 0;
7809 int pages_inserted = 0;
7810 int entry = 0;
7811 uint64_t delayed_ledger_update = 0;
7812 kern_return_t ret = KERN_SUCCESS;
7813 int grab_options;
7814 ppnum_t phys_page;
7815
7816 vm_object_lock_assert_exclusive(object);
7817 assert(object->purgable != VM_PURGABLE_VOLATILE);
7818 assert(object->purgable != VM_PURGABLE_EMPTY);
7819 assert(object->pager == NULL);
7820 assert(object->copy == NULL);
7821 assert(object->shadow == NULL);
7822
7823 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7824 interruptible = THREAD_ABORTSAFE;
7825 else
7826 interruptible = THREAD_UNINT;
7827
7828 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7829 no_zero_fill = TRUE;
7830
7831 grab_options = 0;
7832 #if CONFIG_SECLUDED_MEMORY
7833 if (object->can_grab_secluded) {
7834 grab_options |= VM_PAGE_GRAB_SECLUDED;
7835 }
7836 #endif /* CONFIG_SECLUDED_MEMORY */
7837
7838 while (page_count--) {
7839
7840 while ((dst_page = vm_page_grab_options(grab_options))
7841 == VM_PAGE_NULL) {
7842
7843 OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7844
7845 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7846
7847 if (vm_page_wait(interruptible) == FALSE) {
7848 /*
7849 * interrupted case
7850 */
7851 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7852
7853 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7854
7855 ret = MACH_SEND_INTERRUPTED;
7856 goto done;
7857 }
7858 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7859
7860 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7861 }
7862 if (no_zero_fill == FALSE)
7863 vm_page_zero_fill(dst_page);
7864 else
7865 dst_page->vmp_absent = TRUE;
7866
7867 dst_page->vmp_reference = TRUE;
7868
7869 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7870 SET_PAGE_DIRTY(dst_page, FALSE);
7871 }
7872 if (dst_page->vmp_absent == FALSE) {
7873 assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q);
7874 assert(dst_page->vmp_wire_count == 0);
7875 dst_page->vmp_wire_count++;
7876 dst_page->vmp_q_state = VM_PAGE_IS_WIRED;
7877 assert(dst_page->vmp_wire_count);
7878 pages_wired++;
7879 PAGE_WAKEUP_DONE(dst_page);
7880 }
7881 pages_inserted++;
7882
7883 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
7884
7885 lite_list[entry>>5] |= 1 << (entry & 31);
7886
7887 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7888
7889 if (phys_page > upl->highest_page)
7890 upl->highest_page = phys_page;
7891
7892 if (user_page_list) {
7893 user_page_list[entry].phys_addr = phys_page;
7894 user_page_list[entry].absent = dst_page->vmp_absent;
7895 user_page_list[entry].dirty = dst_page->vmp_dirty;
7896 user_page_list[entry].free_when_done = FALSE;
7897 user_page_list[entry].precious = FALSE;
7898 user_page_list[entry].device = FALSE;
7899 user_page_list[entry].speculative = FALSE;
7900 user_page_list[entry].cs_validated = FALSE;
7901 user_page_list[entry].cs_tainted = FALSE;
7902 user_page_list[entry].cs_nx = FALSE;
7903 user_page_list[entry].needed = FALSE;
7904 user_page_list[entry].mark = FALSE;
7905 }
7906 entry++;
7907 *dst_offset += PAGE_SIZE_64;
7908 }
7909 done:
7910 if (pages_wired) {
7911 vm_page_lockspin_queues();
7912 vm_page_wire_count += pages_wired;
7913 vm_page_unlock_queues();
7914 }
7915 if (pages_inserted) {
7916 if (object->internal) {
7917 OSAddAtomic(pages_inserted, &vm_page_internal_count);
7918 } else {
7919 OSAddAtomic(pages_inserted, &vm_page_external_count);
7920 }
7921 }
7922 if (delayed_ledger_update) {
7923 task_t owner;
7924 int ledger_idx_volatile;
7925 int ledger_idx_nonvolatile;
7926 int ledger_idx_volatile_compressed;
7927 int ledger_idx_nonvolatile_compressed;
7928 boolean_t do_footprint;
7929
7930 owner = VM_OBJECT_OWNER(object);
7931 assert(owner);
7932
7933 vm_object_ledger_tag_ledgers(object,
7934 &ledger_idx_volatile,
7935 &ledger_idx_nonvolatile,
7936 &ledger_idx_volatile_compressed,
7937 &ledger_idx_nonvolatile_compressed,
7938 &do_footprint);
7939
7940 /* more non-volatile bytes */
7941 ledger_credit(owner->ledger,
7942 ledger_idx_nonvolatile,
7943 delayed_ledger_update);
7944 if (do_footprint) {
7945 /* more footprint */
7946 ledger_credit(owner->ledger,
7947 task_ledgers.phys_footprint,
7948 delayed_ledger_update);
7949 }
7950 }
7951
7952 assert(page_grab_count);
7953 *page_grab_count = pages_inserted;
7954
7955 return (ret);
7956 }
7957
7958
7959
7960 kern_return_t
7961 vm_object_iopl_request(
7962 vm_object_t object,
7963 vm_object_offset_t offset,
7964 upl_size_t size,
7965 upl_t *upl_ptr,
7966 upl_page_info_array_t user_page_list,
7967 unsigned int *page_list_count,
7968 upl_control_flags_t cntrl_flags,
7969 vm_tag_t tag)
7970 {
7971 vm_page_t dst_page;
7972 vm_object_offset_t dst_offset;
7973 upl_size_t xfer_size;
7974 upl_t upl = NULL;
7975 unsigned int entry;
7976 wpl_array_t lite_list = NULL;
7977 int no_zero_fill = FALSE;
7978 unsigned int size_in_pages;
7979 int page_grab_count = 0;
7980 u_int32_t psize;
7981 kern_return_t ret;
7982 vm_prot_t prot;
7983 struct vm_object_fault_info fault_info = {};
7984 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7985 struct vm_page_delayed_work *dwp;
7986 int dw_count;
7987 int dw_limit;
7988 int dw_index;
7989 boolean_t caller_lookup;
7990 int io_tracking_flag = 0;
7991 int interruptible;
7992 ppnum_t phys_page;
7993
7994 boolean_t set_cache_attr_needed = FALSE;
7995 boolean_t free_wired_pages = FALSE;
7996 boolean_t fast_path_empty_req = FALSE;
7997 boolean_t fast_path_full_req = FALSE;
7998
7999 if (cntrl_flags & ~UPL_VALID_FLAGS) {
8000 /*
8001 * For forward compatibility's sake,
8002 * reject any unknown flag.
8003 */
8004 return KERN_INVALID_VALUE;
8005 }
8006 if (vm_lopage_needed == FALSE)
8007 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8008
8009 if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8010 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8011 return KERN_INVALID_VALUE;
8012
8013 if (object->phys_contiguous) {
8014 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8015 return KERN_INVALID_ADDRESS;
8016
8017 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8018 return KERN_INVALID_ADDRESS;
8019 }
8020 }
8021 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8022 no_zero_fill = TRUE;
8023
8024 if (cntrl_flags & UPL_COPYOUT_FROM)
8025 prot = VM_PROT_READ;
8026 else
8027 prot = VM_PROT_READ | VM_PROT_WRITE;
8028
8029 if ((!object->internal) && (object->paging_offset != 0))
8030 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8031
8032 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0);
8033
8034 #if CONFIG_IOSCHED || UPL_DEBUG
8035 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8036 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8037 #endif
8038
8039 #if CONFIG_IOSCHED
8040 if (object->io_tracking) {
8041 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8042 if (object != kernel_object)
8043 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8044 }
8045 #endif
8046
8047 if (object->phys_contiguous)
8048 psize = PAGE_SIZE;
8049 else
8050 psize = size;
8051
8052 if (cntrl_flags & UPL_SET_INTERNAL) {
8053 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8054
8055 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8056 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8057 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8058 if (size == 0) {
8059 user_page_list = NULL;
8060 lite_list = NULL;
8061 }
8062 } else {
8063 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8064
8065 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8066 if (size == 0) {
8067 lite_list = NULL;
8068 }
8069 }
8070 if (user_page_list)
8071 user_page_list[0].device = FALSE;
8072 *upl_ptr = upl;
8073
8074 if (cntrl_flags & UPL_NOZEROFILLIO) {
8075 DTRACE_VM4(upl_nozerofillio,
8076 vm_object_t, object,
8077 vm_object_offset_t, offset,
8078 upl_size_t, size,
8079 upl_t, upl);
8080 }
8081
8082 upl->map_object = object;
8083 upl->size = size;
8084
8085 size_in_pages = size / PAGE_SIZE;
8086
8087 if (object == kernel_object &&
8088 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8089 upl->flags |= UPL_KERNEL_OBJECT;
8090 #if UPL_DEBUG
8091 vm_object_lock(object);
8092 #else
8093 vm_object_lock_shared(object);
8094 #endif
8095 } else {
8096 vm_object_lock(object);
8097 vm_object_activity_begin(object);
8098 }
8099 /*
8100 * paging in progress also protects the paging_offset
8101 */
8102 upl->offset = offset + object->paging_offset;
8103
8104 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8105 /*
8106 * The user requested that access to the pages in this UPL
8107 * be blocked until the UPL is commited or aborted.
8108 */
8109 upl->flags |= UPL_ACCESS_BLOCKED;
8110 }
8111
8112 #if CONFIG_IOSCHED || UPL_DEBUG
8113 if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8114 vm_object_activity_begin(object);
8115 queue_enter(&object->uplq, upl, upl_t, uplq);
8116 }
8117 #endif
8118
8119 if (object->phys_contiguous) {
8120
8121 if (upl->flags & UPL_ACCESS_BLOCKED) {
8122 assert(!object->blocked_access);
8123 object->blocked_access = TRUE;
8124 }
8125
8126 vm_object_unlock(object);
8127
8128 /*
8129 * don't need any shadow mappings for this one
8130 * since it is already I/O memory
8131 */
8132 upl->flags |= UPL_DEVICE_MEMORY;
8133
8134 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8135
8136 if (user_page_list) {
8137 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8138 user_page_list[0].device = TRUE;
8139 }
8140 if (page_list_count != NULL) {
8141 if (upl->flags & UPL_INTERNAL)
8142 *page_list_count = 0;
8143 else
8144 *page_list_count = 1;
8145 }
8146
8147 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8148 return KERN_SUCCESS;
8149 }
8150 if (object != kernel_object && object != compressor_object) {
8151 /*
8152 * Protect user space from future COW operations
8153 */
8154 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8155 if (!object->true_share &&
8156 vm_object_tracking_inited) {
8157 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8158 int num = 0;
8159
8160 num = OSBacktrace(bt,
8161 VM_OBJECT_TRACKING_BTDEPTH);
8162 btlog_add_entry(vm_object_tracking_btlog,
8163 object,
8164 VM_OBJECT_TRACKING_OP_TRUESHARE,
8165 bt,
8166 num);
8167 }
8168 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8169
8170 vm_object_lock_assert_exclusive(object);
8171 object->true_share = TRUE;
8172
8173 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8174 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8175 }
8176
8177 if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8178 object->copy != VM_OBJECT_NULL) {
8179 /*
8180 * Honor copy-on-write obligations
8181 *
8182 * The caller is gathering these pages and
8183 * might modify their contents. We need to
8184 * make sure that the copy object has its own
8185 * private copies of these pages before we let
8186 * the caller modify them.
8187 *
8188 * NOTE: someone else could map the original object
8189 * after we've done this copy-on-write here, and they
8190 * could then see an inconsistent picture of the memory
8191 * while it's being modified via the UPL. To prevent this,
8192 * we would have to block access to these pages until the
8193 * UPL is released. We could use the UPL_BLOCK_ACCESS
8194 * code path for that...
8195 */
8196 vm_object_update(object,
8197 offset,
8198 size,
8199 NULL,
8200 NULL,
8201 FALSE, /* should_return */
8202 MEMORY_OBJECT_COPY_SYNC,
8203 VM_PROT_NO_CHANGE);
8204 VM_PAGEOUT_DEBUG(iopl_cow, 1);
8205 VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT));
8206 }
8207 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8208 object->purgable != VM_PURGABLE_VOLATILE &&
8209 object->purgable != VM_PURGABLE_EMPTY &&
8210 object->copy == NULL &&
8211 size == object->vo_size &&
8212 offset == 0 &&
8213 object->shadow == NULL &&
8214 object->pager == NULL)
8215 {
8216 if (object->resident_page_count == size_in_pages)
8217 {
8218 assert(object != compressor_object);
8219 assert(object != kernel_object);
8220 fast_path_full_req = TRUE;
8221 }
8222 else if (object->resident_page_count == 0)
8223 {
8224 assert(object != compressor_object);
8225 assert(object != kernel_object);
8226 fast_path_empty_req = TRUE;
8227 set_cache_attr_needed = TRUE;
8228 }
8229 }
8230
8231 if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8232 interruptible = THREAD_ABORTSAFE;
8233 else
8234 interruptible = THREAD_UNINT;
8235
8236 entry = 0;
8237
8238 xfer_size = size;
8239 dst_offset = offset;
8240 dw_count = 0;
8241
8242 if (fast_path_full_req) {
8243
8244 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8245 goto finish;
8246 /*
8247 * we couldn't complete the processing of this request on the fast path
8248 * so fall through to the slow path and finish up
8249 */
8250
8251 } else if (fast_path_empty_req) {
8252
8253 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8254 ret = KERN_MEMORY_ERROR;
8255 goto return_err;
8256 }
8257 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages, &page_grab_count);
8258
8259 if (ret) {
8260 free_wired_pages = TRUE;
8261 goto return_err;
8262 }
8263 goto finish;
8264 }
8265
8266 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8267 fault_info.lo_offset = offset;
8268 fault_info.hi_offset = offset + xfer_size;
8269 fault_info.mark_zf_absent = TRUE;
8270 fault_info.interruptible = interruptible;
8271 fault_info.batch_pmap_op = TRUE;
8272
8273 dwp = &dw_array[0];
8274 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8275
8276 while (xfer_size) {
8277 vm_fault_return_t result;
8278
8279 dwp->dw_mask = 0;
8280
8281 if (fast_path_full_req) {
8282 /*
8283 * if we get here, it means that we ran into a page
8284 * state we couldn't handle in the fast path and
8285 * bailed out to the slow path... since the order
8286 * we look at pages is different between the 2 paths,
8287 * the following check is needed to determine whether
8288 * this page was already processed in the fast path
8289 */
8290 if (lite_list[entry>>5] & (1 << (entry & 31)))
8291 goto skip_page;
8292 }
8293 dst_page = vm_page_lookup(object, dst_offset);
8294
8295 if (dst_page == VM_PAGE_NULL ||
8296 dst_page->vmp_busy ||
8297 dst_page->vmp_error ||
8298 dst_page->vmp_restart ||
8299 dst_page->vmp_absent ||
8300 dst_page->vmp_fictitious) {
8301
8302 if (object == kernel_object)
8303 panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8304 if (object == compressor_object)
8305 panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8306
8307 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8308 ret = KERN_MEMORY_ERROR;
8309 goto return_err;
8310 }
8311 set_cache_attr_needed = TRUE;
8312
8313 /*
8314 * We just looked up the page and the result remains valid
8315 * until the object lock is release, so send it to
8316 * vm_fault_page() (as "dst_page"), to avoid having to
8317 * look it up again there.
8318 */
8319 caller_lookup = TRUE;
8320
8321 do {
8322 vm_page_t top_page;
8323 kern_return_t error_code;
8324
8325 fault_info.cluster_size = xfer_size;
8326
8327 vm_object_paging_begin(object);
8328
8329 result = vm_fault_page(object, dst_offset,
8330 prot | VM_PROT_WRITE, FALSE,
8331 caller_lookup,
8332 &prot, &dst_page, &top_page,
8333 (int *)0,
8334 &error_code, no_zero_fill,
8335 FALSE, &fault_info);
8336
8337 /* our lookup is no longer valid at this point */
8338 caller_lookup = FALSE;
8339
8340 switch (result) {
8341
8342 case VM_FAULT_SUCCESS:
8343 page_grab_count++;
8344
8345 if ( !dst_page->vmp_absent) {
8346 PAGE_WAKEUP_DONE(dst_page);
8347 } else {
8348 /*
8349 * we only get back an absent page if we
8350 * requested that it not be zero-filled
8351 * because we are about to fill it via I/O
8352 *
8353 * absent pages should be left BUSY
8354 * to prevent them from being faulted
8355 * into an address space before we've
8356 * had a chance to complete the I/O on
8357 * them since they may contain info that
8358 * shouldn't be seen by the faulting task
8359 */
8360 }
8361 /*
8362 * Release paging references and
8363 * top-level placeholder page, if any.
8364 */
8365 if (top_page != VM_PAGE_NULL) {
8366 vm_object_t local_object;
8367
8368 local_object = VM_PAGE_OBJECT(top_page);
8369
8370 /*
8371 * comparing 2 packed pointers
8372 */
8373 if (top_page->vmp_object != dst_page->vmp_object) {
8374 vm_object_lock(local_object);
8375 VM_PAGE_FREE(top_page);
8376 vm_object_paging_end(local_object);
8377 vm_object_unlock(local_object);
8378 } else {
8379 VM_PAGE_FREE(top_page);
8380 vm_object_paging_end(local_object);
8381 }
8382 }
8383 vm_object_paging_end(object);
8384 break;
8385
8386 case VM_FAULT_RETRY:
8387 vm_object_lock(object);
8388 break;
8389
8390 case VM_FAULT_MEMORY_SHORTAGE:
8391 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8392
8393 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8394
8395 if (vm_page_wait(interruptible)) {
8396 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8397
8398 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8399 vm_object_lock(object);
8400
8401 break;
8402 }
8403 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8404
8405 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8406
8407 /* fall thru */
8408
8409 case VM_FAULT_INTERRUPTED:
8410 error_code = MACH_SEND_INTERRUPTED;
8411 case VM_FAULT_MEMORY_ERROR:
8412 memory_error:
8413 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8414
8415 vm_object_lock(object);
8416 goto return_err;
8417
8418 case VM_FAULT_SUCCESS_NO_VM_PAGE:
8419 /* success but no page: fail */
8420 vm_object_paging_end(object);
8421 vm_object_unlock(object);
8422 goto memory_error;
8423
8424 default:
8425 panic("vm_object_iopl_request: unexpected error"
8426 " 0x%x from vm_fault_page()\n", result);
8427 }
8428 } while (result != VM_FAULT_SUCCESS);
8429
8430 }
8431 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8432
8433 if (upl->flags & UPL_KERNEL_OBJECT)
8434 goto record_phys_addr;
8435
8436 if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8437 dst_page->vmp_busy = TRUE;
8438 goto record_phys_addr;
8439 }
8440
8441 if (dst_page->vmp_cleaning) {
8442 /*
8443 * Someone else is cleaning this page in place.
8444 * In theory, we should be able to proceed and use this
8445 * page but they'll probably end up clearing the "busy"
8446 * bit on it in upl_commit_range() but they didn't set
8447 * it, so they would clear our "busy" bit and open
8448 * us to race conditions.
8449 * We'd better wait for the cleaning to complete and
8450 * then try again.
8451 */
8452 VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, 1);
8453 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8454 continue;
8455 }
8456 if (dst_page->vmp_laundry)
8457 vm_pageout_steal_laundry(dst_page, FALSE);
8458
8459 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8460 phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8461 vm_page_t low_page;
8462 int refmod;
8463
8464 /*
8465 * support devices that can't DMA above 32 bits
8466 * by substituting pages from a pool of low address
8467 * memory for any pages we find above the 4G mark
8468 * can't substitute if the page is already wired because
8469 * we don't know whether that physical address has been
8470 * handed out to some other 64 bit capable DMA device to use
8471 */
8472 if (VM_PAGE_WIRED(dst_page)) {
8473 ret = KERN_PROTECTION_FAILURE;
8474 goto return_err;
8475 }
8476 low_page = vm_page_grablo();
8477
8478 if (low_page == VM_PAGE_NULL) {
8479 ret = KERN_RESOURCE_SHORTAGE;
8480 goto return_err;
8481 }
8482 /*
8483 * from here until the vm_page_replace completes
8484 * we musn't drop the object lock... we don't
8485 * want anyone refaulting this page in and using
8486 * it after we disconnect it... we want the fault
8487 * to find the new page being substituted.
8488 */
8489 if (dst_page->vmp_pmapped)
8490 refmod = pmap_disconnect(phys_page);
8491 else
8492 refmod = 0;
8493
8494 if (!dst_page->vmp_absent)
8495 vm_page_copy(dst_page, low_page);
8496
8497 low_page->vmp_reference = dst_page->vmp_reference;
8498 low_page->vmp_dirty = dst_page->vmp_dirty;
8499 low_page->vmp_absent = dst_page->vmp_absent;
8500
8501 if (refmod & VM_MEM_REFERENCED)
8502 low_page->vmp_reference = TRUE;
8503 if (refmod & VM_MEM_MODIFIED) {
8504 SET_PAGE_DIRTY(low_page, FALSE);
8505 }
8506
8507 vm_page_replace(low_page, object, dst_offset);
8508
8509 dst_page = low_page;
8510 /*
8511 * vm_page_grablo returned the page marked
8512 * BUSY... we don't need a PAGE_WAKEUP_DONE
8513 * here, because we've never dropped the object lock
8514 */
8515 if ( !dst_page->vmp_absent)
8516 dst_page->vmp_busy = FALSE;
8517
8518 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8519 }
8520 if ( !dst_page->vmp_busy)
8521 dwp->dw_mask |= DW_vm_page_wire;
8522
8523 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8524 /*
8525 * Mark the page "busy" to block any future page fault
8526 * on this page in addition to wiring it.
8527 * We'll also remove the mapping
8528 * of all these pages before leaving this routine.
8529 */
8530 assert(!dst_page->vmp_fictitious);
8531 dst_page->vmp_busy = TRUE;
8532 }
8533 /*
8534 * expect the page to be used
8535 * page queues lock must be held to set 'reference'
8536 */
8537 dwp->dw_mask |= DW_set_reference;
8538
8539 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8540 SET_PAGE_DIRTY(dst_page, TRUE);
8541 }
8542 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
8543 pmap_sync_page_attributes_phys(phys_page);
8544 dst_page->vmp_written_by_kernel = FALSE;
8545 }
8546
8547 record_phys_addr:
8548 if (dst_page->vmp_busy)
8549 upl->flags |= UPL_HAS_BUSY;
8550
8551 lite_list[entry>>5] |= 1 << (entry & 31);
8552
8553 if (phys_page > upl->highest_page)
8554 upl->highest_page = phys_page;
8555
8556 if (user_page_list) {
8557 user_page_list[entry].phys_addr = phys_page;
8558 user_page_list[entry].free_when_done = dst_page->vmp_free_when_done;
8559 user_page_list[entry].absent = dst_page->vmp_absent;
8560 user_page_list[entry].dirty = dst_page->vmp_dirty;
8561 user_page_list[entry].precious = dst_page->vmp_precious;
8562 user_page_list[entry].device = FALSE;
8563 user_page_list[entry].needed = FALSE;
8564 if (dst_page->vmp_clustered == TRUE)
8565 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8566 else
8567 user_page_list[entry].speculative = FALSE;
8568 user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
8569 user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
8570 user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
8571 user_page_list[entry].mark = FALSE;
8572 }
8573 if (object != kernel_object && object != compressor_object) {
8574 /*
8575 * someone is explicitly grabbing this page...
8576 * update clustered and speculative state
8577 *
8578 */
8579 if (dst_page->vmp_clustered)
8580 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8581 }
8582 skip_page:
8583 entry++;
8584 dst_offset += PAGE_SIZE_64;
8585 xfer_size -= PAGE_SIZE;
8586
8587 if (dwp->dw_mask) {
8588 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8589
8590 if (dw_count >= dw_limit) {
8591 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8592
8593 dwp = &dw_array[0];
8594 dw_count = 0;
8595 }
8596 }
8597 }
8598 assert(entry == size_in_pages);
8599
8600 if (dw_count)
8601 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8602 finish:
8603 if (user_page_list && set_cache_attr_needed == TRUE)
8604 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8605
8606 if (page_list_count != NULL) {
8607 if (upl->flags & UPL_INTERNAL)
8608 *page_list_count = 0;
8609 else if (*page_list_count > size_in_pages)
8610 *page_list_count = size_in_pages;
8611 }
8612 vm_object_unlock(object);
8613
8614 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8615 /*
8616 * We've marked all the pages "busy" so that future
8617 * page faults will block.
8618 * Now remove the mapping for these pages, so that they
8619 * can't be accessed without causing a page fault.
8620 */
8621 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8622 PMAP_NULL, 0, VM_PROT_NONE);
8623 assert(!object->blocked_access);
8624 object->blocked_access = TRUE;
8625 }
8626
8627 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8628 return KERN_SUCCESS;
8629
8630 return_err:
8631 dw_index = 0;
8632
8633 for (; offset < dst_offset; offset += PAGE_SIZE) {
8634 boolean_t need_unwire;
8635
8636 dst_page = vm_page_lookup(object, offset);
8637
8638 if (dst_page == VM_PAGE_NULL)
8639 panic("vm_object_iopl_request: Wired page missing. \n");
8640
8641 /*
8642 * if we've already processed this page in an earlier
8643 * dw_do_work, we need to undo the wiring... we will
8644 * leave the dirty and reference bits on if they
8645 * were set, since we don't have a good way of knowing
8646 * what the previous state was and we won't get here
8647 * under any normal circumstances... we will always
8648 * clear BUSY and wakeup any waiters via vm_page_free
8649 * or PAGE_WAKEUP_DONE
8650 */
8651 need_unwire = TRUE;
8652
8653 if (dw_count) {
8654 if (dw_array[dw_index].dw_m == dst_page) {
8655 /*
8656 * still in the deferred work list
8657 * which means we haven't yet called
8658 * vm_page_wire on this page
8659 */
8660 need_unwire = FALSE;
8661
8662 dw_index++;
8663 dw_count--;
8664 }
8665 }
8666 vm_page_lock_queues();
8667
8668 if (dst_page->vmp_absent || free_wired_pages == TRUE) {
8669 vm_page_free(dst_page);
8670
8671 need_unwire = FALSE;
8672 } else {
8673 if (need_unwire == TRUE)
8674 vm_page_unwire(dst_page, TRUE);
8675
8676 PAGE_WAKEUP_DONE(dst_page);
8677 }
8678 vm_page_unlock_queues();
8679
8680 if (need_unwire == TRUE)
8681 VM_STAT_INCR(reactivations);
8682 }
8683 #if UPL_DEBUG
8684 upl->upl_state = 2;
8685 #endif
8686 if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8687 vm_object_activity_end(object);
8688 vm_object_collapse(object, 0, TRUE);
8689 }
8690 vm_object_unlock(object);
8691 upl_destroy(upl);
8692
8693 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, 0, 0);
8694 return ret;
8695 }
8696
8697 kern_return_t
8698 upl_transpose(
8699 upl_t upl1,
8700 upl_t upl2)
8701 {
8702 kern_return_t retval;
8703 boolean_t upls_locked;
8704 vm_object_t object1, object2;
8705
8706 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8707 return KERN_INVALID_ARGUMENT;
8708 }
8709
8710 upls_locked = FALSE;
8711
8712 /*
8713 * Since we need to lock both UPLs at the same time,
8714 * avoid deadlocks by always taking locks in the same order.
8715 */
8716 if (upl1 < upl2) {
8717 upl_lock(upl1);
8718 upl_lock(upl2);
8719 } else {
8720 upl_lock(upl2);
8721 upl_lock(upl1);
8722 }
8723 upls_locked = TRUE; /* the UPLs will need to be unlocked */
8724
8725 object1 = upl1->map_object;
8726 object2 = upl2->map_object;
8727
8728 if (upl1->offset != 0 || upl2->offset != 0 ||
8729 upl1->size != upl2->size) {
8730 /*
8731 * We deal only with full objects, not subsets.
8732 * That's because we exchange the entire backing store info
8733 * for the objects: pager, resident pages, etc... We can't do
8734 * only part of it.
8735 */
8736 retval = KERN_INVALID_VALUE;
8737 goto done;
8738 }
8739
8740 /*
8741 * Tranpose the VM objects' backing store.
8742 */
8743 retval = vm_object_transpose(object1, object2,
8744 (vm_object_size_t) upl1->size);
8745
8746 if (retval == KERN_SUCCESS) {
8747 /*
8748 * Make each UPL point to the correct VM object, i.e. the
8749 * object holding the pages that the UPL refers to...
8750 */
8751 #if CONFIG_IOSCHED || UPL_DEBUG
8752 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8753 vm_object_lock(object1);
8754 vm_object_lock(object2);
8755 }
8756 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8757 queue_remove(&object1->uplq, upl1, upl_t, uplq);
8758 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8759 queue_remove(&object2->uplq, upl2, upl_t, uplq);
8760 #endif
8761 upl1->map_object = object2;
8762 upl2->map_object = object1;
8763
8764 #if CONFIG_IOSCHED || UPL_DEBUG
8765 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8766 queue_enter(&object2->uplq, upl1, upl_t, uplq);
8767 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8768 queue_enter(&object1->uplq, upl2, upl_t, uplq);
8769 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8770 vm_object_unlock(object2);
8771 vm_object_unlock(object1);
8772 }
8773 #endif
8774 }
8775
8776 done:
8777 /*
8778 * Cleanup.
8779 */
8780 if (upls_locked) {
8781 upl_unlock(upl1);
8782 upl_unlock(upl2);
8783 upls_locked = FALSE;
8784 }
8785
8786 return retval;
8787 }
8788
8789 void
8790 upl_range_needed(
8791 upl_t upl,
8792 int index,
8793 int count)
8794 {
8795 upl_page_info_t *user_page_list;
8796 int size_in_pages;
8797
8798 if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8799 return;
8800
8801 size_in_pages = upl->size / PAGE_SIZE;
8802
8803 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8804
8805 while (count-- && index < size_in_pages)
8806 user_page_list[index++].needed = TRUE;
8807 }
8808
8809
8810 /*
8811 * Reserve of virtual addresses in the kernel address space.
8812 * We need to map the physical pages in the kernel, so that we
8813 * can call the code-signing or slide routines with a kernel
8814 * virtual address. We keep this pool of pre-allocated kernel
8815 * virtual addresses so that we don't have to scan the kernel's
8816 * virtaul address space each time we need to work with
8817 * a physical page.
8818 */
8819 decl_simple_lock_data(,vm_paging_lock)
8820 #define VM_PAGING_NUM_PAGES 64
8821 vm_map_offset_t vm_paging_base_address = 0;
8822 boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8823 int vm_paging_max_index = 0;
8824 int vm_paging_page_waiter = 0;
8825 int vm_paging_page_waiter_total = 0;
8826
8827 unsigned long vm_paging_no_kernel_page = 0;
8828 unsigned long vm_paging_objects_mapped = 0;
8829 unsigned long vm_paging_pages_mapped = 0;
8830 unsigned long vm_paging_objects_mapped_slow = 0;
8831 unsigned long vm_paging_pages_mapped_slow = 0;
8832
8833 void
8834 vm_paging_map_init(void)
8835 {
8836 kern_return_t kr;
8837 vm_map_offset_t page_map_offset;
8838 vm_map_entry_t map_entry;
8839
8840 assert(vm_paging_base_address == 0);
8841
8842 /*
8843 * Initialize our pool of pre-allocated kernel
8844 * virtual addresses.
8845 */
8846 page_map_offset = 0;
8847 kr = vm_map_find_space(kernel_map,
8848 &page_map_offset,
8849 VM_PAGING_NUM_PAGES * PAGE_SIZE,
8850 0,
8851 0,
8852 VM_MAP_KERNEL_FLAGS_NONE,
8853 VM_KERN_MEMORY_NONE,
8854 &map_entry);
8855 if (kr != KERN_SUCCESS) {
8856 panic("vm_paging_map_init: kernel_map full\n");
8857 }
8858 VME_OBJECT_SET(map_entry, kernel_object);
8859 VME_OFFSET_SET(map_entry, page_map_offset);
8860 map_entry->protection = VM_PROT_NONE;
8861 map_entry->max_protection = VM_PROT_NONE;
8862 map_entry->permanent = TRUE;
8863 vm_object_reference(kernel_object);
8864 vm_map_unlock(kernel_map);
8865
8866 assert(vm_paging_base_address == 0);
8867 vm_paging_base_address = page_map_offset;
8868 }
8869
8870 /*
8871 * vm_paging_map_object:
8872 * Maps part of a VM object's pages in the kernel
8873 * virtual address space, using the pre-allocated
8874 * kernel virtual addresses, if possible.
8875 * Context:
8876 * The VM object is locked. This lock will get
8877 * dropped and re-acquired though, so the caller
8878 * must make sure the VM object is kept alive
8879 * (by holding a VM map that has a reference
8880 * on it, for example, or taking an extra reference).
8881 * The page should also be kept busy to prevent
8882 * it from being reclaimed.
8883 */
8884 kern_return_t
8885 vm_paging_map_object(
8886 vm_page_t page,
8887 vm_object_t object,
8888 vm_object_offset_t offset,
8889 vm_prot_t protection,
8890 boolean_t can_unlock_object,
8891 vm_map_size_t *size, /* IN/OUT */
8892 vm_map_offset_t *address, /* OUT */
8893 boolean_t *need_unmap) /* OUT */
8894 {
8895 kern_return_t kr;
8896 vm_map_offset_t page_map_offset;
8897 vm_map_size_t map_size;
8898 vm_object_offset_t object_offset;
8899 int i;
8900
8901 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
8902 /* use permanent 1-to-1 kernel mapping of physical memory ? */
8903 #if __x86_64__
8904 *address = (vm_map_offset_t)
8905 PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
8906 PAGE_SHIFT);
8907 *need_unmap = FALSE;
8908 return KERN_SUCCESS;
8909 #elif __arm__ || __arm64__
8910 *address = (vm_map_offset_t)
8911 phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
8912 *need_unmap = FALSE;
8913 return KERN_SUCCESS;
8914 #else
8915 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
8916 #endif
8917
8918 assert(page->vmp_busy);
8919 /*
8920 * Use one of the pre-allocated kernel virtual addresses
8921 * and just enter the VM page in the kernel address space
8922 * at that virtual address.
8923 */
8924 simple_lock(&vm_paging_lock);
8925
8926 /*
8927 * Try and find an available kernel virtual address
8928 * from our pre-allocated pool.
8929 */
8930 page_map_offset = 0;
8931 for (;;) {
8932 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
8933 if (vm_paging_page_inuse[i] == FALSE) {
8934 page_map_offset =
8935 vm_paging_base_address +
8936 (i * PAGE_SIZE);
8937 break;
8938 }
8939 }
8940 if (page_map_offset != 0) {
8941 /* found a space to map our page ! */
8942 break;
8943 }
8944
8945 if (can_unlock_object) {
8946 /*
8947 * If we can afford to unlock the VM object,
8948 * let's take the slow path now...
8949 */
8950 break;
8951 }
8952 /*
8953 * We can't afford to unlock the VM object, so
8954 * let's wait for a space to become available...
8955 */
8956 vm_paging_page_waiter_total++;
8957 vm_paging_page_waiter++;
8958 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
8959 if (kr == THREAD_WAITING) {
8960 simple_unlock(&vm_paging_lock);
8961 kr = thread_block(THREAD_CONTINUE_NULL);
8962 simple_lock(&vm_paging_lock);
8963 }
8964 vm_paging_page_waiter--;
8965 /* ... and try again */
8966 }
8967
8968 if (page_map_offset != 0) {
8969 /*
8970 * We found a kernel virtual address;
8971 * map the physical page to that virtual address.
8972 */
8973 if (i > vm_paging_max_index) {
8974 vm_paging_max_index = i;
8975 }
8976 vm_paging_page_inuse[i] = TRUE;
8977 simple_unlock(&vm_paging_lock);
8978
8979 page->vmp_pmapped = TRUE;
8980
8981 /*
8982 * Keep the VM object locked over the PMAP_ENTER
8983 * and the actual use of the page by the kernel,
8984 * or this pmap mapping might get undone by a
8985 * vm_object_pmap_protect() call...
8986 */
8987 PMAP_ENTER(kernel_pmap,
8988 page_map_offset,
8989 page,
8990 protection,
8991 VM_PROT_NONE,
8992 0,
8993 TRUE,
8994 kr);
8995 assert(kr == KERN_SUCCESS);
8996 vm_paging_objects_mapped++;
8997 vm_paging_pages_mapped++;
8998 *address = page_map_offset;
8999 *need_unmap = TRUE;
9000
9001 #if KASAN
9002 kasan_notify_address(page_map_offset, PAGE_SIZE);
9003 #endif
9004
9005 /* all done and mapped, ready to use ! */
9006 return KERN_SUCCESS;
9007 }
9008
9009 /*
9010 * We ran out of pre-allocated kernel virtual
9011 * addresses. Just map the page in the kernel
9012 * the slow and regular way.
9013 */
9014 vm_paging_no_kernel_page++;
9015 simple_unlock(&vm_paging_lock);
9016 }
9017
9018 if (! can_unlock_object) {
9019 *address = 0;
9020 *size = 0;
9021 *need_unmap = FALSE;
9022 return KERN_NOT_SUPPORTED;
9023 }
9024
9025 object_offset = vm_object_trunc_page(offset);
9026 map_size = vm_map_round_page(*size,
9027 VM_MAP_PAGE_MASK(kernel_map));
9028
9029 /*
9030 * Try and map the required range of the object
9031 * in the kernel_map
9032 */
9033
9034 vm_object_reference_locked(object); /* for the map entry */
9035 vm_object_unlock(object);
9036
9037 kr = vm_map_enter(kernel_map,
9038 address,
9039 map_size,
9040 0,
9041 VM_FLAGS_ANYWHERE,
9042 VM_MAP_KERNEL_FLAGS_NONE,
9043 VM_KERN_MEMORY_NONE,
9044 object,
9045 object_offset,
9046 FALSE,
9047 protection,
9048 VM_PROT_ALL,
9049 VM_INHERIT_NONE);
9050 if (kr != KERN_SUCCESS) {
9051 *address = 0;
9052 *size = 0;
9053 *need_unmap = FALSE;
9054 vm_object_deallocate(object); /* for the map entry */
9055 vm_object_lock(object);
9056 return kr;
9057 }
9058
9059 *size = map_size;
9060
9061 /*
9062 * Enter the mapped pages in the page table now.
9063 */
9064 vm_object_lock(object);
9065 /*
9066 * VM object must be kept locked from before PMAP_ENTER()
9067 * until after the kernel is done accessing the page(s).
9068 * Otherwise, the pmap mappings in the kernel could be
9069 * undone by a call to vm_object_pmap_protect().
9070 */
9071
9072 for (page_map_offset = 0;
9073 map_size != 0;
9074 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9075
9076 page = vm_page_lookup(object, offset + page_map_offset);
9077 if (page == VM_PAGE_NULL) {
9078 printf("vm_paging_map_object: no page !?");
9079 vm_object_unlock(object);
9080 kr = vm_map_remove(kernel_map, *address, *size,
9081 VM_MAP_REMOVE_NO_FLAGS);
9082 assert(kr == KERN_SUCCESS);
9083 *address = 0;
9084 *size = 0;
9085 *need_unmap = FALSE;
9086 vm_object_lock(object);
9087 return KERN_MEMORY_ERROR;
9088 }
9089 page->vmp_pmapped = TRUE;
9090
9091 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9092 PMAP_ENTER(kernel_pmap,
9093 *address + page_map_offset,
9094 page,
9095 protection,
9096 VM_PROT_NONE,
9097 0,
9098 TRUE,
9099 kr);
9100 assert(kr == KERN_SUCCESS);
9101 #if KASAN
9102 kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9103 #endif
9104 }
9105
9106 vm_paging_objects_mapped_slow++;
9107 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9108
9109 *need_unmap = TRUE;
9110
9111 return KERN_SUCCESS;
9112 }
9113
9114 /*
9115 * vm_paging_unmap_object:
9116 * Unmaps part of a VM object's pages from the kernel
9117 * virtual address space.
9118 * Context:
9119 * The VM object is locked. This lock will get
9120 * dropped and re-acquired though.
9121 */
9122 void
9123 vm_paging_unmap_object(
9124 vm_object_t object,
9125 vm_map_offset_t start,
9126 vm_map_offset_t end)
9127 {
9128 kern_return_t kr;
9129 int i;
9130
9131 if ((vm_paging_base_address == 0) ||
9132 (start < vm_paging_base_address) ||
9133 (end > (vm_paging_base_address
9134 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9135 /*
9136 * We didn't use our pre-allocated pool of
9137 * kernel virtual address. Deallocate the
9138 * virtual memory.
9139 */
9140 if (object != VM_OBJECT_NULL) {
9141 vm_object_unlock(object);
9142 }
9143 kr = vm_map_remove(kernel_map, start, end,
9144 VM_MAP_REMOVE_NO_FLAGS);
9145 if (object != VM_OBJECT_NULL) {
9146 vm_object_lock(object);
9147 }
9148 assert(kr == KERN_SUCCESS);
9149 } else {
9150 /*
9151 * We used a kernel virtual address from our
9152 * pre-allocated pool. Put it back in the pool
9153 * for next time.
9154 */
9155 assert(end - start == PAGE_SIZE);
9156 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9157 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9158
9159 /* undo the pmap mapping */
9160 pmap_remove(kernel_pmap, start, end);
9161
9162 simple_lock(&vm_paging_lock);
9163 vm_paging_page_inuse[i] = FALSE;
9164 if (vm_paging_page_waiter) {
9165 thread_wakeup(&vm_paging_page_waiter);
9166 }
9167 simple_unlock(&vm_paging_lock);
9168 }
9169 }
9170
9171
9172 /*
9173 * page->vmp_object must be locked
9174 */
9175 void
9176 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9177 {
9178 if (!queues_locked) {
9179 vm_page_lockspin_queues();
9180 }
9181
9182 page->vmp_free_when_done = FALSE;
9183 /*
9184 * need to drop the laundry count...
9185 * we may also need to remove it
9186 * from the I/O paging queue...
9187 * vm_pageout_throttle_up handles both cases
9188 *
9189 * the laundry and pageout_queue flags are cleared...
9190 */
9191 vm_pageout_throttle_up(page);
9192
9193 if (!queues_locked) {
9194 vm_page_unlock_queues();
9195 }
9196 }
9197
9198 upl_t
9199 vector_upl_create(vm_offset_t upl_offset)
9200 {
9201 int vector_upl_size = sizeof(struct _vector_upl);
9202 int i=0;
9203 upl_t upl;
9204 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9205
9206 upl = upl_create(0,UPL_VECTOR,0);
9207 upl->vector_upl = vector_upl;
9208 upl->offset = upl_offset;
9209 vector_upl->size = 0;
9210 vector_upl->offset = upl_offset;
9211 vector_upl->invalid_upls=0;
9212 vector_upl->num_upls=0;
9213 vector_upl->pagelist = NULL;
9214
9215 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9216 vector_upl->upl_iostates[i].size = 0;
9217 vector_upl->upl_iostates[i].offset = 0;
9218
9219 }
9220 return upl;
9221 }
9222
9223 void
9224 vector_upl_deallocate(upl_t upl)
9225 {
9226 if(upl) {
9227 vector_upl_t vector_upl = upl->vector_upl;
9228 if(vector_upl) {
9229 if(vector_upl->invalid_upls != vector_upl->num_upls)
9230 panic("Deallocating non-empty Vectored UPL\n");
9231 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9232 vector_upl->invalid_upls=0;
9233 vector_upl->num_upls = 0;
9234 vector_upl->pagelist = NULL;
9235 vector_upl->size = 0;
9236 vector_upl->offset = 0;
9237 kfree(vector_upl, sizeof(struct _vector_upl));
9238 vector_upl = (vector_upl_t)0xfeedfeed;
9239 }
9240 else
9241 panic("vector_upl_deallocate was passed a non-vectored upl\n");
9242 }
9243 else
9244 panic("vector_upl_deallocate was passed a NULL upl\n");
9245 }
9246
9247 boolean_t
9248 vector_upl_is_valid(upl_t upl)
9249 {
9250 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9251 vector_upl_t vector_upl = upl->vector_upl;
9252 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9253 return FALSE;
9254 else
9255 return TRUE;
9256 }
9257 return FALSE;
9258 }
9259
9260 boolean_t
9261 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9262 {
9263 if(vector_upl_is_valid(upl)) {
9264 vector_upl_t vector_upl = upl->vector_upl;
9265
9266 if(vector_upl) {
9267 if(subupl) {
9268 if(io_size) {
9269 if(io_size < PAGE_SIZE)
9270 io_size = PAGE_SIZE;
9271 subupl->vector_upl = (void*)vector_upl;
9272 vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9273 vector_upl->size += io_size;
9274 upl->size += io_size;
9275 }
9276 else {
9277 uint32_t i=0,invalid_upls=0;
9278 for(i = 0; i < vector_upl->num_upls; i++) {
9279 if(vector_upl->upl_elems[i] == subupl)
9280 break;
9281 }
9282 if(i == vector_upl->num_upls)
9283 panic("Trying to remove sub-upl when none exists");
9284
9285 vector_upl->upl_elems[i] = NULL;
9286 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9287 if(invalid_upls == vector_upl->num_upls)
9288 return TRUE;
9289 else
9290 return FALSE;
9291 }
9292 }
9293 else
9294 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9295 }
9296 else
9297 panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9298 }
9299 else
9300 panic("vector_upl_set_subupl was passed a NULL upl\n");
9301
9302 return FALSE;
9303 }
9304
9305 void
9306 vector_upl_set_pagelist(upl_t upl)
9307 {
9308 if(vector_upl_is_valid(upl)) {
9309 uint32_t i=0;
9310 vector_upl_t vector_upl = upl->vector_upl;
9311
9312 if(vector_upl) {
9313 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9314
9315 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9316
9317 for(i=0; i < vector_upl->num_upls; i++) {
9318 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9319 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9320 pagelist_size += cur_upl_pagelist_size;
9321 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9322 upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9323 }
9324 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9325 }
9326 else
9327 panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9328 }
9329 else
9330 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9331
9332 }
9333
9334 upl_t
9335 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9336 {
9337 if(vector_upl_is_valid(upl)) {
9338 vector_upl_t vector_upl = upl->vector_upl;
9339 if(vector_upl) {
9340 if(index < vector_upl->num_upls)
9341 return vector_upl->upl_elems[index];
9342 }
9343 else
9344 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9345 }
9346 return NULL;
9347 }
9348
9349 upl_t
9350 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9351 {
9352 if(vector_upl_is_valid(upl)) {
9353 uint32_t i=0;
9354 vector_upl_t vector_upl = upl->vector_upl;
9355
9356 if(vector_upl) {
9357 upl_t subupl = NULL;
9358 vector_upl_iostates_t subupl_state;
9359
9360 for(i=0; i < vector_upl->num_upls; i++) {
9361 subupl = vector_upl->upl_elems[i];
9362 subupl_state = vector_upl->upl_iostates[i];
9363 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9364 /* We could have been passed an offset/size pair that belongs
9365 * to an UPL element that has already been committed/aborted.
9366 * If so, return NULL.
9367 */
9368 if(subupl == NULL)
9369 return NULL;
9370 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9371 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9372 if(*upl_size > subupl_state.size)
9373 *upl_size = subupl_state.size;
9374 }
9375 if(*upl_offset >= subupl_state.offset)
9376 *upl_offset -= subupl_state.offset;
9377 else if(i)
9378 panic("Vector UPL offset miscalculation\n");
9379 return subupl;
9380 }
9381 }
9382 }
9383 else
9384 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9385 }
9386 return NULL;
9387 }
9388
9389 void
9390 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9391 {
9392 *v_upl_submap = NULL;
9393
9394 if(vector_upl_is_valid(upl)) {
9395 vector_upl_t vector_upl = upl->vector_upl;
9396 if(vector_upl) {
9397 *v_upl_submap = vector_upl->submap;
9398 *submap_dst_addr = vector_upl->submap_dst_addr;
9399 }
9400 else
9401 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9402 }
9403 else
9404 panic("vector_upl_get_submap was passed a null UPL\n");
9405 }
9406
9407 void
9408 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9409 {
9410 if(vector_upl_is_valid(upl)) {
9411 vector_upl_t vector_upl = upl->vector_upl;
9412 if(vector_upl) {
9413 vector_upl->submap = submap;
9414 vector_upl->submap_dst_addr = submap_dst_addr;
9415 }
9416 else
9417 panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9418 }
9419 else
9420 panic("vector_upl_get_submap was passed a NULL UPL\n");
9421 }
9422
9423 void
9424 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9425 {
9426 if(vector_upl_is_valid(upl)) {
9427 uint32_t i = 0;
9428 vector_upl_t vector_upl = upl->vector_upl;
9429
9430 if(vector_upl) {
9431 for(i = 0; i < vector_upl->num_upls; i++) {
9432 if(vector_upl->upl_elems[i] == subupl)
9433 break;
9434 }
9435
9436 if(i == vector_upl->num_upls)
9437 panic("setting sub-upl iostate when none exists");
9438
9439 vector_upl->upl_iostates[i].offset = offset;
9440 if(size < PAGE_SIZE)
9441 size = PAGE_SIZE;
9442 vector_upl->upl_iostates[i].size = size;
9443 }
9444 else
9445 panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9446 }
9447 else
9448 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9449 }
9450
9451 void
9452 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9453 {
9454 if(vector_upl_is_valid(upl)) {
9455 uint32_t i = 0;
9456 vector_upl_t vector_upl = upl->vector_upl;
9457
9458 if(vector_upl) {
9459 for(i = 0; i < vector_upl->num_upls; i++) {
9460 if(vector_upl->upl_elems[i] == subupl)
9461 break;
9462 }
9463
9464 if(i == vector_upl->num_upls)
9465 panic("getting sub-upl iostate when none exists");
9466
9467 *offset = vector_upl->upl_iostates[i].offset;
9468 *size = vector_upl->upl_iostates[i].size;
9469 }
9470 else
9471 panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9472 }
9473 else
9474 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9475 }
9476
9477 void
9478 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9479 {
9480 if(vector_upl_is_valid(upl)) {
9481 vector_upl_t vector_upl = upl->vector_upl;
9482 if(vector_upl) {
9483 if(index < vector_upl->num_upls) {
9484 *offset = vector_upl->upl_iostates[index].offset;
9485 *size = vector_upl->upl_iostates[index].size;
9486 }
9487 else
9488 *offset = *size = 0;
9489 }
9490 else
9491 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9492 }
9493 else
9494 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9495 }
9496
9497 upl_page_info_t *
9498 upl_get_internal_vectorupl_pagelist(upl_t upl)
9499 {
9500 return ((vector_upl_t)(upl->vector_upl))->pagelist;
9501 }
9502
9503 void *
9504 upl_get_internal_vectorupl(upl_t upl)
9505 {
9506 return upl->vector_upl;
9507 }
9508
9509 vm_size_t
9510 upl_get_internal_pagelist_offset(void)
9511 {
9512 return sizeof(struct upl);
9513 }
9514
9515 void
9516 upl_clear_dirty(
9517 upl_t upl,
9518 boolean_t value)
9519 {
9520 if (value) {
9521 upl->flags |= UPL_CLEAR_DIRTY;
9522 } else {
9523 upl->flags &= ~UPL_CLEAR_DIRTY;
9524 }
9525 }
9526
9527 void
9528 upl_set_referenced(
9529 upl_t upl,
9530 boolean_t value)
9531 {
9532 upl_lock(upl);
9533 if (value) {
9534 upl->ext_ref_count++;
9535 } else {
9536 if (!upl->ext_ref_count) {
9537 panic("upl_set_referenced not %p\n", upl);
9538 }
9539 upl->ext_ref_count--;
9540 }
9541 upl_unlock(upl);
9542 }
9543
9544 #if CONFIG_IOSCHED
9545 void
9546 upl_set_blkno(
9547 upl_t upl,
9548 vm_offset_t upl_offset,
9549 int io_size,
9550 int64_t blkno)
9551 {
9552 int i,j;
9553 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9554 return;
9555
9556 assert(upl->upl_reprio_info != 0);
9557 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9558 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9559 }
9560 }
9561 #endif
9562
9563 void inline memoryshot(unsigned int event, unsigned int control)
9564 {
9565 if (vm_debug_events) {
9566 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9567 vm_page_active_count, vm_page_inactive_count,
9568 vm_page_free_count, vm_page_speculative_count,
9569 vm_page_throttled_count);
9570 } else {
9571 (void) event;
9572 (void) control;
9573 }
9574
9575 }
9576
9577 #ifdef MACH_BSD
9578
9579 boolean_t upl_device_page(upl_page_info_t *upl)
9580 {
9581 return(UPL_DEVICE_PAGE(upl));
9582 }
9583 boolean_t upl_page_present(upl_page_info_t *upl, int index)
9584 {
9585 return(UPL_PAGE_PRESENT(upl, index));
9586 }
9587 boolean_t upl_speculative_page(upl_page_info_t *upl, int index)
9588 {
9589 return(UPL_SPECULATIVE_PAGE(upl, index));
9590 }
9591 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
9592 {
9593 return(UPL_DIRTY_PAGE(upl, index));
9594 }
9595 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
9596 {
9597 return(UPL_VALID_PAGE(upl, index));
9598 }
9599 ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
9600 {
9601 return(UPL_PHYS_PAGE(upl, index));
9602 }
9603
9604 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9605 {
9606 upl[index].mark = v;
9607 }
9608
9609 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
9610 {
9611 return upl[index].mark;
9612 }
9613
9614 void
9615 vm_countdirtypages(void)
9616 {
9617 vm_page_t m;
9618 int dpages;
9619 int pgopages;
9620 int precpages;
9621
9622
9623 dpages=0;
9624 pgopages=0;
9625 precpages=0;
9626
9627 vm_page_lock_queues();
9628 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9629 do {
9630 if (m ==(vm_page_t )0) break;
9631
9632 if(m->vmp_dirty) dpages++;
9633 if(m->vmp_free_when_done) pgopages++;
9634 if(m->vmp_precious) precpages++;
9635
9636 assert(VM_PAGE_OBJECT(m) != kernel_object);
9637 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9638 if (m ==(vm_page_t )0) break;
9639
9640 } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9641 vm_page_unlock_queues();
9642
9643 vm_page_lock_queues();
9644 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9645 do {
9646 if (m ==(vm_page_t )0) break;
9647
9648 dpages++;
9649 assert(m->vmp_dirty);
9650 assert(!m->vmp_free_when_done);
9651 assert(VM_PAGE_OBJECT(m) != kernel_object);
9652 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9653 if (m ==(vm_page_t )0) break;
9654
9655 } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9656 vm_page_unlock_queues();
9657
9658 vm_page_lock_queues();
9659 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9660 do {
9661 if (m ==(vm_page_t )0) break;
9662
9663 if(m->vmp_dirty) dpages++;
9664 if(m->vmp_free_when_done) pgopages++;
9665 if(m->vmp_precious) precpages++;
9666
9667 assert(VM_PAGE_OBJECT(m) != kernel_object);
9668 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9669 if (m ==(vm_page_t )0) break;
9670
9671 } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9672 vm_page_unlock_queues();
9673
9674 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9675
9676 dpages=0;
9677 pgopages=0;
9678 precpages=0;
9679
9680 vm_page_lock_queues();
9681 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9682
9683 do {
9684 if(m == (vm_page_t )0) break;
9685 if(m->vmp_dirty) dpages++;
9686 if(m->vmp_free_when_done) pgopages++;
9687 if(m->vmp_precious) precpages++;
9688
9689 assert(VM_PAGE_OBJECT(m) != kernel_object);
9690 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9691 if(m == (vm_page_t )0) break;
9692
9693 } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9694 vm_page_unlock_queues();
9695
9696 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9697
9698 }
9699 #endif /* MACH_BSD */
9700
9701
9702 #if CONFIG_IOSCHED
9703 int upl_get_cached_tier(upl_t upl)
9704 {
9705 assert(upl);
9706 if (upl->flags & UPL_TRACKED_BY_OBJECT)
9707 return (upl->upl_priority);
9708 return (-1);
9709 }
9710 #endif /* CONFIG_IOSCHED */
9711
9712
9713 void upl_callout_iodone(upl_t upl)
9714 {
9715 struct upl_io_completion *upl_ctx = upl->upl_iodone;
9716
9717 if (upl_ctx) {
9718 void (*iodone_func)(void *, int) = upl_ctx->io_done;
9719
9720 assert(upl_ctx->io_done);
9721
9722 (*iodone_func)(upl_ctx->io_context, upl_ctx->io_error);
9723 }
9724 }
9725
9726 void upl_set_iodone(upl_t upl, void *upl_iodone)
9727 {
9728 upl->upl_iodone = (struct upl_io_completion *)upl_iodone;
9729 }
9730
9731 void upl_set_iodone_error(upl_t upl, int error)
9732 {
9733 struct upl_io_completion *upl_ctx = upl->upl_iodone;
9734
9735 if (upl_ctx)
9736 upl_ctx->io_error = error;
9737 }
9738
9739
9740 ppnum_t upl_get_highest_page(
9741 upl_t upl)
9742 {
9743 return upl->highest_page;
9744 }
9745
9746 upl_size_t upl_get_size(
9747 upl_t upl)
9748 {
9749 return upl->size;
9750 }
9751
9752 upl_t upl_associated_upl(upl_t upl)
9753 {
9754 return upl->associated_upl;
9755 }
9756
9757 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
9758 {
9759 upl->associated_upl = associated_upl;
9760 }
9761
9762 struct vnode * upl_lookup_vnode(upl_t upl)
9763 {
9764 if (!upl->map_object->internal)
9765 return vnode_pager_lookup_vnode(upl->map_object->pager);
9766 else
9767 return NULL;
9768 }
9769
9770 #if UPL_DEBUG
9771 kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
9772 {
9773 upl->ubc_alias1 = alias1;
9774 upl->ubc_alias2 = alias2;
9775 return KERN_SUCCESS;
9776 }
9777 int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
9778 {
9779 if(al)
9780 *al = upl->ubc_alias1;
9781 if(al2)
9782 *al2 = upl->ubc_alias2;
9783 return KERN_SUCCESS;
9784 }
9785 #endif /* UPL_DEBUG */
9786
9787 #if VM_PRESSURE_EVENTS
9788 /*
9789 * Upward trajectory.
9790 */
9791 extern boolean_t vm_compressor_low_on_space(void);
9792
9793 boolean_t
9794 VM_PRESSURE_NORMAL_TO_WARNING(void) {
9795
9796 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9797
9798 /* Available pages below our threshold */
9799 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
9800 /* No frozen processes to kill */
9801 if (memorystatus_frozen_count == 0) {
9802 /* Not enough suspended processes available. */
9803 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
9804 return TRUE;
9805 }
9806 }
9807 }
9808 return FALSE;
9809
9810 } else {
9811 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
9812 }
9813 }
9814
9815 boolean_t
9816 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
9817
9818 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9819
9820 /* Available pages below our threshold */
9821 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
9822 return TRUE;
9823 }
9824 return FALSE;
9825 } else {
9826 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9827 }
9828 }
9829
9830 /*
9831 * Downward trajectory.
9832 */
9833 boolean_t
9834 VM_PRESSURE_WARNING_TO_NORMAL(void) {
9835
9836 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9837
9838 /* Available pages above our threshold */
9839 unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
9840 if (memorystatus_available_pages > target_threshold) {
9841 return TRUE;
9842 }
9843 return FALSE;
9844 } else {
9845 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
9846 }
9847 }
9848
9849 boolean_t
9850 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
9851
9852 if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9853
9854 /* Available pages above our threshold */
9855 unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
9856 if (memorystatus_available_pages > target_threshold) {
9857 return TRUE;
9858 }
9859 return FALSE;
9860 } else {
9861 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9862 }
9863 }
9864 #endif /* VM_PRESSURE_EVENTS */
9865
9866
9867
9868 #define VM_TEST_COLLAPSE_COMPRESSOR 0
9869 #define VM_TEST_WIRE_AND_EXTRACT 0
9870 #define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC 0
9871 #if __arm64__
9872 #define VM_TEST_KERNEL_OBJECT_FAULT 0
9873 #endif /* __arm64__ */
9874 #define VM_TEST_DEVICE_PAGER_TRANSPOSE (DEVELOPMENT || DEBUG)
9875
9876 #if VM_TEST_COLLAPSE_COMPRESSOR
9877 extern boolean_t vm_object_collapse_compressor_allowed;
9878 #include <IOKit/IOLib.h>
9879 static void
9880 vm_test_collapse_compressor(void)
9881 {
9882 vm_object_size_t backing_size, top_size;
9883 vm_object_t backing_object, top_object;
9884 vm_map_offset_t backing_offset, top_offset;
9885 unsigned char *backing_address, *top_address;
9886 kern_return_t kr;
9887
9888 printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
9889
9890 /* create backing object */
9891 backing_size = 15 * PAGE_SIZE;
9892 backing_object = vm_object_allocate(backing_size);
9893 assert(backing_object != VM_OBJECT_NULL);
9894 printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
9895 backing_object);
9896 /* map backing object */
9897 backing_offset = 0;
9898 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
9899 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9900 backing_object, 0, FALSE,
9901 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9902 assert(kr == KERN_SUCCESS);
9903 backing_address = (unsigned char *) backing_offset;
9904 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9905 "mapped backing object %p at 0x%llx\n",
9906 backing_object, (uint64_t) backing_offset);
9907 /* populate with pages to be compressed in backing object */
9908 backing_address[0x1*PAGE_SIZE] = 0xB1;
9909 backing_address[0x4*PAGE_SIZE] = 0xB4;
9910 backing_address[0x7*PAGE_SIZE] = 0xB7;
9911 backing_address[0xa*PAGE_SIZE] = 0xBA;
9912 backing_address[0xd*PAGE_SIZE] = 0xBD;
9913 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9914 "populated pages to be compressed in "
9915 "backing_object %p\n", backing_object);
9916 /* compress backing object */
9917 vm_object_pageout(backing_object);
9918 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
9919 backing_object);
9920 /* wait for all the pages to be gone */
9921 while (*(volatile int *)&backing_object->resident_page_count != 0)
9922 IODelay(10);
9923 printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
9924 backing_object);
9925 /* populate with pages to be resident in backing object */
9926 backing_address[0x0*PAGE_SIZE] = 0xB0;
9927 backing_address[0x3*PAGE_SIZE] = 0xB3;
9928 backing_address[0x6*PAGE_SIZE] = 0xB6;
9929 backing_address[0x9*PAGE_SIZE] = 0xB9;
9930 backing_address[0xc*PAGE_SIZE] = 0xBC;
9931 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9932 "populated pages to be resident in "
9933 "backing_object %p\n", backing_object);
9934 /* leave the other pages absent */
9935 /* mess with the paging_offset of the backing_object */
9936 assert(backing_object->paging_offset == 0);
9937 backing_object->paging_offset = 0x3000;
9938
9939 /* create top object */
9940 top_size = 9 * PAGE_SIZE;
9941 top_object = vm_object_allocate(top_size);
9942 assert(top_object != VM_OBJECT_NULL);
9943 printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
9944 top_object);
9945 /* map top object */
9946 top_offset = 0;
9947 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
9948 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9949 top_object, 0, FALSE,
9950 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9951 assert(kr == KERN_SUCCESS);
9952 top_address = (unsigned char *) top_offset;
9953 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9954 "mapped top object %p at 0x%llx\n",
9955 top_object, (uint64_t) top_offset);
9956 /* populate with pages to be compressed in top object */
9957 top_address[0x3*PAGE_SIZE] = 0xA3;
9958 top_address[0x4*PAGE_SIZE] = 0xA4;
9959 top_address[0x5*PAGE_SIZE] = 0xA5;
9960 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9961 "populated pages to be compressed in "
9962 "top_object %p\n", top_object);
9963 /* compress top object */
9964 vm_object_pageout(top_object);
9965 printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
9966 top_object);
9967 /* wait for all the pages to be gone */
9968 while (top_object->resident_page_count != 0)
9969 IODelay(10);
9970 printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
9971 top_object);
9972 /* populate with pages to be resident in top object */
9973 top_address[0x0*PAGE_SIZE] = 0xA0;
9974 top_address[0x1*PAGE_SIZE] = 0xA1;
9975 top_address[0x2*PAGE_SIZE] = 0xA2;
9976 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9977 "populated pages to be resident in "
9978 "top_object %p\n", top_object);
9979 /* leave the other pages absent */
9980
9981 /* link the 2 objects */
9982 vm_object_reference(backing_object);
9983 top_object->shadow = backing_object;
9984 top_object->vo_shadow_offset = 0x3000;
9985 printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
9986 top_object, backing_object);
9987
9988 /* unmap backing object */
9989 vm_map_remove(kernel_map,
9990 backing_offset,
9991 backing_offset + backing_size,
9992 VM_MAP_REMOVE_NO_FLAGS);
9993 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9994 "unmapped backing_object %p [0x%llx:0x%llx]\n",
9995 backing_object,
9996 (uint64_t) backing_offset,
9997 (uint64_t) (backing_offset + backing_size));
9998
9999 /* collapse */
10000 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
10001 vm_object_lock(top_object);
10002 vm_object_collapse(top_object, 0, FALSE);
10003 vm_object_unlock(top_object);
10004 printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
10005
10006 /* did it work? */
10007 if (top_object->shadow != VM_OBJECT_NULL) {
10008 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
10009 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10010 if (vm_object_collapse_compressor_allowed) {
10011 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10012 }
10013 } else {
10014 /* check the contents of the mapping */
10015 unsigned char expect[9] =
10016 { 0xA0, 0xA1, 0xA2, /* resident in top */
10017 0xA3, 0xA4, 0xA5, /* compressed in top */
10018 0xB9, /* resident in backing + shadow_offset */
10019 0xBD, /* compressed in backing + shadow_offset + paging_offset */
10020 0x00 }; /* absent in both */
10021 unsigned char actual[9];
10022 unsigned int i, errors;
10023
10024 errors = 0;
10025 for (i = 0; i < sizeof (actual); i++) {
10026 actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10027 if (actual[i] != expect[i]) {
10028 errors++;
10029 }
10030 }
10031 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10032 "actual [%x %x %x %x %x %x %x %x %x] "
10033 "expect [%x %x %x %x %x %x %x %x %x] "
10034 "%d errors\n",
10035 actual[0], actual[1], actual[2], actual[3],
10036 actual[4], actual[5], actual[6], actual[7],
10037 actual[8],
10038 expect[0], expect[1], expect[2], expect[3],
10039 expect[4], expect[5], expect[6], expect[7],
10040 expect[8],
10041 errors);
10042 if (errors) {
10043 panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10044 } else {
10045 printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10046 }
10047 }
10048 }
10049 #else /* VM_TEST_COLLAPSE_COMPRESSOR */
10050 #define vm_test_collapse_compressor()
10051 #endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10052
10053 #if VM_TEST_WIRE_AND_EXTRACT
10054 extern ledger_template_t task_ledger_template;
10055 #include <mach/mach_vm.h>
10056 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10057 vm_offset_t offset);
10058 static void
10059 vm_test_wire_and_extract(void)
10060 {
10061 ledger_t ledger;
10062 vm_map_t user_map, wire_map;
10063 mach_vm_address_t user_addr, wire_addr;
10064 mach_vm_size_t user_size, wire_size;
10065 mach_vm_offset_t cur_offset;
10066 vm_prot_t cur_prot, max_prot;
10067 ppnum_t user_ppnum, wire_ppnum;
10068 kern_return_t kr;
10069
10070 ledger = ledger_instantiate(task_ledger_template,
10071 LEDGER_CREATE_ACTIVE_ENTRIES);
10072 user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
10073 0x100000000ULL,
10074 0x200000000ULL,
10075 TRUE);
10076 wire_map = vm_map_create(NULL,
10077 0x100000000ULL,
10078 0x200000000ULL,
10079 TRUE);
10080 user_addr = 0;
10081 user_size = 0x10000;
10082 kr = mach_vm_allocate(user_map,
10083 &user_addr,
10084 user_size,
10085 VM_FLAGS_ANYWHERE);
10086 assert(kr == KERN_SUCCESS);
10087 wire_addr = 0;
10088 wire_size = user_size;
10089 kr = mach_vm_remap(wire_map,
10090 &wire_addr,
10091 wire_size,
10092 0,
10093 VM_FLAGS_ANYWHERE,
10094 user_map,
10095 user_addr,
10096 FALSE,
10097 &cur_prot,
10098 &max_prot,
10099 VM_INHERIT_NONE);
10100 assert(kr == KERN_SUCCESS);
10101 for (cur_offset = 0;
10102 cur_offset < wire_size;
10103 cur_offset += PAGE_SIZE) {
10104 kr = vm_map_wire_and_extract(wire_map,
10105 wire_addr + cur_offset,
10106 VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10107 TRUE,
10108 &wire_ppnum);
10109 assert(kr == KERN_SUCCESS);
10110 user_ppnum = vm_map_get_phys_page(user_map,
10111 user_addr + cur_offset);
10112 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10113 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10114 kr,
10115 user_map, user_addr + cur_offset, user_ppnum,
10116 wire_map, wire_addr + cur_offset, wire_ppnum);
10117 if (kr != KERN_SUCCESS ||
10118 wire_ppnum == 0 ||
10119 wire_ppnum != user_ppnum) {
10120 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10121 }
10122 }
10123 cur_offset -= PAGE_SIZE;
10124 kr = vm_map_wire_and_extract(wire_map,
10125 wire_addr + cur_offset,
10126 VM_PROT_DEFAULT,
10127 TRUE,
10128 &wire_ppnum);
10129 assert(kr == KERN_SUCCESS);
10130 printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10131 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10132 kr,
10133 user_map, user_addr + cur_offset, user_ppnum,
10134 wire_map, wire_addr + cur_offset, wire_ppnum);
10135 if (kr != KERN_SUCCESS ||
10136 wire_ppnum == 0 ||
10137 wire_ppnum != user_ppnum) {
10138 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10139 }
10140
10141 printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10142 }
10143 #else /* VM_TEST_WIRE_AND_EXTRACT */
10144 #define vm_test_wire_and_extract()
10145 #endif /* VM_TEST_WIRE_AND_EXTRACT */
10146
10147 #if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10148 static void
10149 vm_test_page_wire_overflow_panic(void)
10150 {
10151 vm_object_t object;
10152 vm_page_t page;
10153
10154 printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10155
10156 object = vm_object_allocate(PAGE_SIZE);
10157 vm_object_lock(object);
10158 page = vm_page_alloc(object, 0x0);
10159 vm_page_lock_queues();
10160 do {
10161 vm_page_wire(page, 1, FALSE);
10162 } while (page->wire_count != 0);
10163 vm_page_unlock_queues();
10164 vm_object_unlock(object);
10165 panic("FBDP(%p,%p): wire_count overflow not detected\n",
10166 object, page);
10167 }
10168 #else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10169 #define vm_test_page_wire_overflow_panic()
10170 #endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10171
10172 #if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10173 extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10174 static void
10175 vm_test_kernel_object_fault(void)
10176 {
10177 kern_return_t kr;
10178 vm_offset_t stack;
10179 uintptr_t frameb[2];
10180 int ret;
10181
10182 kr = kernel_memory_allocate(kernel_map, &stack,
10183 kernel_stack_size + (2*PAGE_SIZE),
10184 0,
10185 (KMA_KSTACK | KMA_KOBJECT |
10186 KMA_GUARD_FIRST | KMA_GUARD_LAST),
10187 VM_KERN_MEMORY_STACK);
10188 if (kr != KERN_SUCCESS) {
10189 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10190 }
10191 ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10192 if (ret != 0) {
10193 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10194 } else {
10195 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10196 }
10197 vm_map_remove(kernel_map,
10198 stack,
10199 stack + kernel_stack_size + (2*PAGE_SIZE),
10200 VM_MAP_REMOVE_KUNWIRE);
10201 stack = 0;
10202 }
10203 #else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10204 #define vm_test_kernel_object_fault()
10205 #endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10206
10207 #if VM_TEST_DEVICE_PAGER_TRANSPOSE
10208 static void
10209 vm_test_device_pager_transpose(void)
10210 {
10211 memory_object_t device_pager;
10212 vm_object_t anon_object, device_object;
10213 vm_size_t size;
10214 vm_map_offset_t anon_mapping, device_mapping;
10215 kern_return_t kr;
10216
10217 size = 3 * PAGE_SIZE;
10218 anon_object = vm_object_allocate(size);
10219 assert(anon_object != VM_OBJECT_NULL);
10220 device_pager = device_pager_setup(NULL, 0, size, 0);
10221 assert(device_pager != NULL);
10222 device_object = memory_object_to_vm_object(device_pager);
10223 assert(device_object != VM_OBJECT_NULL);
10224 anon_mapping = 0;
10225 kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
10226 VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10227 anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10228 VM_INHERIT_DEFAULT);
10229 assert(kr == KERN_SUCCESS);
10230 device_mapping = 0;
10231 kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
10232 VM_FLAGS_ANYWHERE,
10233 VM_MAP_KERNEL_FLAGS_NONE,
10234 VM_KERN_MEMORY_NONE,
10235 (void *)device_pager, 0, FALSE,
10236 VM_PROT_DEFAULT, VM_PROT_ALL,
10237 VM_INHERIT_DEFAULT);
10238 assert(kr == KERN_SUCCESS);
10239 memory_object_deallocate(device_pager);
10240
10241 vm_object_lock(anon_object);
10242 vm_object_activity_begin(anon_object);
10243 anon_object->blocked_access = TRUE;
10244 vm_object_unlock(anon_object);
10245 vm_object_lock(device_object);
10246 vm_object_activity_begin(device_object);
10247 device_object->blocked_access = TRUE;
10248 vm_object_unlock(device_object);
10249
10250 assert(anon_object->ref_count == 1);
10251 assert(!anon_object->named);
10252 assert(device_object->ref_count == 2);
10253 assert(device_object->named);
10254
10255 kr = vm_object_transpose(device_object, anon_object, size);
10256 assert(kr == KERN_SUCCESS);
10257
10258 vm_object_lock(anon_object);
10259 vm_object_activity_end(anon_object);
10260 anon_object->blocked_access = FALSE;
10261 vm_object_unlock(anon_object);
10262 vm_object_lock(device_object);
10263 vm_object_activity_end(device_object);
10264 device_object->blocked_access = FALSE;
10265 vm_object_unlock(device_object);
10266
10267 assert(anon_object->ref_count == 2);
10268 assert(anon_object->named);
10269 kr = vm_deallocate(kernel_map, anon_mapping, size);
10270 assert(kr == KERN_SUCCESS);
10271 assert(device_object->ref_count == 1);
10272 assert(!device_object->named);
10273 kr = vm_deallocate(kernel_map, device_mapping, size);
10274 assert(kr == KERN_SUCCESS);
10275
10276 printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10277 }
10278 #else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10279 #define vm_test_device_pager_transpose()
10280 #endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10281
10282 void
10283 vm_tests(void)
10284 {
10285 vm_test_collapse_compressor();
10286 vm_test_wire_and_extract();
10287 vm_test_page_wire_overflow_panic();
10288 vm_test_kernel_object_fault();
10289 vm_test_device_pager_transpose();
10290 }