]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
b5c35da6214d06b7f15f6d03dcde9bb925a0ed51
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm/vm_pageout.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 * Date: 1985
56 *
57 * The proverbial page-out daemon.
58 */
59
60 #include <mach_pagemap.h>
61 #include <mach_cluster_stats.h>
62 #include <mach_kdb.h>
63 #include <advisory_pageout.h>
64
65 #include <mach/mach_types.h>
66 #include <mach/memory_object.h>
67 #include <mach/memory_object_default.h>
68 #include <mach/memory_object_control_server.h>
69 #include <mach/mach_host_server.h>
70 #include <mach/vm_param.h>
71 #include <mach/vm_statistics.h>
72 #include <kern/host_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/thread.h>
75 #include <kern/xpr.h>
76 #include <vm/pmap.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_pageout.h>
81 #include <machine/vm_tuning.h>
82 #include <kern/misc_protos.h>
83
84 extern ipc_port_t memory_manager_default;
85
86 #ifndef VM_PAGE_LAUNDRY_MAX
87 #define VM_PAGE_LAUNDRY_MAX 6 /* outstanding DMM page cleans */
88 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
89
90 #ifndef VM_PAGEOUT_BURST_MAX
91 #define VM_PAGEOUT_BURST_MAX 32 /* simultaneous EMM page cleans */
92 #endif /* VM_PAGEOUT_BURST_MAX */
93
94 #ifndef VM_PAGEOUT_DISCARD_MAX
95 #define VM_PAGEOUT_DISCARD_MAX 68 /* simultaneous EMM page cleans */
96 #endif /* VM_PAGEOUT_DISCARD_MAX */
97
98 #ifndef VM_PAGEOUT_BURST_WAIT
99 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
100 #endif /* VM_PAGEOUT_BURST_WAIT */
101
102 #ifndef VM_PAGEOUT_EMPTY_WAIT
103 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
104 #endif /* VM_PAGEOUT_EMPTY_WAIT */
105
106 /*
107 * To obtain a reasonable LRU approximation, the inactive queue
108 * needs to be large enough to give pages on it a chance to be
109 * referenced a second time. This macro defines the fraction
110 * of active+inactive pages that should be inactive.
111 * The pageout daemon uses it to update vm_page_inactive_target.
112 *
113 * If vm_page_free_count falls below vm_page_free_target and
114 * vm_page_inactive_count is below vm_page_inactive_target,
115 * then the pageout daemon starts running.
116 */
117
118 #ifndef VM_PAGE_INACTIVE_TARGET
119 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
120 #endif /* VM_PAGE_INACTIVE_TARGET */
121
122 /*
123 * Once the pageout daemon starts running, it keeps going
124 * until vm_page_free_count meets or exceeds vm_page_free_target.
125 */
126
127 #ifndef VM_PAGE_FREE_TARGET
128 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
129 #endif /* VM_PAGE_FREE_TARGET */
130
131 /*
132 * The pageout daemon always starts running once vm_page_free_count
133 * falls below vm_page_free_min.
134 */
135
136 #ifndef VM_PAGE_FREE_MIN
137 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
138 #endif /* VM_PAGE_FREE_MIN */
139
140 /*
141 * When vm_page_free_count falls below vm_page_free_reserved,
142 * only vm-privileged threads can allocate pages. vm-privilege
143 * allows the pageout daemon and default pager (and any other
144 * associated threads needed for default pageout) to continue
145 * operation by dipping into the reserved pool of pages.
146 */
147
148 #ifndef VM_PAGE_FREE_RESERVED
149 #define VM_PAGE_FREE_RESERVED \
150 ((16 * VM_PAGE_LAUNDRY_MAX) + NCPUS)
151 #endif /* VM_PAGE_FREE_RESERVED */
152
153 /*
154 * Exported variable used to broadcast the activation of the pageout scan
155 * Working Set uses this to throttle its use of pmap removes. In this
156 * way, code which runs within memory in an uncontested context does
157 * not keep encountering soft faults.
158 */
159
160 unsigned int vm_pageout_scan_event_counter = 0;
161
162 /*
163 * Forward declarations for internal routines.
164 */
165 extern void vm_pageout_continue(void);
166 extern void vm_pageout_scan(void);
167 extern void vm_pageout_throttle(vm_page_t m);
168 extern vm_page_t vm_pageout_cluster_page(
169 vm_object_t object,
170 vm_object_offset_t offset,
171 boolean_t precious_clean);
172
173 unsigned int vm_pageout_reserved_internal = 0;
174 unsigned int vm_pageout_reserved_really = 0;
175
176 unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */
177 unsigned int vm_page_laundry_min = 0;
178 unsigned int vm_pageout_burst_max = 0;
179 unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
180 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
181 unsigned int vm_pageout_burst_min = 0;
182 unsigned int vm_pageout_pause_count = 0;
183 unsigned int vm_pageout_pause_max = 0;
184 unsigned int vm_free_page_pause = 100; /* milliseconds */
185
186 /*
187 * These variables record the pageout daemon's actions:
188 * how many pages it looks at and what happens to those pages.
189 * No locking needed because only one thread modifies the variables.
190 */
191
192 unsigned int vm_pageout_active = 0; /* debugging */
193 unsigned int vm_pageout_inactive = 0; /* debugging */
194 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
195 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
196 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
197 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
198 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
199 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
200 unsigned int vm_pageout_inactive_used = 0; /* debugging */
201 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
202 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
203 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
204 unsigned int vm_stat_discard = 0; /* debugging */
205 unsigned int vm_stat_discard_sent = 0; /* debugging */
206 unsigned int vm_stat_discard_failure = 0; /* debugging */
207 unsigned int vm_stat_discard_throttle = 0; /* debugging */
208 unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */
209 unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */
210 unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */
211 unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */
212 unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */
213 unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */
214
215
216 unsigned int vm_pageout_out_of_line = 0;
217 unsigned int vm_pageout_in_place = 0;
218 /*
219 * Routine: vm_pageout_object_allocate
220 * Purpose:
221 * Allocate an object for use as out-of-line memory in a
222 * data_return/data_initialize message.
223 * The page must be in an unlocked object.
224 *
225 * If the page belongs to a trusted pager, cleaning in place
226 * will be used, which utilizes a special "pageout object"
227 * containing private alias pages for the real page frames.
228 * Untrusted pagers use normal out-of-line memory.
229 */
230 vm_object_t
231 vm_pageout_object_allocate(
232 vm_page_t m,
233 vm_size_t size,
234 vm_object_offset_t offset)
235 {
236 vm_object_t object = m->object;
237 vm_object_t new_object;
238
239 assert(object->pager_ready);
240
241 if (object->pager_trusted || object->internal)
242 vm_pageout_throttle(m);
243
244 new_object = vm_object_allocate(size);
245
246 if (object->pager_trusted) {
247 assert (offset < object->size);
248
249 vm_object_lock(new_object);
250 new_object->pageout = TRUE;
251 new_object->shadow = object;
252 new_object->can_persist = FALSE;
253 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
254 new_object->shadow_offset = offset;
255 vm_object_unlock(new_object);
256
257 /*
258 * Take a paging reference on the object. This will be dropped
259 * in vm_pageout_object_terminate()
260 */
261 vm_object_lock(object);
262 vm_object_paging_begin(object);
263 vm_object_unlock(object);
264
265 vm_pageout_in_place++;
266 } else
267 vm_pageout_out_of_line++;
268 return(new_object);
269 }
270
271 #if MACH_CLUSTER_STATS
272 unsigned long vm_pageout_cluster_dirtied = 0;
273 unsigned long vm_pageout_cluster_cleaned = 0;
274 unsigned long vm_pageout_cluster_collisions = 0;
275 unsigned long vm_pageout_cluster_clusters = 0;
276 unsigned long vm_pageout_cluster_conversions = 0;
277 unsigned long vm_pageout_target_collisions = 0;
278 unsigned long vm_pageout_target_page_dirtied = 0;
279 unsigned long vm_pageout_target_page_freed = 0;
280 #define CLUSTER_STAT(clause) clause
281 #else /* MACH_CLUSTER_STATS */
282 #define CLUSTER_STAT(clause)
283 #endif /* MACH_CLUSTER_STATS */
284
285 /*
286 * Routine: vm_pageout_object_terminate
287 * Purpose:
288 * Destroy the pageout_object allocated by
289 * vm_pageout_object_allocate(), and perform all of the
290 * required cleanup actions.
291 *
292 * In/Out conditions:
293 * The object must be locked, and will be returned locked.
294 */
295 void
296 vm_pageout_object_terminate(
297 vm_object_t object)
298 {
299 vm_object_t shadow_object;
300
301 /*
302 * Deal with the deallocation (last reference) of a pageout object
303 * (used for cleaning-in-place) by dropping the paging references/
304 * freeing pages in the original object.
305 */
306
307 assert(object->pageout);
308 shadow_object = object->shadow;
309 vm_object_lock(shadow_object);
310
311 while (!queue_empty(&object->memq)) {
312 vm_page_t p, m;
313 vm_object_offset_t offset;
314
315 p = (vm_page_t) queue_first(&object->memq);
316
317 assert(p->private);
318 assert(p->pageout);
319 p->pageout = FALSE;
320 assert(!p->cleaning);
321
322 offset = p->offset;
323 VM_PAGE_FREE(p);
324 p = VM_PAGE_NULL;
325
326 m = vm_page_lookup(shadow_object,
327 offset + object->shadow_offset);
328
329 if(m == VM_PAGE_NULL)
330 continue;
331 assert(m->cleaning);
332 /* used as a trigger on upl_commit etc to recognize the */
333 /* pageout daemon's subseqent desire to pageout a cleaning */
334 /* page. When the bit is on the upl commit code will */
335 /* respect the pageout bit in the target page over the */
336 /* caller's page list indication */
337 m->dump_cleaning = FALSE;
338
339 /*
340 * Account for the paging reference taken when
341 * m->cleaning was set on this page.
342 */
343 vm_object_paging_end(shadow_object);
344 assert((m->dirty) || (m->precious) ||
345 (m->busy && m->cleaning));
346
347 /*
348 * Handle the trusted pager throttle.
349 */
350 vm_page_lock_queues();
351 if (m->laundry) {
352 vm_page_laundry_count--;
353 m->laundry = FALSE;
354 if (vm_page_laundry_count < vm_page_laundry_min) {
355 vm_page_laundry_min = 0;
356 thread_wakeup((event_t) &vm_page_laundry_count);
357 }
358 }
359
360 /*
361 * Handle the "target" page(s). These pages are to be freed if
362 * successfully cleaned. Target pages are always busy, and are
363 * wired exactly once. The initial target pages are not mapped,
364 * (so cannot be referenced or modified) but converted target
365 * pages may have been modified between the selection as an
366 * adjacent page and conversion to a target.
367 */
368 if (m->pageout) {
369 assert(m->busy);
370 assert(m->wire_count == 1);
371 m->cleaning = FALSE;
372 m->pageout = FALSE;
373 #if MACH_CLUSTER_STATS
374 if (m->wanted) vm_pageout_target_collisions++;
375 #endif
376 /*
377 * Revoke all access to the page. Since the object is
378 * locked, and the page is busy, this prevents the page
379 * from being dirtied after the pmap_is_modified() call
380 * returns.
381 */
382 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
383
384 /*
385 * Since the page is left "dirty" but "not modifed", we
386 * can detect whether the page was redirtied during
387 * pageout by checking the modify state.
388 */
389 m->dirty = pmap_is_modified(m->phys_addr);
390
391 if (m->dirty) {
392 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
393 vm_page_unwire(m);/* reactivates */
394 VM_STAT(reactivations++);
395 PAGE_WAKEUP_DONE(m);
396 } else {
397 CLUSTER_STAT(vm_pageout_target_page_freed++;)
398 vm_page_free(m);/* clears busy, etc. */
399 }
400 vm_page_unlock_queues();
401 continue;
402 }
403 /*
404 * Handle the "adjacent" pages. These pages were cleaned in
405 * place, and should be left alone.
406 * If prep_pin_count is nonzero, then someone is using the
407 * page, so make it active.
408 */
409 if (!m->active && !m->inactive && !m->private) {
410 if (m->reference)
411 vm_page_activate(m);
412 else
413 vm_page_deactivate(m);
414 }
415 if((m->busy) && (m->cleaning)) {
416
417 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
418 m->busy = FALSE;
419
420 /* We do not re-set m->dirty ! */
421 /* The page was busy so no extraneous activity */
422 /* could have occured. COPY_INTO is a read into the */
423 /* new pages. CLEAN_IN_PLACE does actually write */
424 /* out the pages but handling outside of this code */
425 /* will take care of resetting dirty. We clear the */
426 /* modify however for the Programmed I/O case. */
427 pmap_clear_modify(m->phys_addr);
428 if(m->absent) {
429 m->absent = FALSE;
430 if(shadow_object->absent_count == 1)
431 vm_object_absent_release(shadow_object);
432 else
433 shadow_object->absent_count--;
434 }
435 m->overwriting = FALSE;
436 } else if (m->overwriting) {
437 /* alternate request page list, write to page_list */
438 /* case. Occurs when the original page was wired */
439 /* at the time of the list request */
440 assert(m->wire_count != 0);
441 vm_page_unwire(m);/* reactivates */
442 m->overwriting = FALSE;
443 } else {
444 /*
445 * Set the dirty state according to whether or not the page was
446 * modified during the pageout. Note that we purposefully do
447 * NOT call pmap_clear_modify since the page is still mapped.
448 * If the page were to be dirtied between the 2 calls, this
449 * this fact would be lost. This code is only necessary to
450 * maintain statistics, since the pmap module is always
451 * consulted if m->dirty is false.
452 */
453 #if MACH_CLUSTER_STATS
454 m->dirty = pmap_is_modified(m->phys_addr);
455
456 if (m->dirty) vm_pageout_cluster_dirtied++;
457 else vm_pageout_cluster_cleaned++;
458 if (m->wanted) vm_pageout_cluster_collisions++;
459 #else
460 m->dirty = 0;
461 #endif
462 }
463 m->cleaning = FALSE;
464
465
466 /*
467 * Wakeup any thread waiting for the page to be un-cleaning.
468 */
469 PAGE_WAKEUP(m);
470 vm_page_unlock_queues();
471 }
472 /*
473 * Account for the paging reference taken in vm_paging_object_allocate.
474 */
475 vm_object_paging_end(shadow_object);
476 vm_object_unlock(shadow_object);
477
478 assert(object->ref_count == 0);
479 assert(object->paging_in_progress == 0);
480 assert(object->resident_page_count == 0);
481 return;
482 }
483
484 /*
485 * Routine: vm_pageout_setup
486 * Purpose:
487 * Set up a page for pageout (clean & flush).
488 *
489 * Move the page to a new object, as part of which it will be
490 * sent to its memory manager in a memory_object_data_write or
491 * memory_object_initialize message.
492 *
493 * The "new_object" and "new_offset" arguments
494 * indicate where the page should be moved.
495 *
496 * In/Out conditions:
497 * The page in question must not be on any pageout queues,
498 * and must be busy. The object to which it belongs
499 * must be unlocked, and the caller must hold a paging
500 * reference to it. The new_object must not be locked.
501 *
502 * This routine returns a pointer to a place-holder page,
503 * inserted at the same offset, to block out-of-order
504 * requests for the page. The place-holder page must
505 * be freed after the data_write or initialize message
506 * has been sent.
507 *
508 * The original page is put on a paging queue and marked
509 * not busy on exit.
510 */
511 vm_page_t
512 vm_pageout_setup(
513 register vm_page_t m,
514 register vm_object_t new_object,
515 vm_object_offset_t new_offset)
516 {
517 register vm_object_t old_object = m->object;
518 vm_object_offset_t paging_offset;
519 vm_object_offset_t offset;
520 register vm_page_t holding_page;
521 register vm_page_t new_m;
522 register vm_page_t new_page;
523 boolean_t need_to_wire = FALSE;
524
525
526 XPR(XPR_VM_PAGEOUT,
527 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
528 (integer_t)m->object, (integer_t)m->offset,
529 (integer_t)m, (integer_t)new_object,
530 (integer_t)new_offset);
531 assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
532 !m->restart);
533
534 assert(m->dirty || m->precious);
535
536 /*
537 * Create a place-holder page where the old one was, to prevent
538 * attempted pageins of this page while we're unlocked.
539 */
540 VM_PAGE_GRAB_FICTITIOUS(holding_page);
541
542 vm_object_lock(old_object);
543
544 offset = m->offset;
545 paging_offset = offset + old_object->paging_offset;
546
547 if (old_object->pager_trusted) {
548 /*
549 * This pager is trusted, so we can clean this page
550 * in place. Leave it in the old object, and mark it
551 * cleaning & pageout.
552 */
553 new_m = holding_page;
554 holding_page = VM_PAGE_NULL;
555
556 /*
557 * Set up new page to be private shadow of real page.
558 */
559 new_m->phys_addr = m->phys_addr;
560 new_m->fictitious = FALSE;
561 new_m->pageout = TRUE;
562
563 /*
564 * Mark real page as cleaning (indicating that we hold a
565 * paging reference to be released via m_o_d_r_c) and
566 * pageout (indicating that the page should be freed
567 * when the pageout completes).
568 */
569 pmap_clear_modify(m->phys_addr);
570 vm_page_lock_queues();
571 new_m->private = TRUE;
572 vm_page_wire(new_m);
573 m->cleaning = TRUE;
574 m->pageout = TRUE;
575
576 vm_page_wire(m);
577 assert(m->wire_count == 1);
578 vm_page_unlock_queues();
579
580 m->dirty = TRUE;
581 m->precious = FALSE;
582 m->page_lock = VM_PROT_NONE;
583 m->unusual = FALSE;
584 m->unlock_request = VM_PROT_NONE;
585 } else {
586 /*
587 * Cannot clean in place, so rip the old page out of the
588 * object, and stick the holding page in. Set new_m to the
589 * page in the new object.
590 */
591 vm_page_lock_queues();
592 VM_PAGE_QUEUES_REMOVE(m);
593 vm_page_remove(m);
594
595 vm_page_insert(holding_page, old_object, offset);
596 vm_page_unlock_queues();
597
598 m->dirty = TRUE;
599 m->precious = FALSE;
600 new_m = m;
601 new_m->page_lock = VM_PROT_NONE;
602 new_m->unlock_request = VM_PROT_NONE;
603
604 if (old_object->internal)
605 need_to_wire = TRUE;
606 }
607 /*
608 * Record that this page has been written out
609 */
610 #if MACH_PAGEMAP
611 vm_external_state_set(old_object->existence_map, offset);
612 #endif /* MACH_PAGEMAP */
613
614 vm_object_unlock(old_object);
615
616 vm_object_lock(new_object);
617
618 /*
619 * Put the page into the new object. If it is a not wired
620 * (if it's the real page) it will be activated.
621 */
622
623 vm_page_lock_queues();
624 vm_page_insert(new_m, new_object, new_offset);
625 if (need_to_wire)
626 vm_page_wire(new_m);
627 else
628 vm_page_activate(new_m);
629 PAGE_WAKEUP_DONE(new_m);
630 vm_page_unlock_queues();
631
632 vm_object_unlock(new_object);
633
634 /*
635 * Return the placeholder page to simplify cleanup.
636 */
637 return (holding_page);
638 }
639
640 /*
641 * Routine: vm_pageclean_setup
642 *
643 * Purpose: setup a page to be cleaned (made non-dirty), but not
644 * necessarily flushed from the VM page cache.
645 * This is accomplished by cleaning in place.
646 *
647 * The page must not be busy, and the object and page
648 * queues must be locked.
649 *
650 */
651 void
652 vm_pageclean_setup(
653 vm_page_t m,
654 vm_page_t new_m,
655 vm_object_t new_object,
656 vm_object_offset_t new_offset)
657 {
658 vm_object_t old_object = m->object;
659 assert(!m->busy);
660 assert(!m->cleaning);
661
662 XPR(XPR_VM_PAGEOUT,
663 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
664 (integer_t)old_object, m->offset, (integer_t)m,
665 (integer_t)new_m, new_offset);
666
667 pmap_clear_modify(m->phys_addr);
668 vm_object_paging_begin(old_object);
669
670 /*
671 * Record that this page has been written out
672 */
673 #if MACH_PAGEMAP
674 vm_external_state_set(old_object->existence_map, m->offset);
675 #endif /*MACH_PAGEMAP*/
676
677 /*
678 * Mark original page as cleaning in place.
679 */
680 m->cleaning = TRUE;
681 m->dirty = TRUE;
682 m->precious = FALSE;
683
684 /*
685 * Convert the fictitious page to a private shadow of
686 * the real page.
687 */
688 assert(new_m->fictitious);
689 new_m->fictitious = FALSE;
690 new_m->private = TRUE;
691 new_m->pageout = TRUE;
692 new_m->phys_addr = m->phys_addr;
693 vm_page_wire(new_m);
694
695 vm_page_insert(new_m, new_object, new_offset);
696 assert(!new_m->wanted);
697 new_m->busy = FALSE;
698 }
699
700 void
701 vm_pageclean_copy(
702 vm_page_t m,
703 vm_page_t new_m,
704 vm_object_t new_object,
705 vm_object_offset_t new_offset)
706 {
707 XPR(XPR_VM_PAGEOUT,
708 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
709 m, new_m, new_object, new_offset, 0);
710
711 assert((!m->busy) && (!m->cleaning));
712
713 assert(!new_m->private && !new_m->fictitious);
714
715 pmap_clear_modify(m->phys_addr);
716
717 m->busy = TRUE;
718 vm_object_paging_begin(m->object);
719 vm_page_unlock_queues();
720 vm_object_unlock(m->object);
721
722 /*
723 * Copy the original page to the new page.
724 */
725 vm_page_copy(m, new_m);
726
727 /*
728 * Mark the old page as clean. A request to pmap_is_modified
729 * will get the right answer.
730 */
731 vm_object_lock(m->object);
732 m->dirty = FALSE;
733
734 vm_object_paging_end(m->object);
735
736 vm_page_lock_queues();
737 if (!m->active && !m->inactive)
738 vm_page_activate(m);
739 PAGE_WAKEUP_DONE(m);
740
741 vm_page_insert(new_m, new_object, new_offset);
742 vm_page_activate(new_m);
743 new_m->busy = FALSE; /* No other thread can be waiting */
744 }
745
746
747 /*
748 * Routine: vm_pageout_initialize_page
749 * Purpose:
750 * Causes the specified page to be initialized in
751 * the appropriate memory object. This routine is used to push
752 * pages into a copy-object when they are modified in the
753 * permanent object.
754 *
755 * The page is moved to a temporary object and paged out.
756 *
757 * In/out conditions:
758 * The page in question must not be on any pageout queues.
759 * The object to which it belongs must be locked.
760 * The page must be busy, but not hold a paging reference.
761 *
762 * Implementation:
763 * Move this page to a completely new object.
764 */
765 void
766 vm_pageout_initialize_page(
767 vm_page_t m)
768 {
769 vm_map_copy_t copy;
770 vm_object_t new_object;
771 vm_object_t object;
772 vm_object_offset_t paging_offset;
773 vm_page_t holding_page;
774
775
776 XPR(XPR_VM_PAGEOUT,
777 "vm_pageout_initialize_page, page 0x%X\n",
778 (integer_t)m, 0, 0, 0, 0);
779 assert(m->busy);
780
781 /*
782 * Verify that we really want to clean this page
783 */
784 assert(!m->absent);
785 assert(!m->error);
786 assert(m->dirty);
787
788 /*
789 * Create a paging reference to let us play with the object.
790 */
791 object = m->object;
792 paging_offset = m->offset + object->paging_offset;
793 vm_object_paging_begin(object);
794 vm_object_unlock(object);
795 if (m->absent || m->error || m->restart ||
796 (!m->dirty && !m->precious)) {
797 VM_PAGE_FREE(m);
798 panic("reservation without pageout?"); /* alan */
799 return;
800 }
801
802 /* set the page for future call to vm_fault_list_request */
803 holding_page = NULL;
804 vm_object_lock(m->object);
805 vm_page_lock_queues();
806 pmap_clear_modify(m->phys_addr);
807 m->dirty = TRUE;
808 m->busy = TRUE;
809 m->list_req_pending = TRUE;
810 m->cleaning = TRUE;
811 m->pageout = TRUE;
812 vm_page_wire(m);
813 vm_page_unlock_queues();
814 vm_object_unlock(m->object);
815 vm_pageout_throttle(m);
816
817 /*
818 * Write the data to its pager.
819 * Note that the data is passed by naming the new object,
820 * not a virtual address; the pager interface has been
821 * manipulated to use the "internal memory" data type.
822 * [The object reference from its allocation is donated
823 * to the eventual recipient.]
824 */
825 memory_object_data_initialize(object->pager,
826 paging_offset,
827 PAGE_SIZE);
828
829 vm_object_lock(object);
830 }
831
832 #if MACH_CLUSTER_STATS
833 #define MAXCLUSTERPAGES 16
834 struct {
835 unsigned long pages_in_cluster;
836 unsigned long pages_at_higher_offsets;
837 unsigned long pages_at_lower_offsets;
838 } cluster_stats[MAXCLUSTERPAGES];
839 #endif /* MACH_CLUSTER_STATS */
840
841 boolean_t allow_clustered_pageouts = FALSE;
842
843 /*
844 * vm_pageout_cluster:
845 *
846 * Given a page, page it out, and attempt to clean adjacent pages
847 * in the same operation.
848 *
849 * The page must be busy, and the object unlocked w/ paging reference
850 * to prevent deallocation or collapse. The page must not be on any
851 * pageout queue.
852 */
853 void
854 vm_pageout_cluster(
855 vm_page_t m)
856 {
857 vm_object_t object = m->object;
858 vm_object_offset_t offset = m->offset; /* from vm_object start */
859 vm_object_offset_t paging_offset = m->offset + object->paging_offset;
860 vm_object_t new_object;
861 vm_object_offset_t new_offset;
862 vm_size_t cluster_size;
863 vm_object_offset_t cluster_offset; /* from memory_object start */
864 vm_object_offset_t cluster_lower_bound; /* from vm_object_start */
865 vm_object_offset_t cluster_upper_bound; /* from vm_object_start */
866 vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */
867 vm_object_offset_t offset_within_cluster;
868 vm_size_t length_of_data;
869 vm_page_t friend, holding_page;
870 kern_return_t rc;
871 boolean_t precious_clean = TRUE;
872 int pages_in_cluster;
873
874 CLUSTER_STAT(int pages_at_higher_offsets = 0;)
875 CLUSTER_STAT(int pages_at_lower_offsets = 0;)
876
877 XPR(XPR_VM_PAGEOUT,
878 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
879 (integer_t)object, offset, (integer_t)m, 0, 0);
880
881 CLUSTER_STAT(vm_pageout_cluster_clusters++;)
882 /*
883 * Only a certain kind of page is appreciated here.
884 */
885 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
886 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
887
888 vm_object_lock(object);
889 cluster_size = object->cluster_size;
890
891 assert(cluster_size >= PAGE_SIZE);
892 if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE;
893 assert(object->pager_created && object->pager_initialized);
894 assert(object->internal || object->pager_ready);
895
896 if (m->precious && !m->dirty)
897 precious_clean = TRUE;
898
899 if (!object->pager_trusted || !allow_clustered_pageouts)
900 cluster_size = PAGE_SIZE;
901 vm_object_unlock(object);
902
903 cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1);
904 /* bytes from beginning of cluster */
905 /*
906 * Due to unaligned mappings, we have to be careful
907 * of negative offsets into the VM object. Clip the cluster
908 * boundary to the VM object, not the memory object.
909 */
910 if (offset > cluster_offset) {
911 cluster_lower_bound = offset - cluster_offset;
912 /* from vm_object */
913 } else {
914 cluster_lower_bound = 0;
915 }
916 cluster_upper_bound = (offset - cluster_offset) +
917 (vm_object_offset_t)cluster_size;
918
919 /* set the page for future call to vm_fault_list_request */
920 holding_page = NULL;
921 vm_object_lock(m->object);
922 vm_page_lock_queues();
923 m->busy = TRUE;
924 m->list_req_pending = TRUE;
925 m->cleaning = TRUE;
926 m->pageout = TRUE;
927 vm_page_wire(m);
928 vm_page_unlock_queues();
929 vm_object_unlock(m->object);
930 vm_pageout_throttle(m);
931
932 /*
933 * Search backward for adjacent eligible pages to clean in
934 * this operation.
935 */
936
937 cluster_start = offset;
938 if (offset) { /* avoid wrap-around at zero */
939 for (cluster_start = offset - PAGE_SIZE_64;
940 cluster_start >= cluster_lower_bound;
941 cluster_start -= PAGE_SIZE_64) {
942 assert(cluster_size > PAGE_SIZE);
943
944 vm_object_lock(object);
945 vm_page_lock_queues();
946
947 if ((friend = vm_pageout_cluster_page(object, cluster_start,
948 precious_clean)) == VM_PAGE_NULL) {
949 vm_page_unlock_queues();
950 vm_object_unlock(object);
951 break;
952 }
953 new_offset = (cluster_start + object->paging_offset)
954 & (cluster_size - 1);
955
956 assert(new_offset < cluster_offset);
957 m->list_req_pending = TRUE;
958 m->cleaning = TRUE;
959 /* do nothing except advance the write request, all we really need to */
960 /* do is push the target page and let the code at the other end decide */
961 /* what is really the right size */
962 if (vm_page_free_count <= vm_page_free_reserved) {
963 m->busy = TRUE;
964 m->pageout = TRUE;
965 vm_page_wire(m);
966 }
967
968 vm_page_unlock_queues();
969 vm_object_unlock(object);
970 if(m->dirty || m->object->internal) {
971 CLUSTER_STAT(pages_at_lower_offsets++;)
972 }
973
974 }
975 cluster_start += PAGE_SIZE_64;
976 }
977 assert(cluster_start >= cluster_lower_bound);
978 assert(cluster_start <= offset);
979 /*
980 * Search forward for adjacent eligible pages to clean in
981 * this operation.
982 */
983 for (cluster_end = offset + PAGE_SIZE_64;
984 cluster_end < cluster_upper_bound;
985 cluster_end += PAGE_SIZE_64) {
986 assert(cluster_size > PAGE_SIZE);
987
988 vm_object_lock(object);
989 vm_page_lock_queues();
990
991 if ((friend = vm_pageout_cluster_page(object, cluster_end,
992 precious_clean)) == VM_PAGE_NULL) {
993 vm_page_unlock_queues();
994 vm_object_unlock(object);
995 break;
996 }
997 new_offset = (cluster_end + object->paging_offset)
998 & (cluster_size - 1);
999
1000 assert(new_offset < cluster_size);
1001 m->list_req_pending = TRUE;
1002 m->cleaning = TRUE;
1003 /* do nothing except advance the write request, all we really need to */
1004 /* do is push the target page and let the code at the other end decide */
1005 /* what is really the right size */
1006 if (vm_page_free_count <= vm_page_free_reserved) {
1007 m->busy = TRUE;
1008 m->pageout = TRUE;
1009 vm_page_wire(m);
1010 }
1011
1012 vm_page_unlock_queues();
1013 vm_object_unlock(object);
1014
1015 if(m->dirty || m->object->internal) {
1016 CLUSTER_STAT(pages_at_higher_offsets++;)
1017 }
1018 }
1019 assert(cluster_end <= cluster_upper_bound);
1020 assert(cluster_end >= offset + PAGE_SIZE);
1021
1022 /*
1023 * (offset - cluster_offset) is beginning of cluster_object
1024 * relative to vm_object start.
1025 */
1026 offset_within_cluster = cluster_start - (offset - cluster_offset);
1027 length_of_data = cluster_end - cluster_start;
1028
1029 assert(offset_within_cluster < cluster_size);
1030 assert((offset_within_cluster + length_of_data) <= cluster_size);
1031
1032 rc = KERN_SUCCESS;
1033 assert(rc == KERN_SUCCESS);
1034
1035 pages_in_cluster = length_of_data/PAGE_SIZE;
1036
1037 #if MACH_CLUSTER_STATS
1038 (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++;
1039 (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++;
1040 (cluster_stats[pages_in_cluster].pages_in_cluster)++;
1041 #endif /* MACH_CLUSTER_STATS */
1042
1043 /*
1044 * Send the data to the pager.
1045 */
1046 paging_offset = cluster_start + object->paging_offset;
1047
1048 rc = memory_object_data_return(object->pager,
1049 paging_offset,
1050 length_of_data,
1051 !precious_clean,
1052 FALSE);
1053
1054 vm_object_lock(object);
1055 vm_object_paging_end(object);
1056
1057 if (holding_page) {
1058 assert(!object->pager_trusted);
1059 VM_PAGE_FREE(holding_page);
1060 vm_object_paging_end(object);
1061 }
1062
1063 vm_object_unlock(object);
1064 }
1065
1066 /*
1067 * Trusted pager throttle.
1068 * Object must be unlocked, page queues must be unlocked.
1069 */
1070 void
1071 vm_pageout_throttle(
1072 register vm_page_t m)
1073 {
1074 vm_page_lock_queues();
1075 assert(!m->laundry);
1076 m->laundry = TRUE;
1077 while (vm_page_laundry_count >= vm_page_laundry_max) {
1078 /*
1079 * Set the threshold for when vm_page_free()
1080 * should wake us up.
1081 */
1082 vm_page_laundry_min = vm_page_laundry_max/2;
1083
1084 assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT);
1085 vm_page_unlock_queues();
1086
1087 /*
1088 * Pause to let the default pager catch up.
1089 */
1090 thread_block((void (*)(void)) 0);
1091 vm_page_lock_queues();
1092 }
1093 vm_page_laundry_count++;
1094 vm_page_unlock_queues();
1095 }
1096
1097 /*
1098 * The global variable vm_pageout_clean_active_pages controls whether
1099 * active pages are considered valid to be cleaned in place during a
1100 * clustered pageout. Performance measurements are necessary to determine
1101 * the best policy.
1102 */
1103 int vm_pageout_clean_active_pages = 1;
1104 /*
1105 * vm_pageout_cluster_page: [Internal]
1106 *
1107 * return a vm_page_t to the page at (object,offset) if it is appropriate
1108 * to clean in place. Pages that are non-existent, busy, absent, already
1109 * cleaning, or not dirty are not eligible to be cleaned as an adjacent
1110 * page in a cluster.
1111 *
1112 * The object must be locked on entry, and remains locked throughout
1113 * this call.
1114 */
1115
1116 vm_page_t
1117 vm_pageout_cluster_page(
1118 vm_object_t object,
1119 vm_object_offset_t offset,
1120 boolean_t precious_clean)
1121 {
1122 vm_page_t m;
1123
1124 XPR(XPR_VM_PAGEOUT,
1125 "vm_pageout_cluster_page, object 0x%X offset 0x%X\n",
1126 (integer_t)object, offset, 0, 0, 0);
1127
1128 if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
1129 return(VM_PAGE_NULL);
1130
1131 if (m->busy || m->absent || m->cleaning ||
1132 (m->wire_count != 0) || m->error)
1133 return(VM_PAGE_NULL);
1134
1135 if (vm_pageout_clean_active_pages) {
1136 if (!m->active && !m->inactive) return(VM_PAGE_NULL);
1137 } else {
1138 if (!m->inactive) return(VM_PAGE_NULL);
1139 }
1140
1141 assert(!m->private);
1142 assert(!m->fictitious);
1143
1144 if (!m->dirty) m->dirty = pmap_is_modified(m->phys_addr);
1145
1146 if (precious_clean) {
1147 if (!m->precious || !m->dirty)
1148 return(VM_PAGE_NULL);
1149 } else {
1150 if (!m->dirty)
1151 return(VM_PAGE_NULL);
1152 }
1153 return(m);
1154 }
1155
1156 /*
1157 * vm_pageout_scan does the dirty work for the pageout daemon.
1158 * It returns with vm_page_queue_free_lock held and
1159 * vm_page_free_wanted == 0.
1160 */
1161 extern void vm_pageout_scan_continue(void); /* forward; */
1162
1163 void
1164 vm_pageout_scan(void)
1165 {
1166 unsigned int burst_count;
1167 boolean_t now = FALSE;
1168 unsigned int laundry_pages;
1169 boolean_t need_more_inactive_pages;
1170 unsigned int loop_detect;
1171
1172 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1173
1174 /*???*/ /*
1175 * We want to gradually dribble pages from the active queue
1176 * to the inactive queue. If we let the inactive queue get
1177 * very small, and then suddenly dump many pages into it,
1178 * those pages won't get a sufficient chance to be referenced
1179 * before we start taking them from the inactive queue.
1180 *
1181 * We must limit the rate at which we send pages to the pagers.
1182 * data_write messages consume memory, for message buffers and
1183 * for map-copy objects. If we get too far ahead of the pagers,
1184 * we can potentially run out of memory.
1185 *
1186 * We can use the laundry count to limit directly the number
1187 * of pages outstanding to the default pager. A similar
1188 * strategy for external pagers doesn't work, because
1189 * external pagers don't have to deallocate the pages sent them,
1190 * and because we might have to send pages to external pagers
1191 * even if they aren't processing writes. So we also
1192 * use a burst count to limit writes to external pagers.
1193 *
1194 * When memory is very tight, we can't rely on external pagers to
1195 * clean pages. They probably aren't running, because they
1196 * aren't vm-privileged. If we kept sending dirty pages to them,
1197 * we could exhaust the free list. However, we can't just ignore
1198 * pages belonging to external objects, because there might be no
1199 * pages belonging to internal objects. Hence, we get the page
1200 * into an internal object and then immediately double-page it,
1201 * sending it to the default pager.
1202 *
1203 * consider_zone_gc should be last, because the other operations
1204 * might return memory to zones.
1205 */
1206
1207
1208 Restart:
1209
1210 #if THREAD_SWAPPER
1211 mutex_lock(&vm_page_queue_free_lock);
1212 now = (vm_page_free_count < vm_page_free_min);
1213 mutex_unlock(&vm_page_queue_free_lock);
1214
1215 swapout_threads(now);
1216 #endif /* THREAD_SWAPPER */
1217
1218 stack_collect();
1219 consider_task_collect();
1220 consider_thread_collect();
1221 consider_zone_gc();
1222 consider_machine_collect();
1223
1224 loop_detect = vm_page_active_count + vm_page_inactive_count;
1225 #if 0
1226 if (vm_page_free_count <= vm_page_free_reserved) {
1227 need_more_inactive_pages = TRUE;
1228 } else {
1229 need_more_inactive_pages = FALSE;
1230 }
1231 #else
1232 need_more_inactive_pages = FALSE;
1233 #endif
1234
1235 for (burst_count = 0;;) {
1236 register vm_page_t m;
1237 register vm_object_t object;
1238
1239 /*
1240 * Recalculate vm_page_inactivate_target.
1241 */
1242
1243 vm_page_lock_queues();
1244 vm_page_inactive_target =
1245 VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1246 vm_page_inactive_count);
1247
1248 /*
1249 * Move pages from active to inactive.
1250 */
1251
1252 while ((vm_page_inactive_count < vm_page_inactive_target ||
1253 need_more_inactive_pages) &&
1254 !queue_empty(&vm_page_queue_active)) {
1255 register vm_object_t object;
1256
1257 vm_pageout_active++;
1258 m = (vm_page_t) queue_first(&vm_page_queue_active);
1259
1260 /*
1261 * If we're getting really low on memory,
1262 * try selecting a page that will go
1263 * directly to the default_pager.
1264 * If there are no such pages, we have to
1265 * page out a page backed by an EMM,
1266 * so that the default_pager can recover
1267 * it eventually.
1268 */
1269 if (need_more_inactive_pages &&
1270 (IP_VALID(memory_manager_default))) {
1271 vm_pageout_scan_active_emm_throttle++;
1272 do {
1273 assert(m->active && !m->inactive);
1274 object = m->object;
1275
1276 if (vm_object_lock_try(object)) {
1277 #if 0
1278 if (object->pager_trusted ||
1279 object->internal) {
1280 /* found one ! */
1281 vm_pageout_scan_active_emm_throttle_success++;
1282 goto object_locked_active;
1283 }
1284 #else
1285 vm_pageout_scan_active_emm_throttle_success++;
1286 goto object_locked_active;
1287 #endif
1288 vm_object_unlock(object);
1289 }
1290 m = (vm_page_t) queue_next(&m->pageq);
1291 } while (!queue_end(&vm_page_queue_active,
1292 (queue_entry_t) m));
1293 if (queue_end(&vm_page_queue_active,
1294 (queue_entry_t) m)) {
1295 vm_pageout_scan_active_emm_throttle_failure++;
1296 m = (vm_page_t)
1297 queue_first(&vm_page_queue_active);
1298 }
1299 }
1300
1301 assert(m->active && !m->inactive);
1302
1303 object = m->object;
1304 if (!vm_object_lock_try(object)) {
1305 /*
1306 * Move page to end and continue.
1307 */
1308
1309 queue_remove(&vm_page_queue_active, m,
1310 vm_page_t, pageq);
1311 queue_enter(&vm_page_queue_active, m,
1312 vm_page_t, pageq);
1313 vm_page_unlock_queues();
1314
1315 mutex_pause();
1316 vm_page_lock_queues();
1317 continue;
1318 }
1319
1320 object_locked_active:
1321 /*
1322 * If the page is busy, then we pull it
1323 * off the active queue and leave it alone.
1324 */
1325
1326 if (m->busy) {
1327 vm_object_unlock(object);
1328 queue_remove(&vm_page_queue_active, m,
1329 vm_page_t, pageq);
1330 m->active = FALSE;
1331 if (!m->fictitious)
1332 vm_page_active_count--;
1333 continue;
1334 }
1335
1336 /*
1337 * Deactivate the page while holding the object
1338 * locked, so we know the page is still not busy.
1339 * This should prevent races between pmap_enter
1340 * and pmap_clear_reference. The page might be
1341 * absent or fictitious, but vm_page_deactivate
1342 * can handle that.
1343 */
1344
1345 vm_page_deactivate(m);
1346 vm_object_unlock(object);
1347 }
1348
1349 /*
1350 * We are done if we have met our target *and*
1351 * nobody is still waiting for a page.
1352 */
1353 if (vm_page_free_count >= vm_page_free_target) {
1354 mutex_lock(&vm_page_queue_free_lock);
1355 if ((vm_page_free_count >= vm_page_free_target) &&
1356 (vm_page_free_wanted == 0)) {
1357 vm_page_unlock_queues();
1358 break;
1359 }
1360 mutex_unlock(&vm_page_queue_free_lock);
1361 }
1362 /*
1363 * Sometimes we have to pause:
1364 * 1) No inactive pages - nothing to do.
1365 * 2) Flow control - wait for untrusted pagers to catch up.
1366 */
1367
1368 if (queue_empty(&vm_page_queue_inactive) ||
1369 ((--loop_detect) == 0) ||
1370 (burst_count >= vm_pageout_burst_max)) {
1371 unsigned int pages, msecs;
1372 int wait_result;
1373
1374 consider_machine_adjust();
1375 /*
1376 * vm_pageout_burst_wait is msecs/page.
1377 * If there is nothing for us to do, we wait
1378 * at least vm_pageout_empty_wait msecs.
1379 */
1380 pages = burst_count;
1381
1382 if (loop_detect == 0) {
1383 printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n");
1384 msecs = vm_free_page_pause;
1385 }
1386 else {
1387 msecs = burst_count * vm_pageout_burst_wait;
1388 }
1389
1390 if (queue_empty(&vm_page_queue_inactive) &&
1391 (msecs < vm_pageout_empty_wait))
1392 msecs = vm_pageout_empty_wait;
1393 vm_page_unlock_queues();
1394
1395 assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE);
1396 counter(c_vm_pageout_scan_block++);
1397
1398 /*
1399 * Unfortunately, we don't have call_continuation
1400 * so we can't rely on tail-recursion.
1401 */
1402 wait_result = thread_block((void (*)(void)) 0);
1403 if (wait_result != THREAD_TIMED_OUT)
1404 thread_cancel_timer();
1405 vm_pageout_scan_continue();
1406
1407 goto Restart;
1408 /*NOTREACHED*/
1409 }
1410
1411 vm_pageout_inactive++;
1412 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1413
1414 if ((vm_page_free_count <= vm_page_free_reserved) &&
1415 (IP_VALID(memory_manager_default))) {
1416 /*
1417 * We're really low on memory. Try to select a page that
1418 * would go directly to the default_pager.
1419 * If there are no such pages, we have to page out a
1420 * page backed by an EMM, so that the default_pager
1421 * can recover it eventually.
1422 */
1423 vm_pageout_scan_inactive_emm_throttle++;
1424 do {
1425 assert(!m->active && m->inactive);
1426 object = m->object;
1427
1428 if (vm_object_lock_try(object)) {
1429 #if 0
1430 if (object->pager_trusted ||
1431 object->internal) {
1432 /* found one ! */
1433 vm_pageout_scan_inactive_emm_throttle_success++;
1434 goto object_locked_inactive;
1435 }
1436 #else
1437 vm_pageout_scan_inactive_emm_throttle_success++;
1438 goto object_locked_inactive;
1439 #endif /* 0 */
1440 vm_object_unlock(object);
1441 }
1442 m = (vm_page_t) queue_next(&m->pageq);
1443 } while (!queue_end(&vm_page_queue_inactive,
1444 (queue_entry_t) m));
1445 if (queue_end(&vm_page_queue_inactive,
1446 (queue_entry_t) m)) {
1447 vm_pageout_scan_inactive_emm_throttle_failure++;
1448 /*
1449 * We should check the "active" queue
1450 * for good candidates to page out.
1451 */
1452 need_more_inactive_pages = TRUE;
1453
1454 m = (vm_page_t)
1455 queue_first(&vm_page_queue_inactive);
1456 }
1457 }
1458
1459 assert(!m->active && m->inactive);
1460 object = m->object;
1461
1462 /*
1463 * Try to lock object; since we've got the
1464 * page queues lock, we can only try for this one.
1465 */
1466
1467 if (!vm_object_lock_try(object)) {
1468 /*
1469 * Move page to end and continue.
1470 * Don't re-issue ticket
1471 */
1472 queue_remove(&vm_page_queue_inactive, m,
1473 vm_page_t, pageq);
1474 queue_enter(&vm_page_queue_inactive, m,
1475 vm_page_t, pageq);
1476 vm_page_unlock_queues();
1477
1478 mutex_pause();
1479 vm_pageout_inactive_nolock++;
1480 continue;
1481 }
1482
1483 object_locked_inactive:
1484 /*
1485 * Paging out pages of objects which pager is being
1486 * created by another thread must be avoided, because
1487 * this thread may claim for memory, thus leading to a
1488 * possible dead lock between it and the pageout thread
1489 * which will wait for pager creation, if such pages are
1490 * finally chosen. The remaining assumption is that there
1491 * will finally be enough available pages in the inactive
1492 * pool to page out in order to satisfy all memory claimed
1493 * by the thread which concurrently creates the pager.
1494 */
1495
1496 if (!object->pager_initialized && object->pager_created) {
1497 /*
1498 * Move page to end and continue, hoping that
1499 * there will be enough other inactive pages to
1500 * page out so that the thread which currently
1501 * initializes the pager will succeed.
1502 * Don't re-grant the ticket, the page should
1503 * pulled from the queue and paged out whenever
1504 * one of its logically adjacent fellows is
1505 * targeted.
1506 */
1507 queue_remove(&vm_page_queue_inactive, m,
1508 vm_page_t, pageq);
1509 queue_enter(&vm_page_queue_inactive, m,
1510 vm_page_t, pageq);
1511 vm_page_unlock_queues();
1512 vm_object_unlock(object);
1513 vm_pageout_inactive_avoid++;
1514 continue;
1515 }
1516
1517 /*
1518 * Remove the page from the inactive list.
1519 */
1520
1521 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1522 m->inactive = FALSE;
1523 if (!m->fictitious)
1524 vm_page_inactive_count--;
1525
1526 if (m->busy || !object->alive) {
1527 /*
1528 * Somebody is already playing with this page.
1529 * Leave it off the pageout queues.
1530 */
1531
1532 vm_page_unlock_queues();
1533 vm_object_unlock(object);
1534 vm_pageout_inactive_busy++;
1535 continue;
1536 }
1537
1538 /*
1539 * If it's absent or in error, we can reclaim the page.
1540 */
1541
1542 if (m->absent || m->error) {
1543 vm_pageout_inactive_absent++;
1544 reclaim_page:
1545 vm_page_free(m);
1546 vm_page_unlock_queues();
1547 vm_object_unlock(object);
1548 continue;
1549 }
1550
1551 assert(!m->private);
1552 assert(!m->fictitious);
1553
1554 /*
1555 * If already cleaning this page in place, convert from
1556 * "adjacent" to "target". We can leave the page mapped,
1557 * and vm_pageout_object_terminate will determine whether
1558 * to free or reactivate.
1559 */
1560
1561 if (m->cleaning) {
1562 #if MACH_CLUSTER_STATS
1563 vm_pageout_cluster_conversions++;
1564 #endif
1565 m->busy = TRUE;
1566 m->pageout = TRUE;
1567 m->dump_cleaning = TRUE;
1568 vm_page_wire(m);
1569 vm_object_unlock(object);
1570 vm_page_unlock_queues();
1571 continue;
1572 }
1573
1574 /*
1575 * If it's being used, reactivate.
1576 * (Fictitious pages are either busy or absent.)
1577 */
1578
1579 if (m->reference || pmap_is_referenced(m->phys_addr)) {
1580 vm_pageout_inactive_used++;
1581 reactivate_page:
1582 #if ADVISORY_PAGEOUT
1583 if (m->discard_request) {
1584 m->discard_request = FALSE;
1585 }
1586 #endif /* ADVISORY_PAGEOUT */
1587 vm_object_unlock(object);
1588 vm_page_activate(m);
1589 VM_STAT(reactivations++);
1590 vm_page_unlock_queues();
1591 continue;
1592 }
1593
1594 #if ADVISORY_PAGEOUT
1595 if (object->advisory_pageout) {
1596 boolean_t do_throttle;
1597 memory_object_t pager;
1598 vm_object_offset_t discard_offset;
1599
1600 if (m->discard_request) {
1601 vm_stat_discard_failure++;
1602 goto mandatory_pageout;
1603 }
1604
1605 assert(object->pager_initialized);
1606 m->discard_request = TRUE;
1607 pager = object->pager;
1608
1609 /* system-wide throttle */
1610 do_throttle = (vm_page_free_count <=
1611 vm_page_free_reserved);
1612
1613 #if 0
1614 /*
1615 * JMM - Do we need a replacement throttle
1616 * mechanism for pagers?
1617 */
1618 if (!do_throttle) {
1619 /* throttle on this pager */
1620 /* XXX lock ordering ? */
1621 ip_lock(port);
1622 do_throttle= imq_full(&port->ip_messages);
1623 ip_unlock(port);
1624 }
1625 #endif
1626
1627 if (do_throttle) {
1628 vm_stat_discard_throttle++;
1629 #if 0
1630 /* ignore this page and skip to next */
1631 vm_page_unlock_queues();
1632 vm_object_unlock(object);
1633 continue;
1634 #else
1635 /* force mandatory pageout */
1636 goto mandatory_pageout;
1637 #endif
1638 }
1639
1640 /* proceed with discard_request */
1641 vm_page_activate(m);
1642 vm_stat_discard++;
1643 VM_STAT(reactivations++);
1644 discard_offset = m->offset + object->paging_offset;
1645 vm_stat_discard_sent++;
1646 vm_page_unlock_queues();
1647 vm_object_unlock(object);
1648
1649 /*
1650 memory_object_discard_request(object->pager,
1651 discard_offset,
1652 PAGE_SIZE);
1653 */
1654 continue;
1655 }
1656 mandatory_pageout:
1657 #endif /* ADVISORY_PAGEOUT */
1658
1659 XPR(XPR_VM_PAGEOUT,
1660 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1661 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1662
1663 /*
1664 * Eliminate all mappings.
1665 */
1666
1667 m->busy = TRUE;
1668 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1669
1670 if (!m->dirty)
1671 m->dirty = pmap_is_modified(m->phys_addr);
1672 /*
1673 * If it's clean and not precious, we can free the page.
1674 */
1675
1676 if (!m->dirty && !m->precious) {
1677 vm_pageout_inactive_clean++;
1678 goto reclaim_page;
1679 }
1680 vm_page_unlock_queues();
1681
1682 /*
1683 * If there is no memory object for the page, create
1684 * one and hand it to the default pager.
1685 */
1686
1687 if (!object->pager_initialized)
1688 vm_object_collapse(object);
1689 if (!object->pager_initialized)
1690 vm_object_pager_create(object);
1691 if (!object->pager_initialized) {
1692 /*
1693 * Still no pager for the object.
1694 * Reactivate the page.
1695 *
1696 * Should only happen if there is no
1697 * default pager.
1698 */
1699 vm_page_lock_queues();
1700 vm_page_activate(m);
1701 vm_page_unlock_queues();
1702
1703 /*
1704 * And we are done with it.
1705 */
1706 PAGE_WAKEUP_DONE(m);
1707 vm_object_unlock(object);
1708
1709 /*
1710 * break here to get back to the preemption
1711 * point in the outer loop so that we don't
1712 * spin forever if there is no default pager.
1713 */
1714 vm_pageout_dirty_no_pager++;
1715 /*
1716 * Well there's no pager, but we can still reclaim
1717 * free pages out of the inactive list. Go back
1718 * to top of loop and look for suitable pages.
1719 */
1720 continue;
1721 }
1722
1723 if ((object->pager_initialized) &&
1724 (object->pager == MEMORY_OBJECT_NULL)) {
1725 /*
1726 * This pager has been destroyed by either
1727 * memory_object_destroy or vm_object_destroy, and
1728 * so there is nowhere for the page to go.
1729 * Just free the page.
1730 */
1731 VM_PAGE_FREE(m);
1732 vm_object_unlock(object);
1733 continue;
1734 }
1735
1736 vm_pageout_inactive_dirty++;
1737 /*
1738 if (!object->internal)
1739 burst_count++;
1740 */
1741 vm_object_paging_begin(object);
1742 vm_object_unlock(object);
1743 vm_pageout_cluster(m); /* flush it */
1744 }
1745 consider_machine_adjust();
1746 }
1747
1748 counter(unsigned int c_vm_pageout_scan_continue = 0;)
1749
1750 void
1751 vm_pageout_scan_continue(void)
1752 {
1753 /*
1754 * We just paused to let the pagers catch up.
1755 * If vm_page_laundry_count is still high,
1756 * then we aren't waiting long enough.
1757 * If we have paused some vm_pageout_pause_max times without
1758 * adjusting vm_pageout_burst_wait, it might be too big,
1759 * so we decrease it.
1760 */
1761
1762 vm_page_lock_queues();
1763 counter(++c_vm_pageout_scan_continue);
1764 if (vm_page_laundry_count > vm_pageout_burst_min) {
1765 vm_pageout_burst_wait++;
1766 vm_pageout_pause_count = 0;
1767 } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
1768 vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
1769 if (vm_pageout_burst_wait < 1)
1770 vm_pageout_burst_wait = 1;
1771 vm_pageout_pause_count = 0;
1772 }
1773 vm_page_unlock_queues();
1774 }
1775
1776 void vm_page_free_reserve(int pages);
1777 int vm_page_free_count_init;
1778
1779 void
1780 vm_page_free_reserve(
1781 int pages)
1782 {
1783 int free_after_reserve;
1784
1785 vm_page_free_reserved += pages;
1786
1787 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1788
1789 vm_page_free_min = vm_page_free_reserved +
1790 VM_PAGE_FREE_MIN(free_after_reserve);
1791
1792 vm_page_free_target = vm_page_free_reserved +
1793 VM_PAGE_FREE_TARGET(free_after_reserve);
1794
1795 if (vm_page_free_target < vm_page_free_min + 5)
1796 vm_page_free_target = vm_page_free_min + 5;
1797 }
1798
1799 /*
1800 * vm_pageout is the high level pageout daemon.
1801 */
1802
1803
1804 void
1805 vm_pageout(void)
1806 {
1807 thread_t self = current_thread();
1808 spl_t s;
1809
1810 /*
1811 * Set thread privileges.
1812 */
1813 self->vm_privilege = TRUE;
1814 stack_privilege(self);
1815
1816 s = splsched();
1817 thread_lock(self);
1818
1819 self->priority = BASEPRI_PREEMPT - 1;
1820 self->sched_pri = self->priority;
1821
1822 thread_unlock(self);
1823 splx(s);
1824
1825 /*
1826 * Initialize some paging parameters.
1827 */
1828
1829 if (vm_page_laundry_max == 0)
1830 vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX;
1831
1832 if (vm_pageout_burst_max == 0)
1833 vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
1834
1835 if (vm_pageout_burst_wait == 0)
1836 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
1837
1838 if (vm_pageout_empty_wait == 0)
1839 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
1840
1841 vm_page_free_count_init = vm_page_free_count;
1842 /*
1843 * even if we've already called vm_page_free_reserve
1844 * call it again here to insure that the targets are
1845 * accurately calculated (it uses vm_page_free_count_init)
1846 * calling it with an arg of 0 will not change the reserve
1847 * but will re-calculate free_min and free_target
1848 */
1849 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED)
1850 vm_page_free_reserve(VM_PAGE_FREE_RESERVED - vm_page_free_reserved);
1851 else
1852 vm_page_free_reserve(0);
1853
1854 /*
1855 * vm_pageout_scan will set vm_page_inactive_target.
1856 *
1857 * The pageout daemon is never done, so loop forever.
1858 * We should call vm_pageout_scan at least once each
1859 * time we are woken, even if vm_page_free_wanted is
1860 * zero, to check vm_page_free_target and
1861 * vm_page_inactive_target.
1862 */
1863 for (;;) {
1864 vm_pageout_scan_event_counter++;
1865 vm_pageout_scan();
1866 /* we hold vm_page_queue_free_lock now */
1867 assert(vm_page_free_wanted == 0);
1868 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1869 mutex_unlock(&vm_page_queue_free_lock);
1870 counter(c_vm_pageout_block++);
1871 thread_block((void (*)(void)) 0);
1872 }
1873 /*NOTREACHED*/
1874 }
1875
1876
1877 static upl_t
1878 upl_create(
1879 boolean_t internal)
1880 {
1881 upl_t upl;
1882
1883 if(internal) {
1884 upl = (upl_t)kalloc(sizeof(struct upl)
1885 + (sizeof(struct upl_page_info)*MAX_UPL_TRANSFER));
1886 } else {
1887 upl = (upl_t)kalloc(sizeof(struct upl));
1888 }
1889 upl->flags = 0;
1890 upl->src_object = NULL;
1891 upl->kaddr = (vm_offset_t)0;
1892 upl->size = 0;
1893 upl->map_object = NULL;
1894 upl->ref_count = 1;
1895 upl_lock_init(upl);
1896 #ifdef UBC_DEBUG
1897 upl->ubc_alias1 = 0;
1898 upl->ubc_alias2 = 0;
1899 #endif /* UBC_DEBUG */
1900 return(upl);
1901 }
1902
1903 static void
1904 upl_destroy(
1905 upl_t upl)
1906 {
1907
1908 #ifdef UBC_DEBUG
1909 {
1910 upl_t upl_ele;
1911 vm_object_lock(upl->map_object->shadow);
1912 queue_iterate(&upl->map_object->shadow->uplq,
1913 upl_ele, upl_t, uplq) {
1914 if(upl_ele == upl) {
1915 queue_remove(&upl->map_object->shadow->uplq,
1916 upl_ele, upl_t, uplq);
1917 break;
1918 }
1919 }
1920 vm_object_unlock(upl->map_object->shadow);
1921 }
1922 #endif /* UBC_DEBUG */
1923 #ifdef notdefcdy
1924 if(!(upl->flags & UPL_DEVICE_MEMORY))
1925 #endif
1926 vm_object_deallocate(upl->map_object);
1927 if(upl->flags & UPL_INTERNAL) {
1928 kfree((vm_offset_t)upl,
1929 sizeof(struct upl) +
1930 (sizeof(struct upl_page_info) * MAX_UPL_TRANSFER));
1931 } else {
1932 kfree((vm_offset_t)upl, sizeof(struct upl));
1933 }
1934 }
1935
1936 __private_extern__ void
1937 uc_upl_dealloc(
1938 upl_t upl)
1939 {
1940 upl->ref_count -= 1;
1941 if(upl->ref_count == 0) {
1942 upl_destroy(upl);
1943 }
1944 }
1945
1946 void
1947 upl_deallocate(
1948 upl_t upl)
1949 {
1950
1951 upl->ref_count -= 1;
1952 if(upl->ref_count == 0) {
1953 upl_destroy(upl);
1954 }
1955 }
1956
1957 /*
1958 * Routine: vm_object_upl_request
1959 * Purpose:
1960 * Cause the population of a portion of a vm_object.
1961 * Depending on the nature of the request, the pages
1962 * returned may be contain valid data or be uninitialized.
1963 * A page list structure, listing the physical pages
1964 * will be returned upon request.
1965 * This function is called by the file system or any other
1966 * supplier of backing store to a pager.
1967 * IMPORTANT NOTE: The caller must still respect the relationship
1968 * between the vm_object and its backing memory object. The
1969 * caller MUST NOT substitute changes in the backing file
1970 * without first doing a memory_object_lock_request on the
1971 * target range unless it is know that the pages are not
1972 * shared with another entity at the pager level.
1973 * Copy_in_to:
1974 * if a page list structure is present
1975 * return the mapped physical pages, where a
1976 * page is not present, return a non-initialized
1977 * one. If the no_sync bit is turned on, don't
1978 * call the pager unlock to synchronize with other
1979 * possible copies of the page. Leave pages busy
1980 * in the original object, if a page list structure
1981 * was specified. When a commit of the page list
1982 * pages is done, the dirty bit will be set for each one.
1983 * Copy_out_from:
1984 * If a page list structure is present, return
1985 * all mapped pages. Where a page does not exist
1986 * map a zero filled one. Leave pages busy in
1987 * the original object. If a page list structure
1988 * is not specified, this call is a no-op.
1989 *
1990 * Note: access of default pager objects has a rather interesting
1991 * twist. The caller of this routine, presumably the file system
1992 * page cache handling code, will never actually make a request
1993 * against a default pager backed object. Only the default
1994 * pager will make requests on backing store related vm_objects
1995 * In this way the default pager can maintain the relationship
1996 * between backing store files (abstract memory objects) and
1997 * the vm_objects (cache objects), they support.
1998 *
1999 */
2000 __private_extern__ kern_return_t
2001 vm_object_upl_request(
2002 vm_object_t object,
2003 vm_object_offset_t offset,
2004 vm_size_t size,
2005 upl_t *upl_ptr,
2006 upl_page_info_array_t user_page_list,
2007 unsigned int *page_list_count,
2008 int cntrl_flags)
2009 {
2010 vm_page_t dst_page;
2011 vm_object_offset_t dst_offset = offset;
2012 vm_size_t xfer_size = size;
2013 boolean_t do_m_lock = FALSE;
2014 boolean_t dirty;
2015 upl_t upl = NULL;
2016 int entry;
2017 boolean_t encountered_lrp = FALSE;
2018
2019 vm_page_t alias_page = NULL;
2020 int page_ticket;
2021
2022
2023 page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2024 >> UPL_PAGE_TICKET_SHIFT;
2025
2026 if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2027 size = MAX_UPL_TRANSFER * page_size;
2028 }
2029
2030 if(cntrl_flags & UPL_SET_INTERNAL)
2031 if(page_list_count != NULL)
2032 *page_list_count = MAX_UPL_TRANSFER;
2033 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
2034 ((page_list_count != NULL) && (*page_list_count != 0)
2035 && *page_list_count < (size/page_size)))
2036 return KERN_INVALID_ARGUMENT;
2037
2038 if((!object->internal) && (object->paging_offset != 0))
2039 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
2040
2041 if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2042 return KERN_SUCCESS;
2043 }
2044 if(upl_ptr) {
2045 if(cntrl_flags & UPL_SET_INTERNAL) {
2046 upl = upl_create(TRUE);
2047 user_page_list = (upl_page_info_t *)
2048 (((vm_offset_t)upl) + sizeof(struct upl));
2049 upl->flags |= UPL_INTERNAL;
2050 } else {
2051 upl = upl_create(FALSE);
2052 }
2053 if(object->phys_contiguous) {
2054 upl->size = size;
2055 upl->offset = offset + object->paging_offset;
2056 *upl_ptr = upl;
2057 if(user_page_list) {
2058 user_page_list[0].phys_addr =
2059 offset + object->shadow_offset;
2060 user_page_list[0].device = TRUE;
2061 }
2062 upl->map_object = vm_object_allocate(size);
2063 vm_object_lock(upl->map_object);
2064 upl->map_object->shadow = object;
2065 upl->flags = UPL_DEVICE_MEMORY | UPL_INTERNAL;
2066 upl->map_object->pageout = TRUE;
2067 upl->map_object->can_persist = FALSE;
2068 upl->map_object->copy_strategy
2069 = MEMORY_OBJECT_COPY_NONE;
2070 upl->map_object->shadow_offset = offset;
2071 vm_object_unlock(upl->map_object);
2072 return KERN_SUCCESS;
2073 }
2074
2075
2076 upl->map_object = vm_object_allocate(size);
2077 vm_object_lock(upl->map_object);
2078 upl->map_object->shadow = object;
2079 upl->size = size;
2080 upl->offset = offset + object->paging_offset;
2081 upl->map_object->pageout = TRUE;
2082 upl->map_object->can_persist = FALSE;
2083 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2084 upl->map_object->shadow_offset = offset;
2085 vm_object_unlock(upl->map_object);
2086 *upl_ptr = upl;
2087 }
2088 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2089 vm_object_lock(object);
2090 #ifdef UBC_DEBUG
2091 if(upl_ptr)
2092 queue_enter(&object->uplq, upl, upl_t, uplq);
2093 #endif /* UBC_DEBUG */
2094 vm_object_paging_begin(object);
2095 entry = 0;
2096 if(cntrl_flags & UPL_COPYOUT_FROM) {
2097 upl->flags |= UPL_PAGE_SYNC_DONE;
2098 while (xfer_size) {
2099 if(alias_page == NULL) {
2100 vm_object_unlock(object);
2101 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2102 vm_object_lock(object);
2103 }
2104 if(((dst_page = vm_page_lookup(object,
2105 dst_offset)) == VM_PAGE_NULL) ||
2106 dst_page->fictitious ||
2107 dst_page->absent ||
2108 dst_page->error ||
2109 (dst_page->wire_count != 0 &&
2110 !dst_page->pageout) ||
2111 ((!(dst_page->dirty || dst_page->precious ||
2112 pmap_is_modified(dst_page->phys_addr)))
2113 && (cntrl_flags & UPL_RET_ONLY_DIRTY)) ||
2114 ((!(dst_page->inactive))
2115 && (dst_page->page_ticket != page_ticket)
2116 && ((dst_page->page_ticket+1) != page_ticket)
2117 && (cntrl_flags & UPL_PAGEOUT)) ||
2118 ((!dst_page->list_req_pending) &&
2119 (cntrl_flags & UPL_RET_ONLY_DIRTY) &&
2120 pmap_is_referenced(dst_page->phys_addr))) {
2121 if(user_page_list)
2122 user_page_list[entry].phys_addr = 0;
2123 } else {
2124
2125 if(dst_page->busy &&
2126 (!(dst_page->list_req_pending &&
2127 dst_page->pageout))) {
2128 if(cntrl_flags & UPL_NOBLOCK) {
2129 if(user_page_list)
2130 user_page_list[entry]
2131 .phys_addr = 0;
2132 entry++;
2133 dst_offset += PAGE_SIZE_64;
2134 xfer_size -= PAGE_SIZE;
2135 continue;
2136 }
2137 /*someone else is playing with the */
2138 /* page. We will have to wait. */
2139 PAGE_ASSERT_WAIT(
2140 dst_page, THREAD_UNINT);
2141 vm_object_unlock(object);
2142 thread_block((void(*)(void))0);
2143 vm_object_lock(object);
2144 continue;
2145 }
2146 /* Someone else already cleaning the page? */
2147 if((dst_page->cleaning || dst_page->absent ||
2148 dst_page->wire_count != 0) &&
2149 !dst_page->list_req_pending) {
2150 if(user_page_list)
2151 user_page_list[entry].phys_addr = 0;
2152 entry++;
2153 dst_offset += PAGE_SIZE_64;
2154 xfer_size -= PAGE_SIZE;
2155 continue;
2156 }
2157 /* eliminate all mappings from the */
2158 /* original object and its prodigy */
2159
2160 vm_page_lock_queues();
2161 pmap_page_protect(dst_page->phys_addr,
2162 VM_PROT_NONE);
2163
2164 /* pageout statistics gathering. count */
2165 /* all the pages we will page out that */
2166 /* were not counted in the initial */
2167 /* vm_pageout_scan work */
2168 if(dst_page->list_req_pending)
2169 encountered_lrp = TRUE;
2170 if((dst_page->dirty ||
2171 (dst_page->object->internal &&
2172 dst_page->precious)) &&
2173 (dst_page->list_req_pending
2174 == FALSE)) {
2175 if(encountered_lrp) {
2176 CLUSTER_STAT
2177 (pages_at_higher_offsets++;)
2178 } else {
2179 CLUSTER_STAT
2180 (pages_at_lower_offsets++;)
2181 }
2182 }
2183
2184 /* Turn off busy indication on pending */
2185 /* pageout. Note: we can only get here */
2186 /* in the request pending case. */
2187 dst_page->list_req_pending = FALSE;
2188 dst_page->busy = FALSE;
2189 dst_page->cleaning = FALSE;
2190
2191 dirty = pmap_is_modified(dst_page->phys_addr);
2192 dirty = dirty ? TRUE : dst_page->dirty;
2193
2194 /* use pageclean setup, it is more convenient */
2195 /* even for the pageout cases here */
2196 vm_pageclean_setup(dst_page, alias_page,
2197 upl->map_object, size - xfer_size);
2198
2199 if(!dirty) {
2200 dst_page->dirty = FALSE;
2201 dst_page->precious = TRUE;
2202 }
2203
2204 if(dst_page->pageout)
2205 dst_page->busy = TRUE;
2206
2207 alias_page->absent = FALSE;
2208 alias_page = NULL;
2209 if((!(cntrl_flags & UPL_CLEAN_IN_PLACE))
2210 || (cntrl_flags & UPL_PAGEOUT)) {
2211 /* deny access to the target page */
2212 /* while it is being worked on */
2213 if((!dst_page->pageout) &&
2214 (dst_page->wire_count == 0)) {
2215 dst_page->busy = TRUE;
2216 dst_page->pageout = TRUE;
2217 vm_page_wire(dst_page);
2218 }
2219 }
2220 if(user_page_list) {
2221 user_page_list[entry].phys_addr
2222 = dst_page->phys_addr;
2223 user_page_list[entry].dirty =
2224 dst_page->dirty;
2225 user_page_list[entry].pageout =
2226 dst_page->pageout;
2227 user_page_list[entry].absent =
2228 dst_page->absent;
2229 user_page_list[entry].precious =
2230 dst_page->precious;
2231 }
2232
2233 vm_page_unlock_queues();
2234 }
2235 entry++;
2236 dst_offset += PAGE_SIZE_64;
2237 xfer_size -= PAGE_SIZE;
2238 }
2239 } else {
2240 while (xfer_size) {
2241 if(alias_page == NULL) {
2242 vm_object_unlock(object);
2243 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2244 vm_object_lock(object);
2245 }
2246 dst_page = vm_page_lookup(object, dst_offset);
2247 if(dst_page != VM_PAGE_NULL) {
2248 if((dst_page->cleaning) &&
2249 !(dst_page->list_req_pending)) {
2250 /*someone else is writing to the */
2251 /* page. We will have to wait. */
2252 PAGE_ASSERT_WAIT(dst_page,THREAD_UNINT);
2253 vm_object_unlock(object);
2254 thread_block((void(*)(void))0);
2255 vm_object_lock(object);
2256 continue;
2257 }
2258 if ((dst_page->fictitious &&
2259 dst_page->list_req_pending)) {
2260 /* dump the fictitious page */
2261 dst_page->list_req_pending = FALSE;
2262 dst_page->clustered = FALSE;
2263 vm_page_lock_queues();
2264 vm_page_free(dst_page);
2265 vm_page_unlock_queues();
2266 } else if ((dst_page->absent &&
2267 dst_page->list_req_pending)) {
2268 /* the default_pager case */
2269 dst_page->list_req_pending = FALSE;
2270 dst_page->busy = FALSE;
2271 dst_page->clustered = FALSE;
2272 }
2273 }
2274 if((dst_page = vm_page_lookup(object, dst_offset)) ==
2275 VM_PAGE_NULL) {
2276 if(object->private) {
2277 /*
2278 * This is a nasty wrinkle for users
2279 * of upl who encounter device or
2280 * private memory however, it is
2281 * unavoidable, only a fault can
2282 * reslove the actual backing
2283 * physical page by asking the
2284 * backing device.
2285 */
2286 if(user_page_list)
2287 user_page_list[entry]
2288 .phys_addr = 0;
2289 entry++;
2290 dst_offset += PAGE_SIZE_64;
2291 xfer_size -= PAGE_SIZE;
2292 continue;
2293 }
2294 /* need to allocate a page */
2295 dst_page = vm_page_alloc(object, dst_offset);
2296 if (dst_page == VM_PAGE_NULL) {
2297 vm_object_unlock(object);
2298 VM_PAGE_WAIT();
2299 vm_object_lock(object);
2300 continue;
2301 }
2302 dst_page->busy = FALSE;
2303 #if 0
2304 if(cntrl_flags & UPL_NO_SYNC) {
2305 dst_page->page_lock = 0;
2306 dst_page->unlock_request = 0;
2307 }
2308 #endif
2309 dst_page->absent = TRUE;
2310 object->absent_count++;
2311 }
2312 #if 1
2313 if(cntrl_flags & UPL_NO_SYNC) {
2314 dst_page->page_lock = 0;
2315 dst_page->unlock_request = 0;
2316 }
2317 #endif /* 1 */
2318 dst_page->overwriting = TRUE;
2319 if(dst_page->fictitious) {
2320 panic("need corner case for fictitious page");
2321 }
2322 if(dst_page->page_lock) {
2323 do_m_lock = TRUE;
2324 }
2325 if(upl_ptr) {
2326
2327 /* eliminate all mappings from the */
2328 /* original object and its prodigy */
2329
2330 if(dst_page->busy) {
2331 /*someone else is playing with the */
2332 /* page. We will have to wait. */
2333 PAGE_ASSERT_WAIT(
2334 dst_page, THREAD_UNINT);
2335 vm_object_unlock(object);
2336 thread_block((void(*)(void))0);
2337 vm_object_lock(object);
2338 continue;
2339 }
2340
2341 vm_page_lock_queues();
2342 pmap_page_protect(dst_page->phys_addr,
2343 VM_PROT_NONE);
2344 dirty = pmap_is_modified(dst_page->phys_addr);
2345 dirty = dirty ? TRUE : dst_page->dirty;
2346
2347 vm_pageclean_setup(dst_page, alias_page,
2348 upl->map_object, size - xfer_size);
2349
2350 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
2351 /* clean in place for read implies */
2352 /* that a write will be done on all */
2353 /* the pages that are dirty before */
2354 /* a upl commit is done. The caller */
2355 /* is obligated to preserve the */
2356 /* contents of all pages marked */
2357 /* dirty. */
2358 upl->flags |= UPL_CLEAR_DIRTY;
2359 }
2360
2361 if(!dirty) {
2362 dst_page->dirty = FALSE;
2363 dst_page->precious = TRUE;
2364 }
2365
2366 if (dst_page->wire_count == 0) {
2367 /* deny access to the target page while */
2368 /* it is being worked on */
2369 dst_page->busy = TRUE;
2370 } else {
2371 vm_page_wire(dst_page);
2372 }
2373 /* expect the page to be used */
2374 dst_page->reference = TRUE;
2375 dst_page->precious =
2376 (cntrl_flags & UPL_PRECIOUS)
2377 ? TRUE : FALSE;
2378 alias_page->absent = FALSE;
2379 alias_page = NULL;
2380 if(user_page_list) {
2381 user_page_list[entry].phys_addr
2382 = dst_page->phys_addr;
2383 user_page_list[entry].dirty =
2384 dst_page->dirty;
2385 user_page_list[entry].pageout =
2386 dst_page->pageout;
2387 user_page_list[entry].absent =
2388 dst_page->absent;
2389 user_page_list[entry].precious =
2390 dst_page->precious;
2391 }
2392 vm_page_unlock_queues();
2393 }
2394 entry++;
2395 dst_offset += PAGE_SIZE_64;
2396 xfer_size -= PAGE_SIZE;
2397 }
2398 }
2399
2400 if (upl->flags & UPL_INTERNAL) {
2401 if(page_list_count != NULL)
2402 *page_list_count = 0;
2403 } else if (*page_list_count > entry) {
2404 if(page_list_count != NULL)
2405 *page_list_count = entry;
2406 }
2407
2408 if(alias_page != NULL) {
2409 vm_page_lock_queues();
2410 vm_page_free(alias_page);
2411 vm_page_unlock_queues();
2412 }
2413
2414 if(do_m_lock) {
2415 vm_prot_t access_required;
2416 /* call back all associated pages from other users of the pager */
2417 /* all future updates will be on data which is based on the */
2418 /* changes we are going to make here. Note: it is assumed that */
2419 /* we already hold copies of the data so we will not be seeing */
2420 /* an avalanche of incoming data from the pager */
2421 access_required = (cntrl_flags & UPL_COPYOUT_FROM)
2422 ? VM_PROT_READ : VM_PROT_WRITE;
2423 while (TRUE) {
2424 kern_return_t rc;
2425 thread_t thread;
2426
2427 if(!object->pager_ready) {
2428 thread = current_thread();
2429 vm_object_assert_wait(object,
2430 VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
2431 vm_object_unlock(object);
2432 thread_block((void (*)(void))0);
2433 if (thread->wait_result != THREAD_AWAKENED) {
2434 return(KERN_FAILURE);
2435 }
2436 vm_object_lock(object);
2437 continue;
2438 }
2439
2440 vm_object_unlock(object);
2441
2442 if (rc = memory_object_data_unlock(
2443 object->pager,
2444 dst_offset + object->paging_offset,
2445 size,
2446 access_required)) {
2447 if (rc == MACH_SEND_INTERRUPTED)
2448 continue;
2449 else
2450 return KERN_FAILURE;
2451 }
2452 break;
2453
2454 }
2455 /* lets wait on the last page requested */
2456 /* NOTE: we will have to update lock completed routine to signal */
2457 if(dst_page != VM_PAGE_NULL &&
2458 (access_required & dst_page->page_lock) != access_required) {
2459 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
2460 thread_block((void (*)(void))0);
2461 vm_object_lock(object);
2462 }
2463 }
2464 vm_object_unlock(object);
2465 return KERN_SUCCESS;
2466 }
2467
2468 /* JMM - Backward compatability for now */
2469 kern_return_t
2470 vm_fault_list_request(
2471 memory_object_control_t control,
2472 vm_object_offset_t offset,
2473 vm_size_t size,
2474 upl_t *upl_ptr,
2475 upl_page_info_t **user_page_list_ptr,
2476 int page_list_count,
2477 int cntrl_flags)
2478 {
2479 int local_list_count;
2480 upl_page_info_t *user_page_list;
2481 kern_return_t kr;
2482
2483 if (user_page_list_ptr != NULL) {
2484 local_list_count = page_list_count;
2485 user_page_list = *user_page_list_ptr;
2486 } else {
2487 local_list_count = 0;
2488 user_page_list = NULL;
2489 }
2490 kr = memory_object_upl_request(control,
2491 offset,
2492 size,
2493 upl_ptr,
2494 user_page_list,
2495 &local_list_count,
2496 cntrl_flags);
2497
2498 if(kr != KERN_SUCCESS)
2499 return kr;
2500
2501 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
2502 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
2503 }
2504
2505 return KERN_SUCCESS;
2506 }
2507
2508
2509
2510 /*
2511 * Routine: vm_object_super_upl_request
2512 * Purpose:
2513 * Cause the population of a portion of a vm_object
2514 * in much the same way as memory_object_upl_request.
2515 * Depending on the nature of the request, the pages
2516 * returned may be contain valid data or be uninitialized.
2517 * However, the region may be expanded up to the super
2518 * cluster size provided.
2519 */
2520
2521 __private_extern__ kern_return_t
2522 vm_object_super_upl_request(
2523 vm_object_t object,
2524 vm_object_offset_t offset,
2525 vm_size_t size,
2526 vm_size_t super_cluster,
2527 upl_t *upl,
2528 upl_page_info_t *user_page_list,
2529 unsigned int *page_list_count,
2530 int cntrl_flags)
2531 {
2532 vm_page_t target_page;
2533 int ticket;
2534
2535 if(object->paging_offset > offset)
2536 return KERN_FAILURE;
2537
2538 offset = offset - object->paging_offset;
2539 if(cntrl_flags & UPL_PAGEOUT) {
2540 if((target_page = vm_page_lookup(object, offset))
2541 != VM_PAGE_NULL) {
2542 ticket = target_page->page_ticket;
2543 cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
2544 cntrl_flags = cntrl_flags |
2545 ((ticket << UPL_PAGE_TICKET_SHIFT)
2546 & UPL_PAGE_TICKET_MASK);
2547 }
2548 }
2549
2550
2551 /* turns off super cluster exercised by the default_pager */
2552 /*
2553 super_cluster = size;
2554 */
2555 if ((super_cluster > size) &&
2556 (vm_page_free_count > vm_page_free_reserved)) {
2557
2558 vm_object_offset_t base_offset;
2559 vm_size_t super_size;
2560
2561 base_offset = (offset &
2562 ~((vm_object_offset_t) super_cluster - 1));
2563 super_size = (offset+size) > (base_offset + super_cluster) ?
2564 super_cluster<<1 : super_cluster;
2565 super_size = ((base_offset + super_size) > object->size) ?
2566 (object->size - base_offset) : super_size;
2567 if(offset > (base_offset + super_size))
2568 panic("vm_object_super_upl_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset);
2569 /* apparently there is a case where the vm requests a */
2570 /* page to be written out who's offset is beyond the */
2571 /* object size */
2572 if((offset + size) > (base_offset + super_size))
2573 super_size = (offset + size) - base_offset;
2574
2575 offset = base_offset;
2576 size = super_size;
2577 }
2578 vm_object_upl_request(object, offset, size,
2579 upl, user_page_list, page_list_count,
2580 cntrl_flags);
2581 }
2582
2583
2584 kern_return_t
2585 vm_upl_map(
2586 vm_map_t map,
2587 upl_t upl,
2588 vm_offset_t *dst_addr)
2589 {
2590 vm_size_t size;
2591 vm_object_offset_t offset;
2592 vm_offset_t addr;
2593 vm_page_t m;
2594 kern_return_t kr;
2595
2596 if (upl == UPL_NULL)
2597 return KERN_INVALID_ARGUMENT;
2598
2599 upl_lock(upl);
2600
2601 /* check to see if already mapped */
2602 if(UPL_PAGE_LIST_MAPPED & upl->flags) {
2603 upl_unlock(upl);
2604 return KERN_FAILURE;
2605 }
2606
2607 offset = 0; /* Always map the entire object */
2608 size = upl->size;
2609
2610 vm_object_lock(upl->map_object);
2611 upl->map_object->ref_count++;
2612 vm_object_res_reference(upl->map_object);
2613 vm_object_unlock(upl->map_object);
2614
2615 *dst_addr = 0;
2616
2617
2618 /* NEED A UPL_MAP ALIAS */
2619 kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE,
2620 upl->map_object, offset, FALSE,
2621 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
2622
2623 if (kr != KERN_SUCCESS) {
2624 upl_unlock(upl);
2625 return(kr);
2626 }
2627
2628 for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
2629 m = vm_page_lookup(upl->map_object, offset);
2630 if(m) {
2631 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, TRUE);
2632 }
2633 offset+=PAGE_SIZE_64;
2634 }
2635 upl->ref_count++; /* hold a reference for the mapping */
2636 upl->flags |= UPL_PAGE_LIST_MAPPED;
2637 upl->kaddr = *dst_addr;
2638 upl_unlock(upl);
2639 return KERN_SUCCESS;
2640 }
2641
2642
2643 kern_return_t
2644 vm_upl_unmap(
2645 vm_map_t map,
2646 upl_t upl)
2647 {
2648 vm_address_t addr;
2649 vm_size_t size;
2650
2651 if (upl == UPL_NULL)
2652 return KERN_INVALID_ARGUMENT;
2653
2654 upl_lock(upl);
2655 if(upl->flags & UPL_PAGE_LIST_MAPPED) {
2656 addr = upl->kaddr;
2657 size = upl->size;
2658 assert(upl->ref_count > 1);
2659 upl->ref_count--; /* removing mapping ref */
2660 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
2661 upl->kaddr = (vm_offset_t) 0;
2662 upl_unlock(upl);
2663
2664 vm_deallocate(map, addr, size);
2665 return KERN_SUCCESS;
2666 }
2667 upl_unlock(upl);
2668 return KERN_FAILURE;
2669 }
2670
2671 kern_return_t
2672 upl_commit_range(
2673 upl_t upl,
2674 vm_offset_t offset,
2675 vm_size_t size,
2676 int flags,
2677 upl_page_info_t *page_list,
2678 mach_msg_type_number_t count,
2679 boolean_t *empty)
2680 {
2681 vm_size_t xfer_size = size;
2682 vm_object_t shadow_object = upl->map_object->shadow;
2683 vm_object_t object = upl->map_object;
2684 vm_object_offset_t target_offset;
2685 vm_object_offset_t page_offset;
2686 int entry;
2687
2688 *empty = FALSE;
2689
2690 if (upl == UPL_NULL)
2691 return KERN_INVALID_ARGUMENT;
2692
2693 if (count == 0)
2694 page_list = NULL;
2695
2696 upl_lock(upl);
2697 if(upl->flags & UPL_DEVICE_MEMORY) {
2698 xfer_size = 0;
2699 } else if ((offset + size) > upl->size) {
2700 upl_unlock(upl);
2701 return KERN_FAILURE;
2702 }
2703
2704 vm_object_lock(shadow_object);
2705
2706 entry = offset/PAGE_SIZE;
2707 target_offset = (vm_object_offset_t)offset;
2708 while(xfer_size) {
2709 vm_page_t t,m;
2710 upl_page_info_t *p;
2711
2712 if((t = vm_page_lookup(object, target_offset)) != NULL) {
2713
2714 t->pageout = FALSE;
2715 page_offset = t->offset;
2716 VM_PAGE_FREE(t);
2717 t = VM_PAGE_NULL;
2718 m = vm_page_lookup(shadow_object,
2719 page_offset + object->shadow_offset);
2720 if(m != VM_PAGE_NULL) {
2721 vm_object_paging_end(shadow_object);
2722 vm_page_lock_queues();
2723 if ((upl->flags & UPL_CLEAR_DIRTY) ||
2724 (flags & UPL_COMMIT_CLEAR_DIRTY)) {
2725 pmap_clear_modify(m->phys_addr);
2726 m->dirty = FALSE;
2727 }
2728 if(page_list) {
2729 p = &(page_list[entry]);
2730 if(p->phys_addr && p->pageout && !m->pageout) {
2731 m->busy = TRUE;
2732 m->pageout = TRUE;
2733 vm_page_wire(m);
2734 } else if (page_list[entry].phys_addr &&
2735 !p->pageout && m->pageout &&
2736 !m->dump_cleaning) {
2737 m->pageout = FALSE;
2738 m->absent = FALSE;
2739 m->overwriting = FALSE;
2740 vm_page_unwire(m);
2741 PAGE_WAKEUP_DONE(m);
2742 }
2743 page_list[entry].phys_addr = 0;
2744 }
2745 m->dump_cleaning = FALSE;
2746 if(m->laundry) {
2747 vm_page_laundry_count--;
2748 m->laundry = FALSE;
2749 if (vm_page_laundry_count < vm_page_laundry_min) {
2750 vm_page_laundry_min = 0;
2751 thread_wakeup((event_t)
2752 &vm_page_laundry_count);
2753 }
2754 }
2755 if(m->pageout) {
2756 m->cleaning = FALSE;
2757 m->pageout = FALSE;
2758 #if MACH_CLUSTER_STATS
2759 if (m->wanted) vm_pageout_target_collisions++;
2760 #endif
2761 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
2762 m->dirty = pmap_is_modified(m->phys_addr);
2763 if(m->dirty) {
2764 CLUSTER_STAT(
2765 vm_pageout_target_page_dirtied++;)
2766 vm_page_unwire(m);/* reactivates */
2767 VM_STAT(reactivations++);
2768 PAGE_WAKEUP_DONE(m);
2769 } else {
2770 CLUSTER_STAT(
2771 vm_pageout_target_page_freed++;)
2772 vm_page_free(m);/* clears busy, etc. */
2773 VM_STAT(pageouts++);
2774 }
2775 vm_page_unlock_queues();
2776 target_offset += PAGE_SIZE_64;
2777 xfer_size -= PAGE_SIZE;
2778 entry++;
2779 continue;
2780 }
2781 if (flags & UPL_COMMIT_INACTIVATE) {
2782 vm_page_deactivate(m);
2783 m->reference = FALSE;
2784 pmap_clear_reference(m->phys_addr);
2785 } else if (!m->active && !m->inactive) {
2786 if (m->reference)
2787 vm_page_activate(m);
2788 else
2789 vm_page_deactivate(m);
2790 }
2791 #if MACH_CLUSTER_STATS
2792 m->dirty = pmap_is_modified(m->phys_addr);
2793
2794 if (m->dirty) vm_pageout_cluster_dirtied++;
2795 else vm_pageout_cluster_cleaned++;
2796 if (m->wanted) vm_pageout_cluster_collisions++;
2797 #else
2798 m->dirty = 0;
2799 #endif
2800
2801 if((m->busy) && (m->cleaning)) {
2802 /* the request_page_list case */
2803 if(m->absent) {
2804 m->absent = FALSE;
2805 if(shadow_object->absent_count == 1)
2806 vm_object_absent_release(shadow_object);
2807 else
2808 shadow_object->absent_count--;
2809 }
2810 m->overwriting = FALSE;
2811 m->busy = FALSE;
2812 m->dirty = FALSE;
2813 }
2814 else if (m->overwriting) {
2815 /* alternate request page list, write to
2816 /* page_list case. Occurs when the original
2817 /* page was wired at the time of the list
2818 /* request */
2819 assert(m->wire_count != 0);
2820 vm_page_unwire(m);/* reactivates */
2821 m->overwriting = FALSE;
2822 }
2823 m->cleaning = FALSE;
2824 /* It is a part of the semantic of COPYOUT_FROM */
2825 /* UPLs that a commit implies cache sync */
2826 /* between the vm page and the backing store */
2827 /* this can be used to strip the precious bit */
2828 /* as well as clean */
2829 if (upl->flags & UPL_PAGE_SYNC_DONE)
2830 m->precious = FALSE;
2831
2832 if (flags & UPL_COMMIT_SET_DIRTY) {
2833 m->dirty = TRUE;
2834 }
2835 /*
2836 * Wakeup any thread waiting for the page to be un-cleaning.
2837 */
2838 PAGE_WAKEUP(m);
2839 vm_page_unlock_queues();
2840
2841 }
2842 }
2843 target_offset += PAGE_SIZE_64;
2844 xfer_size -= PAGE_SIZE;
2845 entry++;
2846 }
2847
2848 vm_object_unlock(shadow_object);
2849 if(flags & UPL_COMMIT_NOTIFY_EMPTY) {
2850 if((upl->flags & UPL_DEVICE_MEMORY)
2851 || (queue_empty(&upl->map_object->memq)))
2852 *empty = TRUE;
2853 }
2854 upl_unlock(upl);
2855
2856 return KERN_SUCCESS;
2857 }
2858
2859 kern_return_t
2860 upl_abort_range(
2861 upl_t upl,
2862 vm_offset_t offset,
2863 vm_size_t size,
2864 int error,
2865 boolean_t *empty)
2866 {
2867 vm_size_t xfer_size = size;
2868 vm_object_t shadow_object = upl->map_object->shadow;
2869 vm_object_t object = upl->map_object;
2870 vm_object_offset_t target_offset;
2871 vm_object_offset_t page_offset;
2872 int entry;
2873
2874 *empty = FALSE;
2875
2876 if (upl == UPL_NULL)
2877 return KERN_INVALID_ARGUMENT;
2878
2879 upl_lock(upl);
2880 if(upl->flags & UPL_DEVICE_MEMORY) {
2881 xfer_size = 0;
2882 } else if ((offset + size) > upl->size) {
2883 upl_unlock(upl);
2884 return KERN_FAILURE;
2885 }
2886
2887 vm_object_lock(shadow_object);
2888
2889 entry = offset/PAGE_SIZE;
2890 target_offset = (vm_object_offset_t)offset;
2891 while(xfer_size) {
2892 vm_page_t t,m;
2893 upl_page_info_t *p;
2894
2895 if((t = vm_page_lookup(object, target_offset)) != NULL) {
2896
2897 t->pageout = FALSE;
2898 page_offset = t->offset;
2899 VM_PAGE_FREE(t);
2900 t = VM_PAGE_NULL;
2901 m = vm_page_lookup(shadow_object,
2902 page_offset + object->shadow_offset);
2903 if(m != VM_PAGE_NULL) {
2904 vm_object_paging_end(m->object);
2905 vm_page_lock_queues();
2906 if(m->absent) {
2907 /* COPYOUT = FALSE case */
2908 /* check for error conditions which must */
2909 /* be passed back to the pages customer */
2910 if(error & UPL_ABORT_RESTART) {
2911 m->restart = TRUE;
2912 m->absent = FALSE;
2913 vm_object_absent_release(m->object);
2914 m->page_error = KERN_MEMORY_ERROR;
2915 m->error = TRUE;
2916 } else if(error & UPL_ABORT_UNAVAILABLE) {
2917 m->restart = FALSE;
2918 m->unusual = TRUE;
2919 m->clustered = FALSE;
2920 } else if(error & UPL_ABORT_ERROR) {
2921 m->restart = FALSE;
2922 m->absent = FALSE;
2923 vm_object_absent_release(m->object);
2924 m->page_error = KERN_MEMORY_ERROR;
2925 m->error = TRUE;
2926 } else if(error & UPL_ABORT_DUMP_PAGES) {
2927 m->clustered = TRUE;
2928 } else {
2929 m->clustered = TRUE;
2930 }
2931
2932
2933 m->cleaning = FALSE;
2934 m->overwriting = FALSE;
2935 PAGE_WAKEUP_DONE(m);
2936 if(m->clustered) {
2937 vm_page_free(m);
2938 } else {
2939 vm_page_activate(m);
2940 }
2941
2942 vm_page_unlock_queues();
2943 target_offset += PAGE_SIZE_64;
2944 xfer_size -= PAGE_SIZE;
2945 entry++;
2946 continue;
2947 }
2948 /*
2949 * Handle the trusted pager throttle.
2950 */
2951 if (m->laundry) {
2952 vm_page_laundry_count--;
2953 m->laundry = FALSE;
2954 if (vm_page_laundry_count
2955 < vm_page_laundry_min) {
2956 vm_page_laundry_min = 0;
2957 thread_wakeup((event_t)
2958 &vm_page_laundry_count);
2959 }
2960 }
2961 if(m->pageout) {
2962 assert(m->busy);
2963 assert(m->wire_count == 1);
2964 m->pageout = FALSE;
2965 vm_page_unwire(m);
2966 }
2967 m->dump_cleaning = FALSE;
2968 m->cleaning = FALSE;
2969 m->busy = FALSE;
2970 m->overwriting = FALSE;
2971 #if MACH_PAGEMAP
2972 vm_external_state_clr(
2973 m->object->existence_map, m->offset);
2974 #endif /* MACH_PAGEMAP */
2975 if(error & UPL_ABORT_DUMP_PAGES) {
2976 vm_page_free(m);
2977 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
2978 } else {
2979 PAGE_WAKEUP(m);
2980 }
2981 vm_page_unlock_queues();
2982 }
2983 }
2984 target_offset += PAGE_SIZE_64;
2985 xfer_size -= PAGE_SIZE;
2986 entry++;
2987 }
2988 vm_object_unlock(shadow_object);
2989 if(error & UPL_ABORT_NOTIFY_EMPTY) {
2990 if((upl->flags & UPL_DEVICE_MEMORY)
2991 || (queue_empty(&upl->map_object->memq)))
2992 *empty = TRUE;
2993 }
2994 upl_unlock(upl);
2995 return KERN_SUCCESS;
2996 }
2997
2998 kern_return_t
2999 upl_abort(
3000 upl_t upl,
3001 int error)
3002 {
3003 vm_object_t object = NULL;
3004 vm_object_t shadow_object = NULL;
3005 vm_object_offset_t offset;
3006 vm_object_offset_t shadow_offset;
3007 vm_object_offset_t target_offset;
3008 int i;
3009 vm_page_t t,m;
3010
3011 if (upl == UPL_NULL)
3012 return KERN_INVALID_ARGUMENT;
3013
3014 upl_lock(upl);
3015 if(upl->flags & UPL_DEVICE_MEMORY) {
3016 upl_unlock(upl);
3017 return KERN_SUCCESS;
3018 }
3019
3020 object = upl->map_object;
3021
3022 if (object == NULL) {
3023 panic("upl_abort: upl object is not backed by an object");
3024 upl_unlock(upl);
3025 return KERN_INVALID_ARGUMENT;
3026 }
3027
3028 shadow_object = upl->map_object->shadow;
3029 shadow_offset = upl->map_object->shadow_offset;
3030 offset = 0;
3031 vm_object_lock(shadow_object);
3032 for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
3033 if((t = vm_page_lookup(object,offset)) != NULL) {
3034 target_offset = t->offset + shadow_offset;
3035 if((m = vm_page_lookup(shadow_object, target_offset)) != NULL) {
3036 vm_object_paging_end(m->object);
3037 vm_page_lock_queues();
3038 if(m->absent) {
3039 /* COPYOUT = FALSE case */
3040 /* check for error conditions which must */
3041 /* be passed back to the pages customer */
3042 if(error & UPL_ABORT_RESTART) {
3043 m->restart = TRUE;
3044 m->absent = FALSE;
3045 vm_object_absent_release(m->object);
3046 m->page_error = KERN_MEMORY_ERROR;
3047 m->error = TRUE;
3048 } else if(error & UPL_ABORT_UNAVAILABLE) {
3049 m->restart = FALSE;
3050 m->unusual = TRUE;
3051 m->clustered = FALSE;
3052 } else if(error & UPL_ABORT_ERROR) {
3053 m->restart = FALSE;
3054 m->absent = FALSE;
3055 vm_object_absent_release(m->object);
3056 m->page_error = KERN_MEMORY_ERROR;
3057 m->error = TRUE;
3058 } else if(error & UPL_ABORT_DUMP_PAGES) {
3059 m->clustered = TRUE;
3060 } else {
3061 m->clustered = TRUE;
3062 }
3063
3064 m->cleaning = FALSE;
3065 m->overwriting = FALSE;
3066 PAGE_WAKEUP_DONE(m);
3067 if(m->clustered) {
3068 vm_page_free(m);
3069 } else {
3070 vm_page_activate(m);
3071 }
3072 vm_page_unlock_queues();
3073 continue;
3074 }
3075 /*
3076 * Handle the trusted pager throttle.
3077 */
3078 if (m->laundry) {
3079 vm_page_laundry_count--;
3080 m->laundry = FALSE;
3081 if (vm_page_laundry_count
3082 < vm_page_laundry_min) {
3083 vm_page_laundry_min = 0;
3084 thread_wakeup((event_t)
3085 &vm_page_laundry_count);
3086 }
3087 }
3088 if(m->pageout) {
3089 assert(m->busy);
3090 assert(m->wire_count == 1);
3091 m->pageout = FALSE;
3092 vm_page_unwire(m);
3093 }
3094 m->dump_cleaning = FALSE;
3095 m->cleaning = FALSE;
3096 m->busy = FALSE;
3097 m->overwriting = FALSE;
3098 #if MACH_PAGEMAP
3099 vm_external_state_clr(
3100 m->object->existence_map, m->offset);
3101 #endif /* MACH_PAGEMAP */
3102 if(error & UPL_ABORT_DUMP_PAGES) {
3103 vm_page_free(m);
3104 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
3105 } else {
3106 PAGE_WAKEUP(m);
3107 }
3108 vm_page_unlock_queues();
3109 }
3110 }
3111 }
3112 vm_object_unlock(shadow_object);
3113 /* Remove all the pages from the map object so */
3114 /* vm_pageout_object_terminate will work properly. */
3115 while (!queue_empty(&upl->map_object->memq)) {
3116 vm_page_t p;
3117
3118 p = (vm_page_t) queue_first(&upl->map_object->memq);
3119
3120 assert(p->private);
3121 assert(p->pageout);
3122 p->pageout = FALSE;
3123 assert(!p->cleaning);
3124
3125 VM_PAGE_FREE(p);
3126 }
3127 upl_unlock(upl);
3128 return KERN_SUCCESS;
3129 }
3130
3131 /* an option on commit should be wire */
3132 kern_return_t
3133 upl_commit(
3134 upl_t upl,
3135 upl_page_info_t *page_list,
3136 mach_msg_type_number_t count)
3137 {
3138 if (upl == UPL_NULL)
3139 return KERN_INVALID_ARGUMENT;
3140
3141 if (count == 0)
3142 page_list = NULL;
3143
3144 upl_lock(upl);
3145 if (upl->flags & UPL_DEVICE_MEMORY)
3146 page_list = NULL;
3147 if ((upl->flags & UPL_CLEAR_DIRTY) ||
3148 (upl->flags & UPL_PAGE_SYNC_DONE)) {
3149 vm_object_t shadow_object = upl->map_object->shadow;
3150 vm_object_t object = upl->map_object;
3151 vm_object_offset_t target_offset;
3152 vm_size_t xfer_end;
3153
3154 vm_page_t t,m;
3155
3156 vm_object_lock(shadow_object);
3157
3158 target_offset = object->shadow_offset;
3159 xfer_end = upl->size + object->shadow_offset;
3160
3161 while(target_offset < xfer_end) {
3162 if ((t = vm_page_lookup(object,
3163 target_offset - object->shadow_offset))
3164 != NULL) {
3165 m = vm_page_lookup(
3166 shadow_object, target_offset);
3167 if(m != VM_PAGE_NULL) {
3168 if (upl->flags & UPL_CLEAR_DIRTY) {
3169 pmap_clear_modify(m->phys_addr);
3170 m->dirty = FALSE;
3171 }
3172 /* It is a part of the semantic of */
3173 /* COPYOUT_FROM UPLs that a commit */
3174 /* implies cache sync between the */
3175 /* vm page and the backing store */
3176 /* this can be used to strip the */
3177 /* precious bit as well as clean */
3178 if (upl->flags & UPL_PAGE_SYNC_DONE)
3179 m->precious = FALSE;
3180 }
3181 }
3182 target_offset += PAGE_SIZE_64;
3183 }
3184 vm_object_unlock(shadow_object);
3185 }
3186 if (page_list) {
3187 vm_object_t shadow_object = upl->map_object->shadow;
3188 vm_object_t object = upl->map_object;
3189 vm_object_offset_t target_offset;
3190 vm_size_t xfer_end;
3191 int entry;
3192
3193 vm_page_t t, m;
3194 upl_page_info_t *p;
3195
3196 vm_object_lock(shadow_object);
3197
3198 entry = 0;
3199 target_offset = object->shadow_offset;
3200 xfer_end = upl->size + object->shadow_offset;
3201
3202 while(target_offset < xfer_end) {
3203
3204 if ((t = vm_page_lookup(object,
3205 target_offset - object->shadow_offset))
3206 == NULL) {
3207 target_offset += PAGE_SIZE_64;
3208 entry++;
3209 continue;
3210 }
3211
3212 m = vm_page_lookup(shadow_object, target_offset);
3213 if(m != VM_PAGE_NULL) {
3214 p = &(page_list[entry]);
3215 if(page_list[entry].phys_addr &&
3216 p->pageout && !m->pageout) {
3217 vm_page_lock_queues();
3218 m->busy = TRUE;
3219 m->pageout = TRUE;
3220 vm_page_wire(m);
3221 vm_page_unlock_queues();
3222 } else if (page_list[entry].phys_addr &&
3223 !p->pageout && m->pageout &&
3224 !m->dump_cleaning) {
3225 vm_page_lock_queues();
3226 m->pageout = FALSE;
3227 m->absent = FALSE;
3228 m->overwriting = FALSE;
3229 vm_page_unwire(m);
3230 PAGE_WAKEUP_DONE(m);
3231 vm_page_unlock_queues();
3232 }
3233 page_list[entry].phys_addr = 0;
3234 }
3235 target_offset += PAGE_SIZE_64;
3236 entry++;
3237 }
3238
3239 vm_object_unlock(shadow_object);
3240 }
3241 upl_unlock(upl);
3242 return KERN_SUCCESS;
3243 }
3244
3245 vm_size_t
3246 upl_get_internal_pagelist_offset()
3247 {
3248 return sizeof(struct upl);
3249 }
3250
3251 void
3252 upl_set_dirty(
3253 upl_t upl)
3254 {
3255 upl->flags |= UPL_CLEAR_DIRTY;
3256 }
3257
3258 void
3259 upl_clear_dirty(
3260 upl_t upl)
3261 {
3262 upl->flags &= ~UPL_CLEAR_DIRTY;
3263 }
3264
3265
3266 #ifdef MACH_BSD
3267
3268 boolean_t upl_page_present(upl_page_info_t *upl, int index)
3269 {
3270 return(UPL_PAGE_PRESENT(upl, index));
3271 }
3272 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
3273 {
3274 return(UPL_DIRTY_PAGE(upl, index));
3275 }
3276 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
3277 {
3278 return(UPL_VALID_PAGE(upl, index));
3279 }
3280 vm_offset_t upl_phys_page(upl_page_info_t *upl, int index)
3281 {
3282 return((vm_offset_t)UPL_PHYS_PAGE(upl, index));
3283 }
3284
3285 void
3286 vm_countdirtypages(void)
3287 {
3288 vm_page_t m;
3289 int dpages;
3290 int pgopages;
3291 int precpages;
3292
3293
3294 dpages=0;
3295 pgopages=0;
3296 precpages=0;
3297
3298 vm_page_lock_queues();
3299 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
3300 do {
3301 if (m ==(vm_page_t )0) break;
3302
3303 if(m->dirty) dpages++;
3304 if(m->pageout) pgopages++;
3305 if(m->precious) precpages++;
3306
3307 m = (vm_page_t) queue_next(&m->pageq);
3308 if (m ==(vm_page_t )0) break;
3309
3310 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
3311 vm_page_unlock_queues();
3312
3313 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
3314
3315 dpages=0;
3316 pgopages=0;
3317 precpages=0;
3318
3319 vm_page_lock_queues();
3320 m = (vm_page_t) queue_first(&vm_page_queue_active);
3321
3322 do {
3323 if(m == (vm_page_t )0) break;
3324 if(m->dirty) dpages++;
3325 if(m->pageout) pgopages++;
3326 if(m->precious) precpages++;
3327
3328 m = (vm_page_t) queue_next(&m->pageq);
3329 if(m == (vm_page_t )0) break;
3330
3331 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
3332 vm_page_unlock_queues();
3333
3334 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
3335
3336 }
3337 #endif /* MACH_BSD */
3338
3339 #ifdef UBC_DEBUG
3340 kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
3341 {
3342 upl->ubc_alias1 = alias1;
3343 upl->ubc_alias2 = alias2;
3344 return KERN_SUCCESS;
3345 }
3346 int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
3347 {
3348 if(al)
3349 *al = upl->ubc_alias1;
3350 if(al2)
3351 *al2 = upl->ubc_alias2;
3352 return KERN_SUCCESS;
3353 }
3354 #endif /* UBC_DEBUG */
3355
3356
3357
3358 #if MACH_KDB
3359 #include <ddb/db_output.h>
3360 #include <ddb/db_print.h>
3361 #include <vm/vm_print.h>
3362
3363 #define printf kdbprintf
3364 extern int db_indent;
3365 void db_pageout(void);
3366
3367 void
3368 db_vm(void)
3369 {
3370 extern int vm_page_gobble_count;
3371
3372 iprintf("VM Statistics:\n");
3373 db_indent += 2;
3374 iprintf("pages:\n");
3375 db_indent += 2;
3376 iprintf("activ %5d inact %5d free %5d",
3377 vm_page_active_count, vm_page_inactive_count,
3378 vm_page_free_count);
3379 printf(" wire %5d gobbl %5d\n",
3380 vm_page_wire_count, vm_page_gobble_count);
3381 iprintf("laund %5d\n",
3382 vm_page_laundry_count);
3383 db_indent -= 2;
3384 iprintf("target:\n");
3385 db_indent += 2;
3386 iprintf("min %5d inact %5d free %5d",
3387 vm_page_free_min, vm_page_inactive_target,
3388 vm_page_free_target);
3389 printf(" resrv %5d\n", vm_page_free_reserved);
3390 db_indent -= 2;
3391
3392 iprintf("burst:\n");
3393 db_indent += 2;
3394 iprintf("max %5d min %5d wait %5d empty %5d\n",
3395 vm_pageout_burst_max, vm_pageout_burst_min,
3396 vm_pageout_burst_wait, vm_pageout_empty_wait);
3397 db_indent -= 2;
3398 iprintf("pause:\n");
3399 db_indent += 2;
3400 iprintf("count %5d max %5d\n",
3401 vm_pageout_pause_count, vm_pageout_pause_max);
3402 #if MACH_COUNTERS
3403 iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue);
3404 #endif /* MACH_COUNTERS */
3405 db_indent -= 2;
3406 db_pageout();
3407 db_indent -= 2;
3408 }
3409
3410 void
3411 db_pageout(void)
3412 {
3413 #if MACH_COUNTERS
3414 extern int c_laundry_pages_freed;
3415 #endif /* MACH_COUNTERS */
3416
3417 iprintf("Pageout Statistics:\n");
3418 db_indent += 2;
3419 iprintf("active %5d inactv %5d\n",
3420 vm_pageout_active, vm_pageout_inactive);
3421 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
3422 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
3423 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
3424 iprintf("used %5d clean %5d dirty %5d\n",
3425 vm_pageout_inactive_used, vm_pageout_inactive_clean,
3426 vm_pageout_inactive_dirty);
3427 #if MACH_COUNTERS
3428 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
3429 #endif /* MACH_COUNTERS */
3430 #if MACH_CLUSTER_STATS
3431 iprintf("Cluster Statistics:\n");
3432 db_indent += 2;
3433 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
3434 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
3435 vm_pageout_cluster_collisions);
3436 iprintf("clusters %5d conversions %5d\n",
3437 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
3438 db_indent -= 2;
3439 iprintf("Target Statistics:\n");
3440 db_indent += 2;
3441 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
3442 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
3443 vm_pageout_target_page_freed);
3444 db_indent -= 2;
3445 #endif /* MACH_CLUSTER_STATS */
3446 db_indent -= 2;
3447 }
3448
3449 #if MACH_CLUSTER_STATS
3450 unsigned long vm_pageout_cluster_dirtied = 0;
3451 unsigned long vm_pageout_cluster_cleaned = 0;
3452 unsigned long vm_pageout_cluster_collisions = 0;
3453 unsigned long vm_pageout_cluster_clusters = 0;
3454 unsigned long vm_pageout_cluster_conversions = 0;
3455 unsigned long vm_pageout_target_collisions = 0;
3456 unsigned long vm_pageout_target_page_dirtied = 0;
3457 unsigned long vm_pageout_target_page_freed = 0;
3458 #define CLUSTER_STAT(clause) clause
3459 #else /* MACH_CLUSTER_STATS */
3460 #define CLUSTER_STAT(clause)
3461 #endif /* MACH_CLUSTER_STATS */
3462
3463 #endif /* MACH_KDB */