]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
57ecaa12bec12daa08c95694cf68d65551619bd9
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm/vm_pageout.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 * Date: 1985
56 *
57 * The proverbial page-out daemon.
58 */
59
60 #include <mach_pagemap.h>
61 #include <mach_cluster_stats.h>
62 #include <mach_kdb.h>
63 #include <advisory_pageout.h>
64
65 #include <mach/mach_types.h>
66 #include <mach/memory_object.h>
67 #include <mach/memory_object_default.h>
68 #include <mach/memory_object_control_server.h>
69 #include <mach/mach_host_server.h>
70 #include <mach/vm_param.h>
71 #include <mach/vm_statistics.h>
72 #include <kern/host_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/thread.h>
75 #include <kern/xpr.h>
76 #include <vm/pmap.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_pageout.h>
81 #include <machine/vm_tuning.h>
82 #include <kern/misc_protos.h>
83
84 extern ipc_port_t memory_manager_default;
85
86 #ifndef VM_PAGE_LAUNDRY_MAX
87 #define VM_PAGE_LAUNDRY_MAX 6 /* outstanding DMM page cleans */
88 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
89
90 #ifndef VM_PAGEOUT_BURST_MAX
91 #define VM_PAGEOUT_BURST_MAX 32 /* simultaneous EMM page cleans */
92 #endif /* VM_PAGEOUT_BURST_MAX */
93
94 #ifndef VM_PAGEOUT_DISCARD_MAX
95 #define VM_PAGEOUT_DISCARD_MAX 68 /* simultaneous EMM page cleans */
96 #endif /* VM_PAGEOUT_DISCARD_MAX */
97
98 #ifndef VM_PAGEOUT_BURST_WAIT
99 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
100 #endif /* VM_PAGEOUT_BURST_WAIT */
101
102 #ifndef VM_PAGEOUT_EMPTY_WAIT
103 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
104 #endif /* VM_PAGEOUT_EMPTY_WAIT */
105
106 /*
107 * To obtain a reasonable LRU approximation, the inactive queue
108 * needs to be large enough to give pages on it a chance to be
109 * referenced a second time. This macro defines the fraction
110 * of active+inactive pages that should be inactive.
111 * The pageout daemon uses it to update vm_page_inactive_target.
112 *
113 * If vm_page_free_count falls below vm_page_free_target and
114 * vm_page_inactive_count is below vm_page_inactive_target,
115 * then the pageout daemon starts running.
116 */
117
118 #ifndef VM_PAGE_INACTIVE_TARGET
119 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
120 #endif /* VM_PAGE_INACTIVE_TARGET */
121
122 /*
123 * Once the pageout daemon starts running, it keeps going
124 * until vm_page_free_count meets or exceeds vm_page_free_target.
125 */
126
127 #ifndef VM_PAGE_FREE_TARGET
128 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
129 #endif /* VM_PAGE_FREE_TARGET */
130
131 /*
132 * The pageout daemon always starts running once vm_page_free_count
133 * falls below vm_page_free_min.
134 */
135
136 #ifndef VM_PAGE_FREE_MIN
137 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
138 #endif /* VM_PAGE_FREE_MIN */
139
140 /*
141 * When vm_page_free_count falls below vm_page_free_reserved,
142 * only vm-privileged threads can allocate pages. vm-privilege
143 * allows the pageout daemon and default pager (and any other
144 * associated threads needed for default pageout) to continue
145 * operation by dipping into the reserved pool of pages.
146 */
147
148 #ifndef VM_PAGE_FREE_RESERVED
149 #define VM_PAGE_FREE_RESERVED \
150 ((16 * VM_PAGE_LAUNDRY_MAX) + NCPUS)
151 #endif /* VM_PAGE_FREE_RESERVED */
152
153 /*
154 * Exported variable used to broadcast the activation of the pageout scan
155 * Working Set uses this to throttle its use of pmap removes. In this
156 * way, code which runs within memory in an uncontested context does
157 * not keep encountering soft faults.
158 */
159
160 unsigned int vm_pageout_scan_event_counter = 0;
161
162 /*
163 * Forward declarations for internal routines.
164 */
165 extern void vm_pageout_continue(void);
166 extern void vm_pageout_scan(void);
167 extern void vm_pageout_throttle(vm_page_t m);
168 extern vm_page_t vm_pageout_cluster_page(
169 vm_object_t object,
170 vm_object_offset_t offset,
171 boolean_t precious_clean);
172
173 unsigned int vm_pageout_reserved_internal = 0;
174 unsigned int vm_pageout_reserved_really = 0;
175
176 unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */
177 unsigned int vm_page_laundry_min = 0;
178 unsigned int vm_pageout_burst_max = 0;
179 unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
180 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
181 unsigned int vm_pageout_burst_min = 0;
182 unsigned int vm_pageout_pause_count = 0;
183 unsigned int vm_pageout_pause_max = 0;
184 unsigned int vm_free_page_pause = 100; /* milliseconds */
185
186 /*
187 * These variables record the pageout daemon's actions:
188 * how many pages it looks at and what happens to those pages.
189 * No locking needed because only one thread modifies the variables.
190 */
191
192 unsigned int vm_pageout_active = 0; /* debugging */
193 unsigned int vm_pageout_inactive = 0; /* debugging */
194 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
195 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
196 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
197 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
198 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
199 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
200 unsigned int vm_pageout_inactive_used = 0; /* debugging */
201 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
202 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
203 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
204 unsigned int vm_stat_discard = 0; /* debugging */
205 unsigned int vm_stat_discard_sent = 0; /* debugging */
206 unsigned int vm_stat_discard_failure = 0; /* debugging */
207 unsigned int vm_stat_discard_throttle = 0; /* debugging */
208 unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */
209 unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */
210 unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */
211 unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */
212 unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */
213 unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */
214
215
216 unsigned int vm_pageout_out_of_line = 0;
217 unsigned int vm_pageout_in_place = 0;
218 /*
219 * Routine: vm_pageout_object_allocate
220 * Purpose:
221 * Allocate an object for use as out-of-line memory in a
222 * data_return/data_initialize message.
223 * The page must be in an unlocked object.
224 *
225 * If the page belongs to a trusted pager, cleaning in place
226 * will be used, which utilizes a special "pageout object"
227 * containing private alias pages for the real page frames.
228 * Untrusted pagers use normal out-of-line memory.
229 */
230 vm_object_t
231 vm_pageout_object_allocate(
232 vm_page_t m,
233 vm_size_t size,
234 vm_object_offset_t offset)
235 {
236 vm_object_t object = m->object;
237 vm_object_t new_object;
238
239 assert(object->pager_ready);
240
241 if (object->pager_trusted || object->internal)
242 vm_pageout_throttle(m);
243
244 new_object = vm_object_allocate(size);
245
246 if (object->pager_trusted) {
247 assert (offset < object->size);
248
249 vm_object_lock(new_object);
250 new_object->pageout = TRUE;
251 new_object->shadow = object;
252 new_object->can_persist = FALSE;
253 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
254 new_object->shadow_offset = offset;
255 vm_object_unlock(new_object);
256
257 /*
258 * Take a paging reference on the object. This will be dropped
259 * in vm_pageout_object_terminate()
260 */
261 vm_object_lock(object);
262 vm_object_paging_begin(object);
263 vm_object_unlock(object);
264
265 vm_pageout_in_place++;
266 } else
267 vm_pageout_out_of_line++;
268 return(new_object);
269 }
270
271 #if MACH_CLUSTER_STATS
272 unsigned long vm_pageout_cluster_dirtied = 0;
273 unsigned long vm_pageout_cluster_cleaned = 0;
274 unsigned long vm_pageout_cluster_collisions = 0;
275 unsigned long vm_pageout_cluster_clusters = 0;
276 unsigned long vm_pageout_cluster_conversions = 0;
277 unsigned long vm_pageout_target_collisions = 0;
278 unsigned long vm_pageout_target_page_dirtied = 0;
279 unsigned long vm_pageout_target_page_freed = 0;
280 #define CLUSTER_STAT(clause) clause
281 #else /* MACH_CLUSTER_STATS */
282 #define CLUSTER_STAT(clause)
283 #endif /* MACH_CLUSTER_STATS */
284
285 /*
286 * Routine: vm_pageout_object_terminate
287 * Purpose:
288 * Destroy the pageout_object allocated by
289 * vm_pageout_object_allocate(), and perform all of the
290 * required cleanup actions.
291 *
292 * In/Out conditions:
293 * The object must be locked, and will be returned locked.
294 */
295 void
296 vm_pageout_object_terminate(
297 vm_object_t object)
298 {
299 vm_object_t shadow_object;
300
301 /*
302 * Deal with the deallocation (last reference) of a pageout object
303 * (used for cleaning-in-place) by dropping the paging references/
304 * freeing pages in the original object.
305 */
306
307 assert(object->pageout);
308 shadow_object = object->shadow;
309 vm_object_lock(shadow_object);
310
311 while (!queue_empty(&object->memq)) {
312 vm_page_t p, m;
313 vm_object_offset_t offset;
314
315 p = (vm_page_t) queue_first(&object->memq);
316
317 assert(p->private);
318 assert(p->pageout);
319 p->pageout = FALSE;
320 assert(!p->cleaning);
321
322 offset = p->offset;
323 VM_PAGE_FREE(p);
324 p = VM_PAGE_NULL;
325
326 m = vm_page_lookup(shadow_object,
327 offset + object->shadow_offset);
328
329 if(m == VM_PAGE_NULL)
330 continue;
331 assert(m->cleaning);
332 /* used as a trigger on upl_commit etc to recognize the */
333 /* pageout daemon's subseqent desire to pageout a cleaning */
334 /* page. When the bit is on the upl commit code will */
335 /* respect the pageout bit in the target page over the */
336 /* caller's page list indication */
337 m->dump_cleaning = FALSE;
338
339 /*
340 * Account for the paging reference taken when
341 * m->cleaning was set on this page.
342 */
343 vm_object_paging_end(shadow_object);
344 assert((m->dirty) || (m->precious) ||
345 (m->busy && m->cleaning));
346
347 /*
348 * Handle the trusted pager throttle.
349 */
350 vm_page_lock_queues();
351 if (m->laundry) {
352 vm_page_laundry_count--;
353 m->laundry = FALSE;
354 if (vm_page_laundry_count < vm_page_laundry_min) {
355 vm_page_laundry_min = 0;
356 thread_wakeup((event_t) &vm_page_laundry_count);
357 }
358 }
359
360 /*
361 * Handle the "target" page(s). These pages are to be freed if
362 * successfully cleaned. Target pages are always busy, and are
363 * wired exactly once. The initial target pages are not mapped,
364 * (so cannot be referenced or modified) but converted target
365 * pages may have been modified between the selection as an
366 * adjacent page and conversion to a target.
367 */
368 if (m->pageout) {
369 assert(m->busy);
370 assert(m->wire_count == 1);
371 m->cleaning = FALSE;
372 m->pageout = FALSE;
373 #if MACH_CLUSTER_STATS
374 if (m->wanted) vm_pageout_target_collisions++;
375 #endif
376 /*
377 * Revoke all access to the page. Since the object is
378 * locked, and the page is busy, this prevents the page
379 * from being dirtied after the pmap_is_modified() call
380 * returns.
381 */
382 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
383
384 /*
385 * Since the page is left "dirty" but "not modifed", we
386 * can detect whether the page was redirtied during
387 * pageout by checking the modify state.
388 */
389 m->dirty = pmap_is_modified(m->phys_addr);
390
391 if (m->dirty) {
392 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
393 vm_page_unwire(m);/* reactivates */
394 VM_STAT(reactivations++);
395 PAGE_WAKEUP_DONE(m);
396 } else {
397 CLUSTER_STAT(vm_pageout_target_page_freed++;)
398 vm_page_free(m);/* clears busy, etc. */
399 }
400 vm_page_unlock_queues();
401 continue;
402 }
403 /*
404 * Handle the "adjacent" pages. These pages were cleaned in
405 * place, and should be left alone.
406 * If prep_pin_count is nonzero, then someone is using the
407 * page, so make it active.
408 */
409 if (!m->active && !m->inactive && !m->private) {
410 if (m->reference)
411 vm_page_activate(m);
412 else
413 vm_page_deactivate(m);
414 }
415 if((m->busy) && (m->cleaning)) {
416
417 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
418 m->busy = FALSE;
419
420 /* We do not re-set m->dirty ! */
421 /* The page was busy so no extraneous activity */
422 /* could have occured. COPY_INTO is a read into the */
423 /* new pages. CLEAN_IN_PLACE does actually write */
424 /* out the pages but handling outside of this code */
425 /* will take care of resetting dirty. We clear the */
426 /* modify however for the Programmed I/O case. */
427 pmap_clear_modify(m->phys_addr);
428 if(m->absent) {
429 m->absent = FALSE;
430 if(shadow_object->absent_count == 1)
431 vm_object_absent_release(shadow_object);
432 else
433 shadow_object->absent_count--;
434 }
435 m->overwriting = FALSE;
436 } else if (m->overwriting) {
437 /* alternate request page list, write to page_list */
438 /* case. Occurs when the original page was wired */
439 /* at the time of the list request */
440 assert(m->wire_count != 0);
441 vm_page_unwire(m);/* reactivates */
442 m->overwriting = FALSE;
443 } else {
444 /*
445 * Set the dirty state according to whether or not the page was
446 * modified during the pageout. Note that we purposefully do
447 * NOT call pmap_clear_modify since the page is still mapped.
448 * If the page were to be dirtied between the 2 calls, this
449 * this fact would be lost. This code is only necessary to
450 * maintain statistics, since the pmap module is always
451 * consulted if m->dirty is false.
452 */
453 #if MACH_CLUSTER_STATS
454 m->dirty = pmap_is_modified(m->phys_addr);
455
456 if (m->dirty) vm_pageout_cluster_dirtied++;
457 else vm_pageout_cluster_cleaned++;
458 if (m->wanted) vm_pageout_cluster_collisions++;
459 #else
460 m->dirty = 0;
461 #endif
462 }
463 m->cleaning = FALSE;
464
465
466 /*
467 * Wakeup any thread waiting for the page to be un-cleaning.
468 */
469 PAGE_WAKEUP(m);
470 vm_page_unlock_queues();
471 }
472 /*
473 * Account for the paging reference taken in vm_paging_object_allocate.
474 */
475 vm_object_paging_end(shadow_object);
476 vm_object_unlock(shadow_object);
477
478 assert(object->ref_count == 0);
479 assert(object->paging_in_progress == 0);
480 assert(object->resident_page_count == 0);
481 return;
482 }
483
484 /*
485 * Routine: vm_pageout_setup
486 * Purpose:
487 * Set up a page for pageout (clean & flush).
488 *
489 * Move the page to a new object, as part of which it will be
490 * sent to its memory manager in a memory_object_data_write or
491 * memory_object_initialize message.
492 *
493 * The "new_object" and "new_offset" arguments
494 * indicate where the page should be moved.
495 *
496 * In/Out conditions:
497 * The page in question must not be on any pageout queues,
498 * and must be busy. The object to which it belongs
499 * must be unlocked, and the caller must hold a paging
500 * reference to it. The new_object must not be locked.
501 *
502 * This routine returns a pointer to a place-holder page,
503 * inserted at the same offset, to block out-of-order
504 * requests for the page. The place-holder page must
505 * be freed after the data_write or initialize message
506 * has been sent.
507 *
508 * The original page is put on a paging queue and marked
509 * not busy on exit.
510 */
511 vm_page_t
512 vm_pageout_setup(
513 register vm_page_t m,
514 register vm_object_t new_object,
515 vm_object_offset_t new_offset)
516 {
517 register vm_object_t old_object = m->object;
518 vm_object_offset_t paging_offset;
519 vm_object_offset_t offset;
520 register vm_page_t holding_page;
521 register vm_page_t new_m;
522 register vm_page_t new_page;
523 boolean_t need_to_wire = FALSE;
524
525
526 XPR(XPR_VM_PAGEOUT,
527 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
528 (integer_t)m->object, (integer_t)m->offset,
529 (integer_t)m, (integer_t)new_object,
530 (integer_t)new_offset);
531 assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
532 !m->restart);
533
534 assert(m->dirty || m->precious);
535
536 /*
537 * Create a place-holder page where the old one was, to prevent
538 * attempted pageins of this page while we're unlocked.
539 */
540 VM_PAGE_GRAB_FICTITIOUS(holding_page);
541
542 vm_object_lock(old_object);
543
544 offset = m->offset;
545 paging_offset = offset + old_object->paging_offset;
546
547 if (old_object->pager_trusted) {
548 /*
549 * This pager is trusted, so we can clean this page
550 * in place. Leave it in the old object, and mark it
551 * cleaning & pageout.
552 */
553 new_m = holding_page;
554 holding_page = VM_PAGE_NULL;
555
556 /*
557 * Set up new page to be private shadow of real page.
558 */
559 new_m->phys_addr = m->phys_addr;
560 new_m->fictitious = FALSE;
561 new_m->pageout = TRUE;
562
563 /*
564 * Mark real page as cleaning (indicating that we hold a
565 * paging reference to be released via m_o_d_r_c) and
566 * pageout (indicating that the page should be freed
567 * when the pageout completes).
568 */
569 pmap_clear_modify(m->phys_addr);
570 vm_page_lock_queues();
571 new_m->private = TRUE;
572 vm_page_wire(new_m);
573 m->cleaning = TRUE;
574 m->pageout = TRUE;
575
576 vm_page_wire(m);
577 assert(m->wire_count == 1);
578 vm_page_unlock_queues();
579
580 m->dirty = TRUE;
581 m->precious = FALSE;
582 m->page_lock = VM_PROT_NONE;
583 m->unusual = FALSE;
584 m->unlock_request = VM_PROT_NONE;
585 } else {
586 /*
587 * Cannot clean in place, so rip the old page out of the
588 * object, and stick the holding page in. Set new_m to the
589 * page in the new object.
590 */
591 vm_page_lock_queues();
592 VM_PAGE_QUEUES_REMOVE(m);
593 vm_page_remove(m);
594
595 vm_page_insert(holding_page, old_object, offset);
596 vm_page_unlock_queues();
597
598 m->dirty = TRUE;
599 m->precious = FALSE;
600 new_m = m;
601 new_m->page_lock = VM_PROT_NONE;
602 new_m->unlock_request = VM_PROT_NONE;
603
604 if (old_object->internal)
605 need_to_wire = TRUE;
606 }
607 /*
608 * Record that this page has been written out
609 */
610 #if MACH_PAGEMAP
611 vm_external_state_set(old_object->existence_map, offset);
612 #endif /* MACH_PAGEMAP */
613
614 vm_object_unlock(old_object);
615
616 vm_object_lock(new_object);
617
618 /*
619 * Put the page into the new object. If it is a not wired
620 * (if it's the real page) it will be activated.
621 */
622
623 vm_page_lock_queues();
624 vm_page_insert(new_m, new_object, new_offset);
625 if (need_to_wire)
626 vm_page_wire(new_m);
627 else
628 vm_page_activate(new_m);
629 PAGE_WAKEUP_DONE(new_m);
630 vm_page_unlock_queues();
631
632 vm_object_unlock(new_object);
633
634 /*
635 * Return the placeholder page to simplify cleanup.
636 */
637 return (holding_page);
638 }
639
640 /*
641 * Routine: vm_pageclean_setup
642 *
643 * Purpose: setup a page to be cleaned (made non-dirty), but not
644 * necessarily flushed from the VM page cache.
645 * This is accomplished by cleaning in place.
646 *
647 * The page must not be busy, and the object and page
648 * queues must be locked.
649 *
650 */
651 void
652 vm_pageclean_setup(
653 vm_page_t m,
654 vm_page_t new_m,
655 vm_object_t new_object,
656 vm_object_offset_t new_offset)
657 {
658 vm_object_t old_object = m->object;
659 assert(!m->busy);
660 assert(!m->cleaning);
661
662 XPR(XPR_VM_PAGEOUT,
663 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
664 (integer_t)old_object, m->offset, (integer_t)m,
665 (integer_t)new_m, new_offset);
666
667 pmap_clear_modify(m->phys_addr);
668 vm_object_paging_begin(old_object);
669
670 /*
671 * Record that this page has been written out
672 */
673 #if MACH_PAGEMAP
674 vm_external_state_set(old_object->existence_map, m->offset);
675 #endif /*MACH_PAGEMAP*/
676
677 /*
678 * Mark original page as cleaning in place.
679 */
680 m->cleaning = TRUE;
681 m->dirty = TRUE;
682 m->precious = FALSE;
683
684 /*
685 * Convert the fictitious page to a private shadow of
686 * the real page.
687 */
688 assert(new_m->fictitious);
689 new_m->fictitious = FALSE;
690 new_m->private = TRUE;
691 new_m->pageout = TRUE;
692 new_m->phys_addr = m->phys_addr;
693 vm_page_wire(new_m);
694
695 vm_page_insert(new_m, new_object, new_offset);
696 assert(!new_m->wanted);
697 new_m->busy = FALSE;
698 }
699
700 void
701 vm_pageclean_copy(
702 vm_page_t m,
703 vm_page_t new_m,
704 vm_object_t new_object,
705 vm_object_offset_t new_offset)
706 {
707 XPR(XPR_VM_PAGEOUT,
708 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
709 m, new_m, new_object, new_offset, 0);
710
711 assert((!m->busy) && (!m->cleaning));
712
713 assert(!new_m->private && !new_m->fictitious);
714
715 pmap_clear_modify(m->phys_addr);
716
717 m->busy = TRUE;
718 vm_object_paging_begin(m->object);
719 vm_page_unlock_queues();
720 vm_object_unlock(m->object);
721
722 /*
723 * Copy the original page to the new page.
724 */
725 vm_page_copy(m, new_m);
726
727 /*
728 * Mark the old page as clean. A request to pmap_is_modified
729 * will get the right answer.
730 */
731 vm_object_lock(m->object);
732 m->dirty = FALSE;
733
734 vm_object_paging_end(m->object);
735
736 vm_page_lock_queues();
737 if (!m->active && !m->inactive)
738 vm_page_activate(m);
739 PAGE_WAKEUP_DONE(m);
740
741 vm_page_insert(new_m, new_object, new_offset);
742 vm_page_activate(new_m);
743 new_m->busy = FALSE; /* No other thread can be waiting */
744 }
745
746
747 /*
748 * Routine: vm_pageout_initialize_page
749 * Purpose:
750 * Causes the specified page to be initialized in
751 * the appropriate memory object. This routine is used to push
752 * pages into a copy-object when they are modified in the
753 * permanent object.
754 *
755 * The page is moved to a temporary object and paged out.
756 *
757 * In/out conditions:
758 * The page in question must not be on any pageout queues.
759 * The object to which it belongs must be locked.
760 * The page must be busy, but not hold a paging reference.
761 *
762 * Implementation:
763 * Move this page to a completely new object.
764 */
765 void
766 vm_pageout_initialize_page(
767 vm_page_t m)
768 {
769 vm_map_copy_t copy;
770 vm_object_t new_object;
771 vm_object_t object;
772 vm_object_offset_t paging_offset;
773 vm_page_t holding_page;
774
775
776 XPR(XPR_VM_PAGEOUT,
777 "vm_pageout_initialize_page, page 0x%X\n",
778 (integer_t)m, 0, 0, 0, 0);
779 assert(m->busy);
780
781 /*
782 * Verify that we really want to clean this page
783 */
784 assert(!m->absent);
785 assert(!m->error);
786 assert(m->dirty);
787
788 /*
789 * Create a paging reference to let us play with the object.
790 */
791 object = m->object;
792 paging_offset = m->offset + object->paging_offset;
793 vm_object_paging_begin(object);
794 vm_object_unlock(object);
795 if (m->absent || m->error || m->restart ||
796 (!m->dirty && !m->precious)) {
797 VM_PAGE_FREE(m);
798 panic("reservation without pageout?"); /* alan */
799 return;
800 }
801
802 /* set the page for future call to vm_fault_list_request */
803 holding_page = NULL;
804 vm_object_lock(m->object);
805 vm_page_lock_queues();
806 pmap_clear_modify(m->phys_addr);
807 m->dirty = TRUE;
808 m->busy = TRUE;
809 m->list_req_pending = TRUE;
810 m->cleaning = TRUE;
811 m->pageout = TRUE;
812 vm_page_wire(m);
813 vm_page_unlock_queues();
814 vm_object_unlock(m->object);
815 vm_pageout_throttle(m);
816
817 /*
818 * Write the data to its pager.
819 * Note that the data is passed by naming the new object,
820 * not a virtual address; the pager interface has been
821 * manipulated to use the "internal memory" data type.
822 * [The object reference from its allocation is donated
823 * to the eventual recipient.]
824 */
825 memory_object_data_initialize(object->pager,
826 paging_offset,
827 PAGE_SIZE);
828
829 vm_object_lock(object);
830 }
831
832 #if MACH_CLUSTER_STATS
833 #define MAXCLUSTERPAGES 16
834 struct {
835 unsigned long pages_in_cluster;
836 unsigned long pages_at_higher_offsets;
837 unsigned long pages_at_lower_offsets;
838 } cluster_stats[MAXCLUSTERPAGES];
839 #endif /* MACH_CLUSTER_STATS */
840
841 boolean_t allow_clustered_pageouts = FALSE;
842
843 /*
844 * vm_pageout_cluster:
845 *
846 * Given a page, page it out, and attempt to clean adjacent pages
847 * in the same operation.
848 *
849 * The page must be busy, and the object unlocked w/ paging reference
850 * to prevent deallocation or collapse. The page must not be on any
851 * pageout queue.
852 */
853 void
854 vm_pageout_cluster(
855 vm_page_t m)
856 {
857 vm_object_t object = m->object;
858 vm_object_offset_t offset = m->offset; /* from vm_object start */
859 vm_object_offset_t paging_offset = m->offset + object->paging_offset;
860 vm_object_t new_object;
861 vm_object_offset_t new_offset;
862 vm_size_t cluster_size;
863 vm_object_offset_t cluster_offset; /* from memory_object start */
864 vm_object_offset_t cluster_lower_bound; /* from vm_object_start */
865 vm_object_offset_t cluster_upper_bound; /* from vm_object_start */
866 vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */
867 vm_object_offset_t offset_within_cluster;
868 vm_size_t length_of_data;
869 vm_page_t friend, holding_page;
870 kern_return_t rc;
871 boolean_t precious_clean = TRUE;
872 int pages_in_cluster;
873
874 CLUSTER_STAT(int pages_at_higher_offsets = 0;)
875 CLUSTER_STAT(int pages_at_lower_offsets = 0;)
876
877 XPR(XPR_VM_PAGEOUT,
878 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
879 (integer_t)object, offset, (integer_t)m, 0, 0);
880
881 CLUSTER_STAT(vm_pageout_cluster_clusters++;)
882 /*
883 * Only a certain kind of page is appreciated here.
884 */
885 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
886 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
887
888 vm_object_lock(object);
889 cluster_size = object->cluster_size;
890
891 assert(cluster_size >= PAGE_SIZE);
892 if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE;
893 assert(object->pager_created && object->pager_initialized);
894 assert(object->internal || object->pager_ready);
895
896 if (m->precious && !m->dirty)
897 precious_clean = TRUE;
898
899 if (!object->pager_trusted || !allow_clustered_pageouts)
900 cluster_size = PAGE_SIZE;
901 vm_object_unlock(object);
902
903 cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1);
904 /* bytes from beginning of cluster */
905 /*
906 * Due to unaligned mappings, we have to be careful
907 * of negative offsets into the VM object. Clip the cluster
908 * boundary to the VM object, not the memory object.
909 */
910 if (offset > cluster_offset) {
911 cluster_lower_bound = offset - cluster_offset;
912 /* from vm_object */
913 } else {
914 cluster_lower_bound = 0;
915 }
916 cluster_upper_bound = (offset - cluster_offset) +
917 (vm_object_offset_t)cluster_size;
918
919 /* set the page for future call to vm_fault_list_request */
920 holding_page = NULL;
921 vm_object_lock(m->object);
922 vm_page_lock_queues();
923 m->busy = TRUE;
924 m->list_req_pending = TRUE;
925 m->cleaning = TRUE;
926 m->pageout = TRUE;
927 vm_page_wire(m);
928 vm_page_unlock_queues();
929 vm_object_unlock(m->object);
930 vm_pageout_throttle(m);
931
932 /*
933 * Search backward for adjacent eligible pages to clean in
934 * this operation.
935 */
936
937 cluster_start = offset;
938 if (offset) { /* avoid wrap-around at zero */
939 for (cluster_start = offset - PAGE_SIZE_64;
940 cluster_start >= cluster_lower_bound;
941 cluster_start -= PAGE_SIZE_64) {
942 assert(cluster_size > PAGE_SIZE);
943
944 vm_object_lock(object);
945 vm_page_lock_queues();
946
947 if ((friend = vm_pageout_cluster_page(object, cluster_start,
948 precious_clean)) == VM_PAGE_NULL) {
949 vm_page_unlock_queues();
950 vm_object_unlock(object);
951 break;
952 }
953 new_offset = (cluster_start + object->paging_offset)
954 & (cluster_size - 1);
955
956 assert(new_offset < cluster_offset);
957 m->list_req_pending = TRUE;
958 m->cleaning = TRUE;
959 /* do nothing except advance the write request, all we really need to */
960 /* do is push the target page and let the code at the other end decide */
961 /* what is really the right size */
962 if (vm_page_free_count <= vm_page_free_reserved) {
963 m->busy = TRUE;
964 m->pageout = TRUE;
965 vm_page_wire(m);
966 }
967
968 vm_page_unlock_queues();
969 vm_object_unlock(object);
970 if(m->dirty || m->object->internal) {
971 CLUSTER_STAT(pages_at_lower_offsets++;)
972 }
973
974 }
975 cluster_start += PAGE_SIZE_64;
976 }
977 assert(cluster_start >= cluster_lower_bound);
978 assert(cluster_start <= offset);
979 /*
980 * Search forward for adjacent eligible pages to clean in
981 * this operation.
982 */
983 for (cluster_end = offset + PAGE_SIZE_64;
984 cluster_end < cluster_upper_bound;
985 cluster_end += PAGE_SIZE_64) {
986 assert(cluster_size > PAGE_SIZE);
987
988 vm_object_lock(object);
989 vm_page_lock_queues();
990
991 if ((friend = vm_pageout_cluster_page(object, cluster_end,
992 precious_clean)) == VM_PAGE_NULL) {
993 vm_page_unlock_queues();
994 vm_object_unlock(object);
995 break;
996 }
997 new_offset = (cluster_end + object->paging_offset)
998 & (cluster_size - 1);
999
1000 assert(new_offset < cluster_size);
1001 m->list_req_pending = TRUE;
1002 m->cleaning = TRUE;
1003 /* do nothing except advance the write request, all we really need to */
1004 /* do is push the target page and let the code at the other end decide */
1005 /* what is really the right size */
1006 if (vm_page_free_count <= vm_page_free_reserved) {
1007 m->busy = TRUE;
1008 m->pageout = TRUE;
1009 vm_page_wire(m);
1010 }
1011
1012 vm_page_unlock_queues();
1013 vm_object_unlock(object);
1014
1015 if(m->dirty || m->object->internal) {
1016 CLUSTER_STAT(pages_at_higher_offsets++;)
1017 }
1018 }
1019 assert(cluster_end <= cluster_upper_bound);
1020 assert(cluster_end >= offset + PAGE_SIZE);
1021
1022 /*
1023 * (offset - cluster_offset) is beginning of cluster_object
1024 * relative to vm_object start.
1025 */
1026 offset_within_cluster = cluster_start - (offset - cluster_offset);
1027 length_of_data = cluster_end - cluster_start;
1028
1029 assert(offset_within_cluster < cluster_size);
1030 assert((offset_within_cluster + length_of_data) <= cluster_size);
1031
1032 rc = KERN_SUCCESS;
1033 assert(rc == KERN_SUCCESS);
1034
1035 pages_in_cluster = length_of_data/PAGE_SIZE;
1036
1037 #if MACH_CLUSTER_STATS
1038 (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++;
1039 (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++;
1040 (cluster_stats[pages_in_cluster].pages_in_cluster)++;
1041 #endif /* MACH_CLUSTER_STATS */
1042
1043 /*
1044 * Send the data to the pager.
1045 */
1046 paging_offset = cluster_start + object->paging_offset;
1047
1048 rc = memory_object_data_return(object->pager,
1049 paging_offset,
1050 length_of_data,
1051 !precious_clean,
1052 FALSE);
1053
1054 vm_object_lock(object);
1055 vm_object_paging_end(object);
1056
1057 if (holding_page) {
1058 assert(!object->pager_trusted);
1059 VM_PAGE_FREE(holding_page);
1060 vm_object_paging_end(object);
1061 }
1062
1063 vm_object_unlock(object);
1064 }
1065
1066 /*
1067 * Trusted pager throttle.
1068 * Object must be unlocked, page queues must be unlocked.
1069 */
1070 void
1071 vm_pageout_throttle(
1072 register vm_page_t m)
1073 {
1074 vm_page_lock_queues();
1075 assert(!m->laundry);
1076 m->laundry = TRUE;
1077 while (vm_page_laundry_count >= vm_page_laundry_max) {
1078 /*
1079 * Set the threshold for when vm_page_free()
1080 * should wake us up.
1081 */
1082 vm_page_laundry_min = vm_page_laundry_max/2;
1083
1084 assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT);
1085 vm_page_unlock_queues();
1086
1087 /*
1088 * Pause to let the default pager catch up.
1089 */
1090 thread_block((void (*)(void)) 0);
1091 vm_page_lock_queues();
1092 }
1093 vm_page_laundry_count++;
1094 vm_page_unlock_queues();
1095 }
1096
1097 /*
1098 * The global variable vm_pageout_clean_active_pages controls whether
1099 * active pages are considered valid to be cleaned in place during a
1100 * clustered pageout. Performance measurements are necessary to determine
1101 * the best policy.
1102 */
1103 int vm_pageout_clean_active_pages = 1;
1104 /*
1105 * vm_pageout_cluster_page: [Internal]
1106 *
1107 * return a vm_page_t to the page at (object,offset) if it is appropriate
1108 * to clean in place. Pages that are non-existent, busy, absent, already
1109 * cleaning, or not dirty are not eligible to be cleaned as an adjacent
1110 * page in a cluster.
1111 *
1112 * The object must be locked on entry, and remains locked throughout
1113 * this call.
1114 */
1115
1116 vm_page_t
1117 vm_pageout_cluster_page(
1118 vm_object_t object,
1119 vm_object_offset_t offset,
1120 boolean_t precious_clean)
1121 {
1122 vm_page_t m;
1123
1124 XPR(XPR_VM_PAGEOUT,
1125 "vm_pageout_cluster_page, object 0x%X offset 0x%X\n",
1126 (integer_t)object, offset, 0, 0, 0);
1127
1128 if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
1129 return(VM_PAGE_NULL);
1130
1131 if (m->busy || m->absent || m->cleaning ||
1132 (m->wire_count != 0) || m->error)
1133 return(VM_PAGE_NULL);
1134
1135 if (vm_pageout_clean_active_pages) {
1136 if (!m->active && !m->inactive) return(VM_PAGE_NULL);
1137 } else {
1138 if (!m->inactive) return(VM_PAGE_NULL);
1139 }
1140
1141 assert(!m->private);
1142 assert(!m->fictitious);
1143
1144 if (!m->dirty) m->dirty = pmap_is_modified(m->phys_addr);
1145
1146 if (precious_clean) {
1147 if (!m->precious || !m->dirty)
1148 return(VM_PAGE_NULL);
1149 } else {
1150 if (!m->dirty)
1151 return(VM_PAGE_NULL);
1152 }
1153 return(m);
1154 }
1155
1156 /*
1157 * vm_pageout_scan does the dirty work for the pageout daemon.
1158 * It returns with vm_page_queue_free_lock held and
1159 * vm_page_free_wanted == 0.
1160 */
1161 extern void vm_pageout_scan_continue(void); /* forward; */
1162
1163 void
1164 vm_pageout_scan(void)
1165 {
1166 unsigned int burst_count;
1167 boolean_t now = FALSE;
1168 unsigned int laundry_pages;
1169 boolean_t need_more_inactive_pages;
1170 unsigned int loop_detect;
1171
1172 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1173
1174 /*???*/ /*
1175 * We want to gradually dribble pages from the active queue
1176 * to the inactive queue. If we let the inactive queue get
1177 * very small, and then suddenly dump many pages into it,
1178 * those pages won't get a sufficient chance to be referenced
1179 * before we start taking them from the inactive queue.
1180 *
1181 * We must limit the rate at which we send pages to the pagers.
1182 * data_write messages consume memory, for message buffers and
1183 * for map-copy objects. If we get too far ahead of the pagers,
1184 * we can potentially run out of memory.
1185 *
1186 * We can use the laundry count to limit directly the number
1187 * of pages outstanding to the default pager. A similar
1188 * strategy for external pagers doesn't work, because
1189 * external pagers don't have to deallocate the pages sent them,
1190 * and because we might have to send pages to external pagers
1191 * even if they aren't processing writes. So we also
1192 * use a burst count to limit writes to external pagers.
1193 *
1194 * When memory is very tight, we can't rely on external pagers to
1195 * clean pages. They probably aren't running, because they
1196 * aren't vm-privileged. If we kept sending dirty pages to them,
1197 * we could exhaust the free list. However, we can't just ignore
1198 * pages belonging to external objects, because there might be no
1199 * pages belonging to internal objects. Hence, we get the page
1200 * into an internal object and then immediately double-page it,
1201 * sending it to the default pager.
1202 *
1203 * consider_zone_gc should be last, because the other operations
1204 * might return memory to zones.
1205 */
1206
1207
1208 Restart:
1209
1210 #if THREAD_SWAPPER
1211 mutex_lock(&vm_page_queue_free_lock);
1212 now = (vm_page_free_count < vm_page_free_min);
1213 mutex_unlock(&vm_page_queue_free_lock);
1214
1215 swapout_threads(now);
1216 #endif /* THREAD_SWAPPER */
1217
1218 stack_collect();
1219 consider_task_collect();
1220 consider_thread_collect();
1221 consider_zone_gc();
1222 consider_machine_collect();
1223
1224 loop_detect = vm_page_active_count + vm_page_inactive_count;
1225 #if 0
1226 if (vm_page_free_count <= vm_page_free_reserved) {
1227 need_more_inactive_pages = TRUE;
1228 } else {
1229 need_more_inactive_pages = FALSE;
1230 }
1231 #else
1232 need_more_inactive_pages = FALSE;
1233 #endif
1234
1235 for (burst_count = 0;;) {
1236 register vm_page_t m;
1237 register vm_object_t object;
1238
1239 /*
1240 * Recalculate vm_page_inactivate_target.
1241 */
1242
1243 vm_page_lock_queues();
1244 vm_page_inactive_target =
1245 VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1246 vm_page_inactive_count);
1247
1248 /*
1249 * Move pages from active to inactive.
1250 */
1251
1252 while ((vm_page_inactive_count < vm_page_inactive_target ||
1253 need_more_inactive_pages) &&
1254 !queue_empty(&vm_page_queue_active)) {
1255 register vm_object_t object;
1256
1257 vm_pageout_active++;
1258 m = (vm_page_t) queue_first(&vm_page_queue_active);
1259
1260 /*
1261 * If we're getting really low on memory,
1262 * try selecting a page that will go
1263 * directly to the default_pager.
1264 * If there are no such pages, we have to
1265 * page out a page backed by an EMM,
1266 * so that the default_pager can recover
1267 * it eventually.
1268 */
1269 if (need_more_inactive_pages &&
1270 (IP_VALID(memory_manager_default))) {
1271 vm_pageout_scan_active_emm_throttle++;
1272 do {
1273 assert(m->active && !m->inactive);
1274 object = m->object;
1275
1276 if (vm_object_lock_try(object)) {
1277 #if 0
1278 if (object->pager_trusted ||
1279 object->internal) {
1280 /* found one ! */
1281 vm_pageout_scan_active_emm_throttle_success++;
1282 goto object_locked_active;
1283 }
1284 #else
1285 vm_pageout_scan_active_emm_throttle_success++;
1286 goto object_locked_active;
1287 #endif
1288 vm_object_unlock(object);
1289 }
1290 m = (vm_page_t) queue_next(&m->pageq);
1291 } while (!queue_end(&vm_page_queue_active,
1292 (queue_entry_t) m));
1293 if (queue_end(&vm_page_queue_active,
1294 (queue_entry_t) m)) {
1295 vm_pageout_scan_active_emm_throttle_failure++;
1296 m = (vm_page_t)
1297 queue_first(&vm_page_queue_active);
1298 }
1299 }
1300
1301 assert(m->active && !m->inactive);
1302
1303 object = m->object;
1304 if (!vm_object_lock_try(object)) {
1305 /*
1306 * Move page to end and continue.
1307 */
1308
1309 queue_remove(&vm_page_queue_active, m,
1310 vm_page_t, pageq);
1311 queue_enter(&vm_page_queue_active, m,
1312 vm_page_t, pageq);
1313 vm_page_unlock_queues();
1314
1315 mutex_pause();
1316 vm_page_lock_queues();
1317 continue;
1318 }
1319
1320 object_locked_active:
1321 /*
1322 * If the page is busy, then we pull it
1323 * off the active queue and leave it alone.
1324 */
1325
1326 if (m->busy) {
1327 vm_object_unlock(object);
1328 queue_remove(&vm_page_queue_active, m,
1329 vm_page_t, pageq);
1330 m->active = FALSE;
1331 if (!m->fictitious)
1332 vm_page_active_count--;
1333 continue;
1334 }
1335
1336 /*
1337 * Deactivate the page while holding the object
1338 * locked, so we know the page is still not busy.
1339 * This should prevent races between pmap_enter
1340 * and pmap_clear_reference. The page might be
1341 * absent or fictitious, but vm_page_deactivate
1342 * can handle that.
1343 */
1344
1345 vm_page_deactivate(m);
1346 vm_object_unlock(object);
1347 }
1348
1349 /*
1350 * We are done if we have met our target *and*
1351 * nobody is still waiting for a page.
1352 */
1353 if (vm_page_free_count >= vm_page_free_target) {
1354 mutex_lock(&vm_page_queue_free_lock);
1355 if ((vm_page_free_count >= vm_page_free_target) &&
1356 (vm_page_free_wanted == 0)) {
1357 vm_page_unlock_queues();
1358 break;
1359 }
1360 mutex_unlock(&vm_page_queue_free_lock);
1361 }
1362 /*
1363 * Sometimes we have to pause:
1364 * 1) No inactive pages - nothing to do.
1365 * 2) Flow control - wait for untrusted pagers to catch up.
1366 */
1367
1368 if (queue_empty(&vm_page_queue_inactive) ||
1369 ((--loop_detect) == 0) ||
1370 (burst_count >= vm_pageout_burst_max)) {
1371 unsigned int pages, msecs;
1372 int wait_result;
1373
1374 consider_machine_adjust();
1375 /*
1376 * vm_pageout_burst_wait is msecs/page.
1377 * If there is nothing for us to do, we wait
1378 * at least vm_pageout_empty_wait msecs.
1379 */
1380 pages = burst_count;
1381
1382 if (loop_detect == 0) {
1383 printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n");
1384 msecs = vm_free_page_pause;
1385 }
1386 else {
1387 msecs = burst_count * vm_pageout_burst_wait;
1388 }
1389
1390 if (queue_empty(&vm_page_queue_inactive) &&
1391 (msecs < vm_pageout_empty_wait))
1392 msecs = vm_pageout_empty_wait;
1393 vm_page_unlock_queues();
1394
1395 assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE);
1396 counter(c_vm_pageout_scan_block++);
1397
1398 /*
1399 * Unfortunately, we don't have call_continuation
1400 * so we can't rely on tail-recursion.
1401 */
1402 wait_result = thread_block((void (*)(void)) 0);
1403 if (wait_result != THREAD_TIMED_OUT)
1404 thread_cancel_timer();
1405 vm_pageout_scan_continue();
1406
1407 goto Restart;
1408 /*NOTREACHED*/
1409 }
1410
1411 vm_pageout_inactive++;
1412 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1413
1414 if ((vm_page_free_count <= vm_page_free_reserved) &&
1415 (IP_VALID(memory_manager_default))) {
1416 /*
1417 * We're really low on memory. Try to select a page that
1418 * would go directly to the default_pager.
1419 * If there are no such pages, we have to page out a
1420 * page backed by an EMM, so that the default_pager
1421 * can recover it eventually.
1422 */
1423 vm_pageout_scan_inactive_emm_throttle++;
1424 do {
1425 assert(!m->active && m->inactive);
1426 object = m->object;
1427
1428 if (vm_object_lock_try(object)) {
1429 #if 0
1430 if (object->pager_trusted ||
1431 object->internal) {
1432 /* found one ! */
1433 vm_pageout_scan_inactive_emm_throttle_success++;
1434 goto object_locked_inactive;
1435 }
1436 #else
1437 vm_pageout_scan_inactive_emm_throttle_success++;
1438 goto object_locked_inactive;
1439 #endif /* 0 */
1440 vm_object_unlock(object);
1441 }
1442 m = (vm_page_t) queue_next(&m->pageq);
1443 } while (!queue_end(&vm_page_queue_inactive,
1444 (queue_entry_t) m));
1445 if (queue_end(&vm_page_queue_inactive,
1446 (queue_entry_t) m)) {
1447 vm_pageout_scan_inactive_emm_throttle_failure++;
1448 /*
1449 * We should check the "active" queue
1450 * for good candidates to page out.
1451 */
1452 need_more_inactive_pages = TRUE;
1453
1454 m = (vm_page_t)
1455 queue_first(&vm_page_queue_inactive);
1456 }
1457 }
1458
1459 assert(!m->active && m->inactive);
1460 object = m->object;
1461
1462 /*
1463 * Try to lock object; since we've got the
1464 * page queues lock, we can only try for this one.
1465 */
1466
1467 if (!vm_object_lock_try(object)) {
1468 /*
1469 * Move page to end and continue.
1470 * Don't re-issue ticket
1471 */
1472 queue_remove(&vm_page_queue_inactive, m,
1473 vm_page_t, pageq);
1474 queue_enter(&vm_page_queue_inactive, m,
1475 vm_page_t, pageq);
1476 vm_page_unlock_queues();
1477
1478 mutex_pause();
1479 vm_pageout_inactive_nolock++;
1480 continue;
1481 }
1482
1483 object_locked_inactive:
1484 /*
1485 * Paging out pages of objects which pager is being
1486 * created by another thread must be avoided, because
1487 * this thread may claim for memory, thus leading to a
1488 * possible dead lock between it and the pageout thread
1489 * which will wait for pager creation, if such pages are
1490 * finally chosen. The remaining assumption is that there
1491 * will finally be enough available pages in the inactive
1492 * pool to page out in order to satisfy all memory claimed
1493 * by the thread which concurrently creates the pager.
1494 */
1495
1496 if (!object->pager_initialized && object->pager_created) {
1497 /*
1498 * Move page to end and continue, hoping that
1499 * there will be enough other inactive pages to
1500 * page out so that the thread which currently
1501 * initializes the pager will succeed.
1502 * Don't re-grant the ticket, the page should
1503 * pulled from the queue and paged out whenever
1504 * one of its logically adjacent fellows is
1505 * targeted.
1506 */
1507 queue_remove(&vm_page_queue_inactive, m,
1508 vm_page_t, pageq);
1509 queue_enter(&vm_page_queue_inactive, m,
1510 vm_page_t, pageq);
1511 vm_page_unlock_queues();
1512 vm_object_unlock(object);
1513 vm_pageout_inactive_avoid++;
1514 continue;
1515 }
1516
1517 /*
1518 * Remove the page from the inactive list.
1519 */
1520
1521 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1522 m->inactive = FALSE;
1523 if (!m->fictitious)
1524 vm_page_inactive_count--;
1525
1526 if (m->busy || !object->alive) {
1527 /*
1528 * Somebody is already playing with this page.
1529 * Leave it off the pageout queues.
1530 */
1531
1532 vm_page_unlock_queues();
1533 vm_object_unlock(object);
1534 vm_pageout_inactive_busy++;
1535 continue;
1536 }
1537
1538 /*
1539 * If it's absent or in error, we can reclaim the page.
1540 */
1541
1542 if (m->absent || m->error) {
1543 vm_pageout_inactive_absent++;
1544 reclaim_page:
1545 vm_page_free(m);
1546 vm_page_unlock_queues();
1547 vm_object_unlock(object);
1548 continue;
1549 }
1550
1551 assert(!m->private);
1552 assert(!m->fictitious);
1553
1554 /*
1555 * If already cleaning this page in place, convert from
1556 * "adjacent" to "target". We can leave the page mapped,
1557 * and vm_pageout_object_terminate will determine whether
1558 * to free or reactivate.
1559 */
1560
1561 if (m->cleaning) {
1562 #if MACH_CLUSTER_STATS
1563 vm_pageout_cluster_conversions++;
1564 #endif
1565 m->busy = TRUE;
1566 m->pageout = TRUE;
1567 m->dump_cleaning = TRUE;
1568 vm_page_wire(m);
1569 vm_object_unlock(object);
1570 vm_page_unlock_queues();
1571 continue;
1572 }
1573
1574 /*
1575 * If it's being used, reactivate.
1576 * (Fictitious pages are either busy or absent.)
1577 */
1578
1579 if (m->reference || pmap_is_referenced(m->phys_addr)) {
1580 vm_pageout_inactive_used++;
1581 reactivate_page:
1582 #if ADVISORY_PAGEOUT
1583 if (m->discard_request) {
1584 m->discard_request = FALSE;
1585 }
1586 #endif /* ADVISORY_PAGEOUT */
1587 vm_object_unlock(object);
1588 vm_page_activate(m);
1589 VM_STAT(reactivations++);
1590 vm_page_unlock_queues();
1591 continue;
1592 }
1593
1594 #if ADVISORY_PAGEOUT
1595 if (object->advisory_pageout) {
1596 boolean_t do_throttle;
1597 memory_object_t pager;
1598 vm_object_offset_t discard_offset;
1599
1600 if (m->discard_request) {
1601 vm_stat_discard_failure++;
1602 goto mandatory_pageout;
1603 }
1604
1605 assert(object->pager_initialized);
1606 m->discard_request = TRUE;
1607 pager = object->pager;
1608
1609 /* system-wide throttle */
1610 do_throttle = (vm_page_free_count <=
1611 vm_page_free_reserved);
1612
1613 #if 0
1614 /*
1615 * JMM - Do we need a replacement throttle
1616 * mechanism for pagers?
1617 */
1618 if (!do_throttle) {
1619 /* throttle on this pager */
1620 /* XXX lock ordering ? */
1621 ip_lock(port);
1622 do_throttle= imq_full(&port->ip_messages);
1623 ip_unlock(port);
1624 }
1625 #endif
1626
1627 if (do_throttle) {
1628 vm_stat_discard_throttle++;
1629 #if 0
1630 /* ignore this page and skip to next */
1631 vm_page_unlock_queues();
1632 vm_object_unlock(object);
1633 continue;
1634 #else
1635 /* force mandatory pageout */
1636 goto mandatory_pageout;
1637 #endif
1638 }
1639
1640 /* proceed with discard_request */
1641 vm_page_activate(m);
1642 vm_stat_discard++;
1643 VM_STAT(reactivations++);
1644 discard_offset = m->offset + object->paging_offset;
1645 vm_stat_discard_sent++;
1646 vm_page_unlock_queues();
1647 vm_object_unlock(object);
1648
1649 /*
1650 memory_object_discard_request(object->pager,
1651 discard_offset,
1652 PAGE_SIZE);
1653 */
1654 continue;
1655 }
1656 mandatory_pageout:
1657 #endif /* ADVISORY_PAGEOUT */
1658
1659 XPR(XPR_VM_PAGEOUT,
1660 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1661 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1662
1663 /*
1664 * Eliminate all mappings.
1665 */
1666
1667 m->busy = TRUE;
1668 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1669
1670 if (!m->dirty)
1671 m->dirty = pmap_is_modified(m->phys_addr);
1672 /*
1673 * If it's clean and not precious, we can free the page.
1674 */
1675
1676 if (!m->dirty && !m->precious) {
1677 vm_pageout_inactive_clean++;
1678 goto reclaim_page;
1679 }
1680 vm_page_unlock_queues();
1681
1682 /*
1683 * If there is no memory object for the page, create
1684 * one and hand it to the default pager.
1685 */
1686
1687 if (!object->pager_initialized)
1688 vm_object_collapse(object);
1689 if (!object->pager_initialized)
1690 vm_object_pager_create(object);
1691 if (!object->pager_initialized) {
1692 /*
1693 * Still no pager for the object.
1694 * Reactivate the page.
1695 *
1696 * Should only happen if there is no
1697 * default pager.
1698 */
1699 vm_page_lock_queues();
1700 vm_page_activate(m);
1701 vm_page_unlock_queues();
1702
1703 /*
1704 * And we are done with it.
1705 */
1706 PAGE_WAKEUP_DONE(m);
1707 vm_object_unlock(object);
1708
1709 /*
1710 * break here to get back to the preemption
1711 * point in the outer loop so that we don't
1712 * spin forever if there is no default pager.
1713 */
1714 vm_pageout_dirty_no_pager++;
1715 /*
1716 * Well there's no pager, but we can still reclaim
1717 * free pages out of the inactive list. Go back
1718 * to top of loop and look for suitable pages.
1719 */
1720 continue;
1721 }
1722
1723 if ((object->pager_initialized) &&
1724 (object->pager == MEMORY_OBJECT_NULL)) {
1725 /*
1726 * This pager has been destroyed by either
1727 * memory_object_destroy or vm_object_destroy, and
1728 * so there is nowhere for the page to go.
1729 * Just free the page.
1730 */
1731 VM_PAGE_FREE(m);
1732 vm_object_unlock(object);
1733 continue;
1734 }
1735
1736 vm_pageout_inactive_dirty++;
1737 /*
1738 if (!object->internal)
1739 burst_count++;
1740 */
1741 vm_object_paging_begin(object);
1742 vm_object_unlock(object);
1743 vm_pageout_cluster(m); /* flush it */
1744 }
1745 consider_machine_adjust();
1746 }
1747
1748 counter(unsigned int c_vm_pageout_scan_continue = 0;)
1749
1750 void
1751 vm_pageout_scan_continue(void)
1752 {
1753 /*
1754 * We just paused to let the pagers catch up.
1755 * If vm_page_laundry_count is still high,
1756 * then we aren't waiting long enough.
1757 * If we have paused some vm_pageout_pause_max times without
1758 * adjusting vm_pageout_burst_wait, it might be too big,
1759 * so we decrease it.
1760 */
1761
1762 vm_page_lock_queues();
1763 counter(++c_vm_pageout_scan_continue);
1764 if (vm_page_laundry_count > vm_pageout_burst_min) {
1765 vm_pageout_burst_wait++;
1766 vm_pageout_pause_count = 0;
1767 } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
1768 vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
1769 if (vm_pageout_burst_wait < 1)
1770 vm_pageout_burst_wait = 1;
1771 vm_pageout_pause_count = 0;
1772 }
1773 vm_page_unlock_queues();
1774 }
1775
1776 void vm_page_free_reserve(int pages);
1777 int vm_page_free_count_init;
1778
1779 void
1780 vm_page_free_reserve(
1781 int pages)
1782 {
1783 int free_after_reserve;
1784
1785 vm_page_free_reserved += pages;
1786
1787 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1788
1789 vm_page_free_min = vm_page_free_reserved +
1790 VM_PAGE_FREE_MIN(free_after_reserve);
1791
1792 vm_page_free_target = vm_page_free_reserved +
1793 VM_PAGE_FREE_TARGET(free_after_reserve);
1794
1795 if (vm_page_free_target < vm_page_free_min + 5)
1796 vm_page_free_target = vm_page_free_min + 5;
1797 }
1798
1799 /*
1800 * vm_pageout is the high level pageout daemon.
1801 */
1802
1803
1804 void
1805 vm_pageout(void)
1806 {
1807 thread_t self = current_thread();
1808 spl_t s;
1809
1810 /*
1811 * Set thread privileges.
1812 */
1813 self->vm_privilege = TRUE;
1814 stack_privilege(self);
1815
1816 s = splsched();
1817 thread_lock(self);
1818
1819 self->priority = BASEPRI_PREEMPT - 1;
1820 self->sched_pri = self->priority;
1821
1822 thread_unlock(self);
1823 splx(s);
1824
1825 /*
1826 * Initialize some paging parameters.
1827 */
1828
1829 if (vm_page_laundry_max == 0)
1830 vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX;
1831
1832 if (vm_pageout_burst_max == 0)
1833 vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
1834
1835 if (vm_pageout_burst_wait == 0)
1836 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
1837
1838 if (vm_pageout_empty_wait == 0)
1839 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
1840
1841 vm_page_free_count_init = vm_page_free_count;
1842 /*
1843 * even if we've already called vm_page_free_reserve
1844 * call it again here to insure that the targets are
1845 * accurately calculated (it uses vm_page_free_count_init)
1846 * calling it with an arg of 0 will not change the reserve
1847 * but will re-calculate free_min and free_target
1848 */
1849 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED)
1850 vm_page_free_reserve(VM_PAGE_FREE_RESERVED - vm_page_free_reserved);
1851 else
1852 vm_page_free_reserve(0);
1853
1854 /*
1855 * vm_pageout_scan will set vm_page_inactive_target.
1856 *
1857 * The pageout daemon is never done, so loop forever.
1858 * We should call vm_pageout_scan at least once each
1859 * time we are woken, even if vm_page_free_wanted is
1860 * zero, to check vm_page_free_target and
1861 * vm_page_inactive_target.
1862 */
1863 for (;;) {
1864 vm_pageout_scan_event_counter++;
1865 vm_pageout_scan();
1866 /* we hold vm_page_queue_free_lock now */
1867 assert(vm_page_free_wanted == 0);
1868 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1869 mutex_unlock(&vm_page_queue_free_lock);
1870 counter(c_vm_pageout_block++);
1871 thread_block((void (*)(void)) 0);
1872 }
1873 /*NOTREACHED*/
1874 }
1875
1876
1877 static upl_t
1878 upl_create(
1879 boolean_t internal,
1880 vm_size_t size)
1881 {
1882 upl_t upl;
1883
1884 if(internal) {
1885 upl = (upl_t)kalloc(sizeof(struct upl)
1886 + (sizeof(struct upl_page_info)*(size/page_size)));
1887 } else {
1888 upl = (upl_t)kalloc(sizeof(struct upl));
1889 }
1890 upl->flags = 0;
1891 upl->src_object = NULL;
1892 upl->kaddr = (vm_offset_t)0;
1893 upl->size = 0;
1894 upl->map_object = NULL;
1895 upl->ref_count = 1;
1896 upl_lock_init(upl);
1897 #ifdef UBC_DEBUG
1898 upl->ubc_alias1 = 0;
1899 upl->ubc_alias2 = 0;
1900 #endif /* UBC_DEBUG */
1901 return(upl);
1902 }
1903
1904 static void
1905 upl_destroy(
1906 upl_t upl)
1907 {
1908
1909 #ifdef UBC_DEBUG
1910 {
1911 upl_t upl_ele;
1912 vm_object_lock(upl->map_object->shadow);
1913 queue_iterate(&upl->map_object->shadow->uplq,
1914 upl_ele, upl_t, uplq) {
1915 if(upl_ele == upl) {
1916 queue_remove(&upl->map_object->shadow->uplq,
1917 upl_ele, upl_t, uplq);
1918 break;
1919 }
1920 }
1921 vm_object_unlock(upl->map_object->shadow);
1922 }
1923 #endif /* UBC_DEBUG */
1924 #ifdef notdefcdy
1925 if(!(upl->flags & UPL_DEVICE_MEMORY))
1926 #endif
1927 vm_object_deallocate(upl->map_object);
1928 if(upl->flags & UPL_INTERNAL) {
1929 kfree((vm_offset_t)upl,
1930 sizeof(struct upl) +
1931 (sizeof(struct upl_page_info) * (upl->size/page_size)));
1932 } else {
1933 kfree((vm_offset_t)upl, sizeof(struct upl));
1934 }
1935 }
1936
1937 __private_extern__ void
1938 uc_upl_dealloc(
1939 upl_t upl)
1940 {
1941 upl->ref_count -= 1;
1942 if(upl->ref_count == 0) {
1943 upl_destroy(upl);
1944 }
1945 }
1946
1947 void
1948 upl_deallocate(
1949 upl_t upl)
1950 {
1951
1952 upl->ref_count -= 1;
1953 if(upl->ref_count == 0) {
1954 upl_destroy(upl);
1955 }
1956 }
1957
1958 /*
1959 * Routine: vm_object_upl_request
1960 * Purpose:
1961 * Cause the population of a portion of a vm_object.
1962 * Depending on the nature of the request, the pages
1963 * returned may be contain valid data or be uninitialized.
1964 * A page list structure, listing the physical pages
1965 * will be returned upon request.
1966 * This function is called by the file system or any other
1967 * supplier of backing store to a pager.
1968 * IMPORTANT NOTE: The caller must still respect the relationship
1969 * between the vm_object and its backing memory object. The
1970 * caller MUST NOT substitute changes in the backing file
1971 * without first doing a memory_object_lock_request on the
1972 * target range unless it is know that the pages are not
1973 * shared with another entity at the pager level.
1974 * Copy_in_to:
1975 * if a page list structure is present
1976 * return the mapped physical pages, where a
1977 * page is not present, return a non-initialized
1978 * one. If the no_sync bit is turned on, don't
1979 * call the pager unlock to synchronize with other
1980 * possible copies of the page. Leave pages busy
1981 * in the original object, if a page list structure
1982 * was specified. When a commit of the page list
1983 * pages is done, the dirty bit will be set for each one.
1984 * Copy_out_from:
1985 * If a page list structure is present, return
1986 * all mapped pages. Where a page does not exist
1987 * map a zero filled one. Leave pages busy in
1988 * the original object. If a page list structure
1989 * is not specified, this call is a no-op.
1990 *
1991 * Note: access of default pager objects has a rather interesting
1992 * twist. The caller of this routine, presumably the file system
1993 * page cache handling code, will never actually make a request
1994 * against a default pager backed object. Only the default
1995 * pager will make requests on backing store related vm_objects
1996 * In this way the default pager can maintain the relationship
1997 * between backing store files (abstract memory objects) and
1998 * the vm_objects (cache objects), they support.
1999 *
2000 */
2001 __private_extern__ kern_return_t
2002 vm_object_upl_request(
2003 vm_object_t object,
2004 vm_object_offset_t offset,
2005 vm_size_t size,
2006 upl_t *upl_ptr,
2007 upl_page_info_array_t user_page_list,
2008 unsigned int *page_list_count,
2009 int cntrl_flags)
2010 {
2011 vm_page_t dst_page;
2012 vm_object_offset_t dst_offset = offset;
2013 vm_size_t xfer_size = size;
2014 boolean_t do_m_lock = FALSE;
2015 boolean_t dirty;
2016 upl_t upl = NULL;
2017 int entry;
2018 boolean_t encountered_lrp = FALSE;
2019
2020 vm_page_t alias_page = NULL;
2021 int page_ticket;
2022
2023
2024 page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2025 >> UPL_PAGE_TICKET_SHIFT;
2026
2027 if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2028 size = MAX_UPL_TRANSFER * page_size;
2029 }
2030
2031 if(cntrl_flags & UPL_SET_INTERNAL)
2032 if(page_list_count != NULL)
2033 *page_list_count = MAX_UPL_TRANSFER;
2034 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
2035 ((page_list_count != NULL) && (*page_list_count != 0)
2036 && *page_list_count < (size/page_size)))
2037 return KERN_INVALID_ARGUMENT;
2038
2039 if((!object->internal) && (object->paging_offset != 0))
2040 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
2041
2042 if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2043 return KERN_SUCCESS;
2044 }
2045 if(upl_ptr) {
2046 if(cntrl_flags & UPL_SET_INTERNAL) {
2047 upl = upl_create(TRUE, size);
2048 user_page_list = (upl_page_info_t *)
2049 (((vm_offset_t)upl) + sizeof(struct upl));
2050 upl->flags |= UPL_INTERNAL;
2051 } else {
2052 upl = upl_create(FALSE, size);
2053 }
2054 if(object->phys_contiguous) {
2055 upl->size = size;
2056 upl->offset = offset + object->paging_offset;
2057 *upl_ptr = upl;
2058 if(user_page_list) {
2059 user_page_list[0].phys_addr =
2060 offset + object->shadow_offset;
2061 user_page_list[0].device = TRUE;
2062 }
2063 upl->map_object = vm_object_allocate(size);
2064 vm_object_lock(upl->map_object);
2065 upl->map_object->shadow = object;
2066 upl->flags = UPL_DEVICE_MEMORY | UPL_INTERNAL;
2067 upl->map_object->pageout = TRUE;
2068 upl->map_object->can_persist = FALSE;
2069 upl->map_object->copy_strategy
2070 = MEMORY_OBJECT_COPY_NONE;
2071 upl->map_object->shadow_offset = offset;
2072 vm_object_unlock(upl->map_object);
2073 return KERN_SUCCESS;
2074 }
2075
2076
2077 upl->map_object = vm_object_allocate(size);
2078 vm_object_lock(upl->map_object);
2079 upl->map_object->shadow = object;
2080 upl->size = size;
2081 upl->offset = offset + object->paging_offset;
2082 upl->map_object->pageout = TRUE;
2083 upl->map_object->can_persist = FALSE;
2084 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2085 upl->map_object->shadow_offset = offset;
2086 vm_object_unlock(upl->map_object);
2087 *upl_ptr = upl;
2088 }
2089 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2090 vm_object_lock(object);
2091 #ifdef UBC_DEBUG
2092 if(upl_ptr)
2093 queue_enter(&object->uplq, upl, upl_t, uplq);
2094 #endif /* UBC_DEBUG */
2095 vm_object_paging_begin(object);
2096 entry = 0;
2097 if(cntrl_flags & UPL_COPYOUT_FROM) {
2098 upl->flags |= UPL_PAGE_SYNC_DONE;
2099 while (xfer_size) {
2100 if(alias_page == NULL) {
2101 vm_object_unlock(object);
2102 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2103 vm_object_lock(object);
2104 }
2105 if(((dst_page = vm_page_lookup(object,
2106 dst_offset)) == VM_PAGE_NULL) ||
2107 dst_page->fictitious ||
2108 dst_page->absent ||
2109 dst_page->error ||
2110 (dst_page->wire_count != 0 &&
2111 !dst_page->pageout) ||
2112 ((!(dst_page->dirty || dst_page->precious ||
2113 pmap_is_modified(dst_page->phys_addr)))
2114 && (cntrl_flags & UPL_RET_ONLY_DIRTY)) ||
2115 ((!(dst_page->inactive))
2116 && (dst_page->page_ticket != page_ticket)
2117 && ((dst_page->page_ticket+1) != page_ticket)
2118 && (cntrl_flags & UPL_PAGEOUT)) ||
2119 ((!dst_page->list_req_pending) &&
2120 (cntrl_flags & UPL_RET_ONLY_DIRTY) &&
2121 pmap_is_referenced(dst_page->phys_addr))) {
2122 if(user_page_list)
2123 user_page_list[entry].phys_addr = 0;
2124 } else {
2125
2126 if(dst_page->busy &&
2127 (!(dst_page->list_req_pending &&
2128 dst_page->pageout))) {
2129 if(cntrl_flags & UPL_NOBLOCK) {
2130 if(user_page_list)
2131 user_page_list[entry]
2132 .phys_addr = 0;
2133 entry++;
2134 dst_offset += PAGE_SIZE_64;
2135 xfer_size -= PAGE_SIZE;
2136 continue;
2137 }
2138 /*someone else is playing with the */
2139 /* page. We will have to wait. */
2140 PAGE_ASSERT_WAIT(
2141 dst_page, THREAD_UNINT);
2142 vm_object_unlock(object);
2143 thread_block((void(*)(void))0);
2144 vm_object_lock(object);
2145 continue;
2146 }
2147 /* Someone else already cleaning the page? */
2148 if((dst_page->cleaning || dst_page->absent ||
2149 dst_page->wire_count != 0) &&
2150 !dst_page->list_req_pending) {
2151 if(user_page_list)
2152 user_page_list[entry].phys_addr = 0;
2153 entry++;
2154 dst_offset += PAGE_SIZE_64;
2155 xfer_size -= PAGE_SIZE;
2156 continue;
2157 }
2158 /* eliminate all mappings from the */
2159 /* original object and its prodigy */
2160
2161 vm_page_lock_queues();
2162 pmap_page_protect(dst_page->phys_addr,
2163 VM_PROT_NONE);
2164
2165 /* pageout statistics gathering. count */
2166 /* all the pages we will page out that */
2167 /* were not counted in the initial */
2168 /* vm_pageout_scan work */
2169 if(dst_page->list_req_pending)
2170 encountered_lrp = TRUE;
2171 if((dst_page->dirty ||
2172 (dst_page->object->internal &&
2173 dst_page->precious)) &&
2174 (dst_page->list_req_pending
2175 == FALSE)) {
2176 if(encountered_lrp) {
2177 CLUSTER_STAT
2178 (pages_at_higher_offsets++;)
2179 } else {
2180 CLUSTER_STAT
2181 (pages_at_lower_offsets++;)
2182 }
2183 }
2184
2185 /* Turn off busy indication on pending */
2186 /* pageout. Note: we can only get here */
2187 /* in the request pending case. */
2188 dst_page->list_req_pending = FALSE;
2189 dst_page->busy = FALSE;
2190 dst_page->cleaning = FALSE;
2191
2192 dirty = pmap_is_modified(dst_page->phys_addr);
2193 dirty = dirty ? TRUE : dst_page->dirty;
2194
2195 /* use pageclean setup, it is more convenient */
2196 /* even for the pageout cases here */
2197 vm_pageclean_setup(dst_page, alias_page,
2198 upl->map_object, size - xfer_size);
2199
2200 if(!dirty) {
2201 dst_page->dirty = FALSE;
2202 dst_page->precious = TRUE;
2203 }
2204
2205 if(dst_page->pageout)
2206 dst_page->busy = TRUE;
2207
2208 alias_page->absent = FALSE;
2209 alias_page = NULL;
2210 if((!(cntrl_flags & UPL_CLEAN_IN_PLACE))
2211 || (cntrl_flags & UPL_PAGEOUT)) {
2212 /* deny access to the target page */
2213 /* while it is being worked on */
2214 if((!dst_page->pageout) &&
2215 (dst_page->wire_count == 0)) {
2216 dst_page->busy = TRUE;
2217 dst_page->pageout = TRUE;
2218 vm_page_wire(dst_page);
2219 }
2220 }
2221 if(user_page_list) {
2222 user_page_list[entry].phys_addr
2223 = dst_page->phys_addr;
2224 user_page_list[entry].dirty =
2225 dst_page->dirty;
2226 user_page_list[entry].pageout =
2227 dst_page->pageout;
2228 user_page_list[entry].absent =
2229 dst_page->absent;
2230 user_page_list[entry].precious =
2231 dst_page->precious;
2232 }
2233
2234 vm_page_unlock_queues();
2235 }
2236 entry++;
2237 dst_offset += PAGE_SIZE_64;
2238 xfer_size -= PAGE_SIZE;
2239 }
2240 } else {
2241 while (xfer_size) {
2242 if(alias_page == NULL) {
2243 vm_object_unlock(object);
2244 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2245 vm_object_lock(object);
2246 }
2247 dst_page = vm_page_lookup(object, dst_offset);
2248 if(dst_page != VM_PAGE_NULL) {
2249 if((dst_page->cleaning) &&
2250 !(dst_page->list_req_pending)) {
2251 /*someone else is writing to the */
2252 /* page. We will have to wait. */
2253 PAGE_ASSERT_WAIT(dst_page,THREAD_UNINT);
2254 vm_object_unlock(object);
2255 thread_block((void(*)(void))0);
2256 vm_object_lock(object);
2257 continue;
2258 }
2259 if ((dst_page->fictitious &&
2260 dst_page->list_req_pending)) {
2261 /* dump the fictitious page */
2262 dst_page->list_req_pending = FALSE;
2263 dst_page->clustered = FALSE;
2264 vm_page_lock_queues();
2265 vm_page_free(dst_page);
2266 vm_page_unlock_queues();
2267 } else if ((dst_page->absent &&
2268 dst_page->list_req_pending)) {
2269 /* the default_pager case */
2270 dst_page->list_req_pending = FALSE;
2271 dst_page->busy = FALSE;
2272 dst_page->clustered = FALSE;
2273 }
2274 }
2275 if((dst_page = vm_page_lookup(object, dst_offset)) ==
2276 VM_PAGE_NULL) {
2277 if(object->private) {
2278 /*
2279 * This is a nasty wrinkle for users
2280 * of upl who encounter device or
2281 * private memory however, it is
2282 * unavoidable, only a fault can
2283 * reslove the actual backing
2284 * physical page by asking the
2285 * backing device.
2286 */
2287 if(user_page_list)
2288 user_page_list[entry]
2289 .phys_addr = 0;
2290 entry++;
2291 dst_offset += PAGE_SIZE_64;
2292 xfer_size -= PAGE_SIZE;
2293 continue;
2294 }
2295 /* need to allocate a page */
2296 dst_page = vm_page_alloc(object, dst_offset);
2297 if (dst_page == VM_PAGE_NULL) {
2298 vm_object_unlock(object);
2299 VM_PAGE_WAIT();
2300 vm_object_lock(object);
2301 continue;
2302 }
2303 dst_page->busy = FALSE;
2304 #if 0
2305 if(cntrl_flags & UPL_NO_SYNC) {
2306 dst_page->page_lock = 0;
2307 dst_page->unlock_request = 0;
2308 }
2309 #endif
2310 dst_page->absent = TRUE;
2311 object->absent_count++;
2312 }
2313 #if 1
2314 if(cntrl_flags & UPL_NO_SYNC) {
2315 dst_page->page_lock = 0;
2316 dst_page->unlock_request = 0;
2317 }
2318 #endif /* 1 */
2319 dst_page->overwriting = TRUE;
2320 if(dst_page->fictitious) {
2321 panic("need corner case for fictitious page");
2322 }
2323 if(dst_page->page_lock) {
2324 do_m_lock = TRUE;
2325 }
2326 if(upl_ptr) {
2327
2328 /* eliminate all mappings from the */
2329 /* original object and its prodigy */
2330
2331 if(dst_page->busy) {
2332 /*someone else is playing with the */
2333 /* page. We will have to wait. */
2334 PAGE_ASSERT_WAIT(
2335 dst_page, THREAD_UNINT);
2336 vm_object_unlock(object);
2337 thread_block((void(*)(void))0);
2338 vm_object_lock(object);
2339 continue;
2340 }
2341
2342 vm_page_lock_queues();
2343 pmap_page_protect(dst_page->phys_addr,
2344 VM_PROT_NONE);
2345 dirty = pmap_is_modified(dst_page->phys_addr);
2346 dirty = dirty ? TRUE : dst_page->dirty;
2347
2348 vm_pageclean_setup(dst_page, alias_page,
2349 upl->map_object, size - xfer_size);
2350
2351 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
2352 /* clean in place for read implies */
2353 /* that a write will be done on all */
2354 /* the pages that are dirty before */
2355 /* a upl commit is done. The caller */
2356 /* is obligated to preserve the */
2357 /* contents of all pages marked */
2358 /* dirty. */
2359 upl->flags |= UPL_CLEAR_DIRTY;
2360 }
2361
2362 if(!dirty) {
2363 dst_page->dirty = FALSE;
2364 dst_page->precious = TRUE;
2365 }
2366
2367 if (dst_page->wire_count == 0) {
2368 /* deny access to the target page while */
2369 /* it is being worked on */
2370 dst_page->busy = TRUE;
2371 } else {
2372 vm_page_wire(dst_page);
2373 }
2374 /* expect the page to be used */
2375 dst_page->reference = TRUE;
2376 dst_page->precious =
2377 (cntrl_flags & UPL_PRECIOUS)
2378 ? TRUE : FALSE;
2379 alias_page->absent = FALSE;
2380 alias_page = NULL;
2381 if(user_page_list) {
2382 user_page_list[entry].phys_addr
2383 = dst_page->phys_addr;
2384 user_page_list[entry].dirty =
2385 dst_page->dirty;
2386 user_page_list[entry].pageout =
2387 dst_page->pageout;
2388 user_page_list[entry].absent =
2389 dst_page->absent;
2390 user_page_list[entry].precious =
2391 dst_page->precious;
2392 }
2393 vm_page_unlock_queues();
2394 }
2395 entry++;
2396 dst_offset += PAGE_SIZE_64;
2397 xfer_size -= PAGE_SIZE;
2398 }
2399 }
2400
2401 if (upl->flags & UPL_INTERNAL) {
2402 if(page_list_count != NULL)
2403 *page_list_count = 0;
2404 } else if (*page_list_count > entry) {
2405 if(page_list_count != NULL)
2406 *page_list_count = entry;
2407 }
2408
2409 if(alias_page != NULL) {
2410 vm_page_lock_queues();
2411 vm_page_free(alias_page);
2412 vm_page_unlock_queues();
2413 }
2414
2415 if(do_m_lock) {
2416 vm_prot_t access_required;
2417 /* call back all associated pages from other users of the pager */
2418 /* all future updates will be on data which is based on the */
2419 /* changes we are going to make here. Note: it is assumed that */
2420 /* we already hold copies of the data so we will not be seeing */
2421 /* an avalanche of incoming data from the pager */
2422 access_required = (cntrl_flags & UPL_COPYOUT_FROM)
2423 ? VM_PROT_READ : VM_PROT_WRITE;
2424 while (TRUE) {
2425 kern_return_t rc;
2426 thread_t thread;
2427
2428 if(!object->pager_ready) {
2429 thread = current_thread();
2430 vm_object_assert_wait(object,
2431 VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
2432 vm_object_unlock(object);
2433 thread_block((void (*)(void))0);
2434 if (thread->wait_result != THREAD_AWAKENED) {
2435 return(KERN_FAILURE);
2436 }
2437 vm_object_lock(object);
2438 continue;
2439 }
2440
2441 vm_object_unlock(object);
2442
2443 if (rc = memory_object_data_unlock(
2444 object->pager,
2445 dst_offset + object->paging_offset,
2446 size,
2447 access_required)) {
2448 if (rc == MACH_SEND_INTERRUPTED)
2449 continue;
2450 else
2451 return KERN_FAILURE;
2452 }
2453 break;
2454
2455 }
2456 /* lets wait on the last page requested */
2457 /* NOTE: we will have to update lock completed routine to signal */
2458 if(dst_page != VM_PAGE_NULL &&
2459 (access_required & dst_page->page_lock) != access_required) {
2460 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
2461 thread_block((void (*)(void))0);
2462 vm_object_lock(object);
2463 }
2464 }
2465 vm_object_unlock(object);
2466 return KERN_SUCCESS;
2467 }
2468
2469 /* JMM - Backward compatability for now */
2470 kern_return_t
2471 vm_fault_list_request(
2472 memory_object_control_t control,
2473 vm_object_offset_t offset,
2474 vm_size_t size,
2475 upl_t *upl_ptr,
2476 upl_page_info_t **user_page_list_ptr,
2477 int page_list_count,
2478 int cntrl_flags)
2479 {
2480 int local_list_count;
2481 upl_page_info_t *user_page_list;
2482 kern_return_t kr;
2483
2484 if (user_page_list_ptr != NULL) {
2485 local_list_count = page_list_count;
2486 user_page_list = *user_page_list_ptr;
2487 } else {
2488 local_list_count = 0;
2489 user_page_list = NULL;
2490 }
2491 kr = memory_object_upl_request(control,
2492 offset,
2493 size,
2494 upl_ptr,
2495 user_page_list,
2496 &local_list_count,
2497 cntrl_flags);
2498
2499 if(kr != KERN_SUCCESS)
2500 return kr;
2501
2502 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
2503 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
2504 }
2505
2506 return KERN_SUCCESS;
2507 }
2508
2509
2510
2511 /*
2512 * Routine: vm_object_super_upl_request
2513 * Purpose:
2514 * Cause the population of a portion of a vm_object
2515 * in much the same way as memory_object_upl_request.
2516 * Depending on the nature of the request, the pages
2517 * returned may be contain valid data or be uninitialized.
2518 * However, the region may be expanded up to the super
2519 * cluster size provided.
2520 */
2521
2522 __private_extern__ kern_return_t
2523 vm_object_super_upl_request(
2524 vm_object_t object,
2525 vm_object_offset_t offset,
2526 vm_size_t size,
2527 vm_size_t super_cluster,
2528 upl_t *upl,
2529 upl_page_info_t *user_page_list,
2530 unsigned int *page_list_count,
2531 int cntrl_flags)
2532 {
2533 vm_page_t target_page;
2534 int ticket;
2535
2536 if(object->paging_offset > offset)
2537 return KERN_FAILURE;
2538
2539 offset = offset - object->paging_offset;
2540 if(cntrl_flags & UPL_PAGEOUT) {
2541 if((target_page = vm_page_lookup(object, offset))
2542 != VM_PAGE_NULL) {
2543 ticket = target_page->page_ticket;
2544 cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
2545 cntrl_flags = cntrl_flags |
2546 ((ticket << UPL_PAGE_TICKET_SHIFT)
2547 & UPL_PAGE_TICKET_MASK);
2548 }
2549 }
2550
2551
2552 /* turns off super cluster exercised by the default_pager */
2553 /*
2554 super_cluster = size;
2555 */
2556 if ((super_cluster > size) &&
2557 (vm_page_free_count > vm_page_free_reserved)) {
2558
2559 vm_object_offset_t base_offset;
2560 vm_size_t super_size;
2561
2562 base_offset = (offset &
2563 ~((vm_object_offset_t) super_cluster - 1));
2564 super_size = (offset+size) > (base_offset + super_cluster) ?
2565 super_cluster<<1 : super_cluster;
2566 super_size = ((base_offset + super_size) > object->size) ?
2567 (object->size - base_offset) : super_size;
2568 if(offset > (base_offset + super_size))
2569 panic("vm_object_super_upl_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset);
2570 /* apparently there is a case where the vm requests a */
2571 /* page to be written out who's offset is beyond the */
2572 /* object size */
2573 if((offset + size) > (base_offset + super_size))
2574 super_size = (offset + size) - base_offset;
2575
2576 offset = base_offset;
2577 size = super_size;
2578 }
2579 vm_object_upl_request(object, offset, size,
2580 upl, user_page_list, page_list_count,
2581 cntrl_flags);
2582 }
2583
2584
2585 kern_return_t
2586 vm_upl_map(
2587 vm_map_t map,
2588 upl_t upl,
2589 vm_offset_t *dst_addr)
2590 {
2591 vm_size_t size;
2592 vm_object_offset_t offset;
2593 vm_offset_t addr;
2594 vm_page_t m;
2595 kern_return_t kr;
2596
2597 if (upl == UPL_NULL)
2598 return KERN_INVALID_ARGUMENT;
2599
2600 upl_lock(upl);
2601
2602 /* check to see if already mapped */
2603 if(UPL_PAGE_LIST_MAPPED & upl->flags) {
2604 upl_unlock(upl);
2605 return KERN_FAILURE;
2606 }
2607
2608 offset = 0; /* Always map the entire object */
2609 size = upl->size;
2610
2611 vm_object_lock(upl->map_object);
2612 upl->map_object->ref_count++;
2613 vm_object_res_reference(upl->map_object);
2614 vm_object_unlock(upl->map_object);
2615
2616 *dst_addr = 0;
2617
2618
2619 /* NEED A UPL_MAP ALIAS */
2620 kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE,
2621 upl->map_object, offset, FALSE,
2622 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
2623
2624 if (kr != KERN_SUCCESS) {
2625 upl_unlock(upl);
2626 return(kr);
2627 }
2628
2629 for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
2630 m = vm_page_lookup(upl->map_object, offset);
2631 if(m) {
2632 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, TRUE);
2633 }
2634 offset+=PAGE_SIZE_64;
2635 }
2636 upl->ref_count++; /* hold a reference for the mapping */
2637 upl->flags |= UPL_PAGE_LIST_MAPPED;
2638 upl->kaddr = *dst_addr;
2639 upl_unlock(upl);
2640 return KERN_SUCCESS;
2641 }
2642
2643
2644 kern_return_t
2645 vm_upl_unmap(
2646 vm_map_t map,
2647 upl_t upl)
2648 {
2649 vm_address_t addr;
2650 vm_size_t size;
2651
2652 if (upl == UPL_NULL)
2653 return KERN_INVALID_ARGUMENT;
2654
2655 upl_lock(upl);
2656 if(upl->flags & UPL_PAGE_LIST_MAPPED) {
2657 addr = upl->kaddr;
2658 size = upl->size;
2659 assert(upl->ref_count > 1);
2660 upl->ref_count--; /* removing mapping ref */
2661 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
2662 upl->kaddr = (vm_offset_t) 0;
2663 upl_unlock(upl);
2664
2665 vm_deallocate(map, addr, size);
2666 return KERN_SUCCESS;
2667 }
2668 upl_unlock(upl);
2669 return KERN_FAILURE;
2670 }
2671
2672 kern_return_t
2673 upl_commit_range(
2674 upl_t upl,
2675 vm_offset_t offset,
2676 vm_size_t size,
2677 int flags,
2678 upl_page_info_t *page_list,
2679 mach_msg_type_number_t count,
2680 boolean_t *empty)
2681 {
2682 vm_size_t xfer_size = size;
2683 vm_object_t shadow_object = upl->map_object->shadow;
2684 vm_object_t object = upl->map_object;
2685 vm_object_offset_t target_offset;
2686 vm_object_offset_t page_offset;
2687 int entry;
2688
2689 *empty = FALSE;
2690
2691 if (upl == UPL_NULL)
2692 return KERN_INVALID_ARGUMENT;
2693
2694 if (count == 0)
2695 page_list = NULL;
2696
2697 upl_lock(upl);
2698 if(upl->flags & UPL_DEVICE_MEMORY) {
2699 xfer_size = 0;
2700 } else if ((offset + size) > upl->size) {
2701 upl_unlock(upl);
2702 return KERN_FAILURE;
2703 }
2704
2705 vm_object_lock(shadow_object);
2706
2707 entry = offset/PAGE_SIZE;
2708 target_offset = (vm_object_offset_t)offset;
2709 while(xfer_size) {
2710 vm_page_t t,m;
2711 upl_page_info_t *p;
2712
2713 if((t = vm_page_lookup(object, target_offset)) != NULL) {
2714
2715 t->pageout = FALSE;
2716 page_offset = t->offset;
2717 VM_PAGE_FREE(t);
2718 t = VM_PAGE_NULL;
2719 m = vm_page_lookup(shadow_object,
2720 page_offset + object->shadow_offset);
2721 if(m != VM_PAGE_NULL) {
2722 vm_object_paging_end(shadow_object);
2723 vm_page_lock_queues();
2724 if ((upl->flags & UPL_CLEAR_DIRTY) ||
2725 (flags & UPL_COMMIT_CLEAR_DIRTY)) {
2726 pmap_clear_modify(m->phys_addr);
2727 m->dirty = FALSE;
2728 }
2729 if(page_list) {
2730 p = &(page_list[entry]);
2731 if(p->phys_addr && p->pageout && !m->pageout) {
2732 m->busy = TRUE;
2733 m->pageout = TRUE;
2734 vm_page_wire(m);
2735 } else if (page_list[entry].phys_addr &&
2736 !p->pageout && m->pageout &&
2737 !m->dump_cleaning) {
2738 m->pageout = FALSE;
2739 m->absent = FALSE;
2740 m->overwriting = FALSE;
2741 vm_page_unwire(m);
2742 PAGE_WAKEUP_DONE(m);
2743 }
2744 page_list[entry].phys_addr = 0;
2745 }
2746 m->dump_cleaning = FALSE;
2747 if(m->laundry) {
2748 vm_page_laundry_count--;
2749 m->laundry = FALSE;
2750 if (vm_page_laundry_count < vm_page_laundry_min) {
2751 vm_page_laundry_min = 0;
2752 thread_wakeup((event_t)
2753 &vm_page_laundry_count);
2754 }
2755 }
2756 if(m->pageout) {
2757 m->cleaning = FALSE;
2758 m->pageout = FALSE;
2759 #if MACH_CLUSTER_STATS
2760 if (m->wanted) vm_pageout_target_collisions++;
2761 #endif
2762 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
2763 m->dirty = pmap_is_modified(m->phys_addr);
2764 if(m->dirty) {
2765 CLUSTER_STAT(
2766 vm_pageout_target_page_dirtied++;)
2767 vm_page_unwire(m);/* reactivates */
2768 VM_STAT(reactivations++);
2769 PAGE_WAKEUP_DONE(m);
2770 } else {
2771 CLUSTER_STAT(
2772 vm_pageout_target_page_freed++;)
2773 vm_page_free(m);/* clears busy, etc. */
2774 VM_STAT(pageouts++);
2775 }
2776 vm_page_unlock_queues();
2777 target_offset += PAGE_SIZE_64;
2778 xfer_size -= PAGE_SIZE;
2779 entry++;
2780 continue;
2781 }
2782 if (flags & UPL_COMMIT_INACTIVATE) {
2783 vm_page_deactivate(m);
2784 m->reference = FALSE;
2785 pmap_clear_reference(m->phys_addr);
2786 } else if (!m->active && !m->inactive) {
2787 if (m->reference)
2788 vm_page_activate(m);
2789 else
2790 vm_page_deactivate(m);
2791 }
2792 #if MACH_CLUSTER_STATS
2793 m->dirty = pmap_is_modified(m->phys_addr);
2794
2795 if (m->dirty) vm_pageout_cluster_dirtied++;
2796 else vm_pageout_cluster_cleaned++;
2797 if (m->wanted) vm_pageout_cluster_collisions++;
2798 #else
2799 m->dirty = 0;
2800 #endif
2801
2802 if((m->busy) && (m->cleaning)) {
2803 /* the request_page_list case */
2804 if(m->absent) {
2805 m->absent = FALSE;
2806 if(shadow_object->absent_count == 1)
2807 vm_object_absent_release(shadow_object);
2808 else
2809 shadow_object->absent_count--;
2810 }
2811 m->overwriting = FALSE;
2812 m->busy = FALSE;
2813 m->dirty = FALSE;
2814 }
2815 else if (m->overwriting) {
2816 /* alternate request page list, write to
2817 /* page_list case. Occurs when the original
2818 /* page was wired at the time of the list
2819 /* request */
2820 assert(m->wire_count != 0);
2821 vm_page_unwire(m);/* reactivates */
2822 m->overwriting = FALSE;
2823 }
2824 m->cleaning = FALSE;
2825 /* It is a part of the semantic of COPYOUT_FROM */
2826 /* UPLs that a commit implies cache sync */
2827 /* between the vm page and the backing store */
2828 /* this can be used to strip the precious bit */
2829 /* as well as clean */
2830 if (upl->flags & UPL_PAGE_SYNC_DONE)
2831 m->precious = FALSE;
2832
2833 if (flags & UPL_COMMIT_SET_DIRTY) {
2834 m->dirty = TRUE;
2835 }
2836 /*
2837 * Wakeup any thread waiting for the page to be un-cleaning.
2838 */
2839 PAGE_WAKEUP(m);
2840 vm_page_unlock_queues();
2841
2842 }
2843 }
2844 target_offset += PAGE_SIZE_64;
2845 xfer_size -= PAGE_SIZE;
2846 entry++;
2847 }
2848
2849 vm_object_unlock(shadow_object);
2850 if(flags & UPL_COMMIT_NOTIFY_EMPTY) {
2851 if((upl->flags & UPL_DEVICE_MEMORY)
2852 || (queue_empty(&upl->map_object->memq)))
2853 *empty = TRUE;
2854 }
2855 upl_unlock(upl);
2856
2857 return KERN_SUCCESS;
2858 }
2859
2860 kern_return_t
2861 upl_abort_range(
2862 upl_t upl,
2863 vm_offset_t offset,
2864 vm_size_t size,
2865 int error,
2866 boolean_t *empty)
2867 {
2868 vm_size_t xfer_size = size;
2869 vm_object_t shadow_object = upl->map_object->shadow;
2870 vm_object_t object = upl->map_object;
2871 vm_object_offset_t target_offset;
2872 vm_object_offset_t page_offset;
2873 int entry;
2874
2875 *empty = FALSE;
2876
2877 if (upl == UPL_NULL)
2878 return KERN_INVALID_ARGUMENT;
2879
2880 upl_lock(upl);
2881 if(upl->flags & UPL_DEVICE_MEMORY) {
2882 xfer_size = 0;
2883 } else if ((offset + size) > upl->size) {
2884 upl_unlock(upl);
2885 return KERN_FAILURE;
2886 }
2887
2888 vm_object_lock(shadow_object);
2889
2890 entry = offset/PAGE_SIZE;
2891 target_offset = (vm_object_offset_t)offset;
2892 while(xfer_size) {
2893 vm_page_t t,m;
2894 upl_page_info_t *p;
2895
2896 if((t = vm_page_lookup(object, target_offset)) != NULL) {
2897
2898 t->pageout = FALSE;
2899 page_offset = t->offset;
2900 VM_PAGE_FREE(t);
2901 t = VM_PAGE_NULL;
2902 m = vm_page_lookup(shadow_object,
2903 page_offset + object->shadow_offset);
2904 if(m != VM_PAGE_NULL) {
2905 vm_object_paging_end(m->object);
2906 vm_page_lock_queues();
2907 if(m->absent) {
2908 /* COPYOUT = FALSE case */
2909 /* check for error conditions which must */
2910 /* be passed back to the pages customer */
2911 if(error & UPL_ABORT_RESTART) {
2912 m->restart = TRUE;
2913 m->absent = FALSE;
2914 vm_object_absent_release(m->object);
2915 m->page_error = KERN_MEMORY_ERROR;
2916 m->error = TRUE;
2917 } else if(error & UPL_ABORT_UNAVAILABLE) {
2918 m->restart = FALSE;
2919 m->unusual = TRUE;
2920 m->clustered = FALSE;
2921 } else if(error & UPL_ABORT_ERROR) {
2922 m->restart = FALSE;
2923 m->absent = FALSE;
2924 vm_object_absent_release(m->object);
2925 m->page_error = KERN_MEMORY_ERROR;
2926 m->error = TRUE;
2927 } else if(error & UPL_ABORT_DUMP_PAGES) {
2928 m->clustered = TRUE;
2929 } else {
2930 m->clustered = TRUE;
2931 }
2932
2933
2934 m->cleaning = FALSE;
2935 m->overwriting = FALSE;
2936 PAGE_WAKEUP_DONE(m);
2937 if(m->clustered) {
2938 vm_page_free(m);
2939 } else {
2940 vm_page_activate(m);
2941 }
2942
2943 vm_page_unlock_queues();
2944 target_offset += PAGE_SIZE_64;
2945 xfer_size -= PAGE_SIZE;
2946 entry++;
2947 continue;
2948 }
2949 /*
2950 * Handle the trusted pager throttle.
2951 */
2952 if (m->laundry) {
2953 vm_page_laundry_count--;
2954 m->laundry = FALSE;
2955 if (vm_page_laundry_count
2956 < vm_page_laundry_min) {
2957 vm_page_laundry_min = 0;
2958 thread_wakeup((event_t)
2959 &vm_page_laundry_count);
2960 }
2961 }
2962 if(m->pageout) {
2963 assert(m->busy);
2964 assert(m->wire_count == 1);
2965 m->pageout = FALSE;
2966 vm_page_unwire(m);
2967 }
2968 m->dump_cleaning = FALSE;
2969 m->cleaning = FALSE;
2970 m->busy = FALSE;
2971 m->overwriting = FALSE;
2972 #if MACH_PAGEMAP
2973 vm_external_state_clr(
2974 m->object->existence_map, m->offset);
2975 #endif /* MACH_PAGEMAP */
2976 if(error & UPL_ABORT_DUMP_PAGES) {
2977 vm_page_free(m);
2978 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
2979 } else {
2980 PAGE_WAKEUP(m);
2981 }
2982 vm_page_unlock_queues();
2983 }
2984 }
2985 target_offset += PAGE_SIZE_64;
2986 xfer_size -= PAGE_SIZE;
2987 entry++;
2988 }
2989 vm_object_unlock(shadow_object);
2990 if(error & UPL_ABORT_NOTIFY_EMPTY) {
2991 if((upl->flags & UPL_DEVICE_MEMORY)
2992 || (queue_empty(&upl->map_object->memq)))
2993 *empty = TRUE;
2994 }
2995 upl_unlock(upl);
2996 return KERN_SUCCESS;
2997 }
2998
2999 kern_return_t
3000 upl_abort(
3001 upl_t upl,
3002 int error)
3003 {
3004 vm_object_t object = NULL;
3005 vm_object_t shadow_object = NULL;
3006 vm_object_offset_t offset;
3007 vm_object_offset_t shadow_offset;
3008 vm_object_offset_t target_offset;
3009 int i;
3010 vm_page_t t,m;
3011
3012 if (upl == UPL_NULL)
3013 return KERN_INVALID_ARGUMENT;
3014
3015 upl_lock(upl);
3016 if(upl->flags & UPL_DEVICE_MEMORY) {
3017 upl_unlock(upl);
3018 return KERN_SUCCESS;
3019 }
3020
3021 object = upl->map_object;
3022
3023 if (object == NULL) {
3024 panic("upl_abort: upl object is not backed by an object");
3025 upl_unlock(upl);
3026 return KERN_INVALID_ARGUMENT;
3027 }
3028
3029 shadow_object = upl->map_object->shadow;
3030 shadow_offset = upl->map_object->shadow_offset;
3031 offset = 0;
3032 vm_object_lock(shadow_object);
3033 for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
3034 if((t = vm_page_lookup(object,offset)) != NULL) {
3035 target_offset = t->offset + shadow_offset;
3036 if((m = vm_page_lookup(shadow_object, target_offset)) != NULL) {
3037 vm_object_paging_end(m->object);
3038 vm_page_lock_queues();
3039 if(m->absent) {
3040 /* COPYOUT = FALSE case */
3041 /* check for error conditions which must */
3042 /* be passed back to the pages customer */
3043 if(error & UPL_ABORT_RESTART) {
3044 m->restart = TRUE;
3045 m->absent = FALSE;
3046 vm_object_absent_release(m->object);
3047 m->page_error = KERN_MEMORY_ERROR;
3048 m->error = TRUE;
3049 } else if(error & UPL_ABORT_UNAVAILABLE) {
3050 m->restart = FALSE;
3051 m->unusual = TRUE;
3052 m->clustered = FALSE;
3053 } else if(error & UPL_ABORT_ERROR) {
3054 m->restart = FALSE;
3055 m->absent = FALSE;
3056 vm_object_absent_release(m->object);
3057 m->page_error = KERN_MEMORY_ERROR;
3058 m->error = TRUE;
3059 } else if(error & UPL_ABORT_DUMP_PAGES) {
3060 m->clustered = TRUE;
3061 } else {
3062 m->clustered = TRUE;
3063 }
3064
3065 m->cleaning = FALSE;
3066 m->overwriting = FALSE;
3067 PAGE_WAKEUP_DONE(m);
3068 if(m->clustered) {
3069 vm_page_free(m);
3070 } else {
3071 vm_page_activate(m);
3072 }
3073 vm_page_unlock_queues();
3074 continue;
3075 }
3076 /*
3077 * Handle the trusted pager throttle.
3078 */
3079 if (m->laundry) {
3080 vm_page_laundry_count--;
3081 m->laundry = FALSE;
3082 if (vm_page_laundry_count
3083 < vm_page_laundry_min) {
3084 vm_page_laundry_min = 0;
3085 thread_wakeup((event_t)
3086 &vm_page_laundry_count);
3087 }
3088 }
3089 if(m->pageout) {
3090 assert(m->busy);
3091 assert(m->wire_count == 1);
3092 m->pageout = FALSE;
3093 vm_page_unwire(m);
3094 }
3095 m->dump_cleaning = FALSE;
3096 m->cleaning = FALSE;
3097 m->busy = FALSE;
3098 m->overwriting = FALSE;
3099 #if MACH_PAGEMAP
3100 vm_external_state_clr(
3101 m->object->existence_map, m->offset);
3102 #endif /* MACH_PAGEMAP */
3103 if(error & UPL_ABORT_DUMP_PAGES) {
3104 vm_page_free(m);
3105 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
3106 } else {
3107 PAGE_WAKEUP(m);
3108 }
3109 vm_page_unlock_queues();
3110 }
3111 }
3112 }
3113 vm_object_unlock(shadow_object);
3114 /* Remove all the pages from the map object so */
3115 /* vm_pageout_object_terminate will work properly. */
3116 while (!queue_empty(&upl->map_object->memq)) {
3117 vm_page_t p;
3118
3119 p = (vm_page_t) queue_first(&upl->map_object->memq);
3120
3121 assert(p->private);
3122 assert(p->pageout);
3123 p->pageout = FALSE;
3124 assert(!p->cleaning);
3125
3126 VM_PAGE_FREE(p);
3127 }
3128 upl_unlock(upl);
3129 return KERN_SUCCESS;
3130 }
3131
3132 /* an option on commit should be wire */
3133 kern_return_t
3134 upl_commit(
3135 upl_t upl,
3136 upl_page_info_t *page_list,
3137 mach_msg_type_number_t count)
3138 {
3139 if (upl == UPL_NULL)
3140 return KERN_INVALID_ARGUMENT;
3141
3142 if (count == 0)
3143 page_list = NULL;
3144
3145 upl_lock(upl);
3146 if (upl->flags & UPL_DEVICE_MEMORY)
3147 page_list = NULL;
3148 if ((upl->flags & UPL_CLEAR_DIRTY) ||
3149 (upl->flags & UPL_PAGE_SYNC_DONE)) {
3150 vm_object_t shadow_object = upl->map_object->shadow;
3151 vm_object_t object = upl->map_object;
3152 vm_object_offset_t target_offset;
3153 vm_size_t xfer_end;
3154
3155 vm_page_t t,m;
3156
3157 vm_object_lock(shadow_object);
3158
3159 target_offset = object->shadow_offset;
3160 xfer_end = upl->size + object->shadow_offset;
3161
3162 while(target_offset < xfer_end) {
3163 if ((t = vm_page_lookup(object,
3164 target_offset - object->shadow_offset))
3165 != NULL) {
3166 m = vm_page_lookup(
3167 shadow_object, target_offset);
3168 if(m != VM_PAGE_NULL) {
3169 if (upl->flags & UPL_CLEAR_DIRTY) {
3170 pmap_clear_modify(m->phys_addr);
3171 m->dirty = FALSE;
3172 }
3173 /* It is a part of the semantic of */
3174 /* COPYOUT_FROM UPLs that a commit */
3175 /* implies cache sync between the */
3176 /* vm page and the backing store */
3177 /* this can be used to strip the */
3178 /* precious bit as well as clean */
3179 if (upl->flags & UPL_PAGE_SYNC_DONE)
3180 m->precious = FALSE;
3181 }
3182 }
3183 target_offset += PAGE_SIZE_64;
3184 }
3185 vm_object_unlock(shadow_object);
3186 }
3187 if (page_list) {
3188 vm_object_t shadow_object = upl->map_object->shadow;
3189 vm_object_t object = upl->map_object;
3190 vm_object_offset_t target_offset;
3191 vm_size_t xfer_end;
3192 int entry;
3193
3194 vm_page_t t, m;
3195 upl_page_info_t *p;
3196
3197 vm_object_lock(shadow_object);
3198
3199 entry = 0;
3200 target_offset = object->shadow_offset;
3201 xfer_end = upl->size + object->shadow_offset;
3202
3203 while(target_offset < xfer_end) {
3204
3205 if ((t = vm_page_lookup(object,
3206 target_offset - object->shadow_offset))
3207 == NULL) {
3208 target_offset += PAGE_SIZE_64;
3209 entry++;
3210 continue;
3211 }
3212
3213 m = vm_page_lookup(shadow_object, target_offset);
3214 if(m != VM_PAGE_NULL) {
3215 p = &(page_list[entry]);
3216 if(page_list[entry].phys_addr &&
3217 p->pageout && !m->pageout) {
3218 vm_page_lock_queues();
3219 m->busy = TRUE;
3220 m->pageout = TRUE;
3221 vm_page_wire(m);
3222 vm_page_unlock_queues();
3223 } else if (page_list[entry].phys_addr &&
3224 !p->pageout && m->pageout &&
3225 !m->dump_cleaning) {
3226 vm_page_lock_queues();
3227 m->pageout = FALSE;
3228 m->absent = FALSE;
3229 m->overwriting = FALSE;
3230 vm_page_unwire(m);
3231 PAGE_WAKEUP_DONE(m);
3232 vm_page_unlock_queues();
3233 }
3234 page_list[entry].phys_addr = 0;
3235 }
3236 target_offset += PAGE_SIZE_64;
3237 entry++;
3238 }
3239
3240 vm_object_unlock(shadow_object);
3241 }
3242 upl_unlock(upl);
3243 return KERN_SUCCESS;
3244 }
3245
3246 vm_size_t
3247 upl_get_internal_pagelist_offset()
3248 {
3249 return sizeof(struct upl);
3250 }
3251
3252 void
3253 upl_set_dirty(
3254 upl_t upl)
3255 {
3256 upl->flags |= UPL_CLEAR_DIRTY;
3257 }
3258
3259 void
3260 upl_clear_dirty(
3261 upl_t upl)
3262 {
3263 upl->flags &= ~UPL_CLEAR_DIRTY;
3264 }
3265
3266
3267 #ifdef MACH_BSD
3268
3269 boolean_t upl_page_present(upl_page_info_t *upl, int index)
3270 {
3271 return(UPL_PAGE_PRESENT(upl, index));
3272 }
3273 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
3274 {
3275 return(UPL_DIRTY_PAGE(upl, index));
3276 }
3277 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
3278 {
3279 return(UPL_VALID_PAGE(upl, index));
3280 }
3281 vm_offset_t upl_phys_page(upl_page_info_t *upl, int index)
3282 {
3283 return((vm_offset_t)UPL_PHYS_PAGE(upl, index));
3284 }
3285
3286 void
3287 vm_countdirtypages(void)
3288 {
3289 vm_page_t m;
3290 int dpages;
3291 int pgopages;
3292 int precpages;
3293
3294
3295 dpages=0;
3296 pgopages=0;
3297 precpages=0;
3298
3299 vm_page_lock_queues();
3300 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
3301 do {
3302 if (m ==(vm_page_t )0) break;
3303
3304 if(m->dirty) dpages++;
3305 if(m->pageout) pgopages++;
3306 if(m->precious) precpages++;
3307
3308 m = (vm_page_t) queue_next(&m->pageq);
3309 if (m ==(vm_page_t )0) break;
3310
3311 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
3312 vm_page_unlock_queues();
3313
3314 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
3315
3316 dpages=0;
3317 pgopages=0;
3318 precpages=0;
3319
3320 vm_page_lock_queues();
3321 m = (vm_page_t) queue_first(&vm_page_queue_active);
3322
3323 do {
3324 if(m == (vm_page_t )0) break;
3325 if(m->dirty) dpages++;
3326 if(m->pageout) pgopages++;
3327 if(m->precious) precpages++;
3328
3329 m = (vm_page_t) queue_next(&m->pageq);
3330 if(m == (vm_page_t )0) break;
3331
3332 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
3333 vm_page_unlock_queues();
3334
3335 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
3336
3337 }
3338 #endif /* MACH_BSD */
3339
3340 #ifdef UBC_DEBUG
3341 kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
3342 {
3343 upl->ubc_alias1 = alias1;
3344 upl->ubc_alias2 = alias2;
3345 return KERN_SUCCESS;
3346 }
3347 int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
3348 {
3349 if(al)
3350 *al = upl->ubc_alias1;
3351 if(al2)
3352 *al2 = upl->ubc_alias2;
3353 return KERN_SUCCESS;
3354 }
3355 #endif /* UBC_DEBUG */
3356
3357
3358
3359 #if MACH_KDB
3360 #include <ddb/db_output.h>
3361 #include <ddb/db_print.h>
3362 #include <vm/vm_print.h>
3363
3364 #define printf kdbprintf
3365 extern int db_indent;
3366 void db_pageout(void);
3367
3368 void
3369 db_vm(void)
3370 {
3371 extern int vm_page_gobble_count;
3372
3373 iprintf("VM Statistics:\n");
3374 db_indent += 2;
3375 iprintf("pages:\n");
3376 db_indent += 2;
3377 iprintf("activ %5d inact %5d free %5d",
3378 vm_page_active_count, vm_page_inactive_count,
3379 vm_page_free_count);
3380 printf(" wire %5d gobbl %5d\n",
3381 vm_page_wire_count, vm_page_gobble_count);
3382 iprintf("laund %5d\n",
3383 vm_page_laundry_count);
3384 db_indent -= 2;
3385 iprintf("target:\n");
3386 db_indent += 2;
3387 iprintf("min %5d inact %5d free %5d",
3388 vm_page_free_min, vm_page_inactive_target,
3389 vm_page_free_target);
3390 printf(" resrv %5d\n", vm_page_free_reserved);
3391 db_indent -= 2;
3392
3393 iprintf("burst:\n");
3394 db_indent += 2;
3395 iprintf("max %5d min %5d wait %5d empty %5d\n",
3396 vm_pageout_burst_max, vm_pageout_burst_min,
3397 vm_pageout_burst_wait, vm_pageout_empty_wait);
3398 db_indent -= 2;
3399 iprintf("pause:\n");
3400 db_indent += 2;
3401 iprintf("count %5d max %5d\n",
3402 vm_pageout_pause_count, vm_pageout_pause_max);
3403 #if MACH_COUNTERS
3404 iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue);
3405 #endif /* MACH_COUNTERS */
3406 db_indent -= 2;
3407 db_pageout();
3408 db_indent -= 2;
3409 }
3410
3411 void
3412 db_pageout(void)
3413 {
3414 #if MACH_COUNTERS
3415 extern int c_laundry_pages_freed;
3416 #endif /* MACH_COUNTERS */
3417
3418 iprintf("Pageout Statistics:\n");
3419 db_indent += 2;
3420 iprintf("active %5d inactv %5d\n",
3421 vm_pageout_active, vm_pageout_inactive);
3422 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
3423 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
3424 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
3425 iprintf("used %5d clean %5d dirty %5d\n",
3426 vm_pageout_inactive_used, vm_pageout_inactive_clean,
3427 vm_pageout_inactive_dirty);
3428 #if MACH_COUNTERS
3429 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
3430 #endif /* MACH_COUNTERS */
3431 #if MACH_CLUSTER_STATS
3432 iprintf("Cluster Statistics:\n");
3433 db_indent += 2;
3434 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
3435 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
3436 vm_pageout_cluster_collisions);
3437 iprintf("clusters %5d conversions %5d\n",
3438 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
3439 db_indent -= 2;
3440 iprintf("Target Statistics:\n");
3441 db_indent += 2;
3442 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
3443 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
3444 vm_pageout_target_page_freed);
3445 db_indent -= 2;
3446 #endif /* MACH_CLUSTER_STATS */
3447 db_indent -= 2;
3448 }
3449
3450 #if MACH_CLUSTER_STATS
3451 unsigned long vm_pageout_cluster_dirtied = 0;
3452 unsigned long vm_pageout_cluster_cleaned = 0;
3453 unsigned long vm_pageout_cluster_collisions = 0;
3454 unsigned long vm_pageout_cluster_clusters = 0;
3455 unsigned long vm_pageout_cluster_conversions = 0;
3456 unsigned long vm_pageout_target_collisions = 0;
3457 unsigned long vm_pageout_target_page_dirtied = 0;
3458 unsigned long vm_pageout_target_page_freed = 0;
3459 #define CLUSTER_STAT(clause) clause
3460 #else /* MACH_CLUSTER_STATS */
3461 #define CLUSTER_STAT(clause)
3462 #endif /* MACH_CLUSTER_STATS */
3463
3464 #endif /* MACH_KDB */